1 #ifndef DJDWJBHNDAYTNOXLFOBDSGAQAAYPWMXJGEBYIRKEAKAQUUWVGDUGGDKSDXUKSPCYYNTWTDNII
2 #define DJDWJBHNDAYTNOXLFOBDSGAQAAYPWMXJGEBYIRKEAKAQUUWVGDUGGDKSDXUKSPCYYNTWTDNII
6 #include "./utils/range.hpp"
7 #include "./utils/better_assert.hpp"
8 #include "./utils/for_each.hpp"
9 #include "./utils/context_cast.hpp"
25 template <Expression Ex>
26 auto constexpr
softmax( Ex
const& ex ) noexcept
30 better_assert( !input.empty(),
"softmax forward: input tensor is empty!" );
32 std::size_t
const last_dim = *(x.shape().rbegin());
33 std::size_t
const rest_dim = x.size() / last_dim;
34 for (
auto idx : range( rest_dim ) )
36 auto [begin, end] = std::make_tuple( x.begin()+idx*last_dim, x.begin()+(idx+1)*last_dim );
37 typename Tsor::value_type
const mx = *std::max_element( begin, end );
38 for_each( begin, end, [mx](
auto & v ){ v =
std::exp( v-mx ); } );
39 typename Tsor::value_type
const sum = std::accumulate( begin, end,
typename Tsor::value_type{0} );
40 for_each( begin, end, [
sum](
auto & v ){ v /=
sum; } );
44 []<
Tensor Tsor>( Tsor
const&, Tsor
const& output, Tsor
const& grad ) noexcept
46 better_assert( !
has_nan( grad ),
"backprop: upcoming gradient for activation softmax contains NaN" );
48 for_each( ans.begin(), ans.end(), output.begin(), [](
auto& a,
auto o ) { a *= o * ( typename Tsor::value_type{1} - o ); } );
66 template <Expression Ex>
67 auto inline selu( Ex
const& ex ) noexcept
69 std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
70 std::shared_ptr<std::any> backward_cache = std::make_shared<std::any>();
74 typedef typename Tsor::value_type value_type;
75 value_type
const lambda = 1.0507;
76 value_type
const alpha = 1.67326;
77 Tsor& ans = context_cast<Tsor>( forward_cache );
78 ans.resize( input.shape() );
79 std::copy( input.begin(), input.end(), ans.begin() );
82 ans.map( [lambda, alpha](
auto& x){ x = (x >= value_type{0}) ? (lambda * x) : (lambda * alpha * (
std::exp(x) - value_type{1})); } );
85 [backward_cache]<
Tensor Tsor>( Tsor
const& input, Tsor
const&, Tsor
const& grad ) noexcept
87 typedef typename Tsor::value_type value_type;
88 value_type
const lambda = 1.0507;
89 value_type
const alpha = 1.67326;
90 Tsor& ans = context_cast<Tsor>( backward_cache );
91 ans.resize( input.shape() );
94 for_each( ans.begin(), ans.end(), input.begin(), grad.begin(), [lambda, alpha](
auto& a,
auto i,
auto g ){ a = (i >= value_type{0}) ? (g * lambda) : (g * lambda * alpha *
std::exp(i)); } );
112 template <Expression Ex>
115 std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
116 std::shared_ptr<std::any> backward_cache = std::make_shared<std::any>();
120 Tsor& ans = context_cast<Tsor>( forward_cache );
121 ans.resize( input.shape() );
122 std::copy( input.begin(), input.end(), ans.begin() );
126 [backward_cache]<
Tensor Tsor>( Tsor
const& input, Tsor
const&, Tsor
const& grad ) noexcept
128 Tsor& ans = context_cast<Tsor>( backward_cache );
129 ans.resize( input.shape() );
130 for_each( ans.begin(), ans.end(), input.begin(), grad.begin(), [](
auto& a,
auto i,
auto g ){ a = g / ( typename Tsor::value_type{1} -
std::exp(-i) ); } );
149 template <Expression Ex>
152 std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
153 std::shared_ptr<std::any> backward_cache = std::make_shared<std::any>();
157 Tsor& ans = context_cast<Tsor>( forward_cache );
158 ans.resize( input.shape() );
159 std::copy( input.begin(), input.end(), ans.begin() );
160 ans.map( [](
auto& x){ x /=
typename Tsor::value_type{1} +
std::abs(x); } );
163 [backward_cache]<
Tensor Tsor>( Tsor
const& input, Tsor
const&, Tsor
const& grad ) noexcept
165 Tsor& ans = context_cast<Tsor>( backward_cache );
166 ans.resize( input.shape() );
167 for_each( ans.begin(), ans.end(), input.begin(), grad.begin(), [](
auto& a,
auto i,
auto g ){ auto tmp = typename Tsor::value_type{1} +
std::abs(i); a = g / (tmp*tmp); } );
185 template <Expression Ex>
188 std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
189 std::shared_ptr<std::any> backward_cache = std::make_shared<std::any>();
192 Tsor& ans = context_cast<Tsor>( forward_cache );
193 ans.resize( input.shape() );
194 std::copy( input.begin(), input.end(), ans.begin() );
196 ans.map( [](
auto& x){ x = 1.0 / (1.0+
std::exp(-x)); } );
199 [backward_cache]<
Tensor Tsor>( Tsor
const&, Tsor
const& output, Tsor
const& grad ) noexcept
201 Tsor& ans = context_cast<Tsor>( backward_cache );
202 ans.resize( output.shape() );
204 for_each( ans.begin(), ans.end(), output.begin(), grad.begin(), [](
auto & a,
auto o,
auto g ){ a = g * o * ( typename Tsor::value_type{1} - o ); } );
216 auto make_forward() const noexcept
218 return []( std::shared_ptr<std::any> forward_cache ) noexcept
220 return [forward_cache]<
Tensor Tsor>( Tsor
const& input ) noexcept
222 typedef typename Tsor::value_type value_type;
223 Tsor& ans = context_cast<Tsor>( forward_cache );
224 ans.resize( input.shape() );
226 for_each( ans.begin(), ans.end(), input.begin(), [](
auto& o,
auto x){ o = std::max(x, value_type{0}); } );
233 auto make_backward() const noexcept
235 return []<
Tensor Tsor>( Tsor
const& input, Tsor
const&, Tsor
const& grad ) noexcept
237 typedef typename Tsor::value_type value_type;
239 for_each( ans.begin(), ans.end(), input.begin(), [](
auto& v,
auto x ){ if ( x <= value_type{0} ) v = value_type{0}; } );
258 template <Expression Ex>
259 auto relu( Ex
const& ex ) noexcept
261 std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
262 return make_unary_operator( relu_context{}.make_forward()( forward_cache ), relu_context{}.make_backward(),
"Relu")( ex );
270 auto make_forward() const noexcept
272 return []( std::shared_ptr<std::any> forward_cache ) noexcept
274 return [forward_cache]<
Tensor Tsor>( Tsor
const& input ) noexcept
276 typedef typename Tsor::value_type value_type;
277 Tsor& ans = context_cast<Tsor>( forward_cache );
278 ans.resize( input.shape() );
279 for_each( ans.begin(), ans.end(), input.begin(), [](
auto& o,
auto x){ o = std::min( value_type{6},
std::max(x, value_type{0}) ); } );
285 auto make_backward() const noexcept
287 return []<
Tensor Tsor>( Tsor
const& input, Tsor
const&, Tsor
const& grad ) noexcept
289 typedef typename Tsor::value_type value_type;
292 for_each( ans.begin(), ans.end(), input.begin(), [](
auto& v,
auto x ){ if ( (x <= value_type{0}) || (x >= value_type{6}) ) v = value_type{0}; } );
311 template <Expression Ex>
314 std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
315 return make_unary_operator( relu6_context{}.make_forward()( forward_cache ), relu6_context{}.make_backward(),
"Relu6")( ex );
330 template<
typename T > requires std::floating_point<T>
333 better_assert( factor > T{0},
"Expecting leak_relu with a factor greater than 0, but got factor = ", factor );
334 better_assert( factor < T{1},
"Expecting leak_relu with a factor less than 1, but got factor = ", factor );
335 return [factor]<
Expression Ex>( Ex
const& ex ) noexcept
337 std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
340 Tsor& ans = context_cast<Tsor>( forward_cache );
341 ans.resize( input.shape() );
342 for_each( ans.begin(), ans.end(), input.begin(), [factor](
auto& v_out,
auto v_in ){ v_out = std::max( T{v_in}, T{factor*v_in} ); } );
345 [factor]<
Tensor Tsor>( Tsor
const& input, Tsor
const&, Tsor
const& grad ) noexcept
347 typedef typename Tsor::value_type value_type;
349 for_each( ans.begin(), ans.end(), input.begin(), [factor]( value_type& v_back, value_type
const v_in ){ v_back = (v_in > value_type{0}) ? v_back : factor*v_back; } );
357 template <Expression Ex>
375 template<
typename T=
float > requires std::floating_point<T>
376 auto elu( T
const alpha=1.0 ) noexcept
378 return [alpha]<
Expression Ex>( Ex
const& ex ) noexcept
380 std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
383 typedef typename Tsor::value_type value_type;
384 Tsor& ans = context_cast<Tsor>( forward_cache );
385 ans.resize( input.shape() );
386 for_each( ans.begin(), ans.end(), input.begin(), [alpha](
auto& v_out,
auto v_in ){ v_out = (v_in > value_type{0}) ? v_in : (alpha * (
std::exp(v_in) - value_type{1})); } );
389 [alpha]<
Tensor Tsor>( Tsor
const& input, Tsor
const&, Tsor
const& grad ) noexcept
391 typedef typename Tsor::value_type value_type;
393 for_each( ans.begin(), ans.end(), input.begin(), [alpha]( value_type& v_back, value_type
const v_in ){ v_back = (v_in >= value_type{0}) ? v_back : alpha*
std::exp(v_back); } );
412 template <Expression Ex>
415 std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
419 Tsor& ans = context_cast<Tsor>( forward_cache );
420 ans.resize( input.shape() );
421 std::copy( input.begin(), input.end(), ans.begin() );
422 ans.map( [](
auto& x){ x =
std::exp(x); } );
423 better_assert( !
has_nan( ans ),
"exponential operator forward output contains nan." );
424 better_assert( !
has_inf( ans ),
"exponential operator forward output contains inf." );
427 []<
Tensor Tsor>( Tsor
const&, Tsor
const& output, Tsor
const& grad ) noexcept
430 for_each( ans.begin(), ans.end(), output.begin(), [](
auto& a,
auto o ){ a *= o; } );
448 template <Expression Ex>
451 std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
455 typedef typename Tsor::value_type value_type;
456 Tsor& ans = context_cast<Tsor>( forward_cache );
457 ans.resize( input.shape() );
458 std::copy( input.begin(), input.end(), ans.begin() );
459 ans.map([](
auto& x) { x = ( x > value_type{1} ) ? value_type{1} : ( x < value_type{-1} ) ? value_type{0} : (x+value_type{1})/value_type{2}; });
462 []<
Tensor Tsor>( Tsor
const& input, Tsor
const&, Tsor
const& grad ) noexcept
464 typedef typename Tsor::value_type value_type;
466 for_each( ans.begin(), ans.end(), input.begin(), [](
auto& a,
auto x ) { a = ((x > value_type{1}) || (x < value_type{-1})) ? value_type{0} : (a / value_type{2}); } );
489 template <Expression Ex>
490 auto inline gelu( Ex
const& ex ) noexcept
492 auto _gelu = []<
typename T>( T x )
494 auto const ans = 0.5 * x * ( 1.0 +
std::tanh( 0.79788456080286535588 * x ( 1.0 + 0.044715*x*x ) ) );
495 return static_cast<T
>( ans );
497 auto sech_2 = [](
auto x )
499 return 1.0 - std::pow(
std::tanh(x), 2 );
501 auto _dgelu = [sech_2]<
typename T>( T x )
503 auto const sq_2_pi_x = 0.79788456080286535588 * x;
504 auto const _xx = x * x;
505 auto const ans = 0.5 * ( 1.0 +
std::tanh( sq_2_pi_x * ( 1.0 + 0.044715 * _xx ) ) ) + sq_2_pi_x * sech_2( sq_2_pi_x * (1.0 + 0.044715 * _xx ) * ( 1.0 + 0.134145 * _xx) );
506 return static_cast<T
>( ans );
509 std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
514 Tsor& ans = context_cast<Tsor>( forward_cache );
515 ans.resize( input.shape() );
516 std::copy( input.begin(), input.end(), ans.begin() );
517 ans.map([_gelu](
auto& x) { x = _gelu(x); });
520 [_dgelu]<
Tensor Tsor>( Tsor
const& input, Tsor
const&, Tsor
const& grad ) noexcept
524 for_each( ans.begin(), ans.end(), [&_dgelu](
auto& x ) { x = _dgelu(x); } );
539 template< Expression Ex >
548 template< Expression Ex >
549 auto silu( Ex
const& ex ) noexcept
564 template< Expression Ex >
578 template< Expression Ex >
581 return ex -
tanh( ex );
593 template< Expression Ex >
594 auto mish( Ex
const& ex ) noexcept
608 template< Expression Ex >
Definition: activation.hpp:12
auto negative_relu(Ex const &ex) noexcept
Definition: activation.hpp:358
auto relu(Ex const &ex) noexcept
Relu function, an unary operator. Returns x if positive, 0 otherwise.
Definition: activation.hpp:259
auto relu6(Ex const &ex) noexcept
Rectified Linear 6 function, an unary operator. Returns min(max(features, 0), 6).
Definition: activation.hpp:312
auto sigmoid(Ex const &ex) noexcept
Sigmoid function, an unary operator. Returns 1 / (exp(-x) + 1).
Definition: activation.hpp:186
auto hard_sigmoid(Ex const &ex) noexcept
Hard Sigmoid function, an unary operator. Piecewise linear approximation of the sigmoid function.
Definition: activation.hpp:449
bool has_nan(Tsor const &tsor)
Definition: tensor.hpp:1095
auto exponential(Ex const &ex) noexcept
Exponential function, an unary operator. Returns exp(x).
Definition: activation.hpp:413
concept Tensor
Definition: tensor.hpp:362
auto gelu(Ex const &ex) noexcept
Gaussian Error function, an unary operator. GAUSSIAN ERROR LINEAR UNITS (GELUS) https://arxiv....
Definition: activation.hpp:490
auto crelu(Ex const &ex) noexcept
Concatenated Rectified Linear Units, an activation function which preserves both positive and negativ...
Definition: activation.hpp:565
auto softplus(Ex const &ex) noexcept
Softplus function, an unary operator. Returns log(exp(x)+1).
Definition: activation.hpp:113
auto swish(Ex const &ex) noexcept
Swish activation function.
Definition: activation.hpp:540
Tsor deep_copy(Tsor const &tsor)
Definition: tensor.hpp:902
requires std::floating_point< T > auto leaky_relu(T const factor=0.2) noexcept
Leaky Rectified Linear function, an unary operator. Returns x if positive, alpha x otherwise....
Definition: activation.hpp:331
concept Expression
A type that represents a unary operator, a binary operator, a variable, a place_holder,...
Definition: operation.hpp:169
auto mish(Ex const &ex) noexcept
Mish function.
Definition: activation.hpp:594
auto lisht(Ex const &ex) noexcept
Lisht function.
Definition: activation.hpp:609
constexpr auto negative(Ex const &ex) noexcept
Definition: operation.hpp:389
constexpr auto softmax(Ex const &ex) noexcept
Softmax activation function, an unary operator.
Definition: activation.hpp:26
auto sum(Tsor const &tsor)
Definition: tensor.hpp:1044
static constexpr auto make_unary_operator
Definition: operation.hpp:49
auto max(Tsor const &tsor)
Definition: tensor.hpp:1008
auto silu(Ex const &ex) noexcept
An alias name of activation swish.
Definition: activation.hpp:549
auto selu(Ex const &ex) noexcept
Scaled Exponential Linear Unit (SELU) activation function, an unary operator. If x>0,...
Definition: activation.hpp:67
Tsor copy(Tsor const &tsor)
Definition: tensor.hpp:908
constexpr auto hadamard_product(Lhs_Expression const &lhs_ex, Rhs_Expression const &rhs_ex) noexcept
Definition: operation.hpp:444
bool has_inf(Tsor const &tsor)
Definition: tensor.hpp:1101
requires std::floating_point< T > auto elu(T const alpha=1.0) noexcept
Exponential Linear function, an unary operator. Returns x if positive, alpha* (exp(x)-1) otherwise....
Definition: activation.hpp:376
auto tank_shrink(Ex const &ex) noexcept
Tank shrink function.
Definition: activation.hpp:579
auto softsign(Ex const &ex) noexcept
Softsign function, an unary operator. Returns x / (abs(x) + 1).
Definition: activation.hpp:150
constexpr auto exp(Ex const &ex) noexcept
Computes Exp of the given expression.
Definition: operation.hpp:2926
constexpr auto log(Ex const &ex) noexcept
Computes Log of the given expression.
Definition: operation.hpp:3231
constexpr auto abs(Ex const &ex) noexcept
Computes Abs of the given expression.
Definition: operation.hpp:2447
constexpr auto concatenate(Lhs_Expression const &lhs_ex, Rhs_Expression const &rhs_ex) noexcept
Definition: operation.hpp:1517
constexpr auto tanh(Ex const &ex) noexcept
Computes Tanh of the given expression.
Definition: operation.hpp:3731