ceras
yet another deep learning engine
activation.hpp
Go to the documentation of this file.
1 #ifndef DJDWJBHNDAYTNOXLFOBDSGAQAAYPWMXJGEBYIRKEAKAQUUWVGDUGGDKSDXUKSPCYYNTWTDNII
2 #define DJDWJBHNDAYTNOXLFOBDSGAQAAYPWMXJGEBYIRKEAKAQUUWVGDUGGDKSDXUKSPCYYNTWTDNII
3 
4 #include "./operation.hpp"
5 #include "./tensor.hpp"
6 #include "./utils/range.hpp"
7 #include "./utils/better_assert.hpp"
8 #include "./utils/for_each.hpp"
9 #include "./utils/context_cast.hpp"
10 
11 namespace ceras
12 {
13 
25  template <Expression Ex>
26  auto constexpr softmax( Ex const& ex ) noexcept
27  {
28  return make_unary_operator( []<Tensor Tsor>( Tsor const& input ) noexcept
29  {
30  better_assert( !input.empty(), "softmax forward: input tensor is empty!" );
31  Tsor x = deep_copy( input );
32  std::size_t const last_dim = *(x.shape().rbegin());
33  std::size_t const rest_dim = x.size() / last_dim;
34  for ( auto idx : range( rest_dim ) )
35  {
36  auto [begin, end] = std::make_tuple( x.begin()+idx*last_dim, x.begin()+(idx+1)*last_dim );
37  typename Tsor::value_type const mx = *std::max_element( begin, end );
38  for_each( begin, end, [mx]( auto & v ){ v = std::exp( v-mx ); } );
39  typename Tsor::value_type const sum = std::accumulate( begin, end, typename Tsor::value_type{0} );
40  for_each( begin, end, [sum]( auto & v ){ v /= sum; } );
41  }
42  return x;
43  },
44  []<Tensor Tsor>( Tsor const&, Tsor const& output, Tsor const& grad ) noexcept
45  {
46  better_assert( !has_nan( grad ), "backprop: upcoming gradient for activation softmax contains NaN" );
47  Tsor ans = grad;
48  for_each( ans.begin(), ans.end(), output.begin(), []( auto& a, auto o ) { a *= o * ( typename Tsor::value_type{1} - o ); } );
49  return ans;
50  },
51  "Softmax"
52  )( ex );
53  }
54 
66  template <Expression Ex>
67  auto inline selu( Ex const& ex ) noexcept
68  {
69  std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
70  std::shared_ptr<std::any> backward_cache = std::make_shared<std::any>();
71 
72  return make_unary_operator( [forward_cache]<Tensor Tsor>( Tsor const& input ) noexcept
73  {
74  typedef typename Tsor::value_type value_type;
75  value_type const lambda = 1.0507;
76  value_type const alpha = 1.67326;
77  Tsor& ans = context_cast<Tsor>( forward_cache );
78  ans.resize( input.shape() );
79  std::copy( input.begin(), input.end(), ans.begin() );
80  // if x >= 0: \lambda x
81  // if x < 0: \lambda \alpha (exp(x) - 1)
82  ans.map( [lambda, alpha](auto& x){ x = (x >= value_type{0}) ? (lambda * x) : (lambda * alpha * (std::exp(x) - value_type{1})); } );
83  return ans;
84  },
85  [backward_cache]<Tensor Tsor>( Tsor const& input, Tsor const&, Tsor const& grad ) noexcept
86  {
87  typedef typename Tsor::value_type value_type;
88  value_type const lambda = 1.0507;
89  value_type const alpha = 1.67326;
90  Tsor& ans = context_cast<Tsor>( backward_cache );
91  ans.resize( input.shape() ); // 1 / ( 1 + exp(-x) )
92  // if x >= 0: \lambda
93  // if x < 0: \lambda \alpha exp( x )
94  for_each( ans.begin(), ans.end(), input.begin(), grad.begin(), [lambda, alpha]( auto& a, auto i, auto g ){ a = (i >= value_type{0}) ? (g * lambda) : (g * lambda * alpha * std::exp(i)); } );
95  return ans;
96  },
97  "SeLU"
98  )( ex );
99  }
100 
112  template <Expression Ex>
113  auto inline softplus( Ex const& ex ) noexcept
114  {
115  std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
116  std::shared_ptr<std::any> backward_cache = std::make_shared<std::any>();
117 
118  return make_unary_operator( [forward_cache]<Tensor Tsor>( Tsor const& input ) noexcept
119  {
120  Tsor& ans = context_cast<Tsor>( forward_cache );
121  ans.resize( input.shape() );
122  std::copy( input.begin(), input.end(), ans.begin() );
123  ans.map( [](auto& x){ x = std::log(1.0+std::exp(x)); } ); // ln( 1+e^x )
124  return ans;
125  },
126  [backward_cache]<Tensor Tsor>( Tsor const& input, Tsor const&, Tsor const& grad ) noexcept
127  {
128  Tsor& ans = context_cast<Tsor>( backward_cache );
129  ans.resize( input.shape() ); // 1 / ( 1 + exp(-x) )
130  for_each( ans.begin(), ans.end(), input.begin(), grad.begin(), []( auto& a, auto i, auto g ){ a = g / ( typename Tsor::value_type{1} - std::exp(-i) ); } );
131  return ans;
132  },
133  "SoftPlus"
134  )( ex );
135  }
136 
137 
149  template <Expression Ex>
150  auto inline softsign( Ex const& ex ) noexcept
151  {
152  std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
153  std::shared_ptr<std::any> backward_cache = std::make_shared<std::any>();
154 
155  return make_unary_operator( [forward_cache]<Tensor Tsor>( Tsor const& input ) noexcept
156  {
157  Tsor& ans = context_cast<Tsor>( forward_cache );
158  ans.resize( input.shape() );
159  std::copy( input.begin(), input.end(), ans.begin() );
160  ans.map( [](auto& x){ x /= typename Tsor::value_type{1} + std::abs(x); } ); // x / ( 1+|x| )
161  return ans;
162  },
163  [backward_cache]<Tensor Tsor>( Tsor const& input, Tsor const&, Tsor const& grad ) noexcept
164  {
165  Tsor& ans = context_cast<Tsor>( backward_cache );
166  ans.resize( input.shape() ); // 1 / ( 1 + |x| )^2
167  for_each( ans.begin(), ans.end(), input.begin(), grad.begin(), []( auto& a, auto i, auto g ){ auto tmp = typename Tsor::value_type{1} + std::abs(i); a = g / (tmp*tmp); } );
168  return ans;
169  },
170  "SoftSign"
171  )( ex );
172  }
173 
185  template <Expression Ex>
186  auto inline sigmoid( Ex const& ex ) noexcept
187  {
188  std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
189  std::shared_ptr<std::any> backward_cache = std::make_shared<std::any>();
190  return make_unary_operator( [forward_cache]<Tensor Tsor>( Tsor const& input ) noexcept
191  {
192  Tsor& ans = context_cast<Tsor>( forward_cache );
193  ans.resize( input.shape() );
194  std::copy( input.begin(), input.end(), ans.begin() );
195  //auto ans = input.deep_copy();
196  ans.map( [](auto& x){ x = 1.0 / (1.0+std::exp(-x)); } );
197  return ans;
198  },
199  [backward_cache]<Tensor Tsor>( Tsor const&, Tsor const& output, Tsor const& grad ) noexcept
200  {
201  Tsor& ans = context_cast<Tsor>( backward_cache );
202  ans.resize( output.shape() );
203  //Tsor ans{ output.shape() };
204  for_each( ans.begin(), ans.end(), output.begin(), grad.begin(), []( auto & a, auto o, auto g ){ a = g * o * ( typename Tsor::value_type{1} - o ); } );
205  return ans;
206  },
207  "Sigmoid"
208  )( ex );
209  }
210 
211 
212  namespace
213  {
214  struct relu_context
215  {
216  auto make_forward() const noexcept
217  {
218  return []( std::shared_ptr<std::any> forward_cache ) noexcept
219  {
220  return [forward_cache]<Tensor Tsor>( Tsor const& input ) noexcept
221  {
222  typedef typename Tsor::value_type value_type;
223  Tsor& ans = context_cast<Tsor>( forward_cache );
224  ans.resize( input.shape() );
225 
226  for_each( ans.begin(), ans.end(), input.begin(), [](auto& o, auto x){ o = std::max(x, value_type{0}); } );
227 
228  return ans;
229  };
230  };
231  }
232 
233  auto make_backward() const noexcept
234  {
235  return []<Tensor Tsor>( Tsor const& input, Tsor const&, Tsor const& grad ) noexcept
236  {
237  typedef typename Tsor::value_type value_type;
238  Tsor ans = grad; // shallow copy
239  for_each( ans.begin(), ans.end(), input.begin(), []( auto& v, auto x ){ if ( x <= value_type{0} ) v = value_type{0}; } );
240  return ans;
241  };
242  }
243  }; // relu_context
244 
245  }//anonymous namespace
246 
258  template <Expression Ex>
259  auto relu( Ex const& ex ) noexcept
260  {
261  std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
262  return make_unary_operator( relu_context{}.make_forward()( forward_cache ), relu_context{}.make_backward(), "Relu")( ex );
263  }
264 
265 
266  namespace
267  {
268  struct relu6_context
269  {
270  auto make_forward() const noexcept
271  {
272  return []( std::shared_ptr<std::any> forward_cache ) noexcept
273  {
274  return [forward_cache]<Tensor Tsor>( Tsor const& input ) noexcept
275  {
276  typedef typename Tsor::value_type value_type;
277  Tsor& ans = context_cast<Tsor>( forward_cache );
278  ans.resize( input.shape() );
279  for_each( ans.begin(), ans.end(), input.begin(), [](auto& o, auto x){ o = std::min( value_type{6}, std::max(x, value_type{0}) ); } );
280  return ans;
281  };
282  };
283  }
284 
285  auto make_backward() const noexcept
286  {
287  return []<Tensor Tsor>( Tsor const& input, Tsor const&, Tsor const& grad ) noexcept
288  {
289  typedef typename Tsor::value_type value_type;
290  Tsor ans = grad; // shallow copy
291  //const typename Tsor::value_type zero{0};
292  for_each( ans.begin(), ans.end(), input.begin(), []( auto& v, auto x ){ if ( (x <= value_type{0}) || (x >= value_type{6}) ) v = value_type{0}; } );
293  return ans;
294  };
295  }
296  }; // relu6_context
297 
298  }//anonymous namespace
299 
311  template <Expression Ex>
312  auto relu6( Ex const& ex ) noexcept
313  {
314  std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
315  return make_unary_operator( relu6_context{}.make_forward()( forward_cache ), relu6_context{}.make_backward(), "Relu6")( ex );
316  }
317 
318 
330  template< typename T > requires std::floating_point<T>
331  auto leaky_relu( T const factor=0.2 ) noexcept
332  {
333  better_assert( factor > T{0}, "Expecting leak_relu with a factor greater than 0, but got factor = ", factor );
334  better_assert( factor < T{1}, "Expecting leak_relu with a factor less than 1, but got factor = ", factor );
335  return [factor]<Expression Ex>( Ex const& ex ) noexcept
336  {
337  std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
338  return make_unary_operator( [factor, forward_cache]<Tensor Tsor>( Tsor const& input ) noexcept
339  {
340  Tsor& ans = context_cast<Tsor>( forward_cache );
341  ans.resize( input.shape() );
342  for_each( ans.begin(), ans.end(), input.begin(), [factor]( auto& v_out, auto v_in ){ v_out = std::max( T{v_in}, T{factor*v_in} ); } );
343  return ans;
344  },
345  [factor]<Tensor Tsor>( Tsor const& input, Tsor const&, Tsor const& grad ) noexcept
346  {
347  typedef typename Tsor::value_type value_type;
348  Tsor ans = grad;// OK for shallow copy
349  for_each( ans.begin(), ans.end(), input.begin(), [factor]( value_type& v_back, value_type const v_in ){ v_back = (v_in > value_type{0}) ? v_back : factor*v_back; } );
350  return ans;
351  },
352  "LeakyRelu"
353  )( ex );
354  };
355  }
356 
357  template <Expression Ex>
358  auto negative_relu( Ex const& ex ) noexcept
359  {
360  return negative( relu( ex ) );
361  }
362 
363 
375  template< typename T=float > requires std::floating_point<T>
376  auto elu( T const alpha=1.0 ) noexcept
377  {
378  return [alpha]<Expression Ex>( Ex const& ex ) noexcept
379  {
380  std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
381  return make_unary_operator( [alpha, forward_cache]<Tensor Tsor>( Tsor const& input ) noexcept
382  {
383  typedef typename Tsor::value_type value_type;
384  Tsor& ans = context_cast<Tsor>( forward_cache );
385  ans.resize( input.shape() );
386  for_each( ans.begin(), ans.end(), input.begin(), [alpha]( auto& v_out, auto v_in ){ v_out = (v_in > value_type{0}) ? v_in : (alpha * (std::exp(v_in) - value_type{1})); } );
387  return ans;
388  },
389  [alpha]<Tensor Tsor>( Tsor const& input, Tsor const&, Tsor const& grad ) noexcept
390  {
391  typedef typename Tsor::value_type value_type;
392  Tsor ans = grad;// OK for shallow copy
393  for_each( ans.begin(), ans.end(), input.begin(), [alpha]( value_type& v_back, value_type const v_in ){ v_back = (v_in >= value_type{0}) ? v_back : alpha*std::exp(v_back); } );
394  return ans;
395  },
396  "ELU"
397  )( ex );
398  };
399  }
400 
412  template <Expression Ex>
413  auto inline exponential( Ex const& ex ) noexcept
414  {
415  std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
416 
417  return make_unary_operator( [forward_cache]<Tensor Tsor>( Tsor const& input ) noexcept
418  {
419  Tsor& ans = context_cast<Tsor>( forward_cache );
420  ans.resize( input.shape() );
421  std::copy( input.begin(), input.end(), ans.begin() );
422  ans.map( [](auto& x){ x = std::exp(x); } ); // exp(x)
423  better_assert( !has_nan( ans ), "exponential operator forward output contains nan." );
424  better_assert( !has_inf( ans ), "exponential operator forward output contains inf." );
425  return ans;
426  },
427  []<Tensor Tsor>( Tsor const&, Tsor const& output, Tsor const& grad ) noexcept
428  {
429  Tsor ans = grad;
430  for_each( ans.begin(), ans.end(), output.begin(), []( auto& a, auto o ){ a *= o; } );
431  return ans;
432  },
433  "Exponentional"
434  )( ex );
435  }
436 
448  template <Expression Ex>
449  auto inline hard_sigmoid( Ex const& ex ) noexcept
450  {
451  std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
452 
453  return make_unary_operator( [forward_cache]<Tensor Tsor>( Tsor const& input ) noexcept
454  {
455  typedef typename Tsor::value_type value_type;
456  Tsor& ans = context_cast<Tsor>( forward_cache );
457  ans.resize( input.shape() );
458  std::copy( input.begin(), input.end(), ans.begin() );
459  ans.map([](auto& x) { x = ( x > value_type{1} ) ? value_type{1} : ( x < value_type{-1} ) ? value_type{0} : (x+value_type{1})/value_type{2}; });
460  return ans;
461  },
462  []<Tensor Tsor>( Tsor const& input, Tsor const&, Tsor const& grad ) noexcept
463  {
464  typedef typename Tsor::value_type value_type;
465  Tsor ans = grad;
466  for_each( ans.begin(), ans.end(), input.begin(), []( auto& a, auto x ) { a = ((x > value_type{1}) || (x < value_type{-1})) ? value_type{0} : (a / value_type{2}); } );
467  return ans;
468  },
469  "HardSigmoid"
470  )( ex );
471  }
472 
489  template <Expression Ex>
490  auto inline gelu( Ex const& ex ) noexcept
491  {
492  auto _gelu = []<typename T>( T x )
493  {
494  auto const ans = 0.5 * x * ( 1.0 + std::tanh( 0.79788456080286535588 * x ( 1.0 + 0.044715*x*x ) ) );
495  return static_cast<T>( ans );
496  };
497  auto sech_2 = []( auto x )
498  {
499  return 1.0 - std::pow( std::tanh(x), 2 );
500  };
501  auto _dgelu = [sech_2]<typename T>( T x )
502  {
503  auto const sq_2_pi_x = 0.79788456080286535588 * x;
504  auto const _xx = x * x;
505  auto const ans = 0.5 * ( 1.0 + std::tanh( sq_2_pi_x * ( 1.0 + 0.044715 * _xx ) ) ) + sq_2_pi_x * sech_2( sq_2_pi_x * (1.0 + 0.044715 * _xx ) * ( 1.0 + 0.134145 * _xx) );
506  return static_cast<T>( ans );
507  };
508 
509  std::shared_ptr<std::any> forward_cache = std::make_shared<std::any>();
510 
511  return make_unary_operator( [forward_cache, _gelu]<Tensor Tsor>( Tsor const& input ) noexcept
512  {
513  //typedef typename Tsor::value_type value_type;
514  Tsor& ans = context_cast<Tsor>( forward_cache );
515  ans.resize( input.shape() );
516  std::copy( input.begin(), input.end(), ans.begin() );
517  ans.map([_gelu](auto& x) { x = _gelu(x); });
518  return ans;
519  },
520  [_dgelu]<Tensor Tsor>( Tsor const& input, Tsor const&, Tsor const& grad ) noexcept
521  {
522  //typedef typename Tsor::value_type value_type;
523  Tsor ans = grad;
524  for_each( ans.begin(), ans.end(), [&_dgelu]( auto& x ) { x = _dgelu(x); } );
525  return ans;
526  },
527  "GeLU"
528  )( ex );
529  }
530 
531 
539  template< Expression Ex >
540  auto swish( Ex const& ex ) noexcept
541  {
542  return hadamard_product( ex, sigmoid( ex ) );
543  }
544 
548  template< Expression Ex >
549  auto silu( Ex const& ex ) noexcept
550  {
551  return swish( ex );
552  }
553 
564  template< Expression Ex >
565  auto crelu( Ex const& ex ) noexcept
566  {
567  return concatenate(-1)( relu(ex), relu(-ex) );
568  }
569 
578  template< Expression Ex >
579  auto tank_shrink( Ex const& ex ) noexcept
580  {
581  return ex - tanh( ex );
582  }
583 
584 
593  template< Expression Ex >
594  auto mish( Ex const& ex ) noexcept
595  {
596  return ex*tanh(softplus(ex));
597  }
598 
599 
608  template< Expression Ex >
609  auto lisht( Ex const& ex ) noexcept
610  {
611  return ex*tanh(ex);
612  }
613 
614 }//namespace ceras
615 
616 #endif//DJDWJBHNDAYTNOXLFOBDSGAQAAYPWMXJGEBYIRKEAKAQUUWVGDUGGDKSDXUKSPCYYNTWTDNII
617 
Definition: activation.hpp:12
auto negative_relu(Ex const &ex) noexcept
Definition: activation.hpp:358
auto relu(Ex const &ex) noexcept
Relu function, an unary operator. Returns x if positive, 0 otherwise.
Definition: activation.hpp:259
auto relu6(Ex const &ex) noexcept
Rectified Linear 6 function, an unary operator. Returns min(max(features, 0), 6).
Definition: activation.hpp:312
auto sigmoid(Ex const &ex) noexcept
Sigmoid function, an unary operator. Returns 1 / (exp(-x) + 1).
Definition: activation.hpp:186
auto hard_sigmoid(Ex const &ex) noexcept
Hard Sigmoid function, an unary operator. Piecewise linear approximation of the sigmoid function.
Definition: activation.hpp:449
bool has_nan(Tsor const &tsor)
Definition: tensor.hpp:1095
auto exponential(Ex const &ex) noexcept
Exponential function, an unary operator. Returns exp(x).
Definition: activation.hpp:413
concept Tensor
Definition: tensor.hpp:362
auto gelu(Ex const &ex) noexcept
Gaussian Error function, an unary operator. GAUSSIAN ERROR LINEAR UNITS (GELUS) https://arxiv....
Definition: activation.hpp:490
auto crelu(Ex const &ex) noexcept
Concatenated Rectified Linear Units, an activation function which preserves both positive and negativ...
Definition: activation.hpp:565
auto softplus(Ex const &ex) noexcept
Softplus function, an unary operator. Returns log(exp(x)+1).
Definition: activation.hpp:113
auto swish(Ex const &ex) noexcept
Swish activation function.
Definition: activation.hpp:540
Tsor deep_copy(Tsor const &tsor)
Definition: tensor.hpp:902
requires std::floating_point< T > auto leaky_relu(T const factor=0.2) noexcept
Leaky Rectified Linear function, an unary operator. Returns x if positive, alpha x otherwise....
Definition: activation.hpp:331
concept Expression
A type that represents a unary operator, a binary operator, a variable, a place_holder,...
Definition: operation.hpp:169
auto mish(Ex const &ex) noexcept
Mish function.
Definition: activation.hpp:594
auto lisht(Ex const &ex) noexcept
Lisht function.
Definition: activation.hpp:609
constexpr auto negative(Ex const &ex) noexcept
Definition: operation.hpp:389
constexpr auto softmax(Ex const &ex) noexcept
Softmax activation function, an unary operator.
Definition: activation.hpp:26
auto sum(Tsor const &tsor)
Definition: tensor.hpp:1044
static constexpr auto make_unary_operator
Definition: operation.hpp:49
auto max(Tsor const &tsor)
Definition: tensor.hpp:1008
auto silu(Ex const &ex) noexcept
An alias name of activation swish.
Definition: activation.hpp:549
auto selu(Ex const &ex) noexcept
Scaled Exponential Linear Unit (SELU) activation function, an unary operator. If x>0,...
Definition: activation.hpp:67
Tsor copy(Tsor const &tsor)
Definition: tensor.hpp:908
constexpr auto hadamard_product(Lhs_Expression const &lhs_ex, Rhs_Expression const &rhs_ex) noexcept
Definition: operation.hpp:444
bool has_inf(Tsor const &tsor)
Definition: tensor.hpp:1101
requires std::floating_point< T > auto elu(T const alpha=1.0) noexcept
Exponential Linear function, an unary operator. Returns x if positive, alpha* (exp(x)-1) otherwise....
Definition: activation.hpp:376
auto tank_shrink(Ex const &ex) noexcept
Tank shrink function.
Definition: activation.hpp:579
auto softsign(Ex const &ex) noexcept
Softsign function, an unary operator. Returns x / (abs(x) + 1).
Definition: activation.hpp:150
constexpr auto exp(Ex const &ex) noexcept
Computes Exp of the given expression.
Definition: operation.hpp:2926
constexpr auto log(Ex const &ex) noexcept
Computes Log of the given expression.
Definition: operation.hpp:3231
constexpr auto abs(Ex const &ex) noexcept
Computes Abs of the given expression.
Definition: operation.hpp:2447
constexpr auto concatenate(Lhs_Expression const &lhs_ex, Rhs_Expression const &rhs_ex) noexcept
Definition: operation.hpp:1517
constexpr auto tanh(Ex const &ex) noexcept
Computes Tanh of the given expression.
Definition: operation.hpp:3731