ceras
yet another deep learning engine
tensor.hpp
Go to the documentation of this file.
1 #ifndef HQKGLAXWWVFBFHQNHBVTQJKGUFTPCQPTPXDVNOSBDJIBHITCEKDISJYNAMCPLJDURURDAISFV
2 #define HQKGLAXWWVFBFHQNHBVTQJKGUFTPCQPTPXDVNOSBDJIBHITCEKDISJYNAMCPLJDURURDAISFV
3 
4 #include "./includes.hpp"
5 #include "./utils/better_assert.hpp"
6 #include "./utils/range.hpp"
7 #include "./utils/stride_iterator.hpp"
8 #include "./utils/for_each.hpp"
9 #include "./utils/buffered_allocator.hpp"
10 #include "./utils/debug.hpp"
11 #include "./utils/id.hpp"
12 #include "./utils/list.hpp"
13 #include "./backend/cuda.hpp"
14 #include "./backend/cblas.hpp"
15 
16 namespace ceras
17 {
18  // random_seed for random numbers
19  static unsigned long random_seed = std::chrono::system_clock::now().time_since_epoch().count();
20  // static random number random_generator
21  static std::mt19937 random_generator{random_seed};
22 
23  template< typename T >
24  //using default_allocator = buffered_allocator<T, 256>;
25  using default_allocator = std::allocator<T>;
26  //
27 
28  // Beware: shallow copy
29  // TODO: impl tensor_view, enabling stride dataset
30  template< typename T, typename Allocator = default_allocator<T> >
31  struct tensor : enable_id<tensor<T, Allocator>, "Tensor">
32  {
33  typedef T value_type;
34  typedef Allocator allocator;
35  typedef std::vector<T, Allocator> vector_type;
36  typedef std::shared_ptr<vector_type> shared_vector;
37  typedef tensor self_type;
38 
39  std::vector<unsigned long> shape_;
40  unsigned long memory_offset_;
41  shared_vector vector_; //shared across different instances
42 
43  constexpr auto begin() noexcept
44  {
45  return data();
46  }
47 
48  constexpr auto begin() const noexcept
49  {
50  return data();
51  }
52 
53  constexpr auto cbegin() const noexcept
54  {
55  return begin();
56  }
57 
58  constexpr auto end() noexcept
59  {
60  return begin() + size();
61  }
62 
63  constexpr auto end() const noexcept
64  {
65  return begin() + size();
66  }
67 
68  constexpr auto cend() const noexcept
69  {
70  return end();
71  }
72 
83  constexpr self_type& reset( T val = T{0} )
84  {
85  std::fill_n( data(), size(), val );
86  return *this;
87  }
88 
89  constexpr unsigned long ndim() const noexcept
90  {
91  return shape_.size();
92  }
93 
94  constexpr self_type& deep_copy( self_type const& other )
95  {
96  (*this).resize( other.shape() );
97  std::copy_n( other.data(), size(), (*this).data() );
98  return *this;
99  }
100 
101  constexpr self_type const deep_copy() const
102  {
103  self_type ans{ shape_ };
104  std::copy_n( data(), size(), ans.data() );
105  return ans;
106  }
107 
108  constexpr self_type const copy() const
109  {
110  return deep_copy();
111  }
112 
113  // 1-D view
114  constexpr value_type& operator[]( unsigned long idx )
115  {
116  return *(data()+idx);
117  }
118 
119  // 1-D view
120  constexpr value_type const& operator[]( unsigned long idx ) const
121  {
122  return *(data()+idx);
123  }
124 
125  tensor() : shape_{std::vector<unsigned long>{}}, memory_offset_{0}, vector_{std::make_shared<vector_type>()}
126  {
127  }
128 
129  //
130  // tensor<double> A{ {2,2}, { 1.0, 1.0, 1.0, 1.0} };
131  //
132  constexpr tensor( std::vector<unsigned long> const& shape, std::initializer_list<T> init, const Allocator& alloc = Allocator() ) : shape_{ shape }, memory_offset_{0}, vector_{std::make_shared<vector_type>(init, alloc)}
133  {
134  better_assert( (*vector_).size() == std::accumulate( shape_.begin(), shape_.end(), 1UL, [](auto x, auto y){ return x*y; } ), "Expecting vector has same size as the shape indicates." );
135  }
136 
137  constexpr tensor( std::vector<unsigned long> const& shape ):shape_{shape}, memory_offset_{0}, vector_{ std::make_shared<vector_type>( std::accumulate( shape_.begin(), shape_.end(), 1UL, []( auto x, auto y ){ return x*y; } ), T{0} ) }
138  {
139  }
140 
141  constexpr tensor( std::vector<unsigned long> const& shape, T init ):shape_{shape}, memory_offset_{0}, vector_{ std::make_shared<vector_type>( std::accumulate( shape_.begin(), shape_.end(), 1UL, []( auto x, auto y ){ return x*y; } ), T{0} ) }
142  {
143  std::fill( begin(), end(), init );
144  }
145 
146  constexpr tensor( tensor const& other, unsigned long memory_offset ) : shape_{ other.shape_ }, memory_offset_{ memory_offset }, vector_{ other.vector_ } {}
147 
148  constexpr tensor( self_type const& other ) noexcept : shape_{ other.shape_ }, memory_offset_{ other.memory_offset_ }
149  {
150  vector_ = other.vector_;
151  (*this).id_ = other.id_;
152  }
153 
154  constexpr tensor( self_type && other ) noexcept : shape_{ other.shape_ }, memory_offset_{ other.memory_offset_ }
155  {
156  vector_ = other.vector_;
157  (*this).id_ = other.id_;
158  }
159 
160  constexpr self_type& operator = ( self_type const& other ) noexcept
161  {
162  shape_ = other.shape_;
163  memory_offset_ = other.memory_offset_;
164  vector_ = other.vector_;
165  (*this).id_ = other.id_;
166  return *this;
167  }
168  constexpr self_type& operator = ( self_type && other ) noexcept
169  {
170  shape_ = other.shape_;
171  memory_offset_ = other.memory_offset_;
172  vector_ = other.vector_;
173  (*this).id_ = other.id_;
174  return *this;
175  }
176 
177  constexpr std::vector< unsigned long > const& shape() const noexcept { return shape_; }
178 
179  constexpr unsigned long size() const noexcept
180  {
181  if ( !vector_ )
182  return 0;
183  return (*vector_).size() - memory_offset_;
184  }
185 
186  constexpr self_type& resize( std::vector< unsigned long > const& new_shape )
187  {
188  unsigned long const new_size = std::accumulate( new_shape.begin(), new_shape.end(), 1UL, [](auto x, auto y){ return x*y; } );
189  if( (*this).size() != new_size )
190  {
191  (*vector_).resize(new_size);
192  memory_offset_ = 0UL;
193  }
194  (*this).shape_ = new_shape;
195  return *this;
196  }
197 
198  //reshape is resize
199  constexpr self_type& reshape( std::vector< unsigned long > const& new_shape )
200  {
201  unsigned long const new_size = std::accumulate( new_shape.begin(), new_shape.end(), 1UL, [](auto x, auto y){ return x*y; } );
202  if ( (*this).size() != new_size ) return resize( new_shape );
203 
204  better_assert( (*this).size() == new_size, "reshape: expecting same size, but the original size is ", (*this).size(), ", and the new size is ", new_size );
205  (*this).shape_ = new_shape;
206  return *this;
207  }
208 
209  //mapping a smaller tensor on a larger one
210  constexpr self_type& shrink_to( std::vector< unsigned long > const& new_shape )
211  {
212  unsigned long const new_size = std::accumulate( new_shape.begin(), new_shape.end(), 1UL, [](auto x, auto y){ return x*y; } );
213  better_assert( (*this).size() >= new_size, "reshape: expecting smaller size, but the original size is ", (*this).size(), ", and the new size is ", new_size );
214  (*this).shape_ = new_shape;
215  return *this;
216  }
217 
218  //adjust the memory offset
219  constexpr self_type& creep_to( unsigned long new_memory_offset )
220  {
221  (*this).memory_offset_ = new_memory_offset;
222  return *this;
223  }
224 
225  [[nodiscard]] constexpr bool empty() const noexcept
226  {
227  return 0 == shape_.size();
228  }
229 
230  constexpr value_type* data() noexcept
231  {
232  return (*vector_).data() + memory_offset_;
233  }
234 
235  constexpr const value_type* data() const noexcept
236  {
237  return (*vector_).data() + memory_offset_;
238  }
239 
240  // element-wise operation
241  //
242  // tensor<double> x{...};
243  // x.map( []( double v ){ return 1.0/v+1.0; } );
244  //
245  template< typename Function >
246  constexpr self_type& map( Function const& f )
247  {
248  for_each( (*this).data(), (*this).data()+(*this).size(), [&f]( auto& v ){ f(v); } );
249  return *this;
250  }
251 
252  constexpr self_type& operator += ( self_type const& other )
253  {
254  //better_assert( shape() == other.shape(), "Error with tensor::operator += : Shape mismatch! -- current shape is ", shape(), " and other tensor shape is ", other.shape() );
255  better_assert( shape() == other.shape(), "Error with tensor::operator += : Shape mismatch!" );
256  std::transform( data(), data()+size(), other.data(), data(), []( auto x, auto y ){ return x+y; } );
257  return *this;
258  }
259 
260  constexpr self_type& operator += ( value_type x )
261  {
262  std::for_each( data(), data()+size(), [x]( value_type& v ){ v += x; } );
263  return *this;
264  }
265 
266  constexpr self_type& operator -= ( self_type const& other )
267  {
268  better_assert( shape() == other.shape(), "Error with tensor::operator -=: Shape not match!" );
269  std::transform( data(), data()+size(), other.data(), data(), []( auto x, auto y ){ return x-y; } );
270  return *this;
271  }
272 
273  constexpr self_type& operator -= ( value_type x )
274  {
275  std::for_each( data(), data()+size(), [x]( auto& v ){ v -= x; } );
276  return *this;
277  }
278 
279  constexpr self_type& operator *= ( self_type const& other )
280  {
281  better_assert( shape() == other.shape(), "Shape not match!" );
282  std::transform( data(), data()+size(), other.data(), data(), []( auto x, auto y ){ return x*y; } );
283  return *this;
284  }
285 
286  constexpr self_type& operator *= ( value_type x )
287  {
288  std::for_each( data(), data()+size(), [x]( auto& v ){ v *= x; } );
289  return *this;
290  }
291 
292  constexpr self_type& operator /= ( self_type const& other )
293  {
294  better_assert( shape() == other.shape(), "Shape not match!" );
295  std::transform( data(), data()+size(), other.data(), data(), []( auto x, auto y ){ return x/y; } );
296  return *this;
297  }
298 
299  constexpr self_type& operator /= ( value_type x )
300  {
301  std::for_each( data(), data()+size(), [x]( auto& v ){ v /= x; } );
302  return *this;
303  }
304 
305  constexpr self_type const operator - () const
306  {
307  self_type ans = (*this).deep_copy();
308  std::for_each( ans.data(), ans.data()+size(), []( auto& v ){ v = -v; } );
309  return ans;
310  }
311 
312  constexpr value_type as_scalar() const noexcept
313  {
314  better_assert( size() == 1, "Expecting tensor has a single value, but got ", size() );
315  return *begin();
316  }
317 
318  template< typename U >
319  constexpr auto as_type() const noexcept
320  {
321  tensor<U> ans{ (*this).shape() };
322  std::copy( (*this).begin(), (*this).end(), ans.begin() );
323  return ans;
324  }
325 
326  tensor slice( unsigned long m, unsigned long n ) const noexcept
327  {
328  better_assert( m < n, "starting dimension larger than then ending dimension." );
329  better_assert( !shape_.empty(), "Cannot slice an empty tensor." );
330 
331  unsigned long first_dim = shape_[0];
332  better_assert( n <= first_dim, "this tensor only has ", first_dim, " at the first dimension, too small for n = ", n );
333 
334  unsigned long rest_dims = std::accumulate( shape_.begin()+1, shape_.end(), 1UL, []( auto x, auto y ){ return x*y; } );
335 
336  tensor ans = *this;
337  ans.shape_[0] = n - m;
338  ans.memory_offset_ = rest_dims * m + memory_offset_;
339  return ans;
340  }
341 
342  };
343 
344  template <typename T, typename A=default_allocator<T> >
345  constexpr tensor<T, A> as_tensor( T val ) noexcept
346  {
347  tensor<T, A> ans{ {1,} };
348  ans[0] = val;
349  return ans;
350  }
351 
352  template< typename T >
353  struct is_tensor : std::false_type {};
354 
355  template< typename T, typename A >
356  struct is_tensor< tensor< T, A> > : std::true_type {};
357 
358  template< class T >
359  inline constexpr bool is_tensor_v = is_tensor<T>::value;
360 
361  template< typename T >
362  concept Tensor = is_tensor_v<T>;
363 
364  template< Tensor Tsor, typename CharT, typename Traits >
365  std::basic_ostream<CharT, Traits>& operator << ( std::basic_ostream<CharT, Traits>& os_, Tsor const& tsor )
366  {
367  typedef typename Tsor::value_type value_type;
368  std::basic_ostringstream<CharT, Traits> os;
369  os.flags(os_.flags());
370  os.imbue(os_.getloc());
371  os.precision(os_.precision());
372  //shape
373  os << "shape: [ ";
374  std::copy( tsor.shape_.begin(), tsor.shape_.end(), std::ostream_iterator<unsigned long>{os, " "} );
375  os << "]\n";
376 
377  //data
378  os << "data:\n{\n";
379  if ( tsor.shape().size() < 2 )
380  {
381  std::copy( tsor.data(), tsor.data()+tsor.size(), std::ostream_iterator<value_type>{os, "\t"} );
382  os << "\n";
383  }
384  else
385  {
386  auto const& shape = tsor.shape();
387  unsigned long const dims = shape.size();
388  unsigned long const last_dim = shape[dims-1];
389  unsigned long const abbreviated_rows= std::reduce( shape.begin(), shape.begin()+dims-1, 1UL, []( unsigned long x, unsigned long y ){ return x*y; } );
390  for ( auto idx = 0UL; idx != abbreviated_rows; ++idx )
391  {
392  std::copy( tsor.data()+idx*last_dim, tsor.data()+(idx+1)*last_dim, std::ostream_iterator<value_type>{os, "\t"} );
393  os << "\n";
394  }
395  }
396  os << "}\n";
397 
398  return os_ << os.str();
399  }
400 
401  template< typename T >
402  struct view_1d
403  {
404  T* data;
405  unsigned long dims;
406 
407  constexpr T& operator[]( unsigned long idx ) noexcept { return data[idx]; }
408  constexpr T const& operator[]( unsigned long idx ) const noexcept { return data[idx]; }
409  };// view_1d
410 
411  template< typename T >
412  using array = view_1d<T>;
413 
414 
415  template< typename T >
416  struct view_2d
417  {
418  typedef T value_type;
420  typedef const value_type* const_row_type;
421  typedef stride_iterator<value_type*> col_type;
422  typedef stride_iterator<const value_type*> const_col_type;
423 
424  T* data_;
425  unsigned long row_;
426  unsigned long col_;
428 
429 
430  template<typename A>
431  constexpr view_2d( tensor<T, A>& tsor, unsigned long row, unsigned long col, bool transposed=false ) noexcept : data_{tsor.data()}, row_{row}, col_{col}, transposed_{transposed} {}
432 
433  constexpr view_2d( T* data, unsigned long row, unsigned long col, bool transposed=false ) noexcept : data_{data}, row_{row}, col_{col}, transposed_{transposed} {}
434 
435  // should have a template specialization for view_2d of const T*, but here ommited with 'const_cast' as operator [] should be specialized in that case
436  constexpr view_2d( const T* data, unsigned long row, unsigned long col, bool transposed=false ) noexcept : data_{const_cast<T*>(data)}, row_{row}, col_{col}, transposed_{transposed} {}
437 
438  constexpr T* operator[]( unsigned long index )
439  {
440  if ( transposed_ )
441  return data_ + index * row_;
442  return data_ + index * col_;
443  }
444 
445  constexpr const T* operator[]( unsigned long index ) const
446  {
447  if ( transposed_ )
448  return data_ + index * row_;
449  return data_ + index * col_;
450  }
451 
452  constexpr auto shape() const noexcept { return std::make_pair( row_, col_ ); }
453  constexpr unsigned long size() const noexcept { return row_ * col_; }
454 
455  constexpr T* data() noexcept { return data_; }
456  constexpr const T* data() const noexcept { return data_; }
457 
458  constexpr T* begin() noexcept { return data_; }
459  constexpr const T* end() const noexcept { return begin()+size(); }
460 
461  constexpr unsigned long row() const noexcept { return row_; }
462  constexpr unsigned long col() const noexcept { return col_; }
463 
464  constexpr row_type row_begin( unsigned long index = 0 ) noexcept { return begin() + index * col(); }
465  constexpr row_type row_end( unsigned long index = 0 ) noexcept { return begin() + (index+1) * col(); }
466 
467  constexpr const_row_type row_begin( unsigned long index = 0 ) const noexcept { return begin() + index * col(); }
468  constexpr const_row_type row_end( unsigned long index = 0 ) const noexcept { return begin() + (index+1) * col(); }
469 
470  constexpr col_type col_begin( unsigned long index = 0 ) noexcept { return col_type{ begin() + index, col() }; }
471  constexpr col_type col_end( unsigned long index = 0 ) noexcept { return col_begin(index) + row(); }
472 
473  constexpr const_col_type col_begin( unsigned long index = 0 ) const noexcept { return const_col_type{ begin() + index, col() }; }
474  constexpr const_col_type col_end( unsigned long index = 0 ) const noexcept { return col_begin(index) + row(); }
475  };
476 
477  template< typename T >
479 
480  // C <= A * B
481  // where A or A' is [m x n], B or B' is [n x k] and C is [m x k]
482  template< typename T > requires std::floating_point<T>
483  void gemm_cpu( T const* A, bool a_transposed, T const* B, bool b_transposed, unsigned long m, unsigned long n, unsigned long k, T* C )
484  {
485  auto a_view = view_2d{ A, m, n, a_transposed };
486  auto b_view = view_2d{ B, n, k, b_transposed };
487  auto c_view = view_2d{ C, m, k };
488 
489  std::fill_n( C, m*k, T{0} );
490 
491  if ( a_transposed == false && b_transposed == false )
492  for ( auto r = 0UL; r != m; ++r )
493  for ( auto c = 0UL; c != k; ++c )
494  for ( auto idx = 0UL; idx != n; ++idx )
495  c_view[r][c] += a_view[r][idx] * b_view[idx][c];
496  else if ( a_transposed == false && b_transposed == true )
497  for ( auto r = 0UL; r != m; ++r )
498  for ( auto c = 0UL; c != k; ++c )
499  for ( auto idx = 0UL; idx != n; ++idx )
500  c_view[r][c] += a_view[r][idx] * b_view[c][idx];
501  else if ( a_transposed == true && b_transposed == false )
502  for ( auto r = 0UL; r != m; ++r )
503  for ( auto c = 0UL; c != k; ++c )
504  for ( auto idx = 0UL; idx != n; ++idx )
505  c_view[r][c] += a_view[idx][r] * b_view[idx][c];
506  else
507  for ( auto r = 0UL; r != m; ++r )
508  for ( auto c = 0UL; c != k; ++c )
509  for ( auto idx = 0UL; idx != n; ++idx )
510  c_view[r][c] += a_view[idx][r] * b_view[c][idx];
511  }
512 
513  // this function is used to update the threshod 'cuda_gemm_threshold' defined in '../config.hpp', only considering float case
515  {
516  if constexpr( cuda_mode == 0 )
517  {
518  cuda_gemm_threshold = std::numeric_limits<unsigned long>::max(); // very larger threshold to stop from using CUDA
519  }
520  else
521  {
522  //warm-up GPU
523  {
524  auto A = tensor<float>({128, 128});
525  auto B = tensor<float>({128, 128});
526  auto C = tensor<float>({128, 128});
527  cuda_gemm( A.data(), false, B.data(), false, 128, 128, 128, C.data() );
528  }
529 
530  unsigned long dim = 16;
531  unsigned long increasement = 16;
532 
533  while ( true )
534  {
535  auto A = tensor<float>( {dim*dim,} );
536  auto B = tensor<float>( {dim*dim,} );
537  auto C = tensor<float>( {dim*dim,} );
538  unsigned long t_gpu = time_it( [&](){ cuda_gemm( A.data(), false, B.data(), false, dim, dim, dim, C.data() ); });
539  unsigned long t_cpu = time_it( [&](){ gemm_cpu( A.data(), false, B.data(), false, dim, dim, dim, C.data() ); });
540 
541  if ( t_cpu > t_gpu ) break;
542 
543  dim += increasement;
544  }
545 
546  cuda_gemm_threshold = dim * dim * dim;
547  }
548  }
549 
550  // C <= A * B
551  // where A or A' is [m x n], B or B' is [n x k] and C is [m x k]
552  template< typename T > requires std::floating_point<T>
553  void gemm( T const* A, bool a_transposed, T const* B, bool b_transposed, unsigned long m, unsigned long n, unsigned long k, T* C )
554  {
555  if ( cuda_gemm_threshold == 0 ) // global variable defined in config.h
557 
558  if constexpr( cuda_mode )
559  {
560  unsigned long const operations = m * n * k;
561 
562  if ( operations >= cuda_gemm_threshold )
563  cuda_gemm( A, a_transposed, B, b_transposed, m, n, k, C );
564  else
565  gemm_cpu( A, a_transposed, B, b_transposed, m, n, k, C );
566  }
567  else if constexpr( cblas_mode )
568  {
569  cblas_gemm( A, a_transposed, B, b_transposed, m, n, k, C );
570  }
571  else
572  {
573  gemm_cpu( A, a_transposed, B, b_transposed, m, n, k, C );
574  }
575  }
576 
577  template< typename T > requires std::floating_point<T> // this one only for non-transposed 2d View
578  void gemm( view_2d<T> const& x, view_2d<T> const& y, view_2d<T>& ans ) //note: direct copy of x and y
579  {
580  auto const [x_row, x_col] = x.shape();
581  auto const [y_row, y_col] = y.shape();
582  auto const [a_row, a_col] = ans.shape();
583 
584  better_assert( x_row == a_row );
585  better_assert( y_col == a_col );
586  better_assert( x_col == y_row, "Expecting x_col == y_row, but x_col = ", x_col, ", and y_row = ", y_row );
587 
588  gemm( x.data(), x.transposed_, y.data(), y.transposed_, x_row, x_col, y_col, ans.data() );
589  }
590 
591  // always prefer channel-last data format
592  // Example:
593  //
594  // ( 3x2 ) + ( 1x2 ) --> ( 3x2 )
595  //
596  // [ 1, 2 ] [ 0, 3 ]
597  // [ 3, 4 ] + [ -1, 1 ] = [ 2, 5 ]
598  // [ 5, 6 ] [ 4, 7 ]
599  //
600  //template< typename T, typename A >
601  //tensor<T, A> add( tensor<T, A> const& lhs, tensor<T, A> const& rhs ) noexcept
602  //TODO: fix cases like [31, 1, 31, 31, 1] + [31, 1, 1, 31]
603  template< Tensor Tsor >
604  Tsor add( Tsor const& lhs, Tsor const& rhs ) noexcept
605  {
606  unsigned long const l_size = lhs.size();
607  unsigned long const r_size = rhs.size();
608  if ( l_size < r_size ) return rhs + lhs;
609 
610  unsigned long const repeats = l_size / r_size;
611  better_assert( (r_size * repeats) == l_size, "Dimension does not match!" );
612 
613  Tsor ans = lhs.deep_copy();
614  for ( auto idx : range( repeats ) )
615  for ( auto jdx : range( r_size ) )
616  ans[idx*r_size+jdx] = lhs[idx*r_size+jdx] + rhs[jdx];
617 
618  return ans;
619  }
620 
621  template< Tensor Tsor >
622  Tsor operator + ( Tsor const& lhs, Tsor const& rhs ) noexcept
623  {
624  return add( lhs, rhs );
625  }
626 
627  template< Tensor Tsor >
628  Tsor operator + ( typename Tsor::value_type const& lhs, Tsor const& rhs ) noexcept
629  {
630  auto ans = rhs.deep_copy();
631  ans.map( [lhs]( auto& v ){ v += lhs; } );
632  return ans;
633  }
634 
635  template< Tensor Tsor >
636  Tsor operator + ( Tsor const& lhs, typename Tsor::value_type const& rhs ) noexcept
637  {
638  return rhs + lhs;
639  }
640 
641  template< Tensor Tsor >
642  Tsor minus( Tsor const& lhs, Tsor const& rhs ) noexcept
643  {
644  return add( lhs, -rhs );
645  }
646 
647  template< Tensor Tsor >
648  Tsor operator - ( Tsor const& lhs, Tsor const& rhs ) noexcept
649  {
650  return minus( lhs, rhs );
651  }
652 
653  template< Tensor Tsor >
654  Tsor operator - ( typename Tsor::value_type const& lhs, Tsor const& rhs ) noexcept
655  {
656  auto ans = rhs.deep_copy();
657  ans.map( [lhs]( auto& v ){ v = lhs - v; } );
658  return ans;
659  }
660 
661  template< Tensor Tsor >
662  Tsor operator - ( Tsor const& lhs, typename Tsor::value_type const& rhs ) noexcept
663  {
664  auto ans = lhs.deep_copy();
665  ans.map( [rhs]( auto& v ){ v -= rhs; } );
666  return ans;
667  }
668 
669  template< Tensor Tsor >
670  Tsor operator * ( typename Tsor::value_type const& lhs, Tsor const& rhs ) noexcept
671  {
672  auto ans = rhs.deep_copy();
673  ans.map( [lhs]( auto& v ){ v *= lhs; } );
674  return ans;
675  }
676 
677  template< Tensor Tsor >
678  Tsor operator * ( Tsor const& lhs, typename Tsor::value_type const& rhs ) noexcept
679  {
680  return rhs * lhs;
681  }
682 
683  template< Tensor Tsor >
684  Tsor operator / ( Tsor const& lhs, typename Tsor::value_type const& rhs ) noexcept
685  {
686  auto ans = lhs.deep_copy();
687  ans.map( [rhs]( auto& v ){ v /= rhs; } );
688  return ans;
689  }
690 
691  template< Tensor Tsor >
692  Tsor reshape( Tsor const& ts, std::vector<unsigned long> const& new_shape )
693  {
694  Tsor ans = ts;
695  return ans.reshape( new_shape );
696  }
697 
698  template< Tensor Tsor >
699  void multiply( Tsor const& lhs, Tsor const& rhs, Tsor& ans ) noexcept
700  {
701  if ( 1 == lhs.ndim() )
702  return multiply( reshape( lhs, {1UL, lhs.size()} ), rhs, ans );
703 
704  if ( 1 == rhs.ndim() )
705  return multiply( lhs, reshape( rhs, {lhs.size(), 1UL} ), ans );
706 
707  better_assert( 2 == rhs.ndim(), "expecting rhs tensor has 2 dimensions, but got ", rhs.ndim() );
708 
709  if ( 2 == lhs.ndim() )
710  {
711  typedef typename Tsor::value_type value_type;
712  auto const& lhs_shape = lhs.shape();
713  auto const& rhs_shape = rhs.shape();
714 
715  view_2d<value_type> const x{ lhs.data(), lhs_shape[0], lhs_shape[1] };
716  view_2d<value_type> const y{ rhs.data(), rhs_shape[0], rhs_shape[1] };
717  auto const [row, col] = std::make_pair( lhs_shape[0], rhs_shape[1] );
718  ans.resize( {row, col} );
719  view_2d<value_type> z{ ans.data(), row, col };
720  gemm( x, y, z );
721  return;
722  }
723  better_assert( false, "dimension not match, lhs dimension is ", lhs.ndim() );
724  }
725 
726  template< Tensor Tsor >
727  Tsor multiply( Tsor const& lhs, Tsor const& rhs ) noexcept
728  {
729  Tsor ans;
730  multiply( lhs, rhs, ans );
731  return ans;
732  }
733 
734  template< Tensor Tsor >
735  Tsor operator * ( Tsor const& lhs, Tsor const& rhs ) noexcept
736  {
737  return multiply( lhs, rhs );
738  }
739 
740  // caution: only valid for channel last case
741  template< Tensor Tsor >
742  Tsor elementwise_product( Tsor const& lhs, Tsor const& rhs ) noexcept
743  {
744  unsigned long const l_size = lhs.size();
745  unsigned long const r_size = rhs.size();
746  if ( l_size < r_size ) return elementwise_product(rhs, lhs);
747 
748  unsigned long const repeats = l_size / r_size;
749  better_assert( (r_size * repeats) == l_size, "Dimension is not match!" );
750 
751  Tsor ans = lhs.deep_copy();
752  for ( auto idx : range( repeats ) )
753  for ( auto jdx : range( r_size ) )
754  {
755  ans[idx*r_size+jdx] *= rhs[jdx];
756  }
757 
758  return ans;
759  }
760 
761  template< Tensor Tsor >
762  Tsor hadamard_product( Tsor const& lhs, Tsor const& rhs ) noexcept
763  {
764  return elementwise_product( lhs, rhs );
765  }
766 
767  template< Tensor Tsor >
768  Tsor elementwise_divide( Tsor const& lhs, Tsor const& rhs ) noexcept
769  {
770  better_assert( lhs.shape() == rhs.shape(), "Shape not match!" );
771  Tsor ans{ lhs.shape() };
772  for_each( lhs.begin(), lhs.end(), rhs.begin(), ans.begin(), []( auto x, auto y, auto& z ){ z = x/y; } );
773  return ans;
774  }
775 
776  template< Tensor Tsor >
777  Tsor repeat( Tsor const& tsor, unsigned long n )
778  {
779  Tsor ans{ {n, tsor.size()} };
780  {
781  auto itor = ans.data();
782  for ( auto idx : range(n) )
783  std::copy( tsor.begin(), tsor.end(), itor + idx * tsor.size() );
784 
785  std::vector<unsigned long> new_shape;
786  new_shape.push_back( n );
787  std::copy( tsor.shape().begin(), tsor.shape().end(), std::back_inserter( new_shape ) );
788  ans.reshape( new_shape );
789  }
790 
791  return ans;
792  }
793 
794  template< Tensor Tsor >
795  Tsor reduce_sum( Tsor const& tsor )
796  {
797  auto result = std::reduce( tsor.data(), tsor.data()+tsor.size(), typename Tsor::value_type{0} );
798  return Tsor{ std::vector<unsigned long>{1}, {result,} };
799  }
800 
801  template< Tensor Tsor >
802  Tsor reduce_mean( Tsor const& tsor )
803  {
804  auto ans = reduce_sum( tsor );
805  ans /= tsor.size();
806  return ans;
807  }
808 
809  template< Tensor Tsor >
810  Tsor clip( Tsor& tsor, typename Tsor::value_type lower = 0, typename Tsor::value_type upper = 1 )
811  {
812  std::for_each( tsor.data(), tsor.data()+tsor.size(), [lower, upper]( typename Tsor::value_type& v ){ v = std::min( upper, v ); v = std::max( lower, v ); } );
813  return tsor;
814  }
815 
816  template< Tensor Tsor >
817  Tsor squeeze( Tsor const& tsor )
818  {
819  Tsor ans{ tsor };
820 
821  if ( 0 == tsor.size() )
822  return ans;
823 
824  if ( 1 == tsor.size() )
825  {
826  ans.reshape( {1,} );
827  return ans;
828  }
829 
830  std::vector<unsigned long> new_shape;
831  for ( auto s : tsor.shape() )
832  if (s > 1 )
833  new_shape.push_back( s );
834  ans.reshape( new_shape );
835  return ans;
836  }
837 
838  template< typename T, typename A=default_allocator<T> >
839  tensor<T,A> randn( std::vector<unsigned long> const& shape, T mean=T{0}, T stddev=T{1} )
840  {
841  std::normal_distribution<T> distribution( mean, stddev );
842  tensor<T,A> ans{ shape };
843  std::generate( ans.data(), ans.data()+ans.size(), [&distribution](){ return distribution(random_generator); } );
844  return ans;
845  }
846 
847  template< typename T, typename A=default_allocator<T> >
848  tensor<T,A> truncated_normal( std::vector<unsigned long> const& shape, T mean=T{0}, T stddev=T{1}, T lower=T{0}, T upper=T{1} )
849  {
850  std::normal_distribution<T> distribution( mean, stddev );
851  tensor<T,A> ans{ shape };
852  for ( auto& v : ans )
853  {
854  for ( ;; )
855  {
856  T x = distribution(random_generator);
857  if ( x >= lower && x <= upper )
858  {
859  v = x;
860  break;
861  }
862  }
863  }
864 
865  return ans;
866  }
867 
868  template< typename T, typename A=default_allocator<T> >
869  tensor<T,A> random( std::vector<unsigned long> const& shape, T min=T{0}, T max=T{1} )
870  {
871  std::uniform_real_distribution<T> distribution( min, max );
872  tensor<T,A> ans{ shape };
873  std::generate( ans.data(), ans.data()+ans.size(), [&distribution](){ return distribution(random_generator); } );
874  return ans;
875  }
876 
877  template< Tensor Tsor >
878  Tsor random_like( Tsor const& tsor, typename Tsor::value_type min = 0, typename Tsor::value_type max = 1 )
879  {
880  return random<typename Tsor::value_type, typename Tsor::allocator>( tsor.shape(), min, max );
881  }
882 
883  template< Tensor Tsor >
884  Tsor randn_like( Tsor const& tsor, typename Tsor::value_type mean = 0, typename Tsor::value_type stddev = 1 )
885  {
886  return randn<typename Tsor::value_type, typename Tsor::allocator>( tsor.shape(), mean, stddev );
887  }
888 
889  // TODO glorot_normal
890  //
891  // Glorot, Xavier, and Yoshua Bengio. “Understanding the Difficulty of Training Deep Feedforward Neural Networks.” In Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics, 249–256, 2010.
892  template< typename T, typename A=default_allocator<T> >
893  tensor<T,A> glorot_uniform( std::initializer_list<unsigned long> shape )
894  {
895  T prev_dim = *(shape.begin());
896  T next_dim = *(shape.begin()+1);
897  T const bound = std::sqrt( T{6} / (std::max(T{1}, prev_dim+next_dim)) );
898  return random<T,A>( shape, -bound, bound );
899  }
900 
901  template< Tensor Tsor >
902  Tsor deep_copy( Tsor const& tsor )
903  {
904  return tsor.deep_copy();
905  }
906 
907  template< Tensor Tsor >
908  Tsor copy( Tsor const& tsor )
909  {
910  return deep_copy( tsor );
911  }
912 
913  template< Tensor Tsor >
914  Tsor concatenate( Tsor const& lhs, Tsor const& rhs, unsigned long axis=0 ) noexcept
915  {
916  if ( lhs.ndim() < rhs.ndim() )
917  return concatenate( rhs, lhs, axis );
918 
919  // axis alignment
920  if ( lhs.ndim() > rhs.ndim() )
921  {
922  unsigned long const dims_to_repeat = std::accumulate( lhs.shape().begin(), lhs.shape().begin()+lhs.ndim()-rhs.ndim(), 1UL, [](auto x, auto y ){ return x*y; } );
923  auto new_rhs = repeat( rhs, dims_to_repeat );
924  std::vector<unsigned long> new_shape{ lhs.shape().begin(), lhs.shape().begin()+lhs.ndim()-rhs.ndim() };
925  std::copy( rhs.shape().begin(), rhs.shape().end(), std::back_inserter( new_shape ) );
926  new_rhs.reshape( new_shape );
927  return concatenate( lhs, new_rhs, axis );
928  }
929 
930  auto l_shape = lhs.shape();
931  auto r_shape = rhs.shape();
932  better_assert( (l_shape.size() == r_shape.size()), "dimension not match, lhs dim is ", l_shape.size(), " and last dim ", *(l_shape.rbegin()), ", but rhs dim is ", r_shape.size(), " where the last dim ", *(r_shape.rbegin()) );
933  axis = (axis == (unsigned long)(-1)) ? (l_shape.size()-1) : axis;
934  better_assert( (l_shape.size() > axis), "axis is too large: axis-", axis, " but allowed range-[0,", l_shape.size()-1, "]" );
935  better_assert( (std::vector<unsigned long>{l_shape.begin(), l_shape.begin()+axis} == std::vector<unsigned long>{r_shape.begin(), r_shape.begin()+axis}) );
936  better_assert( (std::vector<unsigned long>{l_shape.begin()+axis+1, l_shape.end()} == std::vector<unsigned long>{r_shape.begin()+axis+1, r_shape.end()}) );
937 
938  unsigned long const memory_copy_times = std::max( 1UL, std::accumulate( l_shape.begin(), l_shape.begin()+axis, 1UL, []( auto x, auto y ){ return x*y; } ) );
939  unsigned long const l_memory_stride = std::accumulate( l_shape.begin()+axis, l_shape.end(), 1UL, []( auto x, auto y ){ return x*y; } );
940  unsigned long const r_memory_stride = std::accumulate( r_shape.begin()+axis, r_shape.end(), 1UL, []( auto x, auto y ){ return x*y; } );
941 
942  std::vector<unsigned long> result_shape = l_shape;
943  result_shape[axis] += r_shape[axis];
944  Tsor ans{ result_shape };
945  auto target_memory_position = ans.data();
946 
947  for ( auto idx = 0UL; idx != memory_copy_times; ++idx )
948  {
949  std::copy_n( lhs.data() + l_memory_stride*idx, l_memory_stride, target_memory_position );
950  target_memory_position += l_memory_stride;
951  std::copy_n( rhs.data() + r_memory_stride*idx, r_memory_stride, target_memory_position );
952  target_memory_position += r_memory_stride;
953  }
954 
955  return ans;
956  }
957 
958  template< Tensor Tsor >
959  Tsor repmat( Tsor const& tsor, unsigned long row_rep, unsigned long col_rep )
960  {
961  better_assert( tsor.shape().size() == 2, "Only 2D array has repmat method, the input array has ", tsor.shape().size(), " dimensions!" );
962  auto const& old_shape = tsor.shape();
963  auto const [old_row, old_col] = std::make_pair( old_shape[0], old_shape[1] );
964 
965  Tsor ans{ {old_row*row_rep, old_col*col_rep} };
966  // fill cols
967  for ( auto rdx = 0UL; rdx != row_rep; ++rdx )
968  for ( auto cdx = 0UL; cdx != col_rep; ++cdx )
969  for ( auto odx = 0UL; odx != old_row; ++odx )
970  std::copy_n( tsor.data() + odx * old_col, old_col, ans.data() + old_row * old_col * rdx * col_rep + odx * old_col * col_rep + old_col * cdx );
971 
972  return ans;
973  }
974 
975  template< Tensor Tsor >
976  constexpr bool empty( Tsor const& tsor ) noexcept
977  {
978  return tsor.size() == 0;
979  }
980 
981  template< typename T, typename A=default_allocator<T> >
982  constexpr tensor<T,A> zeros( std::vector<unsigned long> const& shape )
983  {
984  return {shape};
985  }
986 
987  template< Tensor Tsor >
988  constexpr Tsor zeros_like( Tsor const& tsor )
989  {
990  return {tsor.shape()};
991  }
992 
993  template< typename T, typename A=default_allocator<T> >
994  constexpr tensor<T,A> ones( std::vector<unsigned long> const& shape )
995  {
996  tensor<T, A> ans{ shape };
997  std::fill( ans.data(), ans.data() + ans.size(), T{1} );
998  return ans;
999  }
1000 
1001  template< Tensor Tsor >
1002  constexpr Tsor ones_like( Tsor const& tsor )
1003  {
1004  return ones<typename Tsor::value_type, typename Tsor::allocator>( tsor.shape() );
1005  }
1006 
1007  template< Tensor Tsor >
1008  auto max( Tsor const& tsor )
1009  {
1010  typedef typename Tsor::value_type value_type;
1011  better_assert( tsor.size() != 0, "tensor::max error: input tensor should not be empty!" );
1012  if ( tsor.size() == 0 ) return value_type{0};
1013  value_type ans = std::numeric_limits<value_type>::min();
1014  for ( auto idx : range( tsor.size() ) )
1015  ans = std::max( tsor[idx], ans );
1016  return ans;
1017  }
1018 
1019  template< Tensor Tsor >
1020  auto amax( Tsor const& tsor )
1021  {
1022  return max( tsor );
1023  }
1024 
1025  template< Tensor Tsor >
1026  auto min( Tsor const& tsor )
1027  {
1028  typedef typename Tsor::value_type value_type;
1029  better_assert( tsor.size() != 0, "tensor::min error: input tensor should not be empty!" );
1030  if ( tsor.size() == 0 ) return value_type{0};
1031  value_type ans = std::numeric_limits<value_type>::max();
1032  for ( auto idx : range( tsor.size() ) )
1033  ans = std::min( tsor[idx], ans );
1034  return ans;
1035  }
1036 
1037  template< Tensor Tsor >
1038  auto amin( Tsor const& tsor )
1039  {
1040  return min( tsor );
1041  }
1042 
1043  template< Tensor Tsor >
1044  auto sum( Tsor const& tsor )
1045  {
1046  typedef typename Tsor::value_type value_type;
1047  better_assert( tsor.size() != 0, "tensor::sum error: input tensor should not be empty!" );
1048  return std::accumulate( tsor.data(), tsor.data()+tsor.size(), value_type{0} );
1049  }
1050 
1051  template< Tensor Tsor >
1052  auto mean( Tsor const& tsor )
1053  {
1054  better_assert( tsor.size() != 0, "tensor::mean error: input tensor should not be empty!" );
1055  if ( 0 == tsor.size() ) return typename Tsor::value_type{0};
1056  return sum( tsor ) / tsor.size();
1057  }
1058 
1059  template< Tensor Tsor >
1060  auto norm( Tsor const& tsor )
1061  {
1062  typedef typename Tsor::value_type value_type;
1063  better_assert( tsor.size() != 0, "tensor::sum error: input tensor should not be empty!" );
1064  return std::sqrt( std::accumulate( tsor.data(), tsor.data()+tsor.size(), value_type{0}, []( value_type x, value_type y ){ return x + y*y; } ) ) / static_cast<value_type>( tsor.size() );
1065  }
1066 
1067  template< Tensor Tsor >
1068  Tsor abs( Tsor const& tsor )
1069  {
1070  auto ans = tsor.deep_copy();
1071  ans.map( []( auto& x ){ x = std::abs( x ); } );
1072  return ans;
1073  }
1074 
1075  template< Tensor Tsor >
1076  Tsor softmax( Tsor const& tsor )
1077  {
1078  typedef typename Tsor::value_type value_type;
1079  better_assert( !tsor.empty(), "softmax argument is an empty tensor. " );
1080  Tsor ans = tsor.deep_copy();
1081  unsigned long const last_dim = *(tsor.shape().rbegin());
1082  unsigned long const rem_dim = tsor.size() / last_dim;
1083  view_2d<value_type> mat{ ans.data(), rem_dim, last_dim };
1084  for ( auto idx : range( rem_dim ) )
1085  {
1086  value_type const mx = *std::max_element( mat[idx], mat[idx+1] );
1087  for_each( mat[idx], mat[idx+1], [mx]( auto& v ){ v -= mx; } );
1088  value_type const ac = std::accumulate( mat[idx], mat[idx+1], value_type{0}, []( value_type init, value_type val ){ return init + std::exp(val); } );
1089  for_each( mat[idx], mat[idx+1], [ac]( auto& v ){ v = std::exp(v) / (ac+eps); } );
1090  }
1091  return ans;
1092  }
1093 
1094  template< Tensor Tsor >
1095  bool has_nan( Tsor const& tsor )
1096  {
1097  return (tsor.data() + tsor.size()) != std::find_if( tsor.data(), tsor.data()+tsor.size(), []( auto const& v ){ return std::isnan( v ); } );
1098  }
1099 
1100  template< Tensor Tsor >
1101  bool has_inf( Tsor const& tsor )
1102  {
1103  return (tsor.data() + tsor.size()) != std::find_if( tsor.data(), tsor.data()+tsor.size(), []( auto const& v ){ return std::isinf( v ); } );
1104  }
1105 
1106  template< Tensor Tsor >
1107  bool is_valid( Tsor const& tsor )
1108  {
1109  return (!has_nan(tsor)) && (!has_inf(tsor));
1110  }
1111 
1112  template< Tensor Tsor, typename Function >
1113  Tsor reduce( Tsor const& ts, unsigned long axis, typename Tsor::value_type const& init, Function const& func, bool keepdims=false ) noexcept
1114  {
1115  if ( ts.empty() ) return ts;
1116 
1117  axis = (axis == static_cast<unsigned long>( -1 )) ? ts.ndim()-1 : axis;
1118  better_assert( axis < ts.ndim(), "Error with tensor::reduce, input axis ", axis, " is too large for a tensor with ", ts.ndim(), " dimensions." );
1119 
1120  std::vector<unsigned long> _shape = ts.shape();
1121  unsigned long const pres = std::reduce( _shape.begin(), _shape.begin()+axis, 1Ul, []( unsigned long x, unsigned long y ){ return x*y; } );
1122  unsigned long const post = std::reduce( _shape.begin()+axis+1, _shape.end(), 1Ul, []( unsigned long x, unsigned long y ){ return x*y; } );
1123 
1124  unsigned long const n = _shape[axis];
1125  _shape[axis] = 1UL;
1126 
1127  Tsor ans{ _shape };
1128  auto itor = ans.begin();
1129  for ( auto idx : range( pres ) )
1130  for ( auto jdx : range( post ) )
1131  {
1132  auto start = ts.begin() + idx * post * n + jdx;
1133  stride_iterator si{ start, static_cast<std::int64_t>(post) };
1134  *itor++ = std::reduce( si, si+n, init, func );
1135  }
1136 
1137  if ( !keepdims )
1138  {
1139  std::copy( _shape.begin()+axis+1, _shape.end(), _shape.begin()+axis );
1140  _shape.resize( _shape.size() - 1 );
1141  ans.reshape( _shape );
1142  }
1143 
1144  return ans;
1145  }
1146 
1147  template <Tensor Tsor>
1148  Tsor sum( Tsor const& ts, unsigned long axis, bool keepdims=false ) noexcept
1149  {
1150  return reduce( ts, axis, typename Tsor::value_type{0}, []( auto const& a, auto const& b ){ return a+b; }, keepdims );
1151  }
1152 
1153  template <Tensor Tsor> requires std::floating_point<typename Tsor::value_type>
1154  Tsor mean( Tsor const& ts, unsigned long axis, bool keepdims=false ) noexcept
1155  {
1156  typedef typename Tsor::value_type value_type;
1157  axis = ( axis == static_cast<unsigned long>( -1 ) ) ? ts.ndim()-1 : axis;
1158  auto const& _shape = ts.shape();
1159  return reduce( ts, axis, value_type{0}, []( auto const& a, auto const& b ){ return a+b; }, keepdims ) / static_cast<value_type>( _shape[axis] );
1160  }
1161 
1162  template <Tensor Tsor> requires std::floating_point<typename Tsor::value_type>
1163  Tsor variance( Tsor const& ts, unsigned long axis, bool keepdims=false ) noexcept
1164  {
1165  Tsor x = mean( ts, axis, true );
1166  x = x - ts;
1167  for_each( x.begin(), x.end(), [](auto& v){ v *= v; } );
1168  return mean( x, axis, keepdims );
1169  }
1170 
1171  template <Tensor Tsor> requires std::floating_point<typename Tsor::value_type>
1172  Tsor standard_deviation( Tsor const& ts, unsigned long axis, bool keepdims=false ) noexcept
1173  {
1174  Tsor x = variance( ts, axis, keepdims );
1175  for_each( x.begin(), x.end(), [](auto& v){ v = std::sqrt(v); } );
1176  return x;
1177  }
1178 
1179  template <Tensor Tsor> requires std::floating_point<typename Tsor::value_type>
1180  typename Tsor::value_type var( Tsor const& ts ) noexcept
1181  {
1182  auto x = ts - mean(ts);
1183  return std::inner_product( x.begin(), x.end(), x.begin(), typename Tsor::value_type{0} );
1184  }
1185 
1186  template <Tensor Tsor> requires std::floating_point<typename Tsor::value_type>
1187  typename Tsor::value_type std( Tsor const& ts ) noexcept
1188  {
1189  return std::sqrt( var(ts) );
1190  }
1191 
1192  template <Tensor Tsor>
1193  Tsor max( Tsor const& ts, unsigned long axis, bool keepdims=false ) noexcept
1194  {
1195  return reduce( ts, axis, std::numeric_limits<typename Tsor::value_type>::min(), []( auto const& a, auto const& b ){ return a > b ? a : b; }, keepdims );
1196  }
1197 
1198  template <Tensor Tsor>
1199  Tsor min( Tsor const& ts, unsigned long axis, bool keepdims=false ) noexcept
1200  {
1201  return reduce( ts, axis, std::numeric_limits<typename Tsor::value_type>::max(), []( auto const& a, auto const& b ){ return a < b ? a : b; }, keepdims );
1202  }
1203 
1204  template < typename T, typename A=default_allocator<T> > requires std::floating_point<T>
1205  tensor<T,A> linspace( T start, T stop, unsigned long num, bool endpoint=true ) noexcept
1206  {
1207  better_assert( num > 1, "tensor::linspace: expecting number larger than 1, but got ", num );
1208 
1209  unsigned long const segs = endpoint ? num-1 : num;
1210  T const distance = ( stop - start ) / segs;
1211 
1212  tensor<T,A> ans{ {num,} };
1213  for ( auto idx : range( num ) )
1214  ans[idx] = start + distance * idx; // 1D view of the tensor
1215 
1216  return ans;
1217  }
1218 
1219  template<class _Tp, class _CharT, class _Traits, class _Alloc>
1220  std::basic_istream<_CharT, _Traits>& read_tensor(std::basic_istream<_CharT, _Traits>& __is, tensor<_Tp, _Alloc>& __x)
1221  {
1222  better_assert( __is.good(), "Error with the istream!" );
1223 
1224  // read the first line to extract shape
1225  std::vector<unsigned long> shape;
1226  {
1227  std::string s_shape;
1228  std::getline( __is, s_shape );
1229  std::stringstream ss( s_shape );
1230  std::copy( std::istream_iterator<unsigned long>( ss ), std::istream_iterator<unsigned long>(), std::back_inserter( shape ) );
1231  }
1232 
1233  // read data
1234  std::vector< _Tp > buff;
1235  {
1236  std::string cache;
1237  std::getline( __is, cache );
1238  std::stringstream ss( cache );
1239  std::copy( std::istream_iterator< _Tp >( ss ), std::istream_iterator< _Tp >(), std::back_inserter( buff ) );
1240  }
1241 
1242  // copy and return
1243  tensor<_Tp, _Alloc> ans{ shape };
1244  __x.resize( shape );
1245  {
1246  better_assert( __x.size() == buff.size(), "tensor::loadtxt: shape suggests size of ", __x.size(), " but got ", buff.size() );
1247  std::copy( buff.begin(), buff.end(), __x.begin() );
1248  }
1249 
1250  return __is;
1251  }
1252 
1253  template<class _Tp, class _CharT, class _Traits, class _Alloc>
1254  std::basic_ostream<_CharT, _Traits>& write_tensor(std::basic_ostream<_CharT, _Traits>& __os, tensor<_Tp, _Alloc> const& __x)
1255  {
1256  std::basic_ostringstream<_CharT, _Traits> __s;
1257  __s.flags(__os.flags());
1258  __s.imbue(__os.getloc());
1259  __s.precision(__os.precision());
1260 
1261  {//write shape
1262  auto const& shape = __x.shape();
1263  std::copy( shape.begin(), shape.end(), std::ostream_iterator<unsigned long>{ __os, " " } );
1264  __os << "\n";
1265  }
1266  {//write data
1267  std::copy( __x.begin(), __x.end(), std::ostream_iterator<_Tp>{ __os, " " } );
1268  }
1269  __os << "\n";
1270 
1271  return __os;
1272  }
1273 
1274  //
1275  // file format:
1276  //
1277  // first line: shape
1278  // second line: data
1279  //
1280  // example of a tensor of shape (2, 3):
1281  //
1282  // 2 3
1283  // 0.910905 0.525709 0.584262 0.34063 0.613034 0.0803866
1284  //
1285  template < typename T, typename A=default_allocator<T> >
1286  tensor<T,A> load_tensor( std::string const& file_name )
1287  {
1288  tensor<T, A> ans;
1289  std::ifstream ifs{ file_name };
1290  read_tensor( ifs, ans );
1291  ifs.close();
1292  return ans;
1293  }
1294 
1295  template< Tensor Tsor >
1296  void save_tensor( std::string const& file_name, Tsor const& tsor )
1297  {
1298  std::ofstream ofs{ file_name };
1299  write_tensor( ofs, tsor );
1300  ofs.close();
1301  }
1302 
1303  template< typename T >
1304  struct view_3d
1305  {
1306  T* data_;
1307  unsigned long row_;
1308  unsigned long col_;
1309  unsigned long channel_;
1310 
1311  constexpr view_3d( T* data, unsigned long row, unsigned long col, unsigned long channel ) noexcept : data_{data}, row_{row}, col_{col}, channel_{channel} {}
1312 
1313  constexpr auto operator[]( unsigned long index ) noexcept
1314  {
1315  return view_2d{ data_+index*col_*channel_, col_, channel_ };
1316  }
1317 
1318  constexpr auto operator[]( unsigned long index ) const noexcept
1319  {
1320  return view_2d{ data_+index*col_*channel_, col_, channel_ };
1321  }
1322  };
1323 
1324  template< typename T >
1325  using cube = view_3d<T>;
1326 
1330  template< typename T >
1331  struct view_4d
1332  {
1333  T* data_;
1334  unsigned long batch_size_;
1335  unsigned long row_;
1336  unsigned long col_;
1337  unsigned long channel_;
1338 
1347  constexpr view_4d( T* data=nullptr, unsigned long batch_size=0, unsigned long row=0, unsigned long col=0, unsigned long channel=0 ) noexcept : data_{data}, batch_size_{batch_size}, row_{row}, col_{col}, channel_{channel} {}
1348 
1362  constexpr auto operator[]( unsigned long index ) noexcept
1363  {
1364  return view_3d{ data_+index*row_*col_*channel_, row_, col_, channel_ };
1365  }
1366 
1367 
1382  constexpr auto operator[]( unsigned long index ) const noexcept
1383  {
1384  return view_3d{ data_+index*row_*col_*channel_, row_, col_, channel_ };
1385  }
1386  }; // struct view_4d
1387 
1388  template<typename T >
1390 
1391 
1392  template<typename T, unsigned long N>
1393  struct view;
1394 
1395  template<typename T>
1396  struct view<T, 1> : view_1d<T>
1397  {
1398  using view_1d<T>::view_1d;
1399  };
1400 
1401  template<typename T>
1402  struct view<T, 2> : view_2d<T>
1403  {
1404  using view_2d<T>::view_2d;
1405  };
1406 
1407  template<typename T>
1408  struct view<T, 3> : view_3d<T>
1409  {
1410  using view_3d<T>::view_3d;
1411  };
1412 
1413  template<typename T>
1414  struct view<T, 4> : view_4d<T>
1415  {
1416  using view_4d<T>::view_4d;
1417 
1418  view( T* data, std::array<unsigned long, 4> const& shape ) noexcept : view_4d<T>{ data, shape[0], shape[1], shape[2], shape[3] } {}
1419  };
1420 
1430  template< typename T, unsigned long N >
1431  struct view
1432  {
1433  T* data_;
1434  std::array<unsigned long, N> shape_;
1435 
1436  constexpr view( T* data, std::array<unsigned long, N> const& shape ) noexcept : data_{ data }, shape_{ shape } {}
1437 
1438  view<T, N-1> operator []( unsigned long index ) noexcept
1439  {
1440  unsigned long first_dim = shape_[0];
1441  better_assert( index < first_dim, "Expecting a dimension smaller than ", first_dim, " but got ", index );
1442  unsigned long offsets = index * std::accumulate( shape_.begin()+1, shape_.end(), 1UL, [](unsigned long a, unsigned long b){ return a*b; } );
1443 
1444  std::array<unsigned long, N-1> new_shape;
1445  std::copy( shape_.begin()+1, shape_.end(), new_shape.begin() );
1446  return view<T, N-1>{ data_+offsets, new_shape };
1447  }
1448 
1449  view<T, N-1> operator []( unsigned long index ) const noexcept
1450  {
1451  unsigned long first_dim = shape_[0];
1452  better_assert( index < first_dim, "Expecting a dimension smaller than ", first_dim, " but got ", index );
1453  unsigned long offsets = index * std::accumulate( shape_.begin()+1, shape_.end(), 1UL, [](unsigned long a, unsigned long b){ return a*b; } );
1454 
1455  std::array<unsigned long, N-1> new_shape;
1456  std::copy( shape_.begin()+1, shape_.end(), new_shape.begin() );
1457  return view<T, N-1>{ data_+offsets, new_shape };
1458  }
1459 
1460  }; // struct view
1461 
1462 
1463 }//namespace ceras
1464 
1465 #endif//HQKGLAXWWVFBFHQNHBVTQJKGUFTPCQPTPXDVNOSBDJIBHITCEKDISJYNAMCPLJDURURDAISFV
1466 
Definition: activation.hpp:12
static std::mt19937 random_generator
Definition: tensor.hpp:21
constexpr tensor< T, A > ones(std::vector< unsigned long > const &shape)
Definition: tensor.hpp:994
auto amax(Tsor const &tsor)
Definition: tensor.hpp:1020
requires std::floating_point< typename Tsor::value_type > Tsor variance(Tsor const &ts, unsigned long axis, bool keepdims=false) noexcept
Definition: tensor.hpp:1163
requires std::floating_point< typename Tsor::value_type > Tsor::value_type std(Tsor const &ts) noexcept
Definition: tensor.hpp:1187
requires std::floating_point< typename Tsor::value_type > Tsor mean(Tsor const &ts, unsigned long axis, bool keepdims=false) noexcept
Definition: tensor.hpp:1154
Tsor operator*(Tsor const &lhs, Tsor const &rhs) noexcept
Definition: tensor.hpp:735
void update_cuda_gemm_threshold()
Definition: tensor.hpp:514
tensor< T, A > random(std::vector< unsigned long > const &shape, T min=T{0}, T max=T{1})
Definition: tensor.hpp:869
tensor< T, A > randn(std::vector< unsigned long > const &shape, T mean=T{0}, T stddev=T{1})
Definition: tensor.hpp:839
tensor< T, A > truncated_normal(std::vector< unsigned long > const &shape, T mean=T{0}, T stddev=T{1}, T lower=T{0}, T upper=T{1})
Definition: tensor.hpp:848
constexpr bool empty(Tsor const &tsor) noexcept
Definition: tensor.hpp:976
Tsor reduce(Tsor const &ts, unsigned long axis, typename Tsor::value_type const &init, Function const &func, bool keepdims=false) noexcept
Definition: tensor.hpp:1113
Tsor concatenate(Tsor const &lhs, Tsor const &rhs, unsigned long axis=0) noexcept
Definition: tensor.hpp:914
std::basic_istream< _CharT, _Traits > & read_tensor(std::basic_istream< _CharT, _Traits > &__is, tensor< _Tp, _Alloc > &__x)
Definition: tensor.hpp:1220
requires std::floating_point< T > tensor< T, A > linspace(T start, T stop, unsigned long num, bool endpoint=true) noexcept
Definition: tensor.hpp:1205
bool is_valid(Tsor const &tsor)
Definition: tensor.hpp:1107
Tsor hadamard_product(Tsor const &lhs, Tsor const &rhs) noexcept
Definition: tensor.hpp:762
Tsor add(Tsor const &lhs, Tsor const &rhs) noexcept
Definition: tensor.hpp:604
requires std::floating_point< typename Tsor::value_type > Tsor standard_deviation(Tsor const &ts, unsigned long axis, bool keepdims=false) noexcept
Definition: tensor.hpp:1172
std::basic_ostream< CharT, Traits > & operator<<(std::basic_ostream< CharT, Traits > &os_, Tsor const &tsor)
Definition: tensor.hpp:365
constexpr bool is_tensor_v
Definition: tensor.hpp:359
Tsor reduce_mean(Tsor const &tsor)
Definition: tensor.hpp:802
Tsor sum(Tsor const &ts, unsigned long axis, bool keepdims=false) noexcept
Definition: tensor.hpp:1148
bool has_nan(Tsor const &tsor)
Definition: tensor.hpp:1095
Tsor operator/(Tsor const &lhs, typename Tsor::value_type const &rhs) noexcept
Definition: tensor.hpp:684
Tsor elementwise_product(Tsor const &lhs, Tsor const &rhs) noexcept
Definition: tensor.hpp:742
tensor< T, A > glorot_uniform(std::initializer_list< unsigned long > shape)
Definition: tensor.hpp:893
concept Tensor
Definition: tensor.hpp:362
Tsor operator+(Tsor const &lhs, typename Tsor::value_type const &rhs) noexcept
Definition: tensor.hpp:636
std::allocator< T > default_allocator
Definition: tensor.hpp:25
Tsor elementwise_divide(Tsor const &lhs, Tsor const &rhs) noexcept
Definition: tensor.hpp:768
constexpr tensor< T, A > as_tensor(T val) noexcept
Definition: tensor.hpp:345
static unsigned long random_seed
Definition: tensor.hpp:19
Tsor min(Tsor const &ts, unsigned long axis, bool keepdims=false) noexcept
Definition: tensor.hpp:1199
Tsor deep_copy(Tsor const &tsor)
Definition: tensor.hpp:902
auto norm(Tsor const &tsor)
Definition: tensor.hpp:1060
Tsor random_like(Tsor const &tsor, typename Tsor::value_type min=0, typename Tsor::value_type max=1)
Definition: tensor.hpp:878
void save_tensor(std::string const &file_name, Tsor const &tsor)
Definition: tensor.hpp:1296
auto amin(Tsor const &tsor)
Definition: tensor.hpp:1038
view_1d< T > array
Definition: tensor.hpp:412
constexpr tensor< T, A > zeros(std::vector< unsigned long > const &shape)
Definition: tensor.hpp:982
Tsor repmat(Tsor const &tsor, unsigned long row_rep, unsigned long col_rep)
Definition: tensor.hpp:959
std::basic_ostream< _CharT, _Traits > & write_tensor(std::basic_ostream< _CharT, _Traits > &__os, tensor< _Tp, _Alloc > const &__x)
Definition: tensor.hpp:1254
Tsor randn_like(Tsor const &tsor, typename Tsor::value_type mean=0, typename Tsor::value_type stddev=1)
Definition: tensor.hpp:884
Tsor operator-(Tsor const &lhs, typename Tsor::value_type const &rhs) noexcept
Definition: tensor.hpp:662
Tsor max(Tsor const &ts, unsigned long axis, bool keepdims=false) noexcept
Definition: tensor.hpp:1193
requires std::floating_point< T > void gemm(view_2d< T > const &x, view_2d< T > const &y, view_2d< T > &ans)
Definition: tensor.hpp:578
tensor< T, A > load_tensor(std::string const &file_name)
Definition: tensor.hpp:1286
Tsor copy(Tsor const &tsor)
Definition: tensor.hpp:908
requires std::floating_point< T > void gemm_cpu(T const *A, bool a_transposed, T const *B, bool b_transposed, unsigned long m, unsigned long n, unsigned long k, T *C)
Definition: tensor.hpp:483
bool has_inf(Tsor const &tsor)
Definition: tensor.hpp:1101
Tsor softmax(Tsor const &tsor)
Definition: tensor.hpp:1076
Tsor multiply(Tsor const &lhs, Tsor const &rhs) noexcept
Definition: tensor.hpp:727
Tsor squeeze(Tsor const &tsor)
Definition: tensor.hpp:817
requires std::floating_point< typename Tsor::value_type > Tsor::value_type var(Tsor const &ts) noexcept
Definition: tensor.hpp:1180
Tsor minus(Tsor const &lhs, Tsor const &rhs) noexcept
Definition: tensor.hpp:642
auto reduce_sum(unsigned long axis) noexcept
Reduce sum elements along an axis.
Definition: operation.hpp:2415
auto repeat(unsigned long repeats, unsigned long axis=-1) noexcept
Repeats elements along an axis.
Definition: operation.hpp:2055
auto ones_like(Ex const &ex) noexcept
Definition: operation.hpp:1744
auto reshape(std::vector< unsigned long > const &new_shape, bool include_batch_flag=true) noexcept
Definition: operation.hpp:737
constexpr auto exp(Ex const &ex) noexcept
Computes Exp of the given expression.
Definition: operation.hpp:2926
constexpr auto sqrt(Ex const &ex) noexcept
Computes Sqrt of the given expression.
Definition: operation.hpp:3657
requires std::floating_point< Float > constexpr auto clip(Float lower, Float upper=std::numeric_limits< Float >::max()) noexcept
Definition: operation.hpp:702
constexpr auto abs(Ex const &ex) noexcept
Computes Abs of the given expression.
Definition: operation.hpp:2447
*auto y
Definition: operation.hpp:627
auto zeros_like(Ex const &ex) noexcept
Definition: operation.hpp:1764
Definition: tensor.hpp:353
Definition: tensor.hpp:32
unsigned long memory_offset_
Definition: tensor.hpp:40
constexpr value_type as_scalar() const noexcept
Definition: tensor.hpp:312
constexpr auto end() const noexcept
Definition: tensor.hpp:63
constexpr tensor(std::vector< unsigned long > const &shape)
Definition: tensor.hpp:137
constexpr auto as_type() const noexcept
Definition: tensor.hpp:319
T value_type
Definition: tensor.hpp:33
constexpr value_type & operator[](unsigned long idx)
Definition: tensor.hpp:114
shared_vector vector_
Definition: tensor.hpp:41
Allocator allocator
Definition: tensor.hpp:34
constexpr const value_type * data() const noexcept
Definition: tensor.hpp:235
constexpr self_type const copy() const
Definition: tensor.hpp:108
constexpr auto end() noexcept
Definition: tensor.hpp:58
constexpr self_type & reshape(std::vector< unsigned long > const &new_shape)
Definition: tensor.hpp:199
constexpr unsigned long size() const noexcept
Definition: tensor.hpp:179
constexpr self_type const deep_copy() const
Definition: tensor.hpp:101
tensor slice(unsigned long m, unsigned long n) const noexcept
Definition: tensor.hpp:326
constexpr auto cbegin() const noexcept
Definition: tensor.hpp:53
tensor self_type
Definition: tensor.hpp:37
constexpr self_type & shrink_to(std::vector< unsigned long > const &new_shape)
Definition: tensor.hpp:210
constexpr tensor(tensor const &other, unsigned long memory_offset)
Definition: tensor.hpp:146
constexpr self_type & map(Function const &f)
Definition: tensor.hpp:246
constexpr self_type & reset(T val=T{0})
Definition: tensor.hpp:83
constexpr tensor(std::vector< unsigned long > const &shape, T init)
Definition: tensor.hpp:141
constexpr self_type & resize(std::vector< unsigned long > const &new_shape)
Definition: tensor.hpp:186
constexpr tensor(std::vector< unsigned long > const &shape, std::initializer_list< T > init, const Allocator &alloc=Allocator())
Definition: tensor.hpp:132
constexpr auto begin() noexcept
Definition: tensor.hpp:43
constexpr unsigned long ndim() const noexcept
Definition: tensor.hpp:89
std::vector< unsigned long > shape_
Definition: tensor.hpp:39
std::vector< T, Allocator > vector_type
Definition: tensor.hpp:35
constexpr tensor(self_type &&other) noexcept
Definition: tensor.hpp:154
std::shared_ptr< vector_type > shared_vector
Definition: tensor.hpp:36
constexpr bool empty() const noexcept
Definition: tensor.hpp:225
constexpr tensor(self_type const &other) noexcept
Definition: tensor.hpp:148
tensor()
Definition: tensor.hpp:125
constexpr auto begin() const noexcept
Definition: tensor.hpp:48
constexpr std::vector< unsigned long > const & shape() const noexcept
Definition: tensor.hpp:177
constexpr auto cend() const noexcept
Definition: tensor.hpp:68
constexpr self_type & creep_to(unsigned long new_memory_offset)
Definition: tensor.hpp:219
constexpr value_type const & operator[](unsigned long idx) const
Definition: tensor.hpp:120
constexpr self_type & deep_copy(self_type const &other)
Definition: tensor.hpp:94
constexpr value_type * data() noexcept
Definition: tensor.hpp:230
view(T *data, std::array< unsigned long, 4 > const &shape) noexcept
Definition: tensor.hpp:1418
Definition: tensor.hpp:403
unsigned long dims
Definition: tensor.hpp:405
T * data
Definition: tensor.hpp:404
constexpr T & operator[](unsigned long idx) noexcept
Definition: tensor.hpp:407
constexpr T const & operator[](unsigned long idx) const noexcept
Definition: tensor.hpp:408
Definition: tensor.hpp:417
constexpr const T * data() const noexcept
Definition: tensor.hpp:456
constexpr row_type row_end(unsigned long index=0) noexcept
Definition: tensor.hpp:465
constexpr unsigned long col() const noexcept
Definition: tensor.hpp:462
constexpr const T * end() const noexcept
Definition: tensor.hpp:459
stride_iterator< value_type * > col_type
Definition: tensor.hpp:421
constexpr unsigned long row() const noexcept
Definition: tensor.hpp:461
constexpr T * begin() noexcept
Definition: tensor.hpp:458
constexpr const_row_type row_begin(unsigned long index=0) const noexcept
Definition: tensor.hpp:467
constexpr view_2d(tensor< T, A > &tsor, unsigned long row, unsigned long col, bool transposed=false) noexcept
Definition: tensor.hpp:431
constexpr col_type col_begin(unsigned long index=0) noexcept
Definition: tensor.hpp:470
constexpr col_type col_end(unsigned long index=0) noexcept
Definition: tensor.hpp:471
constexpr view_2d(T *data, unsigned long row, unsigned long col, bool transposed=false) noexcept
Definition: tensor.hpp:433
constexpr T * operator[](unsigned long index)
Definition: tensor.hpp:438
constexpr const T * operator[](unsigned long index) const
Definition: tensor.hpp:445
constexpr row_type row_begin(unsigned long index=0) noexcept
Definition: tensor.hpp:464
T * data_
Definition: tensor.hpp:424
bool transposed_
Definition: tensor.hpp:427
constexpr const_col_type col_begin(unsigned long index=0) const noexcept
Definition: tensor.hpp:473
constexpr unsigned long size() const noexcept
Definition: tensor.hpp:453
constexpr const_row_type row_end(unsigned long index=0) const noexcept
Definition: tensor.hpp:468
const value_type * const_row_type
Definition: tensor.hpp:420
unsigned long row_
Definition: tensor.hpp:425
T value_type
Definition: tensor.hpp:418
constexpr T * data() noexcept
Definition: tensor.hpp:455
constexpr const_col_type col_end(unsigned long index=0) const noexcept
Definition: tensor.hpp:474
stride_iterator< const value_type * > const_col_type
Definition: tensor.hpp:422
constexpr auto shape() const noexcept
Definition: tensor.hpp:452
value_type * row_type
Definition: tensor.hpp:419
unsigned long col_
Definition: tensor.hpp:426
constexpr view_2d(const T *data, unsigned long row, unsigned long col, bool transposed=false) noexcept
Definition: tensor.hpp:436
Definition: tensor.hpp:1305
constexpr auto operator[](unsigned long index) noexcept
Definition: tensor.hpp:1313
T * data_
Definition: tensor.hpp:1306
constexpr auto operator[](unsigned long index) const noexcept
Definition: tensor.hpp:1318
unsigned long row_
Definition: tensor.hpp:1307
constexpr view_3d(T *data, unsigned long row, unsigned long col, unsigned long channel) noexcept
Definition: tensor.hpp:1311
unsigned long col_
Definition: tensor.hpp:1308
unsigned long channel_
Definition: tensor.hpp:1309
Definition: tensor.hpp:1332
unsigned long channel_
The channel of the 4-D tensor, also the last dimension of the tensor.
Definition: tensor.hpp:1337
unsigned long col_
The column of the 4-D tensor, also the third dimension of the tensor.
Definition: tensor.hpp:1336
constexpr auto operator[](unsigned long index) noexcept
Definition: tensor.hpp:1362
T * data_
The pointer to the start position of the 1-D array.
Definition: tensor.hpp:1333
unsigned long batch_size_
The batch size of the 4-D tensor, also the first dimension of the tensor.
Definition: tensor.hpp:1334
constexpr view_4d(T *data=nullptr, unsigned long batch_size=0, unsigned long row=0, unsigned long col=0, unsigned long channel=0) noexcept
Definition: tensor.hpp:1347
unsigned long row_
The row of the 4-D tensor, also the second dimension of the tensor.
Definition: tensor.hpp:1335
constexpr auto operator[](unsigned long index) const noexcept
Definition: tensor.hpp:1382
N-Dimentional view of 1D memory.
Definition: tensor.hpp:1432
constexpr view(T *data, std::array< unsigned long, N > const &shape) noexcept
Definition: tensor.hpp:1436
T * data_
Definition: tensor.hpp:1433
std::array< unsigned long, N > shape_
Definition: tensor.hpp:1434