Point Cloud Library (PCL)  1.9.1-dev
sparse_matrix.hpp
1 /*
2 Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7 
8 Redistributions of source code must retain the above copyright notice, this list of
9 conditions and the following disclaimer. Redistributions in binary form must reproduce
10 the above copyright notice, this list of conditions and the following disclaimer
11 in the documentation and/or other materials provided with the distribution.
12 
13 Neither the name of the Johns Hopkins University nor the names of its contributors
14 may be used to endorse or promote products derived from this software without specific
15 prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
18 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
20 SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 DAMAGE.
27 */
28 
29 #include <float.h>
30 #ifdef _WIN32
31 # ifndef WIN32_LEAN_AND_MEAN
32 # define WIN32_LEAN_AND_MEAN
33 # endif // WIN32_LEAN_AND_MEAN
34 # ifndef NOMINMAX
35 # define NOMINMAX
36 # endif // NOMINMAX
37 # include <windows.h>
38 #endif //_WIN32
39 
40 ///////////////////
41 // SparseMatrix //
42 ///////////////////
43 ///////////////////////////////////////////
44 // Static Allocator Methods and Memebers //
45 ///////////////////////////////////////////
46 
47 namespace pcl
48 {
49  namespace poisson
50  {
51 
52 
53  template<class T> int SparseMatrix<T>::UseAlloc=0;
54  template<class T> Allocator<MatrixEntry<T> > SparseMatrix<T>::internalAllocator;
55  template<class T> int SparseMatrix<T>::UseAllocator(void){return UseAlloc;}
56  template<class T>
57  void SparseMatrix<T>::SetAllocator( int blockSize )
58  {
59  if(blockSize>0){
60  UseAlloc=1;
61  internalAllocator.set(blockSize);
62  }
63  else{UseAlloc=0;}
64  }
65  ///////////////////////////////////////
66  // SparseMatrix Methods and Memebers //
67  ///////////////////////////////////////
68 
69  template< class T >
71  {
72  _contiguous = false;
73  _maxEntriesPerRow = 0;
74  rows = 0;
75  rowSizes = NULL;
76  m_ppElements = NULL;
77  }
78 
79  template< class T > SparseMatrix< T >::SparseMatrix( int rows ) : SparseMatrix< T >() { Resize( rows ); }
80  template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ) : SparseMatrix< T >() { Resize( rows , maxEntriesPerRow ); }
81 
82  template< class T >
84  {
85  if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
86  else Resize( M.rows );
87  for( int i=0 ; i<rows ; i++ )
88  {
89  SetRowSize( i , M.rowSizes[i] );
90  memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
91  }
92  }
93  template<class T>
94  int SparseMatrix<T>::Entries( void ) const
95  {
96  int e = 0;
97  for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] );
98  return e;
99  }
100  template<class T>
102  {
103  if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
104  else Resize( M.rows );
105  for( int i=0 ; i<rows ; i++ )
106  {
107  SetRowSize( i , M.rowSizes[i] );
108  memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
109  }
110  return *this;
111  }
112 
113  template<class T>
115 
116  template< class T >
117  bool SparseMatrix< T >::write( const char* fileName ) const
118  {
119  FILE* fp = fopen( fileName , "wb" );
120  if( !fp ) return false;
121  bool ret = write( fp );
122  fclose( fp );
123  return ret;
124  }
125  template< class T >
126  bool SparseMatrix< T >::read( const char* fileName )
127  {
128  FILE* fp = fopen( fileName , "rb" );
129  if( !fp ) return false;
130  bool ret = read( fp );
131  fclose( fp );
132  return ret;
133  }
134  template< class T >
135  bool SparseMatrix< T >::write( FILE* fp ) const
136  {
137  if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false;
138  if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
139  for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
140  return true;
141  }
142  template< class T >
143  bool SparseMatrix< T >::read( FILE* fp )
144  {
145  int r;
146  if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false;
147  Resize( r );
148  if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
149  for( int i=0 ; i<rows ; i++ )
150  {
151  r = rowSizes[i];
152  rowSizes[i] = 0;
153  SetRowSize( i , r );
154  if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
155  }
156  return true;
157  }
158 
159 
160  template< class T >
162  {
163  if( rows>0 )
164  {
165 
166  if( !UseAlloc )
167  if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
168  else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
169  free( m_ppElements );
170  free( rowSizes );
171  }
172  rows = r;
173  if( r )
174  {
175  rowSizes = ( int* )malloc( sizeof( int ) * r );
176  memset( rowSizes , 0 , sizeof( int ) * r );
177  m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
178  }
179  _contiguous = false;
180  _maxEntriesPerRow = 0;
181  }
182  template< class T >
183  void SparseMatrix< T >::Resize( int r , int e )
184  {
185  if( rows>0 )
186  {
187  if( !UseAlloc )
188  if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
189  else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
190  free( m_ppElements );
191  free( rowSizes );
192  }
193  rows = r;
194  if( r )
195  {
196  rowSizes = ( int* )malloc( sizeof( int ) * r );
197  memset( rowSizes , 0 , sizeof( int ) * r );
198  m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
199  m_ppElements[0] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * r * e );
200  for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
201  }
202  _contiguous = true;
203  _maxEntriesPerRow = e;
204  }
205 
206  template<class T>
207  void SparseMatrix< T >::SetRowSize( int row , int count )
208  {
209  if( _contiguous )
210  {
211  if (count > _maxEntriesPerRow)
212  {
213  POISSON_THROW_EXCEPTION (pcl::poisson::PoissonBadArgumentException, "Attempted to set row size on contiguous matrix larger than max row size: (requested)"<< count << " > (maximum)" << _maxEntriesPerRow );
214  }
215  rowSizes[row] = count;
216  }
217  else if( row>=0 && row<rows )
218  {
219  if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count);
220  else
221  {
222  if( rowSizes[row] ) free( m_ppElements[row] );
223  if( count>0 ) m_ppElements[row] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * count );
224  }
225  }
226  }
227 
228 
229  template<class T>
231  {
232  Resize(this->m_N, this->m_M);
233  }
234 
235  template<class T>
237  {
238  SetZero();
239  for(int ij=0; ij < Min( this->Rows(), this->Columns() ); ij++)
240  (*this)(ij,ij) = T(1);
241  }
242 
243  template<class T>
245  {
246  SparseMatrix<T> M(*this);
247  M *= V;
248  return M;
249  }
250 
251  template<class T>
253  {
254  for (int i=0; i<this->Rows(); i++)
255  {
256  for(int ii=0;ii<m_ppElements[i].size();i++){m_ppElements[i][ii].Value*=V;}
257  }
258  return *this;
259  }
260 
261  template<class T>
263  {
264  SparseMatrix<T> R( this->Rows(), M.Columns() );
265  for(int i=0; i<R.Rows(); i++){
266  for(int ii=0;ii<m_ppElements[i].size();ii++){
267  int N=m_ppElements[i][ii].N;
268  T Value=m_ppElements[i][ii].Value;
269  for(int jj=0;jj<M.m_ppElements[N].size();jj++){
270  R(i,M.m_ppElements[N][jj].N) += Value * M.m_ppElements[N][jj].Value;
271  }
272  }
273  }
274  return R;
275  }
276 
277  template<class T>
278  template<class T2>
280  {
281  Vector<T2> R( rows );
282 
283  for (int i=0; i<rows; i++)
284  {
285  T2 temp=T2();
286  for(int ii=0;ii<rowSizes[i];ii++){
287  temp+=m_ppElements[i][ii].Value * V.m_pV[m_ppElements[i][ii].N];
288  }
289  R(i)=temp;
290  }
291  return R;
292  }
293 
294  template<class T>
295  template<class T2>
296  void SparseMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , int threads ) const
297  {
298 #pragma omp parallel for num_threads( threads ) schedule( static )
299  for( int i=0 ; i<rows ; i++ )
300  {
301  T2 temp = T2();
302  temp *= 0;
303  for( int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.m_pV[m_ppElements[i][j].N];
304  Out.m_pV[i] = temp;
305  }
306  }
307 
308  template<class T>
310  {
311  return Multiply(M);
312  }
313  template<class T>
314  template<class T2>
316  {
317  return Multiply(V);
318  }
319 
320  template<class T>
322  {
323  SparseMatrix<T> M( this->Columns(), this->Rows() );
324 
325  for (int i=0; i<this->Rows(); i++)
326  {
327  for(int ii=0;ii<m_ppElements[i].size();ii++){
328  M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
329  }
330  }
331  return M;
332  }
333 
334  template<class T>
335  template<class T2>
336  int SparseMatrix<T>::SolveSymmetric( const SparseMatrix<T>& M , const Vector<T2>& b , int iters , Vector<T2>& solution , const T2 eps , int reset , int threads )
337  {
338  if( reset )
339  {
340  solution.Resize( b.Dimensions() );
341  solution.SetZero();
342  }
343  Vector< T2 > r;
344  r.Resize( solution.Dimensions() );
345  M.Multiply( solution , r );
346  r = b - r;
347  Vector< T2 > d = r;
348  double delta_new , delta_0;
349  for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i] * r.m_pV[i];
350  delta_0 = delta_new;
351  if( delta_new<eps ) return 0;
352  int ii;
353  Vector< T2 > q;
354  q.Resize( d.Dimensions() );
355  for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
356  {
357  M.Multiply( d , q , threads );
358  double dDotQ = 0 , alpha = 0;
359  for( int i=0 ; i<d.Dimensions() ; i++ ) dDotQ += d.m_pV[i] * q.m_pV[i];
360  alpha = delta_new / dDotQ;
361 #pragma omp parallel for num_threads( threads ) schedule( static )
362  for( int i=0 ; i<r.Dimensions() ; i++ ) solution.m_pV[i] += d.m_pV[i] * T2( alpha );
363  if( !(ii%50) )
364  {
365  r.Resize( solution.Dimensions() );
366  M.Multiply( solution , r , threads );
367  r = b - r;
368  }
369  else
370 #pragma omp parallel for num_threads( threads ) schedule( static )
371  for( int i=0 ; i<r.Dimensions() ; i++ ) r.m_pV[i] = r.m_pV[i] - q.m_pV[i] * T2(alpha);
372 
373  double delta_old = delta_new , beta;
374  delta_new = 0;
375  for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i];
376  beta = delta_new / delta_old;
377 #pragma omp parallel for num_threads( threads ) schedule( static )
378  for( int i=0 ; i<d.Dimensions() ; i++ ) d.m_pV[i] = r.m_pV[i] + d.m_pV[i] * T2( beta );
379  }
380  return ii;
381  }
382 
383  // Solve for x s.t. M(x)=b by solving for x s.t. M^tM(x)=M^t(b)
384  template<class T>
385  int SparseMatrix<T>::Solve(const SparseMatrix<T>& M,const Vector<T>& b,int iters,Vector<T>& solution,const T eps){
386  SparseMatrix mTranspose=M.Transpose();
387  Vector<T> bb=mTranspose*b;
388  Vector<T> d,r,Md;
389  T alpha,beta,rDotR;
390  int i;
391 
392  solution.Resize(M.Columns());
393  solution.SetZero();
394 
395  d=r=bb;
396  rDotR=r.Dot(r);
397  for(i=0;i<iters && rDotR>eps;i++){
398  T temp;
399  Md=mTranspose*(M*d);
400  alpha=rDotR/d.Dot(Md);
401  solution+=d*alpha;
402  r-=Md*alpha;
403  temp=r.Dot(r);
404  beta=temp/rDotR;
405  rDotR=temp;
406  d=r+d*beta;
407  }
408  return i;
409  }
410 
411 
412 
413 
414  ///////////////////////////
415  // SparseSymmetricMatrix //
416  ///////////////////////////
417  template<class T>
418  template<class T2>
420  template<class T>
421  template<class T2>
423  {
425 
426  for(int i=0; i<SparseMatrix<T>::rows; i++){
427  for(int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){
428  int j=SparseMatrix<T>::m_ppElements[i][ii].N;
429  R(i)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[j];
430  R(j)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[i];
431  }
432  }
433  return R;
434  }
435 
436  template<class T>
437  template<class T2>
438  void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , bool addDCTerm ) const
439  {
440  Out.SetZero();
441  const T2* in = &In[0];
442  T2* out = &Out[0];
443  T2 dcTerm = T2( 0 );
444  if( addDCTerm )
445  {
446  for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i];
447  dcTerm /= SparseMatrix<T>::rows;
448  }
449  for( int i=0 ; i<this->SparseMatrix<T>::rows ; i++ )
450  {
452  const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
453  const T2& in_i_ = in[i];
454  T2 out_i = T2(0);
455  for( ; temp!=end ; temp++ )
456  {
457  int j=temp->N;
458  T2 v=temp->Value;
459  out_i += v * in[j];
460  out[j] += v * in_i_;
461  }
462  out[i] += out_i;
463  }
464  if( addDCTerm ) for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm;
465  }
466  template<class T>
467  template<class T2>
468  void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , MapReduceVector< T2 >& OutScratch , bool addDCTerm ) const
469  {
470  int dim = int( In.Dimensions() );
471  const T2* in = &In[0];
472  int threads = OutScratch.threads();
473  if( addDCTerm )
474  {
475  T2 dcTerm = 0;
476 #pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
477  for( int t=0 ; t<threads ; t++ )
478  {
479  T2* out = OutScratch[t];
480  memset( out , 0 , sizeof( T2 ) * dim );
481  for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
482  {
483  const T2& in_i_ = in[i];
484  T2& out_i_ = out[i];
485  for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
486  {
487  int j = temp->N;
488  T2 v = temp->Value;
489  out_i_ += v * in[j];
490  out[j] += v * in_i_;
491  }
492  dcTerm += in_i_;
493  }
494  }
495  dcTerm /= dim;
496  dim = int( Out.Dimensions() );
497  T2* out = &Out[0];
498 #pragma omp parallel for num_threads( threads ) schedule( static )
499  for( int i=0 ; i<dim ; i++ )
500  {
501  T2 _out = dcTerm;
502  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
503  out[i] = _out;
504  }
505  }
506  else
507  {
508 #pragma omp parallel for num_threads( threads )
509  for( int t=0 ; t<threads ; t++ )
510  {
511  T2* out = OutScratch[t];
512  memset( out , 0 , sizeof( T2 ) * dim );
513  for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
514  {
515  const T2& in_i_ = in[i];
516  T2& out_i_ = out[i];
517  for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
518  {
519  int j = temp->N;
520  T2 v = temp->Value;
521  out_i_ += v * in[j];
522  out[j] += v * in_i_;
523  }
524  }
525  }
526  dim = int( Out.Dimensions() );
527  T2* out = &Out[0];
528 #pragma omp parallel for num_threads( threads ) schedule( static )
529  for( int i=0 ; i<dim ; i++ )
530  {
531  T2 _out = T2(0);
532  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
533  out[i] = _out;
534  }
535  }
536  }
537  template<class T>
538  template<class T2>
539  void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , std::vector< T2* >& OutScratch , const std::vector< int >& bounds ) const
540  {
541  int dim = In.Dimensions();
542  const T2* in = &In[0];
543  int threads = OutScratch.size();
544 #pragma omp parallel for num_threads( threads )
545  for( int t=0 ; t<threads ; t++ )
546  for( int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
547 #pragma omp parallel for num_threads( threads )
548  for( int t=0 ; t<threads ; t++ )
549  {
550  T2* out = OutScratch[t];
551  for( int i=bounds[t] ; i<bounds[t+1] ; i++ )
552  {
554  const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
555  const T2& in_i_ = in[i];
556  T2& out_i_ = out[i];
557  for( ; temp!=end ; temp++ )
558  {
559  int j = temp->N;
560  T2 v = temp->Value;
561  out_i_ += v * in[j];
562  out[j] += v * in_i_;
563  }
564  }
565  }
566  T2* out = &Out[0];
567 #pragma omp parallel for num_threads( threads ) schedule( static )
568  for( int i=0 ; i<Out.Dimensions() ; i++ )
569  {
570  T2& _out = out[i];
571  _out = T2(0);
572  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
573  }
574  }
575 #if defined _WIN32 && !defined __MINGW32__
576 #ifndef _AtomicIncrement_
577 #define _AtomicIncrement_
578  inline void AtomicIncrement( volatile float* ptr , float addend )
579  {
580  float newValue = *ptr;
581  LONG& _newValue = *( (LONG*)&newValue );
582  LONG _oldValue;
583  for( ;; )
584  {
585  _oldValue = _newValue;
586  newValue += addend;
587  _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
588  if( _newValue==_oldValue ) break;
589  }
590  }
591  inline void AtomicIncrement( volatile double* ptr , double addend )
592  //inline void AtomicIncrement( double* ptr , double addend )
593  {
594  double newValue = *ptr;
595  LONGLONG& _newValue = *( (LONGLONG*)&newValue );
596  LONGLONG _oldValue;
597  do
598  {
599  _oldValue = _newValue;
600  newValue += addend;
601  _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
602  }
603  while( _newValue!=_oldValue );
604  }
605 #endif // _AtomicIncrement_
606  template< class T >
607  void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< float >& In , Vector< float >& Out , int threads , const int* partition=NULL )
608  {
609  Out.SetZero();
610  const float* in = &In[0];
611  float* out = &Out[0];
612  if( partition )
613 #pragma omp parallel for num_threads( threads )
614  for( int t=0 ; t<threads ; t++ )
615  for( int i=partition[t] ; i<partition[t+1] ; i++ )
616  {
617  const MatrixEntry< T >* temp = A[i];
618  const MatrixEntry< T >* end = temp + A.rowSizes[i];
619  const float& in_i = in[i];
620  float out_i = 0.;
621  for( ; temp!=end ; temp++ )
622  {
623  int j = temp->N;
624  float v = temp->Value;
625  out_i += v * in[j];
626  AtomicIncrement( out+j , v * in_i );
627  }
628  AtomicIncrement( out+i , out_i );
629  }
630  else
631 #pragma omp parallel for num_threads( threads )
632  for( int i=0 ; i<A.rows ; i++ )
633  {
634  const MatrixEntry< T >* temp = A[i];
635  const MatrixEntry< T >* end = temp + A.rowSizes[i];
636  const float& in_i = in[i];
637  float out_i = 0.f;
638  for( ; temp!=end ; temp++ )
639  {
640  int j = temp->N;
641  float v = temp->Value;
642  out_i += v * in[j];
643  AtomicIncrement( out+j , v * in_i );
644  }
645  AtomicIncrement( out+i , out_i );
646  }
647  }
648  template< class T >
649  void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< double >& In , Vector< double >& Out , int threads , const int* partition=NULL )
650  {
651  Out.SetZero();
652  const double* in = &In[0];
653  double* out = &Out[0];
654 
655  if( partition )
656 #pragma omp parallel for num_threads( threads )
657  for( int t=0 ; t<threads ; t++ )
658  for( int i=partition[t] ; i<partition[t+1] ; i++ )
659  {
660  const MatrixEntry< T >* temp = A[i];
661  const MatrixEntry< T >* end = temp + A.rowSizes[i];
662  const double& in_i = in[i];
663  double out_i = 0.;
664  for( ; temp!=end ; temp++ )
665  {
666  int j = temp->N;
667  T v = temp->Value;
668  out_i += v * in[j];
669  AtomicIncrement( out+j , v * in_i );
670  }
671  AtomicIncrement( out+i , out_i );
672  }
673  else
674 #pragma omp parallel for num_threads( threads )
675  for( int i=0 ; i<A.rows ; i++ )
676  {
677  const MatrixEntry< T >* temp = A[i];
678  const MatrixEntry< T >* end = temp + A.rowSizes[i];
679  const double& in_i = in[i];
680  double out_i = 0.;
681  for( ; temp!=end ; temp++ )
682  {
683  int j = temp->N;
684  T v = temp->Value;
685  out_i += v * in[j];
686  AtomicIncrement( out+j , v * in_i );
687  }
688  AtomicIncrement( out+i , out_i );
689  }
690  }
691 
692  template< class T >
693  template< class T2 >
694  int SparseSymmetricMatrix< T >::SolveAtomic( const SparseSymmetricMatrix< T >& A , const Vector< T2 >& b , int iters , Vector< T2 >& x , T2 eps , int reset , int threads , bool solveNormal )
695  {
696  eps *= eps;
697  int dim = b.Dimensions();
698  if( reset )
699  {
700  x.Resize( dim );
701  x.SetZero();
702  }
703  Vector< T2 > r( dim ) , d( dim ) , q( dim );
704  Vector< T2 > temp;
705  if( solveNormal ) temp.Resize( dim );
706  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
707  const T2* _b = &b[0];
708 
709  std::vector< int > partition( threads+1 );
710  {
711  int eCount = 0;
712  for( int i=0 ; i<A.rows ; i++ ) eCount += A.rowSizes[i];
713  partition[0] = 0;
714 #pragma omp parallel for num_threads( threads )
715  for( int t=0 ; t<threads ; t++ )
716  {
717  int _eCount = 0;
718  for( int i=0 ; i<A.rows ; i++ )
719  {
720  _eCount += A.rowSizes[i];
721  if( _eCount*threads>=eCount*(t+1) )
722  {
723  partition[t+1] = i;
724  break;
725  }
726  }
727  }
728  partition[threads] = A.rows;
729  }
730  if( solveNormal )
731  {
732  MultiplyAtomic( A , x , temp , threads , &partition[0] );
733  MultiplyAtomic( A , temp , r , threads , &partition[0] );
734  MultiplyAtomic( A , b , temp , threads , &partition[0] );
735 #pragma omp parallel for num_threads( threads ) schedule( static )
736  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
737  }
738  else
739  {
740  MultiplyAtomic( A , x , r , threads , &partition[0] );
741 #pragma omp parallel for num_threads( threads ) schedule( static )
742  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
743  }
744  double delta_new = 0 , delta_0;
745  for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
746  delta_0 = delta_new;
747  if( delta_new<eps )
748  {
749  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
750  return 0;
751  }
752  int ii;
753  for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
754  {
755  if( solveNormal ) MultiplyAtomic( A , d , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , q , threads , &partition[0] );
756  else MultiplyAtomic( A , d , q , threads , &partition[0] );
757  double dDotQ = 0;
758  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
759  T2 alpha = T2( delta_new / dDotQ );
760 #pragma omp parallel for num_threads( threads ) schedule( static )
761  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
762  if( (ii%50)==(50-1) )
763  {
764  r.Resize( dim );
765  if( solveNormal ) MultiplyAtomic( A , x , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , r , threads , &partition[0] );
766  else MultiplyAtomic( A , x , r , threads , &partition[0] );
767 #pragma omp parallel for num_threads( threads ) schedule( static )
768  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
769  }
770  else
771 #pragma omp parallel for num_threads( threads ) schedule( static )
772  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
773 
774  double delta_old = delta_new;
775  delta_new = 0;
776  for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
777  T2 beta = T2( delta_new / delta_old );
778 #pragma omp parallel for num_threads( threads ) schedule( static )
779  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
780  }
781  return ii;
782  }
783 #endif // _WIN32 && !__MINGW32__
784  template< class T >
785  template< class T2 >
786  int SparseSymmetricMatrix< T >::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector< T2 >& scratch , T2 eps , int reset , bool addDCTerm , bool solveNormal )
787  {
788  int threads = scratch.threads();
789  eps *= eps;
790  int dim = int( b.Dimensions() );
791  Vector< T2 > r( dim ) , d( dim ) , q( dim ) , temp;
792  if( reset ) x.Resize( dim );
793  if( solveNormal ) temp.Resize( dim );
794  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
795  const T2* _b = &b[0];
796 
797  double delta_new = 0 , delta_0;
798  if( solveNormal )
799  {
800  A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm ) , A.Multiply( b , temp , scratch , addDCTerm );
801 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
802  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
803  }
804  else
805  {
806  A.Multiply( x , r , scratch , addDCTerm );
807 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
808  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
809  }
810  delta_0 = delta_new;
811  if( delta_new<eps )
812  {
813  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
814  return 0;
815  }
816  int ii;
817  for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
818  {
819  if( solveNormal ) A.Multiply( d , temp , scratch , addDCTerm ) , A.Multiply( temp , q , scratch , addDCTerm );
820  else A.Multiply( d , q , scratch , addDCTerm );
821  double dDotQ = 0;
822 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
823  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
824  T2 alpha = T2( delta_new / dDotQ );
825  double delta_old = delta_new;
826  delta_new = 0;
827  if( (ii%50)==(50-1) )
828  {
829 #pragma omp parallel for num_threads( threads )
830  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
831  r.Resize( dim );
832  if( solveNormal ) A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm );
833  else A.Multiply( x , r , scratch , addDCTerm );
834 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
835  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
836  }
837  else
838 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
839  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
840 
841  T2 beta = T2( delta_new / delta_old );
842 #pragma omp parallel for num_threads( threads )
843  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
844  }
845  return ii;
846  }
847  template< class T >
848  template< class T2 >
849  int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , T2 eps , int reset , int threads , bool addDCTerm , bool solveNormal )
850  {
851  eps *= eps;
852  int dim = int( b.Dimensions() );
853  MapReduceVector< T2 > outScratch;
854  if( threads<1 ) threads = 1;
855  if( threads>1 ) outScratch.resize( threads , dim );
856  if( reset ) x.Resize( dim );
857  Vector< T2 > r( dim ) , d( dim ) , q( dim );
858  Vector< T2 > temp;
859  if( solveNormal ) temp.Resize( dim );
860  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
861  const T2* _b = &b[0];
862 
863  double delta_new = 0 , delta_0;
864 
865  if( solveNormal )
866  {
867  if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm ) , A.Multiply( b , temp , outScratch , addDCTerm );
868  else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm ) , A.Multiply( b , temp , addDCTerm );
869 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
870  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
871  }
872  else
873  {
874  if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
875  else A.Multiply( x , r , addDCTerm );
876 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
877  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
878  }
879 
880  delta_0 = delta_new;
881  if( delta_new<eps )
882  {
883  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
884  return 0;
885  }
886  int ii;
887  for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
888  {
889  if( solveNormal )
890  {
891  if( threads>1 ) A.Multiply( d , temp , outScratch , addDCTerm ) , A.Multiply( temp , q , outScratch , addDCTerm );
892  else A.Multiply( d , temp , addDCTerm ) , A.Multiply( temp , q , addDCTerm );
893  }
894  else
895  {
896  if( threads>1 ) A.Multiply( d , q , outScratch , addDCTerm );
897  else A.Multiply( d , q , addDCTerm );
898  }
899  double dDotQ = 0;
900 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
901  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
902  T2 alpha = T2( delta_new / dDotQ );
903  double delta_old = delta_new;
904  delta_new = 0;
905 
906  if( (ii%50)==(50-1) )
907  {
908 #pragma omp parallel for num_threads( threads )
909  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
910  r.SetZero();
911  if( solveNormal )
912  {
913  if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm );
914  else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm );
915  }
916  else
917  {
918  if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
919  else A.Multiply( x , r , addDCTerm );
920  }
921 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
922  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
923  }
924  else
925  {
926 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
927  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
928  }
929 
930  T2 beta = T2( delta_new / delta_old );
931 #pragma omp parallel for num_threads( threads )
932  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
933  }
934  return ii;
935  }
936 
937  template<class T>
938  template<class T2>
939  int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b , int iters , Vector<T2>& solution , int reset )
940  {
941  Vector<T2> d,r,Md;
942 
943  if(reset)
944  {
945  solution.Resize(b.Dimensions());
946  solution.SetZero();
947  }
948  Md.Resize(M.rows);
949  for( int i=0 ; i<iters ; i++ )
950  {
951  M.Multiply( solution , Md );
952  r = b-Md;
953  // solution_new[j] * diagonal[j] + ( Md[j] - solution_old[j] * diagonal[j] ) = b[j]
954  // solution_new[j] = ( b[j] - ( Md[j] - solution_old[j] * diagonal[j] ) ) / diagonal[j]
955  // solution_new[j] = ( b[j] - Md[j] ) / diagonal[j] + solution_old[j]
956  for( int j=0 ; j<int(M.rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
957  }
958  return iters;
959  }
960  template< class T >
961  template< class T2 >
963  {
964  diagonal.Resize( SparseMatrix<T>::rows );
965  for( int i=0 ; i<SparseMatrix<T>::rows ; i++ )
966  {
967  diagonal[i] = 0.;
968  for( int j=0 ; j<SparseMatrix<T>::rowSizes[i] ; j++ ) if( SparseMatrix<T>::m_ppElements[i][j].N==i ) diagonal[i] += SparseMatrix<T>::m_ppElements[i][j].Value * 2;
969  }
970  }
971 
972  }
973 }
Vector< T2 > Multiply(const Vector< T2 > &V) const
SparseMatrix< T > Multiply(const SparseMatrix< T > &M) const
SparseMatrix< T > operator*(const T &V) const
SparseMatrix< T > Transpose() const
static Allocator< MatrixEntry< T > > internalAllocator
Definition: sparse_matrix.h:60
bool write(FILE *fp) const
This file defines compatibility wrappers for low level I/O functions.
Definition: convolution.h:45
Definition: sparse_matrix.h:45
SparseMatrix< T > & operator*=(const T &V)
void SetRowSize(int row, int count)
T Dot(const Vector &V) const
Definition: vector.hpp:237
T Value
Definition: sparse_matrix.h:50
int N
Definition: sparse_matrix.h:49
An exception that is thrown when the arguments number or type is wrong/unhandled. ...
MatrixEntry< T > ** m_ppElements
Definition: sparse_matrix.h:66
void resize(int threads, int dim)
std::size_t Dimensions() const
Definition: vector.hpp:89
SparseMatrix< T > & operator=(const SparseMatrix< T > &M)
Vector< T2 > operator*(const Vector< T2 > &V) const
static int Solve(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool addDCTerm=false, bool solveNormal=false)
static void SetAllocator(int blockSize)
static int UseAllocator(void)
void Resize(std::size_t N)
Definition: vector.hpp:63
static int Solve(const SparseMatrix< T > &M, const Vector< T > &b, int iters, Vector< T > &solution, const T eps=1e-8)
void getDiagonal(Vector< T2 > &diagonal) const
static int SolveSymmetric(const SparseMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, const T2 eps=1e-8, int reset=1, int threads=1)