DMRITool  v0.1.1-139-g860d86b4
Diffusion MRI Tool
utlBlas.h
Go to the documentation of this file.
1 
19 #ifndef __utlBlas_h
20 #define __utlBlas_h
21 
25 #ifdef __cplusplus
26 extern "C"
27 {
28 #endif //__cplusplus
29 
30 #ifdef UTL_USE_MKL
31  #include <mkl_cblas.h>
32 #else
33  #include <utl_cblas.h>
34 #endif
35 
36 #ifdef __cplusplus
37 }
38 #endif //__cplusplus
39 
40 #include <vector>
41 #include <cmath>
42 #include <algorithm>
43 #include <complex>
44 
45 namespace utl
46 {
47 
48 #if defined (MKL_INT)
49  typedef MKL_INT INTT;
50 #elif defined (OPENBLAS_CONFIG_H)
51  typedef blasint INTT;
52 #elif defined (INT_64BITS)
53  typedef long long int INTT;
54 #else
55  typedef int INTT;
56 #endif
57 
58 
59 /**************************************************************************************/
61 /**************************************************************************************/
62 
63 template <class T> inline CBLAS_INDEX
64 cblas_iamax(const INTT N, const T *X, const INTT incX);
65 template <class T> inline T
66 cblas_nrm2(const INTT N, const T *X, const INTT incX);
67 template <class T> inline T
68 cblas_asum(const INTT N, const T *X, const INTT incX);
69 
70 // #ifdef UTL_USE_MKL
71 template <class T> inline CBLAS_INDEX
72 cblas_iamin(const INTT N, const T *X, const INTT incX);
73 // #endif
74 
76 template <class T> inline T
77 cblas_dot( const INTT N, const T *X, const INTT incX, const T *Y, const INTT incY);
79 template <class T> inline void
80 cblas_copy( const INTT N, const T *X, const INTT incX, T *Y, const INTT incY);
82 template <class T> inline void
83 cblas_swap( const INTT N, T *X, const INTT incX, T *Y, const INTT incY);
85 template <class T> inline void
86 cblas_scal (const INTT N, const T alpha, T *X, const INTT incX);
87 
88 
89 /**************************************************************************************/
91 /**************************************************************************************/
92 
94 template <class T> inline void
95 cblas_ger( const CBLAS_ORDER order, const INTT M, const INTT N, const T alpha, const T *X, const INTT incX, const T *Y, const INTT incY, T *A, const INTT lda);
96 
98 template <class T> inline void
99 cblas_syr (const CBLAS_ORDER order, const CBLAS_UPLO Uplo, const INTT N, const T alpha, const T *X, const INTT incX, T *A, const INTT lda);
100 
102 template <class T> inline void
103 cblas_gemv( const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const INTT M, const INTT N, const T alpha, const T *A, const INTT lda, const T *X, const INTT incX, const T beta, T *Y, const INTT incY);
104 
105 
106 /**************************************************************************************/
108 /**************************************************************************************/
109 
111 template <class T> inline void
112 cblas_gemm( const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const INTT M, const INTT N, const INTT K, const T alpha, const T *A, const INTT lda, const T *B, const INTT ldb, const T beta, T *C, const INTT ldc);
113 
115 template <class T> inline void
116 cblas_syrk( CBLAS_ORDER order, CBLAS_UPLO Uplo, CBLAS_TRANSPOSE Trans, INTT N, INTT K, T alpha, T *A, INTT lda, T beta, T*C, INTT ldc);
117 
118 
119 
120 template <class T> inline void
121 cblas_axpby (const INTT N, const T alpha, const T *X, const INTT incX, const T beta, T *Y, const INTT incY);
122 
123 /**************************************************************************************/
126 template <> inline CBLAS_INDEX
127 cblas_iamax<double>(const INTT N, const double *X, const INTT incX)
128 {
129  return cblas_idamax(N,X,incX);
130 }
131 template <> inline CBLAS_INDEX
132 cblas_iamax<float>(const INTT N, const float *X, const INTT incX)
133 {
134  return cblas_isamax(N,X,incX);
135 }
136 template <> inline CBLAS_INDEX
137 cblas_iamax<std::complex<double> >(const INTT N, const std::complex<double> *X, const INTT incX)
138 {
139  return cblas_izamax(N,X,incX);
140 }
141 template <> inline CBLAS_INDEX
142 cblas_iamax<std::complex<float> >(const INTT N, const std::complex<float> *X, const INTT incX)
143 {
144  return cblas_icamax(N,X,incX);
145 }
146 
147 #ifdef UTL_USE_MKL
148 template <> inline CBLAS_INDEX
149 cblas_iamin<double>(const INTT N, const double *X, const INTT incX)
150 {
151  return cblas_idamin(N,X,incX);
152 }
153 template <> inline CBLAS_INDEX
154 cblas_iamin<float>(const INTT N, const float *X, const INTT incX)
155 {
156  return cblas_isamin(N,X,incX);
157 }
158 template <> inline CBLAS_INDEX
159 cblas_iamin<std::complex<double> >(const INTT N, const std::complex<double> *X, const INTT incX)
160 {
161  return cblas_izamin(N,X,incX);
162 }
163 template <> inline CBLAS_INDEX
164 cblas_iamin<std::complex<float> >(const INTT N, const std::complex<float> *X, const INTT incX)
165 {
166  return cblas_icamin(N,X,incX);
167 }
168 #else
169 template <typename T> inline CBLAS_INDEX
170 cblas_iamin(const INTT N, const T *X, const INTT incX)
171 {
172  int iter;
173  std::vector<double> vec(N);
174  for ( int i = 0; i < N; i = i+incX )
175  vec[i]=std::abs(X[i]);
176  iter = std::min_element(vec.begin(), vec.end());
177  int n=0;
178  for ( int ii = vec.begin(); ii!=iter; ++ii )
179  n++;
180  return n;
181 }
182 template <typename T> inline CBLAS_INDEX
183 cblas_iamax(const INTT N, const T *X, const INTT incX)
184 {
185  int iter;
186  std::vector<double> vec(N);
187  for ( int i = 0; i < N; i = i+incX )
188  vec[i]=std::abs(X[i]);
189  iter = std::max_element(vec.begin(), vec.end());
190  int n=0;
191  for ( int ii = vec.begin(); ii!=iter; ++ii )
192  n++;
193  return n;
194 }
195 #endif
196 
197 template <> inline double
198 cblas_nrm2<double>( const INTT N, const double *X, const INTT incX)
199 {
200  return cblas_dnrm2(N,X,incX);
201 }
202 template <> inline float
203 cblas_nrm2<float>( const INTT N, const float *X, const INTT incX)
204 {
205  return cblas_snrm2(N,X,incX);
206 }
207 inline double
208 cblas_nrm2( const INTT N, const std::complex<double> *X, const INTT incX)
209 {
210  return cblas_dznrm2(N,X,incX);
211 }
212 inline float
213 cblas_nrm2( const INTT N, const std::complex<float> *X, const INTT incX)
214 {
215  return cblas_scnrm2(N,X,incX);
216 }
217 
218 template <> inline double
219 cblas_asum<double>( const INTT N, const double *X, const INTT incX)
220 {
221  return cblas_dasum(N,X,incX);
222 }
223 template <> inline float
224 cblas_asum<float>( const INTT N, const float *X, const INTT incX)
225 {
226  return cblas_sasum(N,X,incX);
227 }
228 inline double
229 cblas_asum( const INTT N, const std::complex<double> *X, const INTT incX)
230 {
231  return cblas_dzasum(N,X,incX);
232 }
233 inline float
234 cblas_asum( const INTT N, const std::complex<float> *X, const INTT incX)
235 {
236  return cblas_scasum(N,X,incX);
237 }
238 
239 
240 template <> inline double
241 cblas_dot<double>( const INTT N, const double *X, const INTT incX, const double *Y, const INTT incY)
242 {
243  return cblas_ddot(N,X,incX,Y,incY);
244 }
245 template <> inline float
246 cblas_dot<float>( const INTT N, const float *X, const INTT incX, const float *Y, const INTT incY)
247 {
248  return cblas_sdot(N,X,incX,Y,incY);
249 }
250 template <>
251 inline std::complex<double>
252 cblas_dot<std::complex<double> >( const INTT N, const std::complex<double> *X, const INTT incX, const std::complex<double> *Y, const INTT incY)
253 {
254  std::complex<double> result;
255  cblas_zdotc_sub(N,X,incX,Y,incY, &result);
256  return result;
257 }
258 
259 template <> inline void
260 cblas_copy<double>( const INTT N, const double *X, const INTT incX, double *Y, const INTT incY)
261 {
262  cblas_dcopy(N,X,incX,Y,incY);
263 }
264 template <> inline void
265 cblas_copy<float>( const INTT N, const float *X, const INTT incX, float *Y, const INTT incY)
266 {
267  cblas_scopy(N,X,incX,Y,incY);
268 }
269 template <> inline void
270 cblas_copy<std::complex<double> >( const INTT N, const std::complex<double> *X, const INTT incX, std::complex<double> *Y, const INTT incY)
271 {
272  cblas_zcopy(N,X,incX,Y,incY);
273 }
274 template <> inline void
275 cblas_copy<std::complex<float> >( const INTT N, const std::complex<float> *X, const INTT incX, std::complex<float> *Y, const INTT incY)
276 {
277  cblas_ccopy(N,X,incX,Y,incY);
278 }
279 
280 template <> inline void
281 cblas_swap<double>( const INTT N, double *X, const INTT incX, double *Y, const INTT incY)
282 {
283  cblas_dswap(N,X,incX,Y,incY);
284 }
285 template <> inline void
286 cblas_swap<float>( const INTT N, float *X, const INTT incX, float *Y, const INTT incY)
287 {
288  cblas_sswap(N,X,incX,Y,incY);
289 }
290 
291 template <> inline void
292 cblas_scal<double>(const INTT N, const double alpha, double *X, const INTT incX)
293 {
294  cblas_dscal(N, alpha, X, incX);
295 }
296 template <> inline void
297 cblas_scal<float>(const INTT N, const float alpha, float *X, const INTT incX)
298 {
299  cblas_sscal(N, alpha, X, incX);
300 }
301 template <> inline void
302 cblas_scal<std::complex<double> >(const INTT N, const std::complex<double> alpha, std::complex<double> *X, const INTT incX)
303 {
304  cblas_zscal(N, &alpha, X, incX);
305 }
306 template <> inline void
307 cblas_scal<std::complex<float> >(const INTT N, const std::complex<float> alpha, std::complex<float> *X, const INTT incX)
308 {
309  cblas_cscal(N, &alpha, X, incX);
310 }
311 
312 
313 
314 
315 template <> inline void
316 cblas_ger<float>( const CBLAS_ORDER order, const INTT M, const INTT N, const float alpha, const float *X, const INTT incX, const float *Y, const INTT incY, float *A, const INTT lda)
317 {
318  cblas_sger(order,M,N,alpha,X,incX,Y,incY,A,lda);
319 }
320 template <> inline void
321 cblas_ger<double>( const CBLAS_ORDER order, const INTT M, const INTT N, const double alpha, const double *X, const INTT incX, const double *Y, const INTT incY, double *A, const INTT lda)
322 {
323  cblas_dger(order,M,N,alpha,X,incX,Y,incY,A,lda);
324 }
325 
326 template <> inline void
327 cblas_syr<float>(const CBLAS_ORDER order, const CBLAS_UPLO Uplo, const INTT N, const float alpha, const float *X, const INTT incX, float *A, const INTT lda)
328 {
329  cblas_ssyr(order,Uplo,N,alpha,X,incX,A,lda);
330 }
331 template <> inline void
332 cblas_syr<double>(const CBLAS_ORDER order, const CBLAS_UPLO Uplo, const INTT N, const double alpha, const double *X, const INTT incX, double *A, const INTT lda)
333 {
334  cblas_dsyr(order,Uplo,N,alpha,X,incX,A,lda);
335 }
336 
337 template <> inline void
338 cblas_gemv<double>( const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const INTT M, const INTT N, const double alpha, const double *A, const INTT lda, const double *X, const INTT incX, const double beta, double *Y, const INTT incY)
339 {
340  cblas_dgemv(order,TransA,M,N,alpha,A,lda,X,incX,beta,Y,incY);
341 }
342 template <> inline void
343 cblas_gemv<float>( const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const INTT M, const INTT N, const float alpha, const float *A, const INTT lda, const float *X, const INTT incX, const float beta, float *Y, const INTT incY)
344 {
345  cblas_sgemv(order,TransA,M,N,alpha,A,lda,X,incX,beta,Y,incY);
346 }
347 template <> inline void
348 cblas_gemv<std::complex<double> >( const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const INTT M, const INTT N, const std::complex<double> alpha, const std::complex<double> *A, const INTT lda, const std::complex<double> *X, const INTT incX, const std::complex<double> beta, std::complex<double> *Y, const INTT incY)
349 {
350  cblas_zgemv(order,TransA,M,N,&alpha,A,lda,X,incX,&beta,Y,incY);
351 }
352 template <> inline void
353 cblas_gemv<std::complex<float> >( const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const INTT M, const INTT N, const std::complex<float> alpha, const std::complex<float> *A, const INTT lda, const std::complex<float> *X, const INTT incX, const std::complex<float> beta, std::complex<float> *Y, const INTT incY)
354 {
355  cblas_cgemv(order,TransA,M,N,&alpha,A,lda,X,incX,&beta,Y,incY);
356 }
357 
358 
359 
360 
361 
362 template <> inline void
363 cblas_gemm<double>( const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const INTT M, const INTT N, const INTT K, const double alpha, const double *A, const INTT lda, const double *B, const INTT ldb, const double beta, double *C, const INTT ldc)
364 {
365  cblas_dgemm(order,TransA,TransB,M,N,K,alpha,A,lda,B,ldb,beta,C,ldc);
366 }
367 template <> inline void
368 cblas_gemm<float>( const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const INTT M, const INTT N, const INTT K, const float alpha, const float *A, const INTT lda, const float *B, const INTT ldb, const float beta, float *C, const INTT ldc)
369 {
370  cblas_sgemm(order,TransA,TransB,M,N,K,alpha,A,lda,B,ldb,beta,C,ldc);
371 }
372 template <> inline void
373 cblas_gemm<std::complex<double> >( const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const INTT M, const INTT N, const INTT K, const std::complex<double> alpha, const std::complex<double> *A, const INTT lda, const std::complex<double> *B, const INTT ldb, const std::complex<double> beta, std::complex<double> *C, const INTT ldc)
374 {
375  cblas_zgemm(order,TransA,TransB,M,N,K,&alpha,A,lda,B,ldb,&beta,C,ldc);
376 }
377 
378 template <> inline void
379 cblas_syrk<double>( CBLAS_ORDER order, CBLAS_UPLO Uplo, CBLAS_TRANSPOSE Trans, INTT N, INTT K, double alpha, double *A, INTT lda, double beta, double *C, INTT ldc)
380 {
381  cblas_dsyrk(order,Uplo,Trans,N,K,alpha,A,lda,beta,C,ldc);
382 }
383 template <> inline void
384 cblas_syrk<float>( CBLAS_ORDER order, CBLAS_UPLO Uplo, CBLAS_TRANSPOSE Trans, INTT N, INTT K, float alpha, float *A, INTT lda, float beta, float *C, INTT ldc)
385 {
386  cblas_ssyrk(order,Uplo,Trans,N,K,alpha,A,lda,beta,C,ldc);
387 }
388 
389 
390 #ifdef UTL_USE_MKL
391 template <> inline void
392 cblas_axpby<double>(const INTT N, const double alpha, const double *X, const INTT incX, const double beta, double *Y, const INTT incY)
393 {
394  cblas_daxpby(N,alpha,X,incX,beta,Y,incY);
395 }
396 template <> inline void
397 cblas_axpby<float>(const INTT N, const float alpha, const float *X, const INTT incX, const float beta, float *Y, const INTT incY)
398 {
399  cblas_saxpby(N,alpha,X,incX,beta,Y,incY);
400 }
401 template <> inline void
402 cblas_axpby<std::complex<double> >(const INTT N, const std::complex<double> alpha, const std::complex<double> *X, const INTT incX, const std::complex<double> beta, std::complex<double> *Y, const INTT incY)
403 {
404  cblas_zaxpby(N,&alpha,X,incX,&beta,Y,incY);
405 }
406 template <> inline void
407 cblas_axpby<std::complex<float> >(const INTT N, const std::complex<float> alpha, const std::complex<float> *X, const INTT incX, const std::complex<float> beta, std::complex<float> *Y, const INTT incY)
408 {
409  cblas_zaxpby(N,&alpha,X,incX,&beta,Y,incY);
410 }
411 #else
412 template <typename T> inline void
413 cblas_axpby(const INTT N, const T alpha, const T *X, const INTT incX, const T beta, T *Y, const INTT incY)
414 {
415  for ( int i=0, j=0; i<N, j<N; i+=incX, j+=incY )
416  Y[j] = alpha*X[i] + beta*Y[j];
417 }
418 
419 #endif
420 
421 }
422 
423 
433 #define __utl_gemm_MatrixTimesMatrix(T, FuncName, FuncHelperName, RowMajorMatrixName, GetRowsFuncName, GetColsFuncName, GetDataFuncName, ReSizeFuncName) \
434 template <class T> \
435 inline bool \
436 FuncName(const bool bATrans, const bool bBTrans, const T alpha, const RowMajorMatrixName& A, const RowMajorMatrixName& B, const T beta, RowMajorMatrixName& C) \
437 { \
438  CBLAS_TRANSPOSE transA, transB; \
439  unsigned int M = 0, N = 0, K = 0; \
440  if(!bATrans && !bBTrans) \
441  { \
442  transA = CblasNoTrans; \
443  transB = CblasNoTrans; \
444  M = A.GetRowsFuncName(); \
445  N = B.GetColsFuncName(); \
446  K = A.GetColsFuncName(); \
447  utlSAException(A.GetColsFuncName() != B.GetRowsFuncName())(A.GetColsFuncName())(B.GetRowsFuncName()).msg("matrix dimension mismatch"); \
448  } \
449  else if(bATrans && !bBTrans) \
450  { \
451  transA = CblasTrans; \
452  transB = CblasNoTrans; \
453  M = A.GetColsFuncName(); \
454  N = B.GetColsFuncName(); \
455  K = A.GetRowsFuncName(); \
456  utlSAException(A.GetRowsFuncName() != B.GetRowsFuncName())(A.GetRowsFuncName())(B.GetRowsFuncName()).msg("matrix dimension mismatch"); \
457  } \
458  else if(!bATrans && bBTrans) \
459  { \
460  transA = CblasNoTrans; \
461  transB = CblasTrans; \
462  M = A.GetRowsFuncName(); \
463  N = B.GetRowsFuncName(); \
464  K = A.GetColsFuncName(); \
465  utlSAException(A.GetColsFuncName() != B.GetColsFuncName())(A.GetColsFuncName())(B.GetColsFuncName()).msg("matrix dimension mismatch"); \
466  } \
467  else \
468  { \
469  transA = CblasTrans; \
470  transB = CblasTrans; \
471  M = A.GetColsFuncName(); \
472  N = B.GetRowsFuncName(); \
473  K = A.GetRowsFuncName(); \
474  utlSAException(A.GetRowsFuncName() != B.GetColsFuncName())(A.GetRowsFuncName())(B.GetColsFuncName()).msg("matrix dimension mismatch"); \
475  } \
476  utl::cblas_gemm(CblasRowMajor, transA, transB, M, N, K, alpha, A.GetDataFuncName(), A.GetColsFuncName(), B.GetDataFuncName(), B.GetColsFuncName(), beta, C.GetDataFuncName(), C.GetColsFuncName()); \
477  return true; \
478 } \
479  \
480 template <class T> \
481 inline void \
482 Product##FuncHelperName##MM(const RowMajorMatrixName& A, const RowMajorMatrixName& B, RowMajorMatrixName& C, const double alpha=1.0, const double beta=0.0) \
483 { \
484  C.ReSizeFuncName(A.GetRowsFuncName(), B.GetColsFuncName()); \
485  FuncName<T>(false, false, alpha, A, B, beta, C); \
486 } \
487  \
488 template <class T> \
489 inline void \
490 Product##FuncHelperName##MM(const RowMajorMatrixName& A1, const RowMajorMatrixName& A2, const RowMajorMatrixName& A3, RowMajorMatrixName& C) \
491 { \
492  RowMajorMatrixName tmp; \
493  Product##FuncHelperName##MM<T>(A1, A2, tmp); \
494  Product##FuncHelperName##MM<T>(tmp, A3, C); \
495 } \
496  \
497 template <class T> \
498 inline void \
499 Product##FuncHelperName##MM(const RowMajorMatrixName& A1, const RowMajorMatrixName& A2, const RowMajorMatrixName& A3, const RowMajorMatrixName& A4, RowMajorMatrixName& C) \
500 { \
501  RowMajorMatrixName tmp; \
502  Product##FuncHelperName##MM<T>(A1, A2, A3, tmp); \
503  Product##FuncHelperName##MM<T>(tmp, A4, C); \
504 } \
505  \
506 template <class T> \
507 inline void \
508 Product##FuncHelperName##MM(const RowMajorMatrixName& A1, const RowMajorMatrixName& A2, const RowMajorMatrixName& A3, const RowMajorMatrixName& A4, const RowMajorMatrixName& A5, RowMajorMatrixName& C) \
509 { \
510  RowMajorMatrixName tmp; \
511  Product##FuncHelperName##MM<T>(A1, A2, A3, A4, tmp); \
512  Product##FuncHelperName##MM<T>(tmp, A5, C); \
513 } \
514  \
515 template <class T> \
516 inline void \
517 Product##FuncHelperName##MtM(const RowMajorMatrixName& A, const RowMajorMatrixName& B, RowMajorMatrixName& C, const double alpha=1.0, const double beta=0.0) \
518 { \
519  C.ReSizeFuncName(A.GetColsFuncName(), B.GetColsFuncName()); \
520  FuncName<T>(true, false, alpha, A, B, beta, C); \
521 } \
522  \
523 template <class T> \
524 inline void \
525 Product##FuncHelperName##MMt(const RowMajorMatrixName& A, const RowMajorMatrixName& B, RowMajorMatrixName& C, const double alpha=1.0, const double beta=0.0) \
526 { \
527  C.ReSizeFuncName(A.GetRowsFuncName(), B.GetRowsFuncName()); \
528  FuncName<T>(false, true, alpha, A, B, beta, C); \
529 } \
530  \
531 template <class T> \
532 inline void \
533 Product##FuncHelperName##MtMt(const RowMajorMatrixName& A, const RowMajorMatrixName& B, RowMajorMatrixName& C, const double alpha=1.0, const double beta=0.0) \
534 { \
535  C.ReSizeFuncName(A.GetColsFuncName(), B.GetRowsFuncName()); \
536  FuncName<T>(true, true, alpha, A, B, beta, C); \
537 } \
538 
539 
540 
551 #define __utl_gemv_MatrixTimesVector(T, FuncName, FuncHelperName, RowMajorMatrixName, GetRowsFuncName, GetColsFuncName, MatrixGetDataFuncName, VectorName, GetSizeFuncName, VectorGetDataFuncName, ReSizeFuncName) \
552 template <class T> \
553 inline bool \
554 FuncName(const bool bATrans, const T alpha, const RowMajorMatrixName& A, const VectorName& X, const T beta, VectorName& Y) \
555 { \
556  CBLAS_TRANSPOSE TransA; \
557  if(bATrans) \
558  { \
559  TransA = CblasTrans; \
560  utlSAException(A.GetRowsFuncName() != X.GetSizeFuncName())(A.GetRowsFuncName())(X.GetSizeFuncName()).msg("matrix and vector dimension mismatch"); \
561  } \
562  else \
563  { \
564  TransA = CblasNoTrans; \
565  utlSAException(A.GetColsFuncName() != X.GetSizeFuncName())(A.GetColsFuncName())(X.GetSizeFuncName()).msg("matrix and vector dimension mismatch"); \
566  } \
567  utl::cblas_gemv<T>(CblasRowMajor, TransA, A.GetRowsFuncName(), A.GetColsFuncName(), alpha, A.MatrixGetDataFuncName(), A.GetColsFuncName(), X.VectorGetDataFuncName(), 1, beta, Y.VectorGetDataFuncName(), 1); \
568  return true; \
569 } \
570  \
571 template <class T> \
572 inline void \
573 Product##FuncHelperName##Mv(const RowMajorMatrixName& A, const VectorName& b, VectorName& c, const double alpha=1.0, const double beta=0.0) \
574 { \
575  c.ReSizeFuncName(A.GetRowsFuncName()); \
576  FuncName<T>(false, alpha, A, b, beta, c); \
577 } \
578  \
579 template <class T> \
580 inline void \
581 Product##FuncHelperName##Mtv(const RowMajorMatrixName& A, const VectorName& b, VectorName& c, const double alpha=1.0, const double beta=0.0) \
582 { \
583  c.ReSizeFuncName(A.GetColsFuncName()); \
584  FuncName<T>(true, alpha, A, b, beta, c); \
585 } \
586 
587 
588 
593 #define __utl_gevm_MatrixTimesVector(T, FuncName, FuncHelperName, RowMajorMatrixName, GetRowsFuncName, GetColsFuncName, MatrixGetDataFuncName, VectorName, GetSizeFuncName, VectorGetDataFuncName, ReSizeFuncName) \
594 template <class T> \
595 inline bool \
596 FuncName(const bool bATrans, const T alpha, const VectorName& X, const RowMajorMatrixName& A, const T beta, VectorName& Y) \
597 { \
598  CBLAS_TRANSPOSE transA; \
599  unsigned int M = 1, N = 0, K = 0; \
600  if(bATrans) \
601  { \
602  transA = CblasTrans; \
603  N = A.GetRowsFuncName(); \
604  K = A.GetColsFuncName(); \
605  utlSAException(A.GetColsFuncName() != X.GetSizeFuncName())(A.GetColsFuncName())(X.GetSizeFuncName()).msg("matrix and vector dimension mismatch"); \
606  } \
607  else \
608  { \
609  N = A.GetColsFuncName(); \
610  K = A.GetRowsFuncName(); \
611  transA = CblasNoTrans; \
612  utlSAException(A.GetRowsFuncName() != X.GetSizeFuncName())(A.GetRowsFuncName())(X.GetSizeFuncName()).msg("matrix and vector dimension mismatch"); \
613  } \
614  utl::cblas_gemm<T>(CblasRowMajor, CblasNoTrans, transA, M, N, K, alpha, X.VectorGetDataFuncName(), X.GetSizeFuncName(), A.MatrixGetDataFuncName(), A.GetColsFuncName(), beta, Y.VectorGetDataFuncName(), Y.GetSizeFuncName()); \
615  return true; \
616 } \
617  \
618 template <class T> \
619 inline void \
620 Product##FuncHelperName##vM(const VectorName& b, const RowMajorMatrixName& A, VectorName& c, const double alpha=1.0, const double beta=0.0) \
621 { \
622  c.ReSizeFuncName(A.GetColsFuncName()); \
623  FuncName<T>(false, alpha, b, A, beta, c); \
624 } \
625  \
626 template <class T> \
627 inline void \
628 Product##FuncHelperName##vMt(const VectorName& b, const RowMajorMatrixName& A, VectorName& c, const double alpha=1.0, const double beta=0.0) \
629 { \
630  c.ReSizeFuncName(A.GetRowsFuncName()); \
631  FuncName<T>(true, alpha, b, A, beta, c); \
632 } \
633 
634 
635 
646 #define __utl_syrk_Matrix(T, FuncName, FuncHelperName, RowMajorMatrixName, GetRowsFuncName, GetColsFuncName, GetDataFuncName, ReSizeFuncName) \
647 template <class T> \
648 inline void \
649 FuncName( const bool trans, const T alpha, const RowMajorMatrixName& A, const T beta, RowMajorMatrixName& C ) \
650 { \
651  int size = C.GetRowsFuncName()*C.GetColsFuncName(); \
652  utlSAException(size>0 && C.GetRowsFuncName()!=C.GetColsFuncName())(C.GetRowsFuncName())(C.GetColsFuncName()).msg("C should be symmetric matrix or empty matrix"); \
653  CBLAS_TRANSPOSE transBlas; \
654  int K, N; \
655  if (!trans) \
656  { \
657  utlSAException(size>0 && A.GetRowsFuncName()!=C.GetRowsFuncName())(A.GetRowsFuncName())(C.GetRowsFuncName()).msg("wrong size"); \
658  transBlas=CblasNoTrans; \
659  N = A.GetRowsFuncName(); \
660  K = A.GetColsFuncName(); \
661  } \
662  else \
663  { \
664  utlSAException(size>0 && A.GetColsFuncName()!=C.GetRowsFuncName())(A.GetColsFuncName())(C.GetRowsFuncName()).msg("wrong size"); \
665  transBlas=CblasTrans; \
666  N = A.GetColsFuncName(); \
667  K = A.GetRowsFuncName(); \
668  } \
669  utlException(size>0 && (C.GetRowsFuncName()!=N || C.GetColsFuncName()!=N), "wrong size of C"); \
670  if (size==0) \
671  { \
672  utlSAException(std::fabs(beta)>1e-10)(beta)(C.GetRowsFuncName())(C.GetColsFuncName()).msg("when C is empty matrix, beta should be zero"); \
673  C.ReSizeFuncName(N,N); \
674  } \
675  RowMajorMatrixName B=A; \
676  utl::cblas_syrk<T>(CblasRowMajor, CblasUpper, transBlas, N, K, alpha, B.GetDataFuncName(), B.GetColsFuncName(), beta, C.GetDataFuncName(), N); \
677  \
678  T* data = C.GetDataFuncName(); \
679  for ( int i = 0; i < C.GetRowsFuncName(); ++i ) \
680  for ( int j = 0; j < i; ++j ) \
681  data[i*N+j] = data[j*N+i]; \
682 } \
683  \
684 template <class T> \
685 void \
686 Product##FuncHelperName##XXt ( const RowMajorMatrixName& A, RowMajorMatrixName& C, const double alpha=1.0, const double beta=0.0 ) \
687 { \
688  FuncName<T>(false, alpha, A, beta, C); \
689 } \
690 template <class T> \
691 void \
692 Product##FuncHelperName##XtX ( const RowMajorMatrixName& A, RowMajorMatrixName& C, const double alpha=1.0, const double beta=0.0 ) \
693 { \
694  FuncName<T>(true, alpha, A, beta, C); \
695 } \
696 
697 
699 #endif
700 
void cblas_syrk< double >(CBLAS_ORDER order, CBLAS_UPLO Uplo, CBLAS_TRANSPOSE Trans, INTT N, INTT K, double alpha, double *A, INTT lda, double beta, double *C, INTT ldc)
Definition: utlBlas.h:379
void cblas_ger< float >(const CBLAS_ORDER order, const INTT M, const INTT N, const float alpha, const float *X, const INTT incX, const float *Y, const INTT incY, float *A, const INTT lda)
Definition: utlBlas.h:316
CBLAS_TRANSPOSE
Definition: utl_cblas.h:6
double cblas_asum< double >(const INTT N, const double *X, const INTT incX)
Definition: utlBlas.h:219
T min_element(const std::vector< T > &v)
Definition: utlCore.h:199
void cblas_scal< double >(const INTT N, const double alpha, double *X, const INTT incX)
Definition: utlBlas.h:292
void cblas_copy(const INTT N, const T *X, const INTT incX, T *Y, const INTT incY)
void cblas_syrk< float >(CBLAS_ORDER order, CBLAS_UPLO Uplo, CBLAS_TRANSPOSE Trans, INTT N, INTT K, float alpha, float *A, INTT lda, float beta, float *C, INTT ldc)
Definition: utlBlas.h:384
void cblas_dscal(const int N, const double alpha, double *X, const int incX)
void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc)
int cblas_iamax< float >(const INTT N, const float *X, const INTT incX)
Definition: utlBlas.h:132
void cblas_scal(const INTT N, const T alpha, T *X, const INTT incX)
void cblas_ssyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, const float *X, const int incX, float *A, const int lda)
void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc)
vcl_size_t cblas_idamin(int n, double *X, int incX)
external functions
void cblas_gemm< float >(const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const INTT M, const INTT N, const INTT K, const float alpha, const float *A, const INTT lda, const float *B, const INTT ldb, const float beta, float *C, const INTT ldc)
Definition: utlBlas.h:368
int cblas_iamax(const INTT N, const T *X, const INTT incX)
interface to cblas_i*amax
Definition: utlBlas.h:183
void cblas_cgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY)
void cblas_cscal(const int N, const void *alpha, void *X, const int incX)
void cblas_ger(const CBLAS_ORDER order, const INTT M, const INTT N, const T alpha, const T *X, const INTT incX, const T *Y, const INTT incY, T *A, const INTT lda)
float cblas_sasum(const int N, const float *X, const int incX)
double cblas_ddot(const int N, const double *X, const int incX, const double *Y, const int incY)
int INTT
Definition: utlBlas.h:55
void cblas_zgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY)
double cblas_dot< double >(const INTT N, const double *X, const INTT incX, const double *Y, const INTT incY)
Definition: utlBlas.h:241
CBLAS_ORDER
Definition: utl_cblas.h:5
vcl_size_t cblas_isamin(int n, float *X, int incX)
void cblas_scopy(const int N, const float *X, const int incX, float *Y, const int incY)
void cblas_axpby(const INTT N, const T alpha, const T *X, const INTT incX, const T beta, T *Y, const INTT incY)
Definition: utlBlas.h:413
void cblas_syr(const CBLAS_ORDER order, const CBLAS_UPLO Uplo, const INTT N, const T alpha, const T *X, const INTT incX, T *A, const INTT lda)
void cblas_copy< float >(const INTT N, const float *X, const INTT incX, float *Y, const INTT incY)
Definition: utlBlas.h:265
void cblas_zscal(const int N, const void *alpha, void *X, const int incX)
void cblas_gemm< double >(const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const INTT M, const INTT N, const INTT K, const double alpha, const double *A, const INTT lda, const double *B, const INTT ldb, const double beta, double *C, const INTT ldc)
Definition: utlBlas.h:363
double cblas_dznrm2(const int N, const void *X, const int incX)
float cblas_snrm2(const int N, const float *X, const int incX)
double cblas_dnrm2(const int N, const double *X, const int incX)
int cblas_idamax(const int N, const double *X, const int incX)
float cblas_nrm2< float >(const INTT N, const float *X, const INTT incX)
Definition: utlBlas.h:203
void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float beta, float *C, const int ldc)
void cblas_gemv< float >(const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const INTT M, const INTT N, const float alpha, const float *A, const INTT lda, const float *X, const INTT incX, const float beta, float *Y, const INTT incY)
Definition: utlBlas.h:343
void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc)
void cblas_swap(const INTT N, T *X, const INTT incX, T *Y, const INTT incY)
T abs(const T x)
template version of the fabs function
void cblas_sswap(const int N, float *X, const int incX, float *Y, const int incY)
float cblas_sdot(const int N, const float *X, const int incX, const float *Y, const int incY)
double cblas_dzasum(const int N, const void *X, const int incX)
Definition: utl.h:90
void cblas_ger< double >(const CBLAS_ORDER order, const INTT M, const INTT N, const double alpha, const double *X, const INTT incX, const double *Y, const INTT incY, double *A, const INTT lda)
Definition: utlBlas.h:321
void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N, const double alpha, const double *X, const int incX, const double *Y, const int incY, double *A, const int lda)
float cblas_asum< float >(const INTT N, const float *X, const INTT incX)
Definition: utlBlas.h:224
void cblas_scal< float >(const INTT N, const float alpha, float *X, const INTT incX)
Definition: utlBlas.h:297
void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc)
void cblas_zcopy(const int N, const void *X, const int incX, void *Y, const int incY)
float cblas_scnrm2(const int N, const void *X, const int incX)
int cblas_icamax(const int N, const void *X, const int incX)
T cblas_asum(const INTT N, const T *X, const INTT incX)
int cblas_iamin(const INTT N, const T *X, const INTT incX)
interface to cblas_i*amin
Definition: utlBlas.h:170
void cblas_syrk(CBLAS_ORDER order, CBLAS_UPLO Uplo, CBLAS_TRANSPOSE Trans, INTT N, INTT K, T alpha, T *A, INTT lda, T beta, T *C, INTT ldc)
int cblas_izamax(const int N, const void *X, const int incX)
CBLAS_UPLO
Definition: utl_cblas.h:8
#define CBLAS_INDEX
Definition: utl_cblas.h:15
void cblas_dswap(const int N, double *X, const int incX, double *Y, const int incY)
float cblas_scasum(const int N, const void *X, const int incX)
void cblas_zdotc_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotc)
void cblas_sgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, const float *A, const int lda, const float *X, const int incX, const float beta, float *Y, const int incY)
void cblas_copy< double >(const INTT N, const double *X, const INTT incX, double *Y, const INTT incY)
Definition: utlBlas.h:260
double cblas_nrm2< double >(const INTT N, const double *X, const INTT incX)
Definition: utlBlas.h:198
int cblas_isamax(const int N, const float *X, const int incX)
T cblas_dot(const INTT N, const T *X, const INTT incX, const T *Y, const INTT incY)
double cblas_dasum(const int N, const double *X, const int incX)
void cblas_gemm(const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const INTT M, const INTT N, const INTT K, const T alpha, const T *A, const INTT lda, const T *B, const INTT ldb, const T beta, T *C, const INTT ldc)
void cblas_syr< double >(const CBLAS_ORDER order, const CBLAS_UPLO Uplo, const INTT N, const double alpha, const double *X, const INTT incX, double *A, const INTT lda)
Definition: utlBlas.h:332
void cblas_swap< float >(const INTT N, float *X, const INTT incX, float *Y, const INTT incY)
Definition: utlBlas.h:286
void cblas_syr< float >(const CBLAS_ORDER order, const CBLAS_UPLO Uplo, const INTT N, const float alpha, const float *X, const INTT incX, float *A, const INTT lda)
Definition: utlBlas.h:327
void cblas_gemv(const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const INTT M, const INTT N, const T alpha, const T *A, const INTT lda, const T *X, const INTT incX, const T beta, T *Y, const INTT incY)
float cblas_dot< float >(const INTT N, const float *X, const INTT incX, const float *Y, const INTT incY)
Definition: utlBlas.h:246
void cblas_dcopy(const int N, const double *X, const int incX, double *Y, const int incY)
T cblas_nrm2(const INTT N, const T *X, const INTT incX)
void cblas_dgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY)
T max_element(const std::vector< T > &v)
Definition: utlCore.h:205
void cblas_sger(const enum CBLAS_ORDER Order, const int M, const int N, const float alpha, const float *X, const int incX, const float *Y, const int incY, float *A, const int lda)
void cblas_sscal(const int N, const float alpha, float *X, const int incX)
int cblas_iamax< double >(const INTT N, const double *X, const INTT incX)
Definition: utlBlas.h:127
void cblas_ccopy(const int N, const void *X, const int incX, void *Y, const int incY)
void cblas_gemv< double >(const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, const INTT M, const INTT N, const double alpha, const double *A, const INTT lda, const double *X, const INTT incX, const double beta, double *Y, const INTT incY)
Definition: utlBlas.h:338
void cblas_swap< double >(const INTT N, double *X, const INTT incX, double *Y, const INTT incY)
Definition: utlBlas.h:281
void cblas_dsyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, const double *X, const int incX, double *A, const int lda)