133 Trainer(
const int k,
const int batchsize = 256,
134 const int NUM_THREADS=-1);
137 const int NUM_THREADS=-1);
140 const int itercount,
const int batchsize,
141 const int NUM_THREADS);
150 mode,
const bool whiten =
false,
const T* param_C = NULL,
151 const int p = 1,
const bool pattern =
false);
167 const bool posD =
false,
188 _itercount(0), _batchsize(256) {
198 int batchsize,
const int NUM_THREADS) :
_k(k),
211 const int batchsize,
const int NUM_THREADS) :
_k(D.n()),
215 _A.resize(D.
n(),D.
n());
216 _B.resize(D.
m(),D.
n());
227 B,
const Matrix<T>& D,
const int itercount,
const int batchsize,
242 template <
typename T>
248 int sparseD = modeD ==
L1L2 ? 2 : 6;
249 const int k =
_D.n();
250 const int n =
_D.m();
252 T*
const pr_G=G.
rawX();
255 for (
int i = 0; i<k; ++i) {
257 for (
int j = i; j<k; ++j) {
258 if ((j > i &&
abs(pr_G[i*k+j])/sqrt(pr_G[i*k+i]*pr_G[j*k+j]) > maxCorrel) ||
259 (j == i &&
abs(pr_G[i*k+j]) < 1e-4)) {
261 const int ind = random() % M;
268 aleat.
sparseProject(d,T(1.0),sparseD,gamma1,gamma2,T(2.0),posD);
275 for (
int l = 0; l<
_D.n(); ++l)
276 pr_G[l*k+j] = pr_G[j*k+l];
283 template <
typename T>
285 const int k =
_D.n();
286 const int n =
_D.m();
287 T*
const pr_G=G.
rawX();
288 for (
int i = 0; i<k; ++i) {
289 pr_G[i*k+i] += 1e-10;
294 template <
typename T>
299 int sparseD = param.
modeD ==
L1L2 ? 2 : param.
modeD == L1L2MU ? 7 : 6;
302 cout <<
"num param iterD: " << param.
iter_updateD << endl;
304 cout <<
"Batch Mode" << endl;
306 cout <<
"Stochastic Gradient. rho : " << rho <<
", t0 : " << t0 << endl;
309 cout <<
"Online Dictionary Learning with no parameter " << endl;
311 cout <<
"Online Dictionary Learning with parameters: " << t0 <<
" rho: " << rho << endl;
313 cout <<
"Online Dictionary Learning with exponential decay t0: " << t0 <<
" rho: " << rho << endl;
317 cout <<
"Positivity constraints on D activated" << endl;
319 cout <<
"Positivity constraints on alpha activated" << endl;
320 if (param.
modeD !=
L2) cout <<
"Sparse dictionaries, mode: " << param.
modeD <<
", gamma1: " << param.
gamma1 <<
", gamma2: " << param.
gamma2 << endl;
321 cout <<
"mode Alpha " << param.
mode << endl;
322 if (param.
clean) cout <<
"Cleaning activated " << endl;
324 cout <<
"log activated " << endl;
328 cout <<
"L2 solver is used" << endl;
330 cout <<
"Retraining from iteration " <<
_itercount << endl;
342 cout <<
"batch size: " << batchsize << endl;
343 cout <<
"L: " << L << endl;
344 cout <<
"lambda: " << param.
lambda << endl;
345 cout <<
"mode: " << param.
mode << endl;
349 if (
_D.m() != n ||
_D.n() != K)
356 for (
int i = 0; i<K; ++i) {
357 const int ind = random() % M;
367 cout <<
"*****Online Dictionary Learning*****" << endl;
373 for (
int i = 0; i<K; ++i) {
381 if (param.
posD)
_D.thrsPos();
387 T scalt0 = abs<T>(t0);
451 int JJ = param.
iter < 0 ? 100000000 : param.
iter;
453 int last_written=-40;
455 for (i = 0; i<JJ; ++i) {
456 if (param.
verbose && i%100==0) {
457 cout <<
"Iteration: " << i << endl;
461 if (param.
iter < 0 &&
464 int seconds=
static_cast<int>(floor(log(time.
getElapsed())*5));
465 if (seconds > last_written) {
467 sprintf(buffer_string,
"%s_%d.log",param.
logName,
470 fprintf(stderr,
"\r%d",i);
487 #pragma omp parallel for private(j) 488 for (j = 0; j<batchsize; ++j) {
490 int numT=omp_get_thread_num();
494 const int index=perm[(j+i*batchsize) % M];
508 _D.multTrans(Xj,DtRj);
518 spcoeffj.
refVal(coeffs_sparse);
526 for (
int k = 0; k<K; ++k) {
528 coeffs_sparse[k]=u[k];
531 coreLARS2(DtRj,G,Gs,Ga,invGs,u,coeffs_sparse,ind,work,normX,param.
mode,param.
lambda,param.
posAlpha);
535 coreORMPB(DtRj,G,ind,coeffs_sparse,normX,L,T(0.0),T(0.0));
543 for (
int k = 0; k<L; ++k)
550 spcoeffj.
setL(count2);
565 for (
int k = 0; k<K; ++k) {
566 if (
_A[k*K+k] > 1e-6) {
570 _D.mult(ai,newd,T(-1.0));
572 newd.
scal(T(1.0)/
_A[k*K+k]);
583 }
else if (param.
clean) {
596 _D.mult(
_A,
_B,
false,
false,T(-1.0),T(1.0));
597 T step_grad=rho/T(t0+batchsize*(i+1));
598 _D.add(
_B,step_grad);
602 for (j = 0; j<K; ++j) {
609 for (j = 0; j<K; ++j) {
619 int epoch = (((i+1) % M)*batchsize) / M;
620 if ((even && ((epoch % 2) == 1)) || (!even && ((epoch % 2) == 0))) {
631 int num_elem=
MIN(2*M, ii < batchsize ? ii*batchsize :
632 batchsize*batchsize+ii-batchsize);
633 T scal2=T(T(1.0)/batchsize);
639 scal=
MAX(0.95,pow(T(totaliter)/T(totaliter+1),-rho));
650 i*batchsize < 10000)) {
660 Aeven.
add(AT[j],scal2);
661 Beven.
add(BT[j],scal2);
673 for (
int k = 0; k<K; ++k) {
674 if (
_A[k*K+k] > 1e-6) {
678 _D.mult(ai,newd,T(-1.0));
680 newd.
scal(T(1.0)/
_A[k*K+k]);
690 }
else if (param.
clean &&
716 template <
typename T>
721 f.flags(std::ios_base::scientific);
722 f.open(name, ofstream::trunc);
723 f << time <<
" " << iter << std::endl;
724 for (
int i = 0; i<D.
n(); ++i) {
725 for (
int j = 0; j<D.
m(); ++j) {
726 f << D[i*D.
m()+j] <<
" ";
735 template <
typename T>
739 int sparseD = param.
modeD ==
L1L2 ? 2 : 6;
748 cout <<
"*****Offline Dictionary Learning*****" << endl;
749 fprintf(stderr,
"num param iterD: %d\n",param.
iter_updateD);
751 cout <<
"lambda: " << param.
lambda << endl;
752 cout <<
"X: " << n <<
" x " << M << endl;
753 cout <<
"D: " << n <<
" x " << K << endl;
760 for (
int i = 0; i<K; ++i) {
761 const int ind = random() % M;
772 for (
int i = 0; i<K; ++i) {
780 if (param.
posD)
_D.thrsPos();
796 for (
int i = 0; i<NUM_THREADS; ++i) {
811 int JJ = J < 0 ? 100000000 : J;
815 for (
int i = 0; i<JJ; ++i) {
816 if (J < 0 && time.
getElapsed() > T(-J))
break;
822 #pragma omp parallel for private(j) 823 for (j = 0; j<batch_size; ++j) {
825 int numT=omp_get_thread_num();
829 const int ind=perm[(j+i*batch_size) % M];
843 coeffs.
refCol(ind,coeffj);
844 oldcoeffj.
copy(coeffj);
845 _D.multTrans(Xj,DtRj);
846 coeffj.toSparse(spcoeffj);
847 G.
mult(spcoeffj,DtRj,T(-1.0),T(1.0));
851 T normX = Xj.nrm2sq();
856 coeffj.toSparse(spcoeffj);
859 oldcoeffj.
scal(weights[ind]);
860 oldcoeffj.
sub(coeffj);
870 int num_elem=
MIN(2*M, ii < batchsize ? ii*batchsize :
871 batchsize*batchsize+ii-batchsize);
875 scal=
MAX(0.95,pow(T(totaliter)/T(totaliter+1),-param.
rho));
878 batchsize)/T(num_elem+1);
880 for (j = 0; j<NUM_THREADS; ++j) {
890 for (
int k = 0; k<K; ++k) {
891 if (A[k*K+k] > 1e-6) {
895 _D.mult(ai,newd,T(-1.0));
897 newd.
scal(T(1.0)/A[k*K+k]);
908 }
else if (param.
clean) {
920 for (
int iii = 0; iii < l1norms.
n(); iii += 1 )
921 sumL1 += l1norms[iii];
922 std::cout <<
"i = "<< i <<
", coeffs sumL1 = " << sumL1 << std::endl << std::flush;
935 delete[](coeffsoldT);
void norm_l1_rows(Vector< T > &norms) const
returns the linf norms of the columns
void normalize2()
normalize the vector
void rank1Update(const Vector< T > &vec1, const Vector< T > &vec2, const T alpha=1.0)
perform A <- A + alpha*vec1*vec2'
void copy(const Vector< T > &x)
make a copy of x
void normalize()
normalize the vector
void sparseProject(Vector< T > &out, const T thrs, const int mode=1, const T lambda1=0, const T lambda2=0, const T lambda3=0, const bool pos=false)
void scal(const T a)
scale the vector by a
void coreISTconstrained(const AbstractMatrix< T > &G, Vector< T > &DtR, Vector< T > &coeffs, const T normX2, const T thrs, const int itermax=500, const T tol=0.5)
coreIST constrained
static char buffer_string[50]
void add(const Vector< T > &x, const T a=1.0)
A <- A + a*x.
void addDiag(const Vector< T > &diag)
T * rawX() const
returns a modifiable reference of the data, DANGEROUS
std::vector< group > vector_groups
void cleanDict(const Data< T > &X, Matrix< T > &G, const bool posD=false, const constraint_type_D modeD=L2, const T gamma1=0, const T gamma2=0, const T maxCorrel=0.999999)
clean the dictionary
static void sort(int *irOut, T *prOut, int beg, int end)
void coreORMPB(Vector< T > &RtD, const AbstractMatrix< T > &G, Vector< int > &ind, Vector< T > &coeffs, T &normX, const int L, const T eps, const T lambda=0)
Auxiliary function of omp.
void coreIST(const AbstractMatrix< T > &G, Vector< T > &DtR, Vector< T > &coeffs, const T thrs, const int itermax=500, const T tol=0.5)
coreIST
void conjugateGradient(const Vector< T > &b, Vector< T > &x, const T tol=1e-4, const int=4) const
compute x, such that b = Ax,
void train(const Data< T > &X, const ParamDictLearn< T > ¶m)
train or retrain using the matrix X
int n() const
returns the size of the vector
Data class, abstract class, useful in the class image.
void refCol(int i, Vector< T > &x) const
Reference the column i into the vector x.
void setZeros()
Set all values to zero.
virtual void getData(Vector< T > &data, const int i) const =0
T abs(const T x)
template version of the fabs function
void refIndices(Vector< int > &indices) const
create a reference on the vector r
void copy(const Matrix< T > &mat)
make a copy of the matrix mat in the current matrix
void toSparse(SpVector< T > &vec) const
make a sparse copy
void set(const T val)
set each value of the vector to val
void thrsPos()
performs soft-thresholding of the vector
Contains sparse decomposition algorithms It requires the toolbox linalg.
void getB(Matrix< T > &B) const
Trainer()
Empty constructor.
void mult(const Vector< T > &x, Vector< T > &b, const T alpha=1.0, const T beta=0.0) const
perform b = alpha*A*x+beta*b
void start()
start the time
int n() const
Number of columns.
void getD(Matrix< T > &D) const
void setZeros()
Set all the values to zero.
void resize(const int n)
resize the vector
void trainOffline(const Data< T > &X, const ParamDictLearn< T > ¶m)
void getA(Matrix< T > &A) const
Accessors.
void printElapsed()
print the elapsed time
void refVal(Vector< T > &val) const
creates a reference on the vector val
void resize(const int nzmax)
resizes the vector
double getElapsed() const
print the elapsed time
static int init_omp(const int numThreads)
void coreLARS2(Vector< T > &DtR, const AbstractMatrix< T > &G, Matrix< T > &Gs, Matrix< T > &Ga, Matrix< T > &invGs, Vector< T > &u, Vector< T > &coeffs, Vector< int > &ind, Matrix< T > &work, T &normX, const constraint_type mode, const T constraint, const bool pos=false, T *pr_path=NULL, int length_path=-1)
Auxiliary function for lasso.
void resize(int m, int n)
Resize the matrix.
void sub(const Vector< T > &x)
A <- A - x.
void randperm(int n)
put a random permutation of size n (for integral vectors)
void add(const Matrix< T > &mat, const T alpha=1.0)
add alpha*mat to the current matrix
void scal(const T a)
scale the matrix by the a
T * rawX() const
reference a modifiable reference to the data, DANGEROUS
void writeLog(const Matrix< T > &D, const T time, int iter, char *name)