55 void omp(
const Matrix<T>& X,
const Matrix<T>& D, SpMatrix<T>& spalpha,
56 const int *L,
const T* eps,
const T* lambda,
const bool vecL =
false,
57 const bool vecEps =
false,
const bool Lambda=
false,
const int numThreads=-1,
58 Matrix<T>* path = NULL);
61 void omp_mask(
const Matrix<T>& X,
const Matrix<T>& D, SpMatrix<T>& spalpha,
const Matrix<bool>& mask,
62 const int *L,
const T* eps,
const T* lambda,
const bool vecL =
false,
63 const bool vecEps =
false,
const bool Lambda=
false,
const int numThreads=-1,
64 Matrix<T>* path = NULL);
68 void coreORMP(Vector<T>& scores, Vector<T>& norm, Vector<T>& tmp,
69 Matrix<T>& Un, Matrix<T>& Undn, Matrix<T>& Unds, Matrix<T>& Gs,
70 Vector<T>& Rdn,
const AbstractMatrix<T>& G, Vector<int>& ind,
71 Vector<T>& RUn, T& normX,
const T* eps,
const int* L,
const T* lambda,
77 void coreORMPB(Vector<T>& RtD,
const AbstractMatrix<T>& G, Vector<int>& ind,
78 Vector<T>& coeffs, T& normX,
const int L,
const T eps,
const T lambda = 0);
104 template <
typename T>
108 const bool pos =
false,
const bool ols =
false,
const int numThreads=-1,
109 Matrix<T>* path = NULL,
const int length_path=-1);
111 template <
typename T>
115 const bool pos =
false,
const bool ols =
false,
const int numThreads=-1,
116 Matrix<T>* path = NULL,
const int length_path=-1);
119 template <
typename T>
122 const int numThreads = -1,
Matrix<T>* path = NULL,
const int length_path=-1);
124 template <
typename T>
128 const int numThreads = -1,
Matrix<T>* path = NULL,
const int length_path=-1);
131 template <
typename T>
134 const int numThreads = -1);
137 template <
typename T>
141 const int numThreads = -1);
144 template <
typename T>
152 const bool ols =
false,
153 const bool pos =
false,
155 T* path = NULL,
int length_path=-1);
157 template <
typename T>
168 const T constraint,
const bool pos =
false,
169 T* pr_path = NULL,
int length_path = -1);
171 template <
typename T>
183 const T constraint,
const bool pos =
false);
186 template <
typename T>
187 void downDateLasso(
int& j,
int& minBasis,T& normX,
const bool ols,
188 const bool pos,
Vector<T>& Rdn,
int* ind,
203 template <
typename T>
206 const int itermax=500,
207 const T tol = 0.5,
const int numThreads = -1);
208 template <
typename T>
211 const int itermax=500,
212 const T tol = 0.5,
const int numThreads=-1);
216 template <
typename T>
218 const T thrs,
const int itermax = 500,
222 template <
typename T>
225 const T thrs,
const int itermax = 500,
229 template <
typename T>
233 const int itermax = 500,
234 const T tol = 0.5,
const int numThreads = -1);
237 template <
typename T>
241 const int itermax=500,
246 template <
typename T>
250 const int itermax=500,
254 template <
typename T>
259 template <
typename T>
267 template <
typename T>
269 const int Ngroups,
const int L,
const T* pr_eps,
const bool adapt=
false,
270 const int numThreads=-1);
272 template <
typename T>
274 const int Ngroups,
const int L,
const T eps,
const int numThreads=-1);
277 template <
typename T>
297 template <
typename T>
299 const int* pL,
const T* peps,
const T* pLambda,
300 const bool vecL,
const bool vecEps,
301 const bool vecLambda,
const int numThreads,
Matrix<T>* path) {
319 int NUM_THREADS=
init_omp(numThreads);
329 for (
int i = 0; i<NUM_THREADS; ++i) {
342 #pragma omp parallel for private(i) 343 for (i = 0; i< M; ++i) {
345 int numT=omp_get_thread_num();
362 coreORMP(scoresT[numT],normT[numT],tmpT[numT],UnT[numT],UndnT[numT],UndsT[numT],
363 GsT[numT],Rdn,G,ind,RUn, normX, vecEps ? peps+i : peps,
364 vecL ? pL+i : pL, vecLambda ? pLambda+i : pLambda,
365 path && i==0 ? path->
rawX() : NULL);
381 template <
typename T>
383 const int *pL,
const T* peps,
const T* pLambda,
const bool vecL,
384 const bool vecEps,
const bool vecLambda,
const int numThreads,
403 int NUM_THREADS=
init_omp(numThreads);
416 for (
int i = 0; i<NUM_THREADS; ++i) {
431 #pragma omp parallel for private(i) 432 for (i = 0; i< M; ++i) {
434 int numT=omp_get_thread_num();
455 coreORMP(scoresT[numT],normT[numT],tmpT[numT],UnT[numT],UndnT[numT],UndsT[numT],
456 GsT[numT],Rdn,G,ind,RUn, normX, vecEps ? peps+i : peps,
457 vecL ? pL+i : pL, vecLambda ? pLambda+i : pLambda,
458 path && i==0 ? path->
rawX() : NULL);
462 T normX = XmaskT[numT].
nrm2sq();
463 DmaskT[numT].
multTrans(XmaskT[numT],Rdn);
466 T eps_mask= (vecEps ? *(peps+i) : *peps)*XmaskT[numT].
n()/Xi.
n();
467 coreORMP(scoresT[numT],normT[numT],tmpT[numT],
468 UnT[numT],UndnT[numT],UndsT[numT],
469 GsT[numT],Rdn,GT[numT],ind,RUn,
470 normX, &eps_mask, vecL ? pL+i : pL,
471 vecLambda ? pLambda+i : pLambda,
472 path && i==0 ? path->
rawX() : NULL);
474 DmaskT[numT].
setm(D.
m());
475 DmaskT[numT].
setn(D.
n());
476 XmaskT[numT].
setn(X.
m());
497 template <
typename T>
499 Vector<T>& coeffs, T& normX,
const int L,
const T eps,
const T lambda) {
509 coreORMP(scores,norm,tmp,Un,Undn,Unds,Gs,RtD,G,ind,coeffs,normX,&eps,&L,&lambda);
513 template <
typename T>
518 T& normX,
const T* peps,
const int* pL,
const T* plambda,
520 const T eps = abs<T>(*peps);
521 const int L =
MIN(*pL,Gs.
n());
522 const T lambda=*plambda;
523 if ((normX <= eps) || L == 0)
return;
524 const int K = scores.
n();
530 T*
const prUn = Un.
rawX();
531 T*
const prUnds = Unds.
rawX();
532 T*
const prUndn = Undn.
rawX();
533 T*
const prGs = Gs.
rawX();
534 T*
const prRUn= RUn.
rawX();
536 memset(path,0,K*L*
sizeof(T));
539 for (j = 0; j<L; ++j) {
540 const int currentInd=scores.
fmax();
541 if (norm[currentInd] < 1e-8) {
545 const T invNorm=T(1.0)/sqrt(norm[currentInd]);
546 const T RU=Rdn[currentInd]*invNorm;
547 const T delta = RU*RU;
548 if (delta < 2*lambda) {
571 cblas_copy<T>(j,prUndn+currentInd,K,prUn+j*L,1);
573 cblas_scal<T>(j+1,-invNorm,prUn+j*L,1);
575 if (j == L-1 || (normX <= eps)) {
581 T* last_path=path+(L-1)*K;
582 cblas_copy<T>(j+1,prRUn,1,last_path,1);
584 j+1,prUn,L,last_path,1);
585 for (
int k = 0; k<=j; ++k) {
586 path[j*K+ind[k]]=last_path[k];
595 T(0.0),prUndn+j*K,1);
599 Rdn.
add(Undnj,-RUn[j]);
604 for (
int k = 0; k<=j; ++k) scores[ind[k]]=T();
610 memset(path+(L-1)*K,0,L*
sizeof(T));
611 for (
int k = 0; k<j; ++k) {
612 path[(j-1)*K+ind[k]]=prRUn[k];
638 template <
typename T>
641 const bool pos,
const bool ols,
const int numThreads,
642 Matrix<T>* path,
const int length_path) {
646 lasso(X,G,DtX,spalpha,L,lambda,mode,pos,ols,numThreads,path,length_path);
649 template <
typename T>
653 const bool pos,
const bool ols,
const int numThreads,
654 Matrix<T>* path,
const int length_path) {
667 int NUM_THREADS=
init_omp(numThreads);
684 for (
int i = 0; i<NUM_THREADS; ++i) {
686 if (ols) XdnT[i].
resize(K);
691 if (ols) RUnT[i].
resize(L);
705 #pragma omp parallel for private(i) 706 for (i = 0; i< M; ++i) {
708 int numT=omp_get_thread_num();
722 coreLARS(Rdn,XdnT[numT], AT[numT], uT[numT], sigT[numT], avT[numT],
723 RUnT[numT], UnT[numT], UndsT[numT], GsT[numT], GsaT[numT],
724 workT[numT],RT[numT],G,normX, ind,coeffs,lambda,ols,pos,
725 mode,path && i==0 ? path->
rawX() : NULL, length_path);
747 template <
typename T>
755 T* path,
int length_path) {
756 if (mode ==
L2ERROR && normX < constraint)
return;
758 const int LL = Gsm.
n();
759 const int K = Gsm.
m();
760 const int L =
MIN(LL,K);
761 if (length_path <= 1) length_path=4*L;
763 T*
const Rdn = Rdnv.
rawX();
764 T*
const Xdn = Xdnv.
rawX();
765 T*
const A = Av.
rawX();
766 T*
const u = uv.
rawX();
767 T*
const sig = sigv.
rawX();
768 T*
const av = avv.
rawX();
769 T*
const RUn = RUnv.
rawX();
770 T*
const Un = Unm.
rawX();
771 T*
const Unds = Undsm.
rawX();
772 T*
const Gs = Gsm.
rawX();
773 T*
const Gsa = Gsam.
rawX();
774 T*
const work = workm.
rawX();
776 T*
const R = Rm.
rawX();
777 int* ind = indv.
rawX();
778 T* coeffs = coeffsv.
rawX();
783 if (ols) Xdnv.
copy(Rdnv);
784 int currentInd= pos ? Rdnv.
max() : Rdnv.
fmax();
790 int*
const ind_orig = ind;
791 T*
const coeffs_orig = coeffs;
794 for (j = 0; j<L; ++j) {
799 Cmax = Rdn[currentInd];
802 Cmax = abs<T>(Rdn[currentInd]);
803 sig[j] =
SIGN(Rdn[currentInd]);
805 for (
int k = 0; k<=j; ++k) Un[j*L+k]=0.0;
808 for (
int k = 0; k<j; ++k) Gs[K*j+ind[k]] *= sig[k];
810 Rdn[currentInd]=-Rdn[currentInd];
811 if (ols) Xdn[currentInd]=-Xdn[currentInd];
812 cblas_scal<T>(K,sig[j],Gs+K*j,1);
813 cblas_scal<T>(j+1,sig[j],Gs+currentInd,K);
815 cblas_copy<T>(j+1,Gs+currentInd,K,Gsa+j*L,1);
816 for (
int k = 0; k<j; ++k) Gsa[k*L+j]=Gsa[j*L+k];
819 cblas_copy<T>(j,Gsa+j*L,1,Unds+j,L);
825 for (
int k = 0; k<j; ++k) norm2 -= Unds[k*L+j]*Unds[k*L+j];
840 cblas_copy<T>(j,Unds+j,L,Un+j*L,1);
844 T invNorm=1.0/sqrt(norm2);
845 cblas_scal<T>(j+1,-invNorm,Un+j*L,1);
846 Unds[j*L+j]=cblas_dot<T>(j+1,Un+j*L,1,Gsa+j*L,1);
849 for (
int k = 0; k<=j; ++k) u[k]=T(1.0);
853 T a = T(1.0)/cblas_nrm2<T>(j+1,u,1);
857 cblas_scal<T>(j+1,a,u,1);
859 cblas_gemv<T>(
CblasColMajor,
CblasNoTrans,K,j+1,T(1.0),Gs,K,u,1,T(0.0),A,1);
863 for (
int k = 0; k<=j; ++k) potentNorm += Rdn[ind[k]]*u[k];
867 for (
int k = 0; k<K; ++k) {
869 work[k]= diff <= 0 ?
INFINITY : (Cmax-Rdn[k])/diff;
871 for (
int k = 0; k<=j; ++k) {
874 for (
int k = 0; k<K; ++k)
876 currentInd =cblas_iamin<T>(K,work,1);
878 memset(work,0,2*K*
sizeof(T));
879 for (
int k = 0; k<=j; ++k) {
880 const int index=2*ind[k];
884 for (
int k = 0; k<K; ++k) {
887 const T diff1=a-A[k];
888 work[index]= diff1 <= 0 ?
INFINITY : (Cmax-Rdn[k])/diff1;
889 const T diff2=a+A[k];
890 work[index+1]=diff2 <= 0 ?
INFINITY : (Cmax+Rdn[k])/diff2;
893 currentInd =cblas_iamin<T>(2*K,work,1);
895 T gamma=work[currentInd];
902 gamma=
MIN(gamma,(Cmax-constraint)/a);
906 vDiv<T>(j+1,coeffs,u,work);
907 cblas_scal<T>(j+1,-T(1.0),work,1);
909 for (
int k=0; k<=j; ++k)
910 if (coeffs[k]==0 || work[k] <=0) work[k]=
INFINITY;
911 minBasis=cblas_iamin<T>(j+1,work,1);
912 gammaMin=work[minBasis];
913 if (gammaMin < gamma) gamma=gammaMin;
918 for (
int k = 0; k<=j; ++k) Tu += u[k];
921 gamma=
MIN(gamma,(constraint-thrs)/Tu);
928 const T t = gamma*gamma - 2*gamma*potentNorm;
929 if (t > 0 || std::isnan(t) || std::isinf(t)) {
940 for (
int k = 0; k<=j; ++k) RUn[j] += Xdn[ind[k]]*
942 normX -= RUn[j]*RUn[j];
947 cblas_axpy<T>(j+1,gamma,u,1,coeffs,1);
950 for (
int k = 0; k<j+1; ++k)
951 if (coeffs[k] < 0) coeffs[k]=0;
954 cblas_axpy<T>(K,-gamma,A,1,Rdn,1);
955 if (!pos) currentInd/= 2;
957 for (
int k = 0; k<=j; ++k)
958 path[iter*K+ind[k]]=coeffs[k]*sig[k];
961 if (gamma == gammaMin) {
962 downDateLasso<T>(j,minBasis,normX,ols,pos,Rdnv,ind,coeffs,sigv,
963 avv,Xdnv, RUnv, Unm, Gsm, Gsam,Undsm,Rm);
965 Cmax=abs<T>(Rdn[ind[0]]);
973 thrs=abs<T>(Rdn[ind[0]]);
977 (mode ==
PENALTY && (thrs - constraint < 1e-15)) ||
978 (mode ==
L1COEFFS && (thrs - constraint > -1e-15)) ||
979 (newAtom && mode ==
L2ERROR && (normX - constraint < 1e-15)) ||
981 (iter >= length_path)) {
991 cblas_copy<T>(j+1,RUn,1,coeffs,1);
995 vMul<T>(j+1,coeffs,sig,coeffs);
999 template <
typename T>
1007 const int L = Gsm.
n();
1008 const int K = Gsm.
m();
1009 T*
const Rdn = Rdnv.
rawX();
1010 T*
const Xdn = Xdnv.
rawX();
1011 T*
const sig = sigv.
rawX();
1012 T*
const av = avv.
rawX();
1013 T*
const RUn = RUnv.
rawX();
1014 T*
const Un = Unm.
rawX();
1015 T*
const Unds = Undsm.
rawX();
1016 T*
const Gs = Gsm.
rawX();
1017 T*
const Gsa = Gsam.
rawX();
1018 T*
const R = Rm.
rawX();
1020 int indB=ind[minBasis];
1022 if (!pos && sig[minBasis] < 0) {
1024 Rdn[indB]=-Rdn[indB];
1025 if (ols) Xdn[indB]=-Xdn[indB];
1029 for (
int k = 0; k<num*num;++k) R[k]=0.0;
1030 for (
int k = 0; k<num; ++k) R[k*num+k]=1.0;
1032 for (
int k = minBasis+1; k<=j; ++k) {
1033 T a = -Un[k*L+minBasis]/Un[minBasis*L+minBasis];
1034 av[k-minBasis-1] = a;
1035 cblas_axpy<T>(minBasis,a,Un+minBasis*L,1,Un+k*L,1);
1037 for (
int k = minBasis+1; k<=j; ++k) {
1038 cblas_copy<T>(minBasis,Un+k*L,1,Un+(k-1)*L,1);
1039 cblas_copy<T>(num,Un+k*L+minBasis+1,1,Un+(k-1)*L+minBasis,1);
1042 T alphab,gamma,lambda;
1043 for (
int k = 0; k<num; ++k) {
1044 alphab=alpha+av[k]*av[k];
1045 R[k*num+k]=sqrt(alphab/alpha);
1046 gamma=av[k]*R[k*num+k]/alphab;
1048 cblas_copy<T>(num-k-1,av+k+1,1,R+k*num+k+1,1);
1049 cblas_scal<T>(num-k-1,gamma,R+k*num+k+1,1);
1054 j,num,T(1.0),R,num,Un+minBasis*L,L);
1058 for (
int k = minBasis+1; k<=j; ++k)
1059 cblas_axpy<T>(j-minBasis,av[k-minBasis-1],Unds+minBasis*L+minBasis+1,1,
1060 Unds+k*L+minBasis+1,1);
1061 for (
int k = 0; k<minBasis; ++k)
1062 for (
int l = minBasis+1; l<=j; ++l)
1063 Unds[k*L+l-1]=Unds[k*L+l];
1064 for (
int k = minBasis+1; k<=j; ++k)
1065 cblas_copy<T>(j-minBasis,Unds+k*L+minBasis+1,1,Unds+(k-1)*L+minBasis,1);
1068 j-minBasis,num,T(1.0),R,num,Unds+minBasis*L+minBasis,L);
1069 for (
int k = minBasis+1; k<=j; ++k)
1070 for (
int l = 0; l<k; ++l) Unds[k*L+l]=0.0;
1073 for (
int k = minBasis+1; k<=j; ++k) {
1074 cblas_copy<T>(K,Gs+k*K,1,Gs+(k-1)*K,1);
1076 if (!pos && sig[minBasis] < T(0.0)) cblas_scal<T>(j,T(-1.0),Gs+indB,K);
1078 for (
int k = minBasis+1; k<=j; ++k) {
1079 cblas_copy<T>(minBasis,Gsa+k*L,1,Gsa+(k-1)*L,1);
1080 cblas_copy<T>(j-minBasis,Gsa+k*L+minBasis+1,1,Gsa+(k-1)*L+minBasis,1);
1082 for (
int k = 0; k<minBasis; ++k) {
1083 for (
int l = minBasis+1; l<=j; ++l) Gsa[k*L+l-1]=Gsa[k*L+l];
1087 for (
int k = minBasis+1; k<=j && !pos; ++k) sig[k-1]=sig[k];
1089 for (
int k = minBasis+1; k<=j; ++k) ind[k-1]=ind[k];
1092 for (
int k = minBasis+1; k<=j; ++k) coeffs[k-1]=coeffs[k];
1097 for (
int k = minBasis; k<=j; ++k)
1098 normX += RUn[k]*RUn[k];
1099 for (
int k = minBasis; k<j; ++k) {
1101 for (
int l = 0; l<=k; ++l) RUn[k] += Xdn[ind[l]]*
1103 normX -= RUn[k]*RUn[k];
1112 template <
typename T>
1116 const int numThreads) {
1118 const int M = X.
n();
1119 const int K = D.
n();
1124 const int iterR = 30;
1128 int NUM_THREADS=
init_omp(numThreads);
1146 for (
int i = 0; i<NUM_THREADS; ++i) {
1161 #pragma omp parallel for private(i) 1162 for (i = 0; i< M; ++i) {
1164 int numT=omp_get_thread_num();
1181 coreLARS2(DtRR,G,GsT[numT],GaT[numT],invGsT[numT],uT[numT],coeffs,
1182 ind,workT[numT],normX,mode,constraint,pos);
1186 for (
int j = 0; j<iterR; ++j) {
1187 const T sig = sigma*pow(0.7,iterR-1-j);
1189 for (
int k = 0; k<K; ++k) {
1191 weights[ind[k]] =
MAX(1e-4,sig*exp(-sig*abs<T>(coeffs[k])));
1198 coreLARS2W(DtRR,G,GsT[numT],GaT[numT],invGsT[numT],uT[numT],coeffs,weights,
1199 ind,workT[numT],normX,mode,constraint,pos);
1219 template <
typename T>
1223 const int numThreads) {
1226 const int M = X.
n();
1227 const int K = D.
n();
1235 int NUM_THREADS=
init_omp(numThreads);
1249 for (
int i = 0; i<NUM_THREADS; ++i) {
1261 #pragma omp parallel for private(i) 1262 for (i = 0; i< M; ++i) {
1264 int numT=omp_get_thread_num();
1282 coreLARS2W(DtR,G,GsT[numT],GaT[numT],invGsT[numT],uT[numT],coeffs,we,
1283 ind,workT[numT],normX,mode,constraint,pos);
1297 template <
typename T>
1301 const int numThreads) {
1304 const int M = X.
n();
1305 const int K = G.
n();
1313 int NUM_THREADS=
init_omp(numThreads);
1321 for (
int i = 0; i<NUM_THREADS; ++i) {
1333 #pragma omp parallel for private(i) 1334 for (i = 0; i< M; ++i) {
1336 int numT=omp_get_thread_num();
1354 coreLARS2W(DtRi,G,GsT[numT],GaT[numT],invGsT[numT],uT[numT],coeffs,we,
1355 ind,workT[numT],normX,mode,constraint,pos);
1370 template <
typename T>
1372 int L,
const T constraint,
const T lambda2,
constraint_type mode,
const bool pos,
1373 const int numThreads) {
1375 const int M = X.
n();
1376 const int K = D.
n();
1384 int NUM_THREADS=
init_omp(numThreads);
1398 for (
int i = 0; i<NUM_THREADS; ++i) {
1412 #pragma omp parallel for private(i) 1413 for (i = 0; i< M; ++i) {
1415 int numT=omp_get_thread_num();
1433 coreLARS2(DtR,G,GsT[numT],GaT[numT],invGsT[numT],uT[numT],coeffs,
1434 ind,workT[numT],normX,mode,constraint,pos);
1438 T constraint_mask = mode ==
PENALTY || mode ==
L2ERROR ? constraint*XmaskT[numT].
n()/Xi.
n() : constraint;
1439 T normX = XmaskT[numT].
nrm2sq();
1440 DmaskT[numT].
multTrans(XmaskT[numT],DtR);
1444 GsT[numT],GaT[numT],invGsT[numT],uT[numT],coeffs,
1445 ind,workT[numT],normX,mode,constraint_mask,pos);
1446 DmaskT[numT].
setm(D.
m());
1447 DmaskT[numT].
setn(D.
n());
1448 XmaskT[numT].
setn(X.
m());
1467 template <
typename T>
1469 int L,
const T constraint,
const T lambda2,
constraint_type mode,
const bool pos,
1470 const int numThreads,
Matrix<T>* path,
int length_path) {
1474 lasso2(X,G,DtX,spalpha,L,constraint,mode,pos,numThreads,path, length_path);
1478 template <
typename T>
1482 const int numThreads,
Matrix<T>* path,
int length_path) {
1484 const int M = X.
n();
1485 const int K = G.
n();
1494 int NUM_THREADS=
init_omp(numThreads);
1502 for (
int i = 0; i<NUM_THREADS; ++i) {
1515 #pragma omp parallel for private(i) 1516 for (i = 0; i< M; ++i) {
1518 int numT=omp_get_thread_num();
1535 coreLARS2(DtR,G,GsT[numT],GaT[numT],invGsT[numT],
1537 ind,workT[numT],normX,mode,constraint,pos,
1538 path && i==0 ? path->
rawX() : NULL,length_path);
1555 template <
typename T>
1568 T* path,
int length_path) {
1569 const int LL = Gs.
n();
1570 const int K = G.
n();
1571 const int L =
MIN(LL,K);
1572 if (length_path <= 1) length_path=4*L;
1577 T*
const pr_Gs = Gs.
rawX();
1578 T*
const pr_invGs = invGs.
rawX();
1579 T*
const pr_Ga = Ga.
rawX();
1580 T*
const pr_work = work.
rawX();
1581 T*
const pr_u = u.
rawX();
1582 T*
const pr_DtR = DtR.
rawX();
1583 T*
const pr_coeffs = coeffs.
rawX();
1584 int*
const pr_ind = ind.
rawX();
1587 int currentInd = pos ? DtR.
max() : DtR.
fmax();
1588 if (mode ==
PENALTY &&
abs(DtR[currentInd]) < constraint)
return;
1589 if (mode ==
L2ERROR && normX < constraint)
return;
1595 for (i = 0; i<L; ++i) {
1598 pr_ind[i]=currentInd;
1601 for (
int j = 0; j<=i; ++j)
1602 pr_Gs[i*LL+j]=pr_Ga[i*K+pr_ind[j]];
1606 pr_invGs[0]=T(1.0)/pr_Gs[0];
1609 pr_invGs,LL,pr_Gs+i*LL,1,T(0.0),pr_u,1);
1611 T(1.0)/(pr_Gs[i*LL+i]-cblas_dot<T>(i,pr_u,1,pr_Gs+i*LL,1));
1612 pr_invGs[i*LL+i]=schur;
1613 cblas_copy<T>(i,pr_u,1,pr_invGs+i*LL,1);
1614 cblas_scal<T>(i,-schur,pr_invGs+i*LL,1);
1621 for (
int j = 0; j<=i; ++j)
1622 pr_work[j]= pr_DtR[pr_ind[j]] > 0 ? T(1.0) : T(-1.0);
1624 pr_work,1,T(0.0),pr_u,1);
1628 int first_zero = -1;
1629 for (
int j = 0; j<=i; ++j) {
1630 T ratio = -pr_coeffs[j]/pr_u[j];
1631 if (ratio > 0 && ratio <= step_max) {
1638 T current_correlation = abs<T>(pr_DtR[pr_ind[0]]);
1640 K,pr_u,1,T(0.0),pr_work+2*K,1);
1641 cblas_copy<T>(K,pr_work+2*K,1,pr_work+K,1);
1642 cblas_copy<T>(K,pr_work+2*K,1,pr_work,1);
1644 for (
int j = 0; j<=i; ++j) {
1648 for (
int j = 0; j<K; ++j) {
1649 pr_work[j] = ((pr_work[j] <
INFINITY) && (pr_work[j] > T(-1.0))) ? (pr_DtR[j]+current_correlation)/(T(1.0)+pr_work[j]) :
INFINITY;
1652 for (
int j = 0; j<K; ++j) {
1653 pr_work[j+K] = ((pr_work[j+K] <
INFINITY) && (pr_work[j+K] < T(1.0))) ? (current_correlation-pr_DtR[j])/(T(1.0)-pr_work[j+K]) :
INFINITY;
1658 for (
int j = 0; j<K; ++j) {
1664 int index = cblas_iamin<T>(2*K,pr_work,1);
1665 T step = pr_work[index];
1668 currentInd = index % K;
1672 for (
int j = 0; j<=i; ++j)
1673 coeff1 += pr_DtR[pr_ind[j]] > 0 ? pr_u[j] : -pr_u[j];
1675 for (
int j = 0; j<=i; ++j)
1676 coeff2 += pr_DtR[pr_ind[j]]*pr_u[j];
1677 T coeff3 = normX-constraint;
1682 step_max2 = current_correlation-constraint;
1685 const T delta = coeff2*coeff2-coeff1*coeff3;
1686 step_max2 = delta < 0 ?
INFINITY : (coeff2-sqrt(delta))/coeff1;
1687 step_max2 =
MIN(current_correlation,step_max2);
1690 step_max2 = coeff1 < 0 ?
INFINITY : (constraint-thrs)/coeff1;
1691 step_max2 =
MIN(current_correlation,step_max2);
1693 step =
MIN(
MIN(step,step_max2),step_max);
1697 cblas_axpy<T>(i+1,step,pr_u,1,pr_coeffs,1);
1700 for (
int j = 0; j<i+1; ++j)
1701 if (pr_coeffs[j] < 0) pr_coeffs[j]=0;
1705 cblas_axpy<T>(K,-step,pr_work+2*K,1,pr_DtR,1);
1708 normX += coeff1*step*step-2*coeff2*step;
1711 thrs += step*coeff1;
1714 for (
int k = 0; k<=i; ++k)
1715 path[iter*K+ind[k]]=pr_coeffs[k];
1720 if (step == step_max) {
1724 for (
int j = first_zero; j<i; ++j) {
1725 cblas_copy<T>(K,pr_Ga+(j+1)*K,1,pr_Ga+j*K,1);
1726 pr_ind[j]=pr_ind[j+1];
1727 pr_coeffs[j]=pr_coeffs[j+1];
1731 for (
int j = first_zero; j<i; ++j) {
1732 cblas_copy<T>(first_zero,pr_Gs+(j+1)*LL,1,pr_Gs+j*LL,1);
1733 cblas_copy<T>(i-first_zero,pr_Gs+(j+1)*LL+first_zero+1,1,
1734 pr_Gs+j*LL+first_zero,1);
1736 const T schur = pr_invGs[first_zero*LL+first_zero];
1737 cblas_copy<T>(first_zero,pr_invGs+first_zero*LL,1,pr_u,1);
1738 cblas_copy<T>(i-first_zero,pr_invGs+(first_zero+1)*LL+first_zero,LL,
1740 for (
int j = first_zero; j<i; ++j) {
1741 cblas_copy<T>(first_zero,pr_invGs+(j+1)*LL,1,pr_invGs+j*LL,1);
1742 cblas_copy<T>(i-first_zero,pr_invGs+(j+1)*LL+first_zero+1,1,
1743 pr_invGs+j*LL+first_zero,1);
1746 pr_u,1,pr_invGs,LL);
1752 if ((iter >= length_path-1) ||
abs(step) < 1e-15 ||
1753 step == step_max2 || (normX < 1e-15) ||
1755 (mode ==
L2ERROR && normX - constraint < 1e-15) ||
1756 (mode ==
L1COEFFS && (constraint-thrs < 1e-15))) {
1763 template <
typename T>
1777 const int LL = Gs.
n();
1778 const int K = G.
n();
1779 const int L =
MIN(LL,K);
1783 T*
const pr_Gs = Gs.
rawX();
1784 T*
const pr_invGs = invGs.
rawX();
1785 T*
const pr_Ga = Ga.
rawX();
1787 T*
const pr_work = work.
rawX();
1788 T*
const pr_u = u.
rawX();
1789 T*
const pr_DtR = DtR.
rawX();
1790 T*
const pr_coeffs = coeffs.
rawX();
1791 T*
const pr_weights = weights.
rawX();
1792 int*
const pr_ind = ind.
rawX();
1797 int currentInd = pos ? DtR.
max() : DtR.
fmax();
1798 if (mode ==
PENALTY &&
abs(DtR[currentInd]) < constraint)
return;
1799 if (mode ==
L2ERROR && normX < constraint)
return;
1805 for (i = 0; i<L; ++i) {
1808 pr_ind[i]=currentInd;
1811 for (
int j = 0; j<=i; ++j)
1812 pr_Gs[i*LL+j]=pr_Ga[i*K+pr_ind[j]];
1816 pr_invGs[0]=T(1.0)/pr_Gs[0];
1819 pr_invGs,LL,pr_Gs+i*LL,1,T(0.0),pr_u,1);
1821 T(1.0)/(pr_Gs[i*LL+i]-cblas_dot<T>(i,pr_u,1,pr_Gs+i*LL,1));
1822 pr_invGs[i*LL+i]=schur;
1823 cblas_copy<T>(i,pr_u,1,pr_invGs+i*LL,1);
1824 cblas_scal<T>(i,-schur,pr_invGs+i*LL,1);
1831 for (
int j = 0; j<=i; ++j)
1832 pr_work[j]= pr_DtR[pr_ind[j]] > 0 ? weights[pr_ind[j]] : -weights[pr_ind[j]];
1834 pr_work,1,T(0.0),pr_u,1);
1838 int first_zero = -1;
1839 for (
int j = 0; j<=i; ++j) {
1840 T ratio = -pr_coeffs[j]/pr_u[j];
1841 if (ratio > 0 && ratio <= step_max) {
1847 T current_correlation = abs<T>(pr_DtR[pr_ind[0]]);
1849 K,pr_u,1,T(0.0),pr_work+2*K,1);
1850 vDiv<T>(K,pr_work+2*K,pr_weights,pr_work+2*K);
1851 cblas_copy<T>(K,pr_work+2*K,1,pr_work+K,1);
1852 cblas_copy<T>(K,pr_work+2*K,1,pr_work,1);
1854 for (
int j = 0; j<=i; ++j) {
1858 for (
int j = 0; j<K; ++j) {
1859 pr_work[j] = ((pr_work[j] <
INFINITY) && (pr_work[j] > T(-1.0))) ? (pr_DtR[j]+current_correlation)/(T(1.0)+pr_work[j]) :
INFINITY;
1861 for (
int j = 0; j<K; ++j) {
1862 pr_work[j+K] = ((pr_work[j+K] <
INFINITY) && (pr_work[j+K] < T(1.0))) ? (current_correlation-pr_DtR[j])/(T(1.0)-pr_work[j+K]) :
INFINITY;
1866 for (
int j = 0; j<K; ++j) {
1870 int index = cblas_iamin<T>(2*K,pr_work,1);
1871 T step = pr_work[index];
1873 currentInd = index % K;
1877 for (
int j = 0; j<=i; ++j)
1878 coeff1 += pr_DtR[pr_ind[j]] > 0 ? pr_weights[pr_ind[j]]*pr_u[j] :
1879 -pr_weights[pr_ind[j]]*pr_u[j];
1881 for (
int j = 0; j<=i; ++j)
1882 coeff2 += pr_DtR[pr_ind[j]]*pr_u[j]*pr_weights[pr_ind[j]];
1883 T coeff3 = normX-constraint;
1887 step_max2 = current_correlation-constraint;
1890 const T delta = coeff2*coeff2-coeff1*coeff3;
1891 step_max2 = delta < 0 ?
INFINITY : (coeff2-sqrt(delta))/coeff1;
1894 step_max2 = coeff1 < 0 ?
INFINITY : (constraint-thrs)/coeff1;
1896 step =
MIN(
MIN(step,step_max2),step_max);
1901 cblas_axpy<T>(i+1,step,pr_u,1,pr_coeffs,1);
1904 cblas_axpy<T>(K,-step,pr_work+2*K,1,pr_DtR,1);
1907 normX += coeff1*step*step-2*coeff2*step;
1910 thrs += step*coeff1;
1912 if (step == step_max) {
1915 for (
int j = first_zero; j<i; ++j) {
1916 cblas_copy<T>(K,pr_Ga+(j+1)*K,1,pr_Ga+j*K,1);
1917 pr_ind[j]=pr_ind[j+1];
1918 pr_coeffs[j]=pr_coeffs[j+1];
1922 for (
int j = first_zero; j<i; ++j) {
1923 cblas_copy<T>(first_zero,pr_Gs+(j+1)*LL,1,pr_Gs+j*LL,1);
1924 cblas_copy<T>(i-first_zero,pr_Gs+(j+1)*LL+first_zero+1,1,
1925 pr_Gs+j*LL+first_zero,1);
1927 const T schur = pr_invGs[first_zero*LL+first_zero];
1928 cblas_copy<T>(first_zero,pr_invGs+first_zero*LL,1,pr_u,1);
1929 cblas_copy<T>(i-first_zero,pr_invGs+(first_zero+1)*LL+first_zero,LL,
1931 for (
int j = first_zero; j<i; ++j) {
1932 cblas_copy<T>(first_zero,pr_invGs+(j+1)*LL,1,pr_invGs+j*LL,1);
1933 cblas_copy<T>(i-first_zero,pr_invGs+(j+1)*LL+first_zero+1,1,
1934 pr_invGs+j*LL+first_zero,1);
1937 pr_u,1,pr_invGs,LL);
1944 if (iter > 4*L ||
abs(step) < 1e-10 ||
1945 step == step_max2 || (normX < 1e-10) ||
1947 (mode ==
L2ERROR && normX - constraint < 1e-10) ||
1948 (mode ==
L1COEFFS && (constraint-thrs < 1e-10))) {
1965 template <
typename T>
1970 const int numThreads) {
1974 ist(X,D,alpha,lambda,mode,itermax,tol,numThreads);
1978 template <
typename T>
1982 const T tol,
const int numThreads) {
1985 std::cerr <<
"Mode not implemented" << std::endl;
1993 cerr <<
"Current implementation of IST does not support non-normalized dictionaries" << endl;
2006 int NUM_THREADS=
init_omp(numThreads);
2010 for (
int i = 0; i<NUM_THREADS; ++i) {
2016 #pragma omp parallel for private(i) 2017 for (i = 0; i< M; ++i) {
2019 int numT=omp_get_thread_num();
2027 T norm1 = coeffs.
asum();
2036 G.
mult(spAlpha,DtR,-1.0,1.0);
2040 coreIST(G,DtR,coeffs,lambda,itermax,tol);
2051 template <
typename T>
2053 const T thrs,
const int itermax,
2056 const int K = G.
n();
2057 T*
const coeffs = coeffsv.
rawX();
2058 T*
const DtR = DtRv.
rawX();
2061 const T lambda_init=thrs;
2063 T norm1=coeffsv.
asum();
2064 T lambda=lambda_init;
2065 vAdd(K,DtR,coeffs,DtR);
2067 for (
int iter=0; iter < itermax; ++iter) {
2068 for (
int j = 0; j <K; ++j) {
2069 if (DtR[j] > lambda) {
2071 coeffs[j]=DtR[j]-lambda;
2076 }
else if (DtR[j] < -lambda) {
2078 coeffs[j]=DtR[j]+lambda;
2083 }
else if (coeffs[j]) {
2091 if (iter % 5 == 1) {
2092 vSub(K,DtR,coeffs,DtR);
2096 for (
int j = 0; j<K; ++j) {
2098 norm1 +=
abs(coeffs[j]);
2099 DtRa += DtR[j]*coeffs[j];
2102 vAdd(K,DtR,coeffs,DtR);
2103 const T kappa = -DtRa+norm1*maxDtR;
2104 if (
abs(lambda - maxDtR) < tol && kappa <= tol)
2112 template <
typename T>
2114 coeffsv,
const T normX2,
const T eps,
const int itermax,
const T tol) {
2115 const int K = G.
n();
2116 T*
const coeffs = coeffsv.
rawX();
2117 T*
const DtR = DtRv.
rawX();
2121 T norm1 = coeffsv.
asum();
2122 if (!norm1 && err <= eps)
return;
2123 T current_tol = 10.0*tol;
2126 T lambdasq= lambda*lambda;
2128 lambdasq *= eps/err;
2129 lambda=sqrt(lambdasq);
2134 int*
const pr_indices=indices.
rawX();
2137 for (
int iter=0; iter < itermax; ++iter) {
2141 for (
int j = 0; j <K; ++j) {
2144 T old_coeff = coeffs[j];
2145 T diff = DtR[j]+old_coeff;
2146 if (diff > lambda) {
2147 coeffs[j] = diff - lambda;
2148 err+=lambdasq-DtR[j]*DtR[j];
2149 pr_indices[count++]=j;
2150 }
else if (diff < - lambda) {
2151 coeffs[j] = diff + lambda;
2152 err+=lambdasq-DtR[j]*DtR[j];
2153 pr_indices[count++]=j;
2157 err+=diff*diff-DtR[j]*DtR[j];
2161 diff = old_coeff-coeffs[j];
2171 for (
int j = 0; j<count; ++j) {
2172 const int ind = pr_indices[j];
2173 norm1 +=
abs(coeffs[ind]);
2174 DtRa += DtR[ind]*coeffs[ind];
2176 if (norm1-DtRa/maxDtR <= current_tol) {
2177 const bool change = ((old_err > eps) && err < eps+current_tol) ||
2178 (old_err < eps && err > eps-current_tol);
2180 if (current_tol == tol) {
2183 current_tol =
MAX(current_tol*0.5,tol);
2186 lambdasq *= eps/err;
2187 lambda=sqrt(lambdasq);
2195 template <
typename T>
2200 const T tol,
const int numThreads) {
2205 cerr <<
"Current implementation of block coordinate descent does not support non-normalized dictionaries" << endl;
2210 std::cerr <<
"Mode not implemented" << std::endl;
2219 int NUM_THREADS=
init_omp(numThreads);
2225 #pragma omp parallel for private(i) 2226 for (i = 0; i< Ngroups; ++i) {
2228 int numT=omp_get_thread_num();
2237 X.
mult(D,RtD,
true,
false);
2241 T norm1 = alphat.
asum();
2251 coreIST(G,DtR_mean,coeffs_mean,lambda/T(2.0),itermax,tol);
2257 lambda,itermax,tol);
2259 normX2-=
computeError(normX2,G,DtR_mean,coeffs_mean,spalpha);
2266 for (
int j = 0; j<K; ++j) {
2268 const T nrm=col.
nrm2sq();
2276 coreGroupIST(G,RtD,alphat,sqr<T>(M)*lambda/T(2.0),itermax,sqr<T>(M)*tol);
2289 template <
typename T>
2295 const int K = G.
n();
2296 const int M = RtDm.
m();
2297 T*
const prG = G.
rawX();
2298 T*
const RtD = RtDm.
rawX();
2299 T*
const coeffs = coeffsm.
rawX();
2301 const T lambda_init=thrs;
2302 T lambda=lambda_init;
2305 T*
const old_coeff = old_coeffv.
rawX();
2307 T*
const norms = normsv.
rawX();
2313 int*
const activate=activatev.
rawX();
2315 for (
int iter=0; iter < itermax; ++iter) {
2316 for (
int j = 0; j <K; ++j) {
2317 if (activate[j] >= 0) {
2320 vAdd(M,coeffs+j*M,RtD+j*M,coeffs+j*M);
2323 norms[j]=nrm-lambda;
2325 vSub(M,old_coeff,coeffs+j*M,old_coeff);
2329 memset(coeffs+j*M,0,M*
sizeof(T));
2338 norms[j]=nrm-lambda;
2344 activate[j] = (activate[j] == 0) ? -10 : activate[j]-1;
2352 if (iter % 5 == 4) {
2353 T norm1=normsv.
asum();
2357 for (
int j = 0; j<K; ++j) {
2359 DtRa +=
cblas_dot(M,coeffs+j*M,1,RtD+j*M,1);
2362 if ((maxDtR - lambda) < (tol*maxDtR/norm1) && norm1-DtRa/maxDtR < tol)
break;
2369 template <
typename T>
2375 const int K = G.
n();
2376 const int M = RtDm.
m();
2377 T*
const prG = G.
rawX();
2378 T*
const RtD = RtDm.
rawX();
2379 T*
const coeffs = coeffsm.
rawX();
2384 T*
const old_coeff = old_coeffv.
rawX();
2386 T*
const norms = normsv.
rawX();
2393 int*
const activate=activatev.
rawX();
2395 T norm1 = normsv.
sum();
2396 if (!norm1 && err <= eps)
return;
2397 T current_tol = 10.0*tol;
2401 T lambdasq= lambda*lambda;
2404 lambdasq *= eps/err;
2405 lambda=sqrt(lambdasq);
2408 for (
int iter=0; iter < itermax; ++iter) {
2411 for (
int j = 0; j <K; ++j) {
2412 if (activate[j] >= 0) {
2415 vAdd(M,coeffs+j*M,RtD+j*M,coeffs+j*M);
2418 norms[j]=nrm-lambda;
2420 vSub(M,old_coeff,coeffs+j*M,old_coeff);
2421 err +=
cblas_dot(M,old_coeff,1,old_coeff,1)
2426 memset(coeffs+j*M,0,M*
sizeof(T));
2428 err +=
cblas_dot(M,old_coeff,1,old_coeff,1)
2437 norms[j]=nrm-lambda;
2440 err +=
cblas_dot(M,coeffs+j*M,1,coeffs+j*M,1)
2445 activate[j] = (activate[j] == 0) ? -3 : activate[j]-1;
2453 norm1 = normsv.
sum();
2457 for (
int j = 0; j<K; ++j) {
2459 DtRa +=
cblas_dot(M,coeffs+j*M,1,RtD+j*M,1);
2462 if (norm1-DtRa/maxDtR <= current_tol) {
2463 const T tol_bis=current_tol*maxDtR;
2464 const bool change = ((old_err > eps) && err < eps+tol_bis) ||
2465 (old_err < eps && err > eps-tol_bis);
2467 if (current_tol == tol) {
2470 current_tol =
MAX(current_tol*0.5,tol);
2473 lambdasq *= eps/err;
2474 lambda=sqrt(lambdasq);
2480 template <
typename T>
2485 for (
int j = 0; j<G.
n(); ++j) {
2489 err2 -= 2*col.
dot(col2);
2491 for (
int k = 0; k<j; ++k) {
2494 add -= G(j,k)*col.
dot(col2);
2497 add += add - G(j,j)*col.
nrm2sq();
2505 template <
typename T>
2510 return normX2 -G.
quad(spAlpha)-2*DtR.
dot(spAlpha);
2517 template <
typename T>
2519 const int Ngroups,
const int L,
const T eps,
const int numThreads) {
2520 somp(X,D,spalpha,Ngroups,L,&eps,
false,numThreads);
2523 template <
typename T>
2525 const int Ngroups,
const int LL,
const T* eps,
const bool adapt,
2526 const int numThreads) {
2527 if (LL <= 0)
return;
2528 const int K = D.
n();
2529 const int L =
MIN(D.
m(),
MIN(LL,K));
2532 cerr <<
"Current implementation of OMP does not support non-normalized dictionaries" << endl;
2540 int NUM_THREADS=
init_omp(numThreads);
2543 #pragma omp parallel for private(i) 2544 for (i = 0; i< Ngroups; ++i) {
2546 const int M = X.
n();
2551 T thrs = adapt ? eps[i] : M*(*eps);
2557 template <
typename T>
2561 const int K = G.
n();
2562 const int n = D.
m();
2563 const int M = X.
n();
2565 const bool big_mode = M*K*(n+L) > 2*(M*n*n+K*n*(n+L));
2583 T normX = Xt.nrm2sq();
2585 coreORMP(scores,norm,tmp,Un,Undn,Unds,Gs,Rdn,G,r,RUn,normX,&eps,&L,&lambda);
2587 for (
int i = 0; i<L; ++i) {
2588 if (r[i] == -1)
break;
2605 if (E < eps)
return;
2609 if (E < eps)
return;
2631 for (
int i = 0; i<K; ++i) {
2646 T*
const prAs = As.
rawX();
2647 T*
const prA = A.
rawX();
2648 T*
const prS = S.
rawX();
2649 T*
const prGs = Gs.
rawX();
2650 T*
const prFs = Fs.
rawX();
2651 T*
const prB = B.
rawX();
2652 T*
const pr_c = c.
rawX();
2653 T*
const pr_tmp = tmp.
rawX();
2656 for (j = 0; j<L; ++j) {
2659 for (
int k = 0; k<j; ++k) scores[r[k]]=-1.0;
2660 const int currentInd = scores.
max();
2661 const T invNorm=T(1.0)/sqrt(e[currentInd]);
2662 if (invNorm > 1e3) {
2667 E -= scores[currentInd];
2668 for (
int k = 0; k<j; ++k) prS[j*L+k]=T();
2670 for (
int k = 0; k<j; ++k) prAs[k*L+j]=prA[k*K+currentInd];
2673 int iter = invNorm > 1.41 ? 2 : 1;
2674 for (
int k = 0; k<iter; ++k) {
2675 for (
int l = 0; l<j; ++l) {
2676 T scal = -cblas_dot<T>(j-l+1,prAs+l*L+l,1,prS+j*L+l,1);
2677 cblas_axpy<T>(l+1,scal,prS+l*L,1,prS+j*L,1);
2680 cblas_scal<T>(j+1,invNorm,prS+j*L,1);
2682 if (j == L-1 || E <= eps) {
2694 prAs[j*L+j]=prA[j*K+currentInd];
2708 XtD.
refCol(currentInd,di);
2713 for (
int k = 0; k<j;++k) pr_c[k]=T();
2714 for (
int k = 0; k<=j;++k)
2715 cblas_axpy<T>(j,prS[j*L+k],prB+r[k]*L,1,pr_c,1);
2716 f.
add(tmp,f[currentInd]*invNorm*invNorm);
2723 cblas_axpy<T>(K,T(-1.0),prB+j,L,pr_tmp,1);
2738 for (
int i = 0; i<j;++i) {
2744 SSt.
mult(Dg,SStDt,
false,
true);
T quad(const Vector< T > &vec1, const SpVector< T > &vec2) const
return vec1'*A*vec2, where vec2 is sparse
void toFull(Matrix< T > &matrix) const
copy the sparse matrix into a dense matrix
void rank1Update(const Vector< T > &vec1, const Vector< T > &vec2, const T alpha=1.0)
perform A <- A + alpha*vec1*vec2'
T sum() const
returns the sum of the vector
void ist(const Matrix< T > &X, const Matrix< T > &D, SpMatrix< T > &spalpha, T lambda, constraint_type mode, const int itermax=500, const T tol=0.5, const int numThreads=-1)
void norm_2_cols(Vector< T > &norms) const
returns the l2 norms of the columns
void meanCol(Vector< T > &mean) const
Compute the mean of the columns.
T computeError(const T normX2, const Vector< T > &norms, const Matrix< T > &G, const Matrix< T > &RtD, const Matrix< T > &alphat)
auxiliary function for ist_groupLasso
void vSub(int n, T *vecIn, T *vecIn2, T *vecOut)
interface to v*Sub
void copy(const Vector< T > &x)
make a copy of x
void cblas_copy(const INTT N, const T *X, const INTT incX, T *Y, const INTT incY)
void coreGroupISTConstrained(const Matrix< T > &G, Matrix< T > &RtD, Matrix< T > &alphat, const T normR, const T eps, const int itermax=500, const T tol=0.5)
Auxiliary function for ist_groupLasso.
void coreISTconstrained(const AbstractMatrix< T > &G, Vector< T > &DtR, Vector< T > &coeffs, const T normX2, const T thrs, const int itermax=500, const T tol=0.5)
coreIST constrained
T sqr(const T x)
template version of the fabs function
void setn(const int n)
modify _n
void cblas_scal(const INTT N, const T alpha, T *X, const INTT incX)
void downDateLasso(int &j, int &minBasis, T &normX, const bool ols, const bool pos, Vector< T > &Rdn, int *ind, T *coeffs, Vector< T > &sig, Vector< T > &av, Vector< T > &Xdn, Vector< T > &RUn, Matrix< T > &Unm, Matrix< T > &Gsm, Matrix< T > &Gsam, Matrix< T > &Undsm, Matrix< T > &Rm)
Auxiliary functoni for coreLARS (Cholesky downdate)
T fmaxval() const
returns the maximum magnitude
void add(const Vector< T > &x, const T a=1.0)
A <- A + a*x.
void XtX(Matrix< T > &XtX) const
XtX = A'*A.
void addDiag(const Vector< T > &diag)
void cblas_ger(const CBLAS_ORDER order, const INTT M, const INTT N, const T alpha, const T *X, const INTT incX, const T *Y, const INTT incY, T *A, const INTT lda)
static constexpr double E
void mult(const Vector< T > &x, const Vector< T > &y)
A <- x .* y.
T * rawX() const
returns a modifiable reference of the data, DANGEROUS
void transpose(Matrix< T > &trans)
void omp(const Matrix< T > &X, const Matrix< T > &D, SpMatrix< T > &spalpha, const int *L, const T *eps, const T *lambda, const bool vecL=false, const bool vecEps=false, const bool Lambda=false, const int numThreads=-1, Matrix< T > *path=NULL)
void vAdd(int n, T *vecIn, T *vecIn2, T *vecOut)
interface to v*Add
T maxval() const
returns the maximum value
void sqr(const Vector< T > &x)
A <- x .^ 2.
void coreORMPB(Vector< T > &RtD, const AbstractMatrix< T > &G, Vector< int > &ind, Vector< T > &coeffs, T &normX, const int L, const T eps, const T lambda=0)
Auxiliary function of omp.
void coreIST(const AbstractMatrix< T > &G, Vector< T > &DtR, Vector< T > &coeffs, const T thrs, const int itermax=500, const T tol=0.5)
coreIST
void diag(Vector< T > &d) const
extract the diagonal
void copyCol(const int i, Vector< T > &DtXi) const
compute DtX(:,i)
void lassoReweighted(const Matrix< T > &X, const Matrix< T > &D, SpMatrix< T > &spalpha, int L, const T constraint, constraint_type mode, const bool pos, const T sigma, const int numThreads=-1)
second implementation using matrix inversion lemma
void coreSOMP(const Matrix< T > &X, const Matrix< T > &D, const Matrix< T > &G, Matrix< T > &vM, Vector< int > &rv, const int L, const T eps)
static char low
a few static variables for lapack
void copyMask(Vector< T > &out, Vector< bool > &mask) const
extract the rows of a matrix corresponding to a binary mask
void copyCol(const int i, Vector< T > &x) const
Copy the column i into x.
void somp(const Matrix< T > *X, const Matrix< T > &D, SpMatrix< T > *spalpha, const int Ngroups, const int L, const T *pr_eps, const bool adapt=false, const int numThreads=-1)
Contains various variables and class timer.
int n() const
returns the size of the vector
Data class, abstract class, useful in the class image.
void refCol(int i, Vector< T > &x) const
Reference the column i into the vector x.
void lasso(const Matrix< T > &X, const Matrix< T > &D, SpMatrix< T > &spalpha, int L, const T constraint, const T lambda2=0, constraint_type mode=PENALTY, const bool pos=false, const bool ols=false, const int numThreads=-1, Matrix< T > *path=NULL, const int length_path=-1)
void copyMask(Matrix< T > &out, Vector< bool > &mask) const
extract the rows of a matrix corresponding to a binary mask
void setZeros()
Set all values to zero.
T asum() const
computes the sum of the magnitudes of the vector
T dot(const Vector< T > &x) const
returns A'x
virtual void copyCol(const int i, Vector< T > &Xi) const =0
copy X(:,i) into Xi
T trace() const
return the trace of the matrix
T abs(const T x)
template version of the fabs function
void norm_2sq_cols(Vector< T > &norms) const
returns the l2 norms ^2 of the columns
bool isNormalized() const
Check wether the columns of the matrix are normalized or not.
Class representing the product of two matrices.
void toSparse(SpVector< T > &vec) const
make a sparse copy
void lasso2(const Matrix< T > &X, const Matrix< T > &D, SpMatrix< T > &spalpha, int L, const T constraint, const T lambda2=0, constraint_type mode=PENALTY, const bool pos=false, const int numThreads=-1, Matrix< T > *path=NULL, const int length_path=-1)
second implementation using matrix inversion lemma
void clear()
clear the matrix
void ist_groupLasso(const Matrix< T > *XT, const Matrix< T > &D, Matrix< T > *alphaT, const int Ngroups, const T lambda, const constraint_type mode, const int itermax=500, const T tol=0.5, const int numThreads=-1)
ist for group Lasso
void set(const T val)
set each value of the vector to val
void fillRow(const Vector< T > &row)
fill the matrix with the row given
int max() const
returns the index of the largest value
void lassoWeightPreComputed(const Matrix< T > &X, const Matrix< T > &G, const Matrix< T > &DtR, const Matrix< T > &weights, SpMatrix< T > &spalpha, int L, const T constraint, constraint_type mode, const bool pos, const int numThreads)
void mult(const Vector< T > &x, Vector< T > &b, const T alpha=1.0, const T beta=0.0) const
perform b = alpha*A*x+beta*b
int n() const
Number of columns.
void lasso_mask(const Matrix< T > &X, const Matrix< T > &D, SpMatrix< T > &spalpha, const Matrix< bool > &mask, int L, const T constraint, const T lambda2=0, constraint_type mode=PENALTY, const bool pos=false, const int numThreads=-1)
second implementation using matrix inversion lemma
virtual void extract_rawCol(const int i, T *Xi) const =0
copy X(:,i) into Xi
T nrm2sq() const
returns ||A||_2^2
void setZeros()
Set all the values to zero.
void resize(const int n)
resize the vector
virtual void add_rawCol(const int i, T *col, const T a) const =0
compute X(:,i)<- X(:,i)+a*col;
virtual void norm_2sq_cols(Vector< T > &norms) const
void XXt(Matrix< T > &XXt) const
XXt = A*A'.
void convert2(const Matrix< T > &v, const Vector< int > &r, const int K)
use the data from v, r for _v, _r
void coreLARS2W(Vector< T > &DtR, AbstractMatrix< T > &G, Matrix< T > &Gs, Matrix< T > &Ga, Matrix< T > &invGs, Vector< T > &u, Vector< T > &coeffs, const Vector< T > &weights, Vector< int > &ind, Matrix< T > &work, T &normX, const constraint_type mode, const T constraint, const bool pos=false)
Auxiliary function for lasso.
void meanRow(Vector< T > &mean) const
Compute the mean of the rows.
void upperTriXXt(Matrix< T > &XXt, const int L) const
XXt = A*A' where A is an upper triangular matrix.
int fmax() const
returns the index of the value with largest magnitude
void resize(const int nzmax)
resizes the vector
static int init_omp(const int numThreads)
void setMatrices(const Matrix< T > &D, const bool high_memory=true)
set_matrices
T cblas_dot(const INTT N, const T *X, const INTT incX, const T *Y, const INTT incY)
void coreGroupIST(const Matrix< T > &G, Matrix< T > &RtD, Matrix< T > &alphat, const T thrs, const int itermax=500, const T tol=0.5)
Auxiliary function for ist_groupLasso.
T asum() const
compute the sum of the magnitude of the matrix values
void multTrans(const Vector< T > &x, Vector< T > &b, const T alpha=1.0, const T beta=0.0) const
perform b = alpha*A'x + beta*b
void convert(const Matrix< T > &v, const Matrix< int > &r, const int K)
use the data from v, r for _v, _r
void div(const Vector< T > &x)
A <- A ./ x.
void lassoWeight(const Matrix< T > &X, const Matrix< T > &D, const Matrix< T > &weights, SpMatrix< T > &spalpha, int L, const T constraint, constraint_type mode, const bool pos, const int numThreads)
void coreLARS2(Vector< T > &DtR, const AbstractMatrix< T > &G, Matrix< T > &Gs, Matrix< T > &Ga, Matrix< T > &invGs, Vector< T > &u, Vector< T > &coeffs, Vector< int > &ind, Matrix< T > &work, T &normX, const constraint_type mode, const T constraint, const bool pos=false, T *pr_path=NULL, int length_path=-1)
Auxiliary function for lasso.
void omp_mask(const Matrix< T > &X, const Matrix< T > &D, SpMatrix< T > &spalpha, const Matrix< bool > &mask, const int *L, const T *eps, const T *lambda, const bool vecL=false, const bool vecEps=false, const bool Lambda=false, const int numThreads=-1, Matrix< T > *path=NULL)
void resize(int m, int n)
Resize the matrix.
T normFsq() const
return ||A||_F^2
void toSparse(SpMatrix< T > &matrix) const
make a sparse copy of the current matrix
void sub(const Vector< T > &x)
A <- A - x.
T cblas_nrm2(const INTT N, const T *X, const INTT incX)
void coreLARS(Vector< T > &Rdn, Vector< T > &Xdn, Vector< T > &A, Vector< T > &u, Vector< T > &sig, Vector< T > &av, Vector< T > &RUn, Matrix< T > &Un, Matrix< T > &Unds, Matrix< T > &Gs, Matrix< T > &Gsa, Matrix< T > &workT, Matrix< T > &R, const AbstractMatrix< T > &G, T &normX, Vector< int > &ind, Vector< T > &coeffs, const T constraint, const bool ols=false, const bool pos=false, constraint_type mode=L1COEFFS, T *path=NULL, int length_path=-1)
Auxiliary function for lasso.
void addDiag(const T diag)
add something to the diagonal
void coreORMP(Vector< T > &scores, Vector< T > &norm, Vector< T > &tmp, Matrix< T > &Un, Matrix< T > &Undn, Matrix< T > &Unds, Matrix< T > &Gs, Vector< T > &Rdn, const AbstractMatrix< T > &G, Vector< int > &ind, Vector< T > &RUn, T &normX, const T *eps, const int *L, const T *lambda, T *path=NULL)
Auxiliary function of omp.
void setm(const int m)
modify _m
T * rawX() const
reference a modifiable reference to the data, DANGEROUS
void clear()
Clear the matrix.