32 enum regul_t {
L0,
L1,
RIDGE,
L2,
LINF,
L1CONSTRAINT,
ELASTICNET,
FUSEDLASSO,
GROUPLASSO_L2,
GROUPLASSO_LINF,
GROUPLASSO_L2_L1,
GROUPLASSO_LINF_L1,
L1L2,
L1LINF,
L1L2_L1,
L1LINF_L1,
TREE_L0,
TREE_L2,
TREE_LINF,
GRAPH,
GRAPH_RIDGE,
GRAPH_L2,
TREEMULT,
GRAPHMULT,
L1LINFCR,
NONE,
TRACE_NORM,
TRACE_NORM_VEC,
RANK,
RANK_VEC,
INCORRECT_REG,
GRAPH_PATH_L0,
GRAPH_PATH_CONV};
35 if (strcmp(regul,
"l0")==0)
return L0;
36 if (strcmp(regul,
"l1")==0)
return L1;
37 if (strcmp(regul,
"l2")==0)
return RIDGE;
38 if (strcmp(regul,
"linf")==0)
return LINF;
39 if (strcmp(regul,
"l2-not-squared")==0)
return L2;
40 if (strcmp(regul,
"l1-constraint")==0)
return L1CONSTRAINT;
41 if (strcmp(regul,
"elastic-net")==0)
return ELASTICNET;
42 if (strcmp(regul,
"fused-lasso")==0)
return FUSEDLASSO;
47 if (strcmp(regul,
"l1l2")==0)
return L1L2;
48 if (strcmp(regul,
"l1linf")==0)
return L1LINF;
49 if (strcmp(regul,
"l1l2+l1")==0)
return L1L2_L1;
50 if (strcmp(regul,
"l1linf+l1")==0)
return L1LINF_L1;
51 if (strcmp(regul,
"tree-l0")==0)
return TREE_L0;
52 if (strcmp(regul,
"tree-l2")==0)
return TREE_L2;
53 if (strcmp(regul,
"tree-linf")==0)
return TREE_LINF;
54 if (strcmp(regul,
"graph")==0)
return GRAPH;
55 if (strcmp(regul,
"graph-ridge")==0)
return GRAPH_RIDGE;
56 if (strcmp(regul,
"graph-l2")==0)
return GRAPH_L2;
57 if (strcmp(regul,
"multi-task-tree")==0)
return TREEMULT;
58 if (strcmp(regul,
"multi-task-graph")==0)
return GRAPHMULT;
59 if (strcmp(regul,
"l1linf-row-column")==0)
return L1LINFCR;
60 if (strcmp(regul,
"trace-norm")==0)
return TRACE_NORM;
62 if (strcmp(regul,
"rank")==0)
return RANK;
63 if (strcmp(regul,
"rank-vec")==0)
return RANK_VEC;
66 if (strcmp(regul,
"none")==0)
return NONE;
71 if (strcmp(loss,
"square")==0)
return SQUARE;
73 if (strcmp(loss,
"logistic")==0)
return LOG;
74 if (strcmp(loss,
"weighted-logistic")==0)
return LOGWEIGHT;
75 if (strcmp(loss,
"hinge")==0)
return HINGE;
76 if (strcmp(loss,
"multi-logistic")==0)
return MULTILOG;
77 if (strcmp(loss,
"cur")==0)
return CUR;
83 case SQUARE: cout <<
"Square loss" << endl;
break;
84 case SQUARE_MISSING: cout <<
"Square loss with missing data" << endl;
break;
85 case LOG: cout <<
"Logistic loss" << endl;
break;
86 case LOGWEIGHT: cout <<
"Weighted Logistic loss" << endl;
break;
87 case HINGE: cout <<
"Hinge loss" << endl;
break;
88 case MULTILOG: cout <<
"Multiclass logistic Loss" << endl;
break;
89 case CUR: cout <<
"CUR decomposition" << endl;
break;
90 default: cerr <<
"Not implemented" << endl;
100 case L0: cout <<
"L0 regularization" << endl;
break;
101 case L1: cout <<
"L1 regularization" << endl;
break;
102 case RIDGE: cout <<
"L2-squared regularization" << endl;
break;
103 case L2: cout <<
"L2-not-squared regularization" << endl;
break;
104 case L1CONSTRAINT: cout <<
"L1 constraint regularization" << endl;
break;
105 case LINF: cout <<
"Linf regularization" << endl;
break;
106 case ELASTICNET: cout <<
"Elastic-net regularization" << endl;
break;
107 case FUSEDLASSO: cout <<
"Fused Lasso or total variation regularization" << endl;
break;
112 case L1L2: cout <<
"L1L2 regularization" << endl;
break;
113 case L1LINF: cout <<
"L1LINF regularization" << endl;
break;
114 case TRACE_NORM: cout <<
"Trace Norm regularization" << endl;
break;
115 case TRACE_NORM_VEC: cout <<
"Trace Norm regularization for vectors" << endl;
break;
116 case RANK: cout <<
"Rank regularization" << endl;
break;
117 case RANK_VEC: cout <<
"Rank regularization for vectors" << endl;
break;
118 case L1L2_L1: cout <<
"L1L2 regularization + L1" << endl;
break;
119 case L1LINF_L1: cout <<
"L1LINF regularization + L1" << endl;
break;
120 case TREE_L0: cout <<
"Tree-L0 regularization" << endl;
break;
121 case TREE_L2: cout <<
"Tree-L2 regularization" << endl;
break;
122 case TREE_LINF: cout <<
"Tree-Linf regularization" << endl;
break;
123 case GRAPH: cout <<
"Graph regularization" << endl;
break;
124 case GRAPH_RIDGE: cout <<
"Graph+ridge regularization" << endl;
break;
125 case GRAPH_L2: cout <<
"Graph regularization with l2" << endl;
break;
126 case TREEMULT: cout <<
"multitask tree regularization" << endl;
break;
127 case GRAPHMULT: cout <<
"multitask graph regularization" << endl;
break;
128 case L1LINFCR: cout <<
"L1LINF regularization on rows and columns" << endl;
break;
129 case GRAPH_PATH_L0: cout <<
"Graph path non-convex regularization" << endl;
break;
130 case GRAPH_PATH_CONV: cout <<
"Graph path convex regularization" << endl;
break;
131 case NONE: cout <<
"No regularization" << endl;
break;
132 default: cerr <<
"Not implemented" << endl;
235 template <
typename T>
242 template <
typename T,
typename F = Matrix<T>,
typename D = Vector<T> ,
243 typename E = Vector<T> >
250 virtual T
eval(
const D& input)
const = 0;
252 virtual T eval_split(
const F& input)
const = 0;
253 virtual T
eval_weighted(
const D& input,
const F& input_struct,
const T* weights)
const {
return this->
eval(input);};
254 virtual int num_components()
const = 0;
255 virtual void prox_split(
F& splitted_w,
const T
lambda)
const = 0;
256 virtual void init_split_variables(
F& splitted_w)
const = 0;
267 template <
typename T,
typename D = Vector<T> ,
typename E = Vector<T> >
273 virtual void init(
const E& input) = 0;
274 virtual T
eval(
const D& input)
const = 0;
275 virtual void grad(
const D& input, D& output)
const = 0;
276 virtual inline bool test_backtracking(
const D& y,
const D& grad,
const D& prox,
const T L)
const {
280 return (this->
eval(prox) <= this->
eval(y) + grad.dot(tmp) + 0.5*L*tmp.nrm2sq());
282 virtual T fenchel(
const D& input)
const = 0;
284 virtual void var_fenchel(
const D& x, D& grad1, D& grad2,
292 template <
typename T>
300 _missingvalues.clear();
301 for (
int i = 0; i<_x.n(); ++i) {
304 _missingvalues.push_back(i);
314 _D->mult(spalpha,residual,T(-1.0),T(1.0));
316 it != _missingvalues.end(); ++it)
318 return 0.5*residual.
nrm2sq();
326 _D->mult(spalpha,residual,T(-1.0),T(1.0));
328 it != _missingvalues.end(); ++it)
330 _D->multTrans(residual,grad,T(-1.0),T(0.0));
341 _D->mult(spalpha,grad1,T(1.0),T(-1.0));
343 it != _missingvalues.end(); ++it)
347 _D->multTrans(grad1,grad2,T(1.0),T(0.0));
358 template <
typename T>
368 _D->multTrans(x,_DtX);
377 if (spalpha.L() < alpha.
n()/2) {
378 _D->mult(spalpha,residual,T(-1.0),T(1.0));
380 _D->mult(alpha,residual,T(-1.0),T(1.0));
382 return 0.5*residual.
nrm2sq();
389 _G->mult(spalpha,grad,T(1.0),-T(1.0));
395 _D->mult(spalpha,residual,T(-1.0),T(1.0));
396 _D->multTrans(residual,grad,T(-1.0),T(0.0));
406 return (_G->quad(sptmp) <= L*sptmp.nrm2sq());
409 _D->
mult(sptmp,tmp2);
410 return (tmp2.nrm2sq() <= L*sptmp.nrm2sq());
422 _D->mult(spalpha,grad1,T(1.0),T(-1.0));
425 _D->multTrans(grad1,grad2,T(1.0),T(0.0));
430 const int n = this->num_components();
433 for (
int i = 0; i<n; ++i) {
436 const T xtw=row.dot(wi);
437 const T xtx=row.dot(row);
438 wi.
add(row,-lambda*(xtw-_x[i])/(T(1.0)+lambda*xtx));
442 const int n = this->num_components();
446 for (
int i = 0; i<n; ++i) {
449 const T xtw=row.dot(wi);
450 sum += 0.5*(_x[i]-xtw)*(_x[i]-xtw);
455 splitted_w.
resize(_D->n(),_D->m());
467 _D->mult(prim_var,out,T(1.0),T(1.0));
469 out.
scal(T(1.0)/(T(1.0)+gamma));
477 tmp.
add(dual_var,gamma);
478 _D->multTrans(tmp,prim,T(1.0),delta);
482 _D->mult(prim,out,fact,T(1.0));
495 template <
typename T>
513 return tmp.sum()/tmp.n();
519 for (
int i = 0; i<_X->n(); ++i) {
522 sum +=
MAX(0,T(1.0)-_y[i]*row.dot(wi));
528 splitted_w.
resize(_X->n(),_X->m());
540 _X->mult(prim_var,out,T(1.0),T(1.0));
541 const T thrs=T(1.0)-
gamma;
542 for (
int i = 0; i<out.
n(); ++i) {
543 const T y = _y[i]*out[i];
546 }
else if (y < T(1.0)) {
557 tmp.
add(dual_var,gamma);
558 _X->multTrans(tmp,prim,T(1.0),delta);
561 const T fact)
const {
562 _X->mult(prim,out,fact,T(1.0));
565 const int n = this->num_components();
568 for (
int i = 0; i<n; ++i) {
571 const T xtw=row.dot(wi);
572 const T xtx=row.dot(row);
573 const T diff=1-_y[i]*xtw;
574 if (diff > lambda*xtx) {
575 wi.
add(row,lambda*_y[i]);
576 }
else if (diff > 0) {
577 wi.
add(row,_y[i]*diff/xtx);
590 template <
typename T,
bool weighted = false>
600 for (
int i = 0; i<y.
n(); ++i)
601 if (y[i]>0) countpos++;
602 _weightpos=T(1.0)/countpos;
603 _weightneg=T(1.0)/
MAX(1e-3,(y.
n()-countpos));
617 for (
int i = 0; i<tmp.n(); ++i)
618 sum+= _y[i]>0 ? _weightpos*tmp[i] : _weightneg*tmp[i];
621 return tmp.sum()/tmp.n();
636 for (
int i = 0; i<tmp.n(); ++i)
637 tmp[i] *= _y[i] > 0 ? _weightpos : _weightneg;
638 _X->multTrans(tmp,grad);
640 _X->multTrans(tmp,grad);
641 grad.
scal(T(1.0)/_X->m());
649 for (
int i = 0; i<input.
n(); ++i) {
650 T prod = _y[i]>0 ? input[i]/_weightpos : -input[i]/_weightneg;
651 sum += _y[i] >0 ? _weightpos*(
xlogx(1.0+prod)+
xlogx(-prod)) : _weightneg*(
xlogx(1.0+prod)+
xlogx(-prod));
655 for (
int i = 0; i<input.
n(); ++i) {
656 T prod = _y[i]*input[i]*_X->m();
667 grad1.
mult(_y,grad1);
671 grad1.
mult(_y,grad1);
675 grad1.
scal(T(1.0)/_X->m());
676 _X->multTrans(grad1,grad2);
688 template <
typename T>
697 for (
int i = 0; i<y.
n(); ++i)
698 _y[i] = static_cast<int>(y[i]);
706 for (
int i = 0; i<tmp.
n(); ++i) {
718 for (
int i = 0; i<tmp.
n(); ++i) {
720 col.
add(-col[_y[i]]);
721 bool overweight=
false;
722 for (
int j = 0; j<col.
n(); ++j)
726 const int ind =col.
fmax();
734 col[_y[i]] = col[_y[i]]-T(1.0);
736 _X->mult(tmp,grad,
true,
true);
737 grad.
scal(T(1.0)/_X->m());
742 for (
int i = 0; i<input.
n(); ++i) {
743 const int clas = _y[i];
745 for (
int j = 0; j<input.
m(); ++j) {
747 sum +=
xlogx(_X->m()*input[i*input.
m()+j]+1.0);
749 sum +=
xlogx(_X->m()*input[i*input.
m()+j]);
756 _X->multSwitch(W,grad1,
true,
true);
759 for (
int i = 0; i<grad1.
n(); ++i) {
761 col.
add(-col[_y[i]]);
762 bool overweight=
false;
763 for (
int j = 0; j<col.
n(); ++j)
767 const int ind =col.
fmax();
775 col[_y[i]] = col[_y[i]]-T(1.0);
779 for (
int i = 0; i<grad1.
m(); ++i) {
785 grad1.
scal(T(1.0)/_X->m());
787 _X->mult(grad1,grad2,
true,
true);
797 template <
typename T>
813 _X->multSwitch(tmp,tmp2,
false,
false,T(-1.0),T(1.0));
823 _X->multSwitch(tmp,tmp2,
false,
false,T(-1.0),T(1.0));
825 _X->multSwitch(tmp2,tmp,
true,
false,T(-1.0),T(0.0));
827 _X->mult(tmp,grad,
true,
false);
830 return 0.5*input.
normFsq()+_X->dot(input);
838 _X->multSwitch(tmp,grad1,
false,
false,T(1.0),T(-1.0));
840 _X->multSwitch(grad1,tmp,
true,
false,T(1.0),T(0.0));
842 _X->mult(tmp,grad2,
true,
false);
851 template <
typename T>
856 _compute_gram =
true; };
862 _D->mult(x,_DtX,
true,
false);
871 _D->mult(spalpha,residual,
false,
false,T(-1.0),T(1.0));
879 _G->mult(spalpha,grad,
false,
false,T(1.0),-T(1.0));
883 _D->mult(spalpha,residual,
false,
false,T(-1.0),T(1.0));
884 _D->mult(residual,grad,
true,
false,T(-1.0),T(0.0));
896 for (
int i = 0; i<sptmp.
n(); ++i) {
898 sum += _G->quad(col);
900 return (sum <= L*sptmp.
normFsq());
903 _D->
mult(sptmp,tmp2);
914 _D->mult(spalpha,grad1,
false,
false,T(1.0),T(-1.0));
917 _D->mult(grad1,grad2,
true,
false,T(1.0),T(0.0));
930 template <
typename T,
typename L>
936 for (
int i = 0; i<_N; ++i) {
946 for (
int i = 0; i<_N; ++i) {
948 _losses[i]->init(col);
955 for (
int i = 0; i<_N; ++i) {
957 sum+=_losses[i]->eval(col);
964 for (
int i = 0; i<_N; ++i) {
967 _losses[i]->grad(col,col2);
973 for (
int i = 0; i<_N; ++i) {
975 sum += _losses[i]->fenchel(col);
983 for (
int i = 0; i<_N; ++i) {
987 _losses[i]->var_fenchel(col,col2,col3,intercept);
992 for (
int i = 0; i<_N; ++i)
993 ok = ok && _losses[i]->is_fenchel();
996 virtual void dummy() = 0;
1008 template <
typename T,
typename L>
1011 template <
typename T,
bool weighted>
1018 for (
int i = 0; i<this->_N; ++i)
1025 template <
typename T>
1032 for (
int i = 0; i<this->_N; ++i)
1039 template <
typename T,
typename D = Vector<T> >
1050 virtual void prox(
const D& input, D& output,
const T
lambda) = 0;
1051 virtual T
eval(
const D& input)
const = 0;
1054 virtual void fenchel(
const D& input, T& val, T& scal)
const = 0;
1058 virtual void sub_grad(
const D& input, D& output)
const { };
1074 template <
typename T>
1084 if (this->_intercept) y[y.
n()-1] = x[y.
n()-1];
1087 return (this->_intercept ? x.
asum() -
abs(x[x.
n()-1]) : x.
asum());
1092 if (this->_pos) output.
thrsPos();
1094 scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1096 if (this->_intercept & abs<T>(output[output.
n()-1]) >
EPSILON) val=
INFINITY;
1102 for (
int i = 0; i<input.
n(); ++i) {
1103 output[i] = input[i] > 0 ? T(1.0) : input[i] < 0 ? -T(1.0) : 0;
1106 for (
int i = 0; i<input.
n(); ++i) {
1107 output[i] = input[i] > 0 ? T(1.0) : 0;
1110 if (this->_intercept) output[output.
n()-1]=0;
1114 template <
typename T>
1123 if (this->_intercept) {
1126 y[y.
n()-1] = x[y.
n()-1];
1138 if (this->_intercept) output[output.
n()-1]=0;
1146 template <
typename T>
1157 if (this->_intercept) y[y.
n()-1] = x[y.
n()-1];
1160 return (this->_intercept ? x.
lzero() - 1 : x.
lzero());
1165 template <
typename T>
1190 template <
typename T>
1199 y.
scal(T(1.0/(1.0+lambda)));
1200 if (this->_intercept) y[y.
n()-1] = x[y.
n()-1];
1203 return (this->_intercept ? 0.5*x.
nrm2sq() - 0.5*x[x.
n()-1]*x[x.
n()-1] : 0.5*x.
nrm2sq());
1208 if (this->_pos) tmp.
thrsPos();
1209 val=this->
eval(tmp);
1217 for (
int i = 0; i<input.
n(); ++i) {
1218 output[i] = input[i] > 0 ? 0.5*input[i] : 0;
1224 if (this->_intercept) output[output.
n()-1]=0;
1228 template <
typename T>
1238 const T nrm=xref.
nrm2();
1242 y.
scal(T(1.0) - lambda/nrm);
1244 if (this->_intercept) y[y.
n()-1] = x[y.
n()-1];
1254 if (this->_pos) output.
thrsPos();
1255 T mm = output.
nrm2();
1256 scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1258 if (this->_intercept & abs<T>(output[output.
n()-1]) >
EPSILON) val=
INFINITY;
1262 template <
typename T>
1274 for (
int j = 0; j<xref.n(); ++j)
1276 if (this->_intercept) y[y.
n()-1] = x[y.
n()-1];
1286 if (this->_pos) output.
thrsPos();
1287 T mm = output.
asum();
1288 scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1290 if (this->_intercept & abs<T>(output[output.
n()-1]) >
EPSILON) val=
INFINITY;
1294 template <
typename T,
typename D,
typename RegA,
typename RegB,
bool order = true,
bool scale_lambda = false>
1299 _regA=
new RegA(param);
1300 _regB=
new RegB(param);
1308 _regA->prox(x,tmp,lambda);
1309 _regB->prox(tmp,y,lambda*_lambda2d1/(T(1.0)+lambda));
1311 _regB->prox(x,tmp,lambda*_lambda2d1);
1312 _regA->prox(tmp,y,lambda/(T(1.0)+lambda*_lambda2d1));
1316 _regA->prox(x,tmp,lambda);
1317 _regB->prox(tmp,y,lambda*_lambda2d1);
1319 _regB->prox(x,tmp,lambda*_lambda2d1);
1320 _regA->prox(tmp,y,lambda);
1325 return _regA->eval(x) + _lambda2d1*_regB->eval(x);
1328 void inline fenchel(
const D& input, T& val, T& scal)
const { };
1329 virtual bool is_subgrad()
const {
return _regA->is_subgrad() && _regB->is_subgrad(); };
1330 virtual void sub_grad(
const D& input, D& output)
const {
1331 _regA->sub_grad(input,output);
1333 _regB->sub_grad(input,tmp);
1334 output.add(tmp,_lambda2d1);
1342 template <
typename T>
1347 template <
typename T>
1364 const int maxn = this->_intercept ? x.
n()-1 : x.
n();
1365 for (
int i = 0; i<maxn-1; ++i)
1366 sum +=
abs(x[i+1]-x[i]) + _lambda2d1*
abs(x[i]) + 0.5*_lambda3d1*x[i]*x[i];
1367 sum += _lambda2d1*
abs(x[maxn-1])+0.5*_lambda3d1*x[maxn-1]*x[maxn-1];
1378 template <
typename T>
1383 const bool linf = param.
linf;
1388 _graph.create_graph(graph_st.
Nv,graph_st.
Ng,graph_st.
weights,
1390 _graph.save_capacities();
1391 _work.resize(graph_st.
Nv+graph_st.
Ng+2);
1392 _weights.resize(graph_st.
Ng);
1393 for (
int i = 0; i<graph_st.
Ng; ++i) _weights[i] = graph_st.
weights[i];
1399 void inline reset() { _old_lambda = -1.0; };
1403 cerr <<
"Not implemented" << endl;
1407 _graph.restore_capacities();
1408 _graph.set_weights(_weights.rawX(),
lambda);
1409 if (_old_lambda < 0 || _resetflow) {
1410 _graph.reset_flow();
1412 if (lambda != _old_lambda)
1413 _graph.scale_flow(lambda/_old_lambda);
1419 _graph.proximal_operator(xc.
rawX(),y.
rawX(),_clever);
1421 _graph.proximal_operator(x.
rawX(),y.
rawX(),_clever);
1425 cerr <<
"duality_gap2 " << duality_gap2 << endl;
1433 return gr->
norm(x.
rawX(),_work.rawX(),_weights.rawX(),_linf);
1447 if (this->_pos) output.
thrsPos();
1451 scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1462 for (
int i = 0; i<splitted_w.
n(); ++i) {
1463 splitted_w.
refCol(i,col);
1472 for (
int i = 0; i<splitted_w.
n(); ++i) {
1473 splitted_w.
refCol(i,col);
1475 const T nrm = tmp.
nrm2();
1476 if (nrm > lambda*_weights[i]) {
1477 tmp.
scal(T(1.0)-lambda*_weights[i]/nrm);
1491 for (
int i = 0; i<input.
n(); ++i) {
1493 sum += _linf ? _weights[i]*col.
fmaxval() : _weights[i]*col.
nrm2();
1498 const SpMatrix<T>& input_struct,
const T* inner_weight)
const {
1502 for (
int i = 0; i<input_struct.
n(); ++i) {
1503 input_struct.
refCol(i,col);
1505 for (
int j = 0; j<col.
L(); ++j)
1506 tmp[j]=inner_weight[j]*input[col.
r(j)];
1507 sum += _linf ? _weights[i]*tmp.
fmaxval() : _weights[i]*tmp.nrm2();
1523 template <
typename T>
1528 template <
typename T>
1533 const bool linf = param.
linf;
1546 if (this->_intercept) {
1551 _tree.proj(yp,_linf,lambda);
1559 if (this->_intercept) {
1566 if (this->_pos) yp2.
thrsPos();
1567 T mm =
const_cast<Tree_Seq<T>*
>(&_tree)->dual_norm_inf(yp2);
1568 scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1579 const_cast<Tree_Seq<T>*
>(&_tree)->sub_grad(input,output,_linf);
1580 if (this->_intercept) output[output.
n()-1]=0;
1587 template <
typename T>
1603 if (this->_intercept) {
1608 _tree.proj_zero(yp,lambda);
1620 template <
typename T,
typename ProxMat>
1627 _proxy =
new ProxMat(param2);
1633 int size_vec=this->_intercept ? x.
n()-1 : x.
n();
1636 _proxy->prox(mX,mY,lambda);
1637 if (this->_intercept) y[y.
n()-1]=x[x.
n()-1];
1640 int size_vec=this->_intercept ? x.
n()-1 : x.
n();
1642 return _proxy->eval(mX);
1644 virtual bool is_fenchel()
const {
return (_proxy->is_fenchel()); };
1646 int size_vec=this->_intercept ? x.
n()-1 : x.
n();
1648 _proxy->fenchel(mX,val,scal);
1656 template <
typename T,
typename Reg>
1665 for (
int i = 0; i<param.
ngroups; ++i) num_groups=
MAX(num_groups,param.
groups[i]);
1666 _groups.resize(num_groups);
1667 for (
int i = 0; i<num_groups; ++i) _groups[i]=
new list_int();
1670 _prox =
new Reg(param2);
1674 for (
int i = 0; i<_groups.size(); ++i)
delete(_groups[i]);
1679 const int maxn= this->_intercept ? x.
n()-1 : x.
n();
1680 if (!_groups.empty()) {
1681 for (
int i = 0; i<_groups.size(); ++i) {
1687 tmp[count++]=x[*it];
1689 _prox->prox(tmp,tmp2,lambda);
1692 y[*it]=tmp2[count++];
1698 const int p = _size_group;
1699 for (
int i = 0; i+p-1<maxn; i+=p) {
1702 _prox->prox(tmp,tmp2,lambda);
1707 const int maxn= this->_intercept ? x.
n()-1 : x.
n();
1709 if (!_groups.empty()) {
1710 for (
int i = 0; i<_groups.size(); ++i) {
1715 tmp[count++]=x[*it];
1717 sum+=_prox->eval(tmp);
1721 const int p = _size_group;
1722 for (
int i = 0; i+p-1<maxn; i+=p) {
1724 sum+=_prox->eval(tmp);
1731 const int maxn= this->_intercept ? x.
n()-1 : x.
n();
1736 if (!_groups.empty()) {
1737 for (
int i = 0; i<_groups.size(); ++i) {
1742 tmp[count++]=x[*it];
1744 _prox->fenchel(tmp,val2,scal2);
1746 scal=
MIN(scal,scal2);
1749 const int p = _size_group;
1751 for (
int i = 0; i+p-1<maxn; i+=p) {
1753 _prox->fenchel(tmp,val2,scal2);
1755 scal=
MIN(scal,scal2);
1765 template <
typename T>
1770 template <
typename T>
1775 template <
typename T>
1780 template <
typename T>
1785 template <
typename T>
1797 const int m = x.
m();
1798 const int n = x.
n();
1799 for (
int i = 0; i<m; ++i) {
1800 if (norm[i] > lambda) {
1801 T scal = (norm[i]-
lambda)/norm[i];
1802 for (
int j = 0; j<n; ++j)
1803 y[j*m+i] = x[j*m+i]*scal;
1807 if (this->_intercept)
1808 for (
int j = 0; j<n; ++j)
1809 y[j*m+m-1]=x[j*m+m-1];
1814 return this->_intercept ? norm.
asum() - norm[norm.
n() -1] : norm.
asum();
1820 for (
int i = 0; i<norm.
n(); ++i) {
1821 if (norm[i] < 1e-20) norm[i]=T(1.0);
1824 if (this->_intercept) norm[norm.
n()-1]=0;
1839 scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1847 template <
typename T>
1858 const int maxn= this->_intercept ? x.
m()-1 : x.
m();
1859 for (
int i = 0; i< maxn; ++i) {
1860 for (
int j = 0; j<x.
n(); ++j)
1862 row.l1project(row2,lambda);
1863 for (
int j = 0; j<x.
n(); ++j)
1864 y(i,j) = row[j]-row2[j];
1870 return this->_intercept ? norm.
asum() - norm[norm.
n() -1] : norm.
asum();
1882 if (this->_intercept) norm[norm.
n()-1]=0;
1884 scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1890 output.
resize(input.
m(),input.
n());
1892 const T maxm= this->_intercept ? input.
m()-1 : input.
m();
1894 for (
int i = 0; i<maxm; ++i) {
1896 T
max=row.fmaxval();
1899 for (
int j = 0; j<row.n(); ++j) {
1900 if (abs<T>(max-abs<T>(row[j])) < 1e-15)
1903 T add = T(1.0)/num_max;
1904 for (
int j = 0; j<row.n(); ++j) {
1905 if (abs<T>(max-abs<T>(row[j])) < 1e-15)
1906 row[j] = row[j] > 0 ? add : -add;
1914 template <
typename T>
1919 cerr <<
"Trace norm implementation is not compatible with intercept, intercept deactivated" << endl;
1922 cerr <<
"Trace norm implementation is not compatible with non-negativity constraints" << endl;
1980 scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1986 template <
typename T>
1991 cerr <<
"Rank implementation is not compatible with intercept, intercept deactivated" << endl;
1994 cerr <<
"Rank implementation is not compatible with non-negativity constraints" << endl;
2008 for (
int i = 0; i<
MIN(x.
m(),x.
n()); ++i) {
2011 if (val*val < lambda)
break;
2018 if (x.
m() > x.
n()) {
2026 for (
int i = 0; i<XtX.
m(); ++i) {
2030 if (val <= 1e-10)
break;
2038 template <
typename T>
2042 nzmax+=it->nodes.size();
2043 paths_mat.
resize(n,paths.size(),nzmax);
2044 int* pB =paths_mat.
pB();
2045 int* pE =paths_mat.
pE();
2046 int* r =paths_mat.
r();
2047 T* v =paths_mat.
v();
2052 it_path != paths.end(); ++it_path) {
2054 it != it_path->nodes.end(); ++it) {
2056 v[count++]= it_path->flow;
2058 pB[++count_col]=count;
2060 for (
int i = 0; i<paths_mat.
n(); ++i)
sort(r,v,pB[i],pE[i]-1);
2063 template <
typename T>
2068 _graph.init_graph(graph);
2084 convert_paths_to_mat<T>(paths,paths_mat,_graph.
n());
2086 it_path != paths.end(); ++it_path)
delete(*it_path);
2097 template <
typename T>
2102 _graph.init_graph(graph);
2120 convert_paths_to_mat<T>(paths,paths_mat,_graph.
n());
2122 it_path != paths.end(); ++it_path)
delete(*it_path);
2129 paths.push_back(&path);
2131 path.
flow=double(1.0);
2132 convert_paths_to_mat<T>(paths,paths_mat,_graph.
n());
2147 scal= mm > 1.0 ? T(1.0)/mm : 1.0;
2156 template <
typename T,
typename Reg>
2164 for (
int i = 0; i<N; ++i)
2165 _regs[i]=
new Reg(param);
2168 for (
int i = 0; i<_N; ++i) {
2175 for (
int i = 0; i<_N; ++i) _regs[i]->reset();
2181 #pragma omp parallel for private(i) 2182 for (i = 0; i<_N; ++i) {
2185 _regs[i]->prox(colx,coly,lambda);
2189 #pragma omp parallel for private(i) 2190 for (i = 0; i<_N; ++i) {
2194 _regs[i]->prox(colx,coly,lambda);
2200 for (
int i = 0; i<_N; ++i)
2201 ok=ok && _regs[i]->is_subgrad();
2208 for (
int i = 0; i<_N; ++i) {
2210 _regs[i]->sub_grad(colx,coly);
2214 for (
int i = 0; i<_N; ++i) {
2217 _regs[i]->sub_grad(colx,coly);
2224 #pragma omp parallel for private(i) 2225 for (i = 0; i<_N; ++i) {
2232 #pragma omp critical 2233 sum += _regs[i]->eval(col);
2241 for (
int i = 0; i<_N; ++i) {
2249 _regs[i]->fenchel(col,val2,scal2);
2250 scal=
MIN(scal,scal2);
2256 for (
int i = 0; i<_N; ++i)
2257 ok = ok && _regs[i]->is_fenchel();
2267 template <
typename T>
2272 template <
typename T>
2277 template <
typename T>
2283 virtual void dummy() = 0;
2285 void inline reset() { _graphlasso->reset(); };
2292 _graphlasso->prox(xv,yv,lambda);
2297 return _graphlasso->eval(xv);
2303 _graphlasso->fenchel(inv,val,scal);
2306 return _graphlasso->is_fenchel();
2313 template <
typename T>
2322 T* weights =
new T[graph_st.
Ng];
2323 for (
int i = 0; i<n; ++i) weights[i]=T(1.0);
2324 for (
int i = 0; i<m; ++i) weights[i+n]=l2dl1;
2329 for (
int i = 0; i<n; ++i) {
2331 for (
int j = 0; j<m; ++j)
2334 for (
int i = 0; i<m; ++i) {
2336 for (
int j = 0; j<n; ++j)
2337 gv_ir[i*n+n*m+j]=j*m+i;
2340 graph_st.
gv_jc=gv_jc;
2341 graph_st.
gv_ir=gv_ir;
2345 for (
int i = 0; i< graph_st.
Ng+1; ++i) gg_jc[i]=0;
2346 graph_st.
gg_jc=gg_jc;
2347 graph_st.
gg_ir=gg_ir;
2364 template <
typename T>
2376 graph_st.
Ng=Ng*(N+1);
2377 T* weights=
new T[graph_st.
Ng];
2378 for (
int i = 0; i<N+1; ++i)
2379 for (
int j = 0; j<Ng; ++j)
2380 weights[i*Ng+j]=tree_st.
weights[j];
2381 for (
int j = 0; j<Ng; ++j)
2382 weights[N*Ng+j]*=l1dl2;
2386 for (
int i = 0; i<Ng; ++i)
2388 int nzmax_v=nzmax_tree*N;
2392 for (
int i = 0; i<N; ++i) {
2393 for (
int j = 0; j<Ng; ++j) {
2394 gv_jc[i*Ng+j]=count;
2401 for (
int i = 0; i<Ng+1; ++i) {
2402 gv_jc[N*Ng+i]=count;
2404 graph_st.
gv_jc=gv_jc;
2405 graph_st.
gv_ir=gv_ir;
2409 int nzmax2=nzmax_tree2*(N+1)+Ng*N;
2412 for (
int i = 0; i<N; ++i) {
2413 for (
int j = 0; j<Ng; ++j) {
2414 gg_jc[i*Ng+j] = count;
2415 for (
int k = tree_st.
groups_jc[j]; k<static_cast<int>(tree_st.
groups_jc[j+1]); ++k) {
2416 gg_ir[count++] = i*Ng+tree_st.
groups_ir[k];
2420 for (
int i = 0; i<Ng; ++i) {
2421 gg_jc[N*Ng+i] = count;
2422 for (
int j = tree_st.
groups_jc[i]; j<static_cast<int>(tree_st.
groups_jc[i+1]); ++j) {
2423 gg_ir[count++] = N*Ng+tree_st.
groups_ir[j];
2425 for (
int j = 0; j<N; ++j) {
2426 gg_ir[count++] = j*Ng+i;
2429 gg_jc[(N+1)*Ng]=nzmax2;
2431 graph_st.
gg_jc=gg_jc;
2432 graph_st.
gg_ir=gg_ir;
2448 template <
typename T>
2460 T* weights=
new T[g_st.
Ng];
2461 for (
int i = 0; i<N+1; ++i)
2462 for (
int j = 0; j<Ng; ++j)
2463 weights[i*Ng+j]=graph_st.
weights[j];
2464 for (
int j = 0; j<Ng; ++j)
2465 weights[N*Ng+j]*=l1dl2;
2467 int nzmax_graph=graph_st.
gv_jc[Ng];
2468 int nzmax_v=nzmax_graph*N;
2472 for (
int i = 0; i<N; ++i) {
2473 for (
int j = 0; j<Ng; ++j) {
2474 gv_jc[i*Ng+j]=count;
2475 for (
int k = graph_st.
gv_jc[j]; k<graph_st.
gv_jc[j+1]; ++k) {
2476 gv_ir[count++] =Nv*i+graph_st.
gv_ir[k];
2480 for (
int i = 0; i<Ng+1; ++i) {
2481 gv_jc[N*Ng+i]=count;
2487 int nzmax_tree2=graph_st.
gg_jc[Ng];
2488 int nzmax2=nzmax_tree2*(N+1)+Ng*N;
2491 for (
int i = 0; i<N; ++i) {
2492 for (
int j = 0; j<Ng; ++j) {
2493 gg_jc[i*Ng+j] = count;
2494 for (
int k = graph_st.
gg_jc[j]; k<graph_st.
gg_jc[j+1]; ++k) {
2495 gg_ir[count++] = i*Ng+graph_st.
gg_ir[k];
2499 for (
int i = 0; i<Ng; ++i) {
2500 gg_jc[N*Ng+i] = count;
2501 for (
int j = graph_st.
gg_jc[i]; j<static_cast<int>(graph_st.
gg_jc[i+1]); ++j) {
2502 gg_ir[count++] = N*Ng+graph_st.
gg_ir[j];
2504 for (
int j = 0; j<N; ++j) {
2505 gg_ir[count++] = j*Ng+i;
2508 gg_jc[(N+1)*Ng]=nzmax2;
2526 template <
typename T,
typename D,
typename E>
2530 cerr <<
"Error: no duality gap available" << endl;
2533 T primal= loss.
eval(x)+lambda*regularizer.
eval(x);
2537 grad2.scal(-T(1.0)/lambda);
2540 regularizer.
fenchel(grad2,val,scal);
2541 T dual = -lambda*val;
2544 dual =
MAX(dual,best_dual);
2545 T
delta= primal == 0 ? 0 : (primal-dual)/abs<T>(primal);
2547 cout <<
"Relative duality gap: " << delta << endl;
2554 template <
typename T,
typename D,
typename E>
2561 template <
typename T>
2568 switch (param.
loss) {
2572 default: cerr <<
"Not implemented"; exit(1);
2578 alpha0.
refCol(0,alpha0i);
2579 regularizer->
reset();
2580 res[0]=loss->
eval(alpha0i)+param.
lambda*regularizer->
eval(alpha0i);
2583 delete(regularizer);
2586 template <
typename T>
2587 void writeLog(
const int iter,
const T time,
const T primal,
const T dual,
2591 f.flags(std::ios_base::scientific);
2592 f.open(name, ofstream::app);
2593 f << iter <<
" " << primal <<
" " << dual <<
" " << time << std::endl;
2598 template <
typename T,
typename D,
typename E>
2613 for (it = 1; it<=param.
max_it; ++it) {
2615 if (param.
verbose && ((it % it0) == 0)) {
2617 T los=loss.
eval(x) + lambda*regularizer.
eval(x);
2620 cout <<
"Iter: " << it <<
", loss: " << los <<
", time: " << sec <<
" ";
2632 T step = param.
sqrt_step ? param.
a/(param.
b+sqrt(static_cast<T>(it))) : param.
a/(param.
b+(static_cast<T>(it)));
2634 x.add(sub_grad,-lambda*step);
2635 if (duality && ((it % it0) == 0)) {
2638 optim_info[1]=best_dual;
2639 optim_info[2]=rel_duality_gap;
2640 if (rel_duality_gap < param.
tol)
break;
2644 if ((it % it0) != 0 || !param.
verbose) {
2645 T los=loss.
eval(x) + lambda*regularizer.
eval(x);
2649 optim_info[1]=best_dual;
2650 optim_info[2]=rel_duality_gap;
2656 template <
typename T,
typename D,
typename E>
2664 D grad, tmp, prox, old;
2674 for (it = 1; it<=param.
max_it; ++it) {
2676 if (param.
verbose && ((it % it0) == 0)) {
2678 T los=loss.
eval(x) + lambda*regularizer.
eval(x);
2681 cout <<
"Iter: " << it <<
", loss: " << los <<
", time: " << sec <<
", L: " << L;
2693 prox.add(grad,-T(1.0)/L);
2694 regularizer.
prox(prox,tmp,lambda/L);
2701 if (param.
verbose && ((it % it0) == 0))
2705 if (param.
verbose && ((it % it0) == 0))
2710 if ((it % it0) == 0) {
2713 optim_info[1]=best_dual;
2714 optim_info[2]=rel_duality_gap;
2715 if (rel_duality_gap < param.
tol)
break;
2720 if (sqrt(old.nrm2sq()/
MAX(
EPSILON,x.nrm2sq())) < param.
tol)
break;
2723 T los=loss.
eval(x) + lambda*regularizer.
eval(x);
2727 cout <<
"Iter: " << it <<
", loss: " << los <<
", time: " << sec <<
", L: " << L << endl;
2732 optim_info[1]=best_dual;
2733 optim_info[2]=rel_duality_gap;
2738 template <
typename T,
typename D,
typename E>
2747 D y, grad, prox, tmp;
2760 for (it = 1; it<=param.
max_it; ++it) {
2762 if (param.
verbose && ((it % it0) == 0)) {
2764 T los=loss.
eval(x) + lambda*regularizer.
eval(x);
2767 cout <<
"Iter: " << it <<
", loss: " << los <<
", time: " << sec <<
", L: " << L;
2781 prox.add(grad,-T(1.0)/L);
2782 regularizer.
prox(prox,tmp,lambda/L);
2786 if (param.
verbose && ((it % it0) == 0))
2790 if (param.
verbose && ((it % it0) == 0))
2797 t=(1.0+sqrt(1+4*t*t))/2;
2799 y.add(prox,(1-old_t)/t);
2801 if ((it % it0) == 0) {
2804 optim_info[1]=best_dual;
2805 optim_info[2]=rel_duality_gap;
2806 if (rel_duality_gap < param.
tol)
break;
2810 if (sqrt(prox.nrm2sq()/
MAX(
EPSILON,x.nrm2sq())) < param.
tol)
break;
2813 T los=loss.
eval(x) + lambda*regularizer.
eval(x);
2817 cout <<
"Iter: " << it <<
", loss: " << los <<
", time: " << sec <<
", L: " << L << endl;
2822 optim_info[1]=best_dual;
2823 optim_info[2]=rel_duality_gap;
2828 template <
typename T>
2832 const int n_reg=reg.num_components();
2834 T lagrangian =
loss.eval_split(splitted_loss) + lambda*reg.eval_split(splitted_reg);
2836 tmp.
copy(splitted_loss);
2838 T add =0.5*gamma*tmp.
normFsq();
2843 stmp.
copy(splitted_reg);
2850 lagrangian -= multi_loss.
dot(tmp);
2855 template <
typename T>
2866 w.
add(mean,-T(1.0)/gamma);
2868 number_occurences.
set(splitted_w_loss.
n());
2869 const int n_reg=splitted_w_reg.
n();
2873 for (
int i = 0; i<n_reg; ++i) {
2874 splitted_w_reg.
refCol(i,col);
2876 for (
int j = 0; j<col.
L(); ++j)
2877 number_occurences[col.
r(j)]++;
2881 for (
int i = 0; i<n_reg; ++i) {
2882 multipliers_w_reg.
refCol(i,col);
2885 w.
add(mean,-T(1.0)/gamma);
2887 w.
div(number_occurences);
2891 template <
typename T>
2902 w.
add(mean,-T(1.0)/gamma);
2904 number_occurences.
set(splitted_w_loss.
n());
2905 const int n_reg=splitted_w_reg.
n();
2909 for (
int i = 0; i<n_reg; ++i) {
2910 splitted_w_reg.
refCol(i,col);
2911 for (
int j = 0; j<col.
L(); ++j) {
2912 mean[col.
r(j)]+=inner_weights[j]*col.
v(j);
2913 number_occurences[col.
r(j)]+=inner_weights[j]*inner_weights[j];
2918 for (
int i = 0; i<n_reg; ++i) {
2919 multipliers_w_reg.
refCol(i,col);
2920 for (
int j = 0; j<col.
L(); ++j)
2921 mean[col.
r(j)]+=inner_weights[j]*col.
v(j);
2923 w.
add(mean,-T(1.0)/gamma);
2925 w.
div(number_occurences);
2928 template <
typename T>
2933 const int n_reg=reg.num_components();
2942 loss.init_split_variables(multipliers_w_loss);
2943 reg.init_split_variables(multipliers_w_reg);
2944 splitted_w_loss.
copy(multipliers_w_loss);
2947 splitted_w_reg.
copy(multipliers_w_reg);
2957 for (it = 0; it<param.
max_it; ++it) {
2959 if (((it % it0) == 0)) {
2962 los=
loss.eval(w)+lambda*reg.eval_weighted(w,splitted_w_reg,
2965 los=
loss.eval(w)+lambda*reg.eval(w);
2971 cout <<
"Iter: " << it <<
", loss: " << los <<
", time: " << sec << endl;
2983 splitted_w_loss.
copy(multipliers_w_loss);
2984 splitted_w_loss.
scal((1.0)/gamma);
2986 loss.prox_split(splitted_w_loss,T(1.0)/gamma);
2988 splitted_w_reg.
copy(multipliers_w_reg);
2989 splitted_w_reg.
scal((1.0)/gamma);
2991 reg.prox_split(splitted_w_reg,lambda/gamma);
2996 multipliers_w_loss.
add(splitted_w_loss,-gamma);
3000 multipliers_w_reg.
add_direct(splitted_w_reg,-gamma);
3007 splitted_w_loss.
copy(multipliers_w_loss);
3008 splitted_w_loss.
scal((1.0)/gamma);
3010 loss.prox_split(splitted_w_loss,T(1.0)/gamma);
3012 splitted_w_reg.
copy(multipliers_w_reg);
3013 splitted_w_reg.
scal((1.0)/gamma);
3015 reg.prox_split(splitted_w_reg,lambda/gamma);
3020 multipliers_w_loss.
add(splitted_w_loss,-gamma);
3023 multipliers_w_reg.
add_direct(splitted_w_reg,-gamma);
3028 if ((it % it0) == 0) {
3029 if (it > 0 && (old_los-los)/old_los < param.
tol)
break;
3034 los=
loss.eval(w)+lambda*reg.eval_weighted(w,splitted_w_reg,
3037 los=
loss.eval(w)+lambda*reg.eval(w);
3043 template <
typename T>
3050 number_occurences.
set(delta);
3051 const int n_reg=splitted_w_reg.
n();
3055 for (
int i = 0; i<n_reg; ++i) {
3056 splitted_w_reg.
refCol(i,col);
3058 for (
int j = 0; j<col.
L(); ++j)
3059 number_occurences[col.
r(j)]+=
gamma;
3062 for (
int i = 0; i<n_reg; ++i) {
3063 multipliers_w_reg.
refCol(i,col);
3068 w.
div(number_occurences);
3072 template <
typename T>
3077 const int n_reg=reg.num_components();
3084 reg.init_split_variables(dual_reg);
3086 primal_reg.
copy(dual_reg);
3090 loss.init_prim_var(prim_loss);
3092 dual_loss.
copy(prim_loss);
3100 for (it = 0; it<param.
max_it; ++it) {
3107 if (((it % it0) == 0)) {
3109 los=
loss.eval(w)+lambda*reg.eval(w);
3114 cout <<
"Iter: " << it <<
", loss: " << los <<
", time: " << sec << endl;
3122 loss.prox_prim_var(prim_loss,dual_loss,w,gamma);
3126 primal_reg.
copy(dual_reg);
3127 primal_reg.
scal(-(1.0)/gamma);
3129 reg.prox_split(primal_reg,lambda/gamma);
3133 loss.compute_new_prim(w,prim_loss,dual_loss,gamma,param.
delta);
3141 loss.add_mult_design_matrix(w,dual_loss,-gamma);
3142 dual_loss.
add(prim_loss,gamma);
3145 if ((it % it0) == 0) {
3146 if (it > 0 && (old_los-los)/old_los < param.
tol)
break;
3150 los=
loss.eval(w)+lambda*reg.eval(w);
3155 template <
typename T>
3167 switch (param.
regul) {
3171 default: cerr <<
"Not implemented"; exit(1);
3176 template <
typename T>
3196 switch (param.
regul) {
3220 default: cerr <<
"Not implemented"; exit(1);
3225 template <
typename T>
3227 const int m,
const int n,
3244 switch (param.
regul) {
3269 default: cerr <<
"not implemented"; exit(1);
3274 template <
typename T>
3282 cout <<
"Linearized ADMM algorithm" << endl;
3284 cout <<
"ADMM algorithm" << endl;
3289 cout <<
"ISTA algorithm" << endl;
3291 cout <<
"Subgradient descent" << endl;
3293 cout <<
"FISTA algorithm" << endl;
3298 cout <<
"Projections with arc capacities" << endl;
3299 if (param.
intercept) cout <<
"with intercept" << endl;
3301 cout <<
"log activated " << endl;
3302 cout << param.
logName << endl;
3311 template <
typename T>
3315 const int M = X.
n();
3319 #pragma omp parallel for private(i1) 3320 for (i1 = 0; i1< M; ++i1) {
3322 int numT=omp_get_thread_num();
3328 losses[numT]->init(Xi);
3330 alpha0.
refCol(i1,alpha0i);
3333 regularizers[numT]->reset();
3335 optim_info.
refCol(i1,optim_infoi);
3338 LinADMM(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3340 ADMM(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3347 template <
typename T>
3351 const int M = X.
n();
3353 const bool duality = losses[0]->is_fenchel() && regularizers[0]->is_fenchel();
3354 if (duality) cout <<
"Duality gap via Fenchel duality" << endl;
3355 if (!param.
ista && param.
subgrad && !regularizers[0]->is_subgrad()) {
3356 cerr <<
"Subgradient algorithm is not implemented for this combination of loss/regularization" << endl;
3359 cout <<
"Timings reported do not include loss and fenchel evaluation" << endl;
3365 #pragma omp parallel for private(i1) 3366 for (i1 = 0; i1< M; ++i1) {
3368 int numT=omp_get_thread_num();
3374 losses[numT]->init(Xi);
3376 alpha0.
refCol(i1,alpha0i);
3379 regularizers[numT]->reset();
3381 optim_info.
refCol(i1,optim_infoi);
3383 ISTA_Generic(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3387 FISTA_Generic(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3392 template <
typename T>
3396 const int M = X.
n();
3398 const bool duality = losses[0]->is_fenchel() && regularizers[0]->is_fenchel();
3399 if (duality) cout <<
"Duality gap via Fenchel duality" << endl;
3406 #pragma omp parallel for private(i2) 3407 for (i2 = 0; i2< M; ++i2) {
3409 int numT=omp_get_thread_num();
3415 losses[numT]->init(Xi);
3416 const int N = alpha0.
n()/X.
n();
3421 regularizers[numT]->reset();
3423 optim_info.
refCol(i2,optim_infoi);
3425 ISTA_Generic(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3429 FISTA_Generic(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3435 template <
typename T>
3448 if (num_threads > 1) param.
verbose=
false;
3453 switch (param.
loss) {
3456 default: cerr <<
"Not implemented" << endl; exit(1);
3459 solver_admm(X, alpha0, alpha, optim_info, regularizers,losses,param);
3462 delete(regularizers[i]);
3465 delete[](regularizers);
3470 cerr <<
"Loss only implemented for ADMM" << endl;
3475 if (num_threads > 1) param.
verbose=
false;
3480 switch (param.
loss) {
3490 default: cerr <<
"Not implemented"; exit(1);
3494 solver_aux1(X, alpha0, alpha, optim_info, regularizers,losses,param);
3498 delete(regularizers[i]);
3499 regularizers[i]=NULL;
3502 delete[](regularizers);
3505 if (num_threads > 1) param.
verbose=
false;
3508 const int N = alpha0.
n()/X.
n();
3511 switch (param.
loss) {
3513 default: cerr <<
"Not implemented"; exit(1);
3516 solver_aux2(X, alpha0, alpha, optim_info, regularizers,losses,param);
3521 delete(regularizers[i]);
3522 regularizers[i]=NULL;
3526 delete[](regularizers);
3531 switch (param.
loss) {
3542 default: cerr <<
"Not implemented"; exit(1);
3547 if (duality) cout <<
"Duality gap via Fenchel duality" << endl;
3552 optim_info.
refCol(0,optim_infoi);
3554 ISTA_Generic(*loss,*regularizer,alpha0,alpha,optim_infoi,param);
3558 FISTA_Generic(*loss,*regularizer,alpha0,alpha,optim_infoi,param);
3560 delete(regularizer);
3566 template <
typename T>
3578 cout <<
"Projections with arc capacities" << endl;
3579 if (param.
intercept) cout <<
"with intercept" << endl;
3584 const int M = alpha.
n();
3586 cerr <<
"Graph structure should be provided" << endl;
3598 #pragma omp parallel for private(i) 3599 for (i = 0; i< M; ++i) {
3601 int numT=omp_get_thread_num();
3606 alpha0.
refCol(i,alpha0i);
3609 regularizers[numT]->
reset();
3610 regularizers[numT]->
prox(alpha0i,alphai,param.
lambda);
3612 val_loss[i]=regularizers[numT]->
eval(alphai);
3615 delete(regularizers[i]);
3616 regularizers[i]=NULL;
3618 delete[](regularizers);
3626 regularizer->
prox(alpha0,alpha,param.
lambda);
3628 val_loss[0]=regularizer->
eval(alpha);
3629 delete(regularizer);
3633 template <
typename T>
3641 if (param.
intercept) cout <<
"with intercept" << endl;
3642 if (param.
eval_dual_norm) cout <<
"Evaluate the dual norm only" << endl;
3647 const int M = alpha0.
n();
3652 regularizers[i]=setRegularizerVectors<T>(param,NULL,NULL,graph_path_st);
3656 #pragma omp parallel for private(i) 3657 for (i = 0; i< M; ++i) {
3659 int numT=omp_get_thread_num();
3665 regularizers[numT]->
reset();
3666 if (i==0 && paths) {
3670 val_loss[i]=regularizers[numT]->
eval_paths(alphai,*paths);
3676 val_loss[i]=regularizers[numT]->
eval(alphai);
3681 delete(regularizers[i]);
3682 regularizers[i]=NULL;
3684 delete[](regularizers);
3687 cerr <<
"Not implemented" << endl;
void add_direct(const SpMatrix< T > &mat, const T a)
void norm_l1_rows(Vector< T > &norms) const
returns the linf norms of the columns
void center()
center the columns of the matrix
void sub(const Matrix< T > &mat)
substract the matrix mat to the current matrix
T eval(const Matrix< T > &X) const
void addVecToCols(const Vector< T > &diag, const T a=1.0)
void scal(const T a) const
scale the matrix by a
virtual void init(const E &input)=0
T eval(const Vector< T > &x) const
void addVecToColsWeighted(const Vector< T > &diag, const T *weights, const T a=1.0)
GraphPathConv(const ParamReg< T > ¶m)
virtual bool is_fenchel() const
T eval_dual_norm(const Vector< T > &x) const
const AbstractMatrixB< T > * _D
void multSwitch(const Matrix< T > &B, Matrix< T > &C, const bool transA=false, const bool transB=false, const T a=1.0, const T b=0.0) const
perform C = a*B*A + b*C, possibly transposing A or B.
void fenchel(const Vector< T > &y, T &val, T &scal) const
void rank1Update(const Vector< T > &vec1, const Vector< T > &vec2, const T alpha=1.0)
perform A <- A + alpha*vec1*vec2'
virtual T eval_split(const SpMatrix< T > &input) const
virtual bool is_subgrad() const
T sum() const
returns the sum of the vector
GraphLasso< T > * _graphlasso
virtual bool test_backtracking(const D &y, const D &grad, const D &prox, const T L) const
virtual bool is_fenchel() const
void grad(const Matrix< T > &alpha, Matrix< T > &grad) const
void EvalGraphPath(const Matrix< T > &alpha0, const ParamFISTA< T > ¶m, Vector< T > &val_loss, const GraphPathStruct< T > *graph_path_st, SpMatrix< T > *paths=NULL)
SqLossMat(const AbstractMatrixB< T > &D)
virtual bool is_subgrad() const
TreeLzero(const ParamReg< T > ¶m)
const AbstractMatrixB< T > * _X
void init_prim_var(Vector< T > &prim_var) const
void PROX(const Matrix< T > &alpha0, Matrix< T > &alpha, const ParamFISTA< T > ¶m, Vector< T > &val_loss, const GraphStruct< T > *graph_st=NULL, const TreeStruct< T > *tree_st=NULL, const GraphPathStruct< T > *graph_path_st=NULL)
virtual bool is_subgrad() const
void sum_cols(Vector< T > &sum) const
whiten
T project_tree_l1(T *variables, const int n, const T lambda)
int pB(const int i) const
returns pB[i]
void copy(const Vector< T > &x)
make a copy of x
void fenchel(const Vector< T > &input, T &val, T &scal) const
SqLoss(const AbstractMatrixB< T > &D, const Matrix< T > &G)
void sub_grad(const Matrix< T > &x, Matrix< T > &y) const
virtual void sub_grad(const Vector< T > &input, Vector< T > &output) const
virtual void var_fenchel(const D &x, D &grad1, D &grad2, const bool intercept=false) const =0
MultiLogLoss(const AbstractMatrixB< T > &X)
void sparseProject(Vector< T > &out, const T thrs, const int mode=1, const T lambda1=0, const T lambda2=0, const T lambda3=0, const bool pos=false)
void solver(const Matrix< T > &X, const AbstractMatrixB< T > &D, const Matrix< T > &alpha0, Matrix< T > &alpha, const ParamFISTA< T > ¶m1, Matrix< T > &optim_info, const GraphStruct< T > *graph_st=NULL, const TreeStruct< T > *tree_st=NULL, const GraphPathStruct< T > *graph_path_st=NULL)
AbstractMatrixB is basically either SpMatrix or Matrix.
T dual_norm_inf(const Vector< T > &input, const Vector< T > &weights)
virtual bool is_fenchel() const
void compute_new_prim(Vector< T > &prim, const Vector< T > &prim_var, const Vector< T > &dual_var, const T gamma, const T delta) const
T eval(const Vector< T > &x) const
void scal(const T a)
scale the vector by a
T nrm2() const
returns ||A||_2
void inv(const Vector< T > &x)
A <- 1./x.
virtual bool is_subgrad() const
T normFsq() const
compute the sum of the matrix elements
virtual void sub_grad(const Vector< T > &input, Vector< T > &output) const
virtual bool is_fenchel() const
LogLoss(const AbstractMatrixB< T > &X)
void fenchel(const Matrix< T > &input, T &val, T &scal) const
int num_components() const
void fenchel(const Matrix< T > &input, T &val, T &scal) const
virtual T eval_weighted(const D &input, const F &input_struct, const T *weights) const
TraceNorm(const ParamReg< T > ¶m)
T fmaxval() const
returns the maximum magnitude
void grad(const Vector< T > &w, Vector< T > &grad) const
void copyRow(const int i, Vector< T > &x) const
Copy the column i into x.
virtual bool is_fenchel() const
std::vector< list_int * > _groups
T eval_paths(const Vector< T > &x, SpMatrix< T > &paths_mat) const
virtual void var_fenchel(const Matrix< T > &A, Matrix< T > &grad1, Matrix< T > &grad2, const bool intercept) const
void prox_split(SpMatrix< T > &splitted_w, const T lambda) const
virtual bool is_fenchel() const
void add(const Vector< T > &x, const T a=1.0)
A <- A + a*x.
T eval(const Vector< T > &x) const
void XtX(Matrix< T > &XtX) const
XtX = A'*A.
T dot_direct(const SpMatrix< T > &mat) const
virtual bool is_fenchel() const
Regularizer< T, Matrix< T > > * setRegularizerMatrices(const ParamFISTA< T > ¶m, const int m, const int n, const GraphStruct< T > *graph_st=NULL, const TreeStruct< T > *tree_st=NULL, const GraphPathStruct< T > *graph_path_st=NULL)
T eval_weighted(const Vector< T > &input, const SpMatrix< T > &input_struct, const T *inner_weight) const
static constexpr double E
T eval(const Vector< T > &x) const
virtual bool test_backtracking(const Matrix< T > &y, const Matrix< T > &grad, const Matrix< T > &prox, const T L) const
static T xlogx(const T x)
T eval(const Vector< T > &w) const
virtual void XtX(Matrix< T > &XtX) const =0
XtX = A'*A.
T duality_gap(Loss< T, D, E > &loss, Regularizer< T, D > ®ularizer, const D &x, const T lambda, T &best_dual, const bool verbose=false)
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
int * pE() const
Direct access to _pE.
void multDiagLeft(const Vector< T > &diag)
mult by a diagonal matrix on the left
const AbstractMatrixB< T > * _D
void mult(const Vector< T > &x, const Vector< T > &y)
A <- x .* y.
void print_regul(const regul_t ®ul)
T eval(const Vector< T > &w) const
T * rawX() const
returns a modifiable reference of the data, DANGEROUS
GroupProx< T, normLINF< T > > type
MixedL1L2(const ParamReg< T > ¶m)
SpecGraphMat(const ParamReg< T > ¶m)
void fenchel(const Matrix< T > &input, T &val, T &scal) const
virtual T eval_paths(const D &x, SpMatrix< T > &paths_mat) const
void init(const Vector< T > &y)
void norm_inf_rows(Vector< T > &norms) const
returns the linf norms of the columns
void restore_capacities()
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
void prox(const D &x, D &y, const T lambda)
virtual T fenchel(const Matrix< T > &input) const
GroupProx< T, normL2< T > > type
virtual void sub_grad(const Matrix< T > &input, Matrix< T > &output) const
LassoConstraint(const ParamReg< T > ¶m)
void prox_split(Matrix< T > &splitted_w, const T lambda) const
GraphStruct< T > * graph_st
virtual bool is_fenchel() const
T eval(const Vector< T > &x) const
static void sort(int *irOut, T *prOut, int beg, int end)
void copyTo(Matrix< T > &mat) const
make a copy of the matrix mat in the current matrix
T eval_split(const Matrix< T > &input) const
T softmax(const int y)
replace each value by its exponential
void print_loss(const loss_t &loss)
T maxval() const
returns the maximum value
void prox_prim_var(Vector< T > &out, const Vector< T > &dual_var, const Vector< T > &prim_var, const T lambda, const T c) const
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
GraphPathL0(const ParamReg< T > ¶m)
GraphLasso(const ParamReg< T > ¶m)
virtual bool is_intercept() const
SqLoss(const AbstractMatrixB< T > &D)
virtual void prox_prim_var(E &out, const E &dual_var, const E &prim_var, const T gamma) const
void refCol(int i, SpVector< T > &vec) const
reference the column i into vec
T eval(const Vector< T > &x) const
virtual bool is_fenchel() const
const AbstractMatrixB< T > * _X
const T & max(const T &a, const T &b)
Return the maximum between a and b.
virtual bool is_subgrad() const
void ADMM(const SplittingFunction< T, Matrix< T > > &loss, const SplittingFunction< T, SpMatrix< T > > ®, const Vector< T > &w0, Vector< T > &w, Vector< T > &optim_info, const ParamFISTA< T > ¶m)
void init(const Vector< T > &x)
T eval_dual_norm_paths(const Vector< T > &x, SpMatrix< T > &paths_mat) const
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
void svdRankOne(const Vector< T > &u0, Vector< T > &u, Vector< T > &v) const
virtual void init(const E &y)
void ISTA_Generic(Loss< T, D, E > &loss, Regularizer< T, D > ®ularizer, const D &x0, D &x, Vector< T > &optim_info, const ParamFISTA< T > ¶m)
ListIterator< T > & begin() const
int n() const
returns the size of the vector
TreeStruct< T > * tree_st
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
virtual void prox_split(SpMatrix< T > &splitted_w, const T lambda) const
void init_prim_var(Vector< T > &prim_var) const
LossMat(const int N, const AbstractMatrixB< T > &X)
SqLossMissing(const AbstractMatrixB< T > &D)
T eval(const Matrix< T > &x) const
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
void project_sft_binary(const Vector< T > &labels)
void refCol(int i, Vector< T > &x) const
Reference the column i into the vector x.
Rank(const ParamReg< T > ¶m)
T eval(const Matrix< T > &x) const
T eval(const Vector< T > &alpha) const
virtual ~LassoConstraint()
void prox(const Matrix< T > &x, Matrix< T > &y, const T lambda)
virtual bool is_fenchel() const
void fenchel(const Vector< T > &input, T &val, T &scal) const
void setZeros()
Set all values to zero.
Lasso(const ParamReg< T > ¶m)
virtual void init(const Matrix< T > &x)
void fenchel(const Matrix< T > &input, T &val, T &scal) const
regul_t regul_from_string(char *regul)
T eval(const Matrix< T > &alpha) const
virtual T fenchel(const Vector< T > &input) const
virtual void var_fenchel(const Matrix< T > &W, Matrix< T > &grad1, Matrix< T > &grad2, const bool intercept) const
T eval(const Vector< T > &x) const
T asum() const
computes the sum of the magnitudes of the vector
T dot(const Vector< T > &x) const
returns A'x
T eval(const Matrix< T > &x) const
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
T v(const int i) const
returns v[i]
void update_multipliers_ADMM(Vector< T > &w, const Matrix< T > &splitted_w_loss, const Matrix< T > &multipliers_w_loss, const SpMatrix< T > &splitted_w_reg, const SpMatrix< T > &multipliers_w_reg, const T gamma)
void thrsabsmin(const T nu)
performs thresholding of the vector
None(const ParamReg< T > ¶m)
void fenchel(const Vector< T > &input, T &val, T &scal) const
BinaryOpExpr< OP, TLeft, TRight > F(const Expr< TLeft, typename TLeft::ValueType > &lhs, const Expr< TRight, typename TRight::ValueType > &rhs)
virtual bool is_subgrad() const
void multDiagRight(const Vector< T > &diag)
mult by a diagonal matrix on the right
virtual void fenchel(const D &input, T &val, T &scal) const =0
void whiten(Vector< T > &mean, const bool pattern=false)
whiten
void toVect(Vector< T > &vec) const
make a reference of the matrix to a vector vec
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
T eval(const Vector< T > &x) const
virtual void sub_grad(const D &input, D &output) const
virtual void init(const Matrix< T > &input)
virtual bool is_fenchel() const
T abs(const T x)
template version of the fabs function
virtual T eval_split(const Matrix< T > &input) const
virtual int num_components() const
virtual T eval(const D &input) const =0
T eval_split(const SpMatrix< T > &input) const
void fenchel(const Vector< T > &input, T &val, T &scal) const
HingeLoss(const AbstractMatrixB< T > &X)
T eval(const Vector< T > &alpha) const
void init_split_variables(SpMatrix< T > &splitted_w)
void print_info_solver(const ParamFISTA< T > ¶m)
RegMat(const ParamReg< T > ¶m)
void copy(const Matrix< T > &mat)
make a copy of the matrix mat in the current matrix
TreeMult(const ParamReg< T > ¶m)
void prox(const Matrix< T > &x, Matrix< T > &y, const T lambda)
virtual T eval_dual_norm(const D &x) const
virtual bool test_backtracking(const Vector< T > &y, const Vector< T > &grad, const Vector< T > &prox, const T L) const
virtual void var_fenchel(const Vector< T > &w, Vector< T > &grad1, Vector< T > &grad2, const bool intercept) const
void FISTA_Generic(Loss< T, D, E > &loss, Regularizer< T, D > ®ularizer, const D &x0, D &x, Vector< T > &optim_info, const ParamFISTA< T > ¶m)
virtual void var_fenchel(const Matrix< T > &x, Matrix< T > &grad1, Matrix< T > &grad2, const bool intercept) const
void toSparse(SpVector< T > &vec) const
make a sparse copy
void copy(const SpMatrix< T > &mat)
void prox(const Matrix< T > &x, Matrix< T > &y, const T lambda)
bool regul_for_matrices(const regul_t ®ul)
void extractRow(const int i, Vector< T > &row) const
fill the matrix with the row given
void setPointer(T *X, const int n)
change the data of the vector
GraphPathStruct< T > * graph_path_st
virtual bool is_fenchel() const
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
void exp()
replace each value by its exponential
void add_mult_design_matrix(const Vector< T > &prim, Vector< T > &out, const T fact) const
void prox(const Matrix< T > &x, Matrix< T > &y, const T lambda)
void set(const T val)
set each value of the vector to val
virtual T fenchel(const Matrix< T > &input) const
SplittingFunction< T, SpMatrix< T > > * setRegularizerADMM(const ParamFISTA< T > ¶m, const GraphStruct< T > *graph_st=NULL, const TreeStruct< T > *tree_st=NULL)
T eval(const Vector< T > &x) const
virtual bool is_fenchel() const
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
void thrsPos()
performs soft-thresholding of the vector
void add_mult_design_matrix(const Vector< T > &prim, Vector< T > &out, const T fact) const
T fmaxval() const
computes the linf norm of the vector
virtual bool is_subgrad() const
void grad(const Vector< T > &alpha, Vector< T > &grad) const
T eval(const Vector< T > &x) const
void prox(const Matrix< T > &x, Matrix< T > &y, const T lambda)
T dot(const Matrix< T > &mat) const
add alpha*mat to the current matrix
virtual T fenchel(const Matrix< T > &input) const
void fenchel(const Vector< T > &y, T &val, T &scal) const
int m() const
returns the number of columns
void fenchel(const D &input, T &val, T &scal) const
void fusedProjectHomotopy(Vector< T > &out, const T lambda1, const T lambda2, const T lambda3=0, const bool penalty=true)
virtual void add_mult_design_matrix(const E &prim, E &out, const T fact) const
GraphMult(const ParamReg< T > ¶m)
void fenchel(const Vector< T > &x, T &val, T &scal) const
const AbstractMatrixB< T > * _X
void convert_paths_to_mat(const List< Path< long long > * > &paths, SpMatrix< T > &paths_mat, const int n)
void mult(const Vector< T > &x, Vector< T > &b, const T alpha=1.0, const T beta=0.0) const
perform b = alpha*A*x+beta*b
void start()
start the time
T eval(const Vector< T > &x) const
virtual bool is_fenchel() const
int n() const
Number of columns.
virtual void grad(const D &input, D &output) const =0
void refSubMat(int i, int n, Matrix< T > &mat) const
Reference the column i to i+n into the Matrix mat.
virtual void prox(const D &input, D &output, const T lambda)=0
Ridge(const ParamReg< T > ¶m)
Lzero(const ParamReg< T > ¶m)
void singularValues(Vector< T > &u) const
virtual void init_split_variables(SpMatrix< T > &splitted_w) const
T nrm2() const
computes the l2 norm of the vector
void init(const Matrix< T > &y)
virtual T eval(const D &input) const =0
T nrm2sq() const
returns ||A||_2^2
virtual void compute_new_prim(E &prim, const E &prim_var, const E &dual_var, const T gamma, const T delta) const
void setZeros()
Set all the values to zero.
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
void resize(const int n)
resize the vector
bool param_for_admm(const ParamFISTA< T > ¶m)
void prox(const Matrix< T > &x, Matrix< T > &y, const T lambda)
void XXt(Matrix< T > &XXt) const
XXt = A*A'.
int r(const int i) const
returns r[i]
ProxMatToVec(const ParamReg< T > ¶m)
virtual bool is_fenchel() const
void resize(const int m, const int n, const int nzmax)
resize the matrix
ComposeProx< T, Matrix< T >, MixedL1L2< T >, RegMat< T, Lasso< T > >, false > type
FusedLasso(const ParamReg< T > ¶m)
virtual void init_prim_var(E &prim_var) const
void svd(Matrix< T > &U, Vector< T > &S, Matrix< T > &V) const
virtual void var_fenchel(const Matrix< T > &x, Matrix< T > &grad1, Matrix< T > &grad2, const bool intercept) const
virtual void prox_prim_var(Vector< T > &out, const Vector< T > &dual_var, const Vector< T > &prim_var, const T c) const
void softThrshold(const T nu)
performs soft-thresholding of the vector
T eval(const Vector< T > &x) const
GroupProx(const ParamReg< T > ¶m)
virtual T eval_dual_norm_paths(const D &x, SpMatrix< T > &path) const
void update_multipliers_weighted_ADMM(Vector< T > &w, const Matrix< T > &splitted_w_loss, const Matrix< T > &multipliers_w_loss, const SpMatrix< T > &splitted_w_reg, const SpMatrix< T > &multipliers_w_reg, const T gamma, const T *inner_weights)
void fenchel(const Vector< T > &input, T &val, T &scal) const
void writeLog(const int iter, const T time, const T primal, const T dual, char *name)
void addVecToCols(const Vector< T > &diag, const T a=1.0)
void grad(const Vector< T > &alpha, Vector< T > &grad) const
void fenchel(const Matrix< T > &input, T &val, T &scal) const
void grad(const Matrix< T > &W, Matrix< T > &grad) const
normL2(const ParamReg< T > ¶m)
void hardThrshold(const T nu)
performs soft-thresholding of the vector
virtual void sub_grad(const Vector< T > &input, Vector< T > &output) const
void solver_admm(const Matrix< T > &X, const Matrix< T > &alpha0, Matrix< T > &alpha, Matrix< T > &optim_info, SplittingFunction< T, SpMatrix< T > > **regularizers, SplittingFunction< T, Matrix< T > > **losses, const ParamFISTA< T > ¶m)
virtual void sub_grad(const D &input, D &output) const
virtual int num_components() const
int fmax() const
returns the index of the value with largest magnitude
void subGradientDescent_Generic(Loss< T, D, E > &loss, Regularizer< T, D > ®ularizer, const D &x0, D &x, Vector< T > &optim_info, const ParamFISTA< T > ¶m)
void dualityGraph(const Matrix< T > &X, const Matrix< T > &D, const Matrix< T > &alpha0, Vector< T > &res, const ParamFISTA< T > ¶m, const GraphStruct< T > *graph_st)
void fenchel(const Vector< T > &input, T &val, T &scal) const
Regularizer(const ParamReg< T > ¶m)
void init_split_variables(Matrix< T > &splitted_w) const
void solver_aux1(const Matrix< T > &X, const Matrix< T > &alpha0, Matrix< T > &alpha, Matrix< T > &optim_info, Regularizer< T, Vector< T > > **regularizers, Loss< T, Vector< T > > **losses, const ParamFISTA< T > ¶m)
T eval(const Matrix< T > &A) const
Regularizer< T > * setRegularizerVectors(const ParamFISTA< T > ¶m, const GraphStruct< T > *graph_st=NULL, const TreeStruct< T > *tree_st=NULL, const GraphPathStruct< T > *graph_path_st=NULL)
void prox_split(Matrix< T > &splitted_w, const T lambda) const
ComposeProx< T, Matrix< T >, MixedL1LINF< T >, RegMat< T, Lasso< T > >, false > type
double getElapsed() const
print the elapsed time
static int init_omp(const int numThreads)
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
virtual bool is_subgrad() const
void LinADMM(const SplittingFunction< T, Matrix< T > > &loss, const SplittingFunction< T, SpMatrix< T > > ®, const Vector< T > &w0, Vector< T > &w, Vector< T > &optim_info, const ParamFISTA< T > ¶m)
ComposeProx< T, Vector< T >, typename GroupLassoLINF< T >::type, Lasso< T >, false > type
int n() const
returns the number of rows
void solver_aux2(const Matrix< T > &X, const Matrix< T > &alpha0, Matrix< T > &alpha, Matrix< T > &optim_info, Regularizer< T, Matrix< T > > **regularizers, Loss< T, Matrix< T > > **losses, const ParamFISTA< T > ¶m)
virtual void var_fenchel(const Vector< T > &x, Vector< T > &grad1, Vector< T > &grad2, const bool intercept) const
void init_split_variables(SpMatrix< T > &splitted_w) const
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
normLINF(const ParamReg< T > ¶m)
void fenchel(const Matrix< T > &input, T &val, T &scal) const
virtual bool is_fenchel() const
virtual T fenchel(const D &input) const =0
int max_iter_backtracking
virtual void init(const Vector< T > &y)
void setAleat()
put random values in the vector (White Gaussian Noise)
T eval_paths(const Vector< T > &x, SpMatrix< T > &paths_mat) const
void fenchel(const Vector< T > &x, T &val, T &scal) const
virtual T fenchel(const Vector< T > &input) const
void norm_2_rows(Vector< T > &norms) const
returns the l2 norms of the columns
LossCur(const AbstractMatrixB< T > &X)
T norm(const T *variables, T *work, const T *weights, const bool linf=true)
virtual bool is_subgrad() const
void init(const Vector< T > &y)
virtual void sub_grad(const Vector< T > &input, Vector< T > &output) const
T eval(const Matrix< T > &x) const
MixedL1LINFCR(const int m, const ParamReg< T > ¶m)
void init_split_variables(Matrix< T > &splitted_w) const
void div(const Vector< T > &x)
A <- A ./ x.
ComposeProx< T, Vector< T >, typename GroupLassoL2< T >::type, Lasso< T >, false > type
void fenchel(const Vector< T > &input, T &val, T &scal) const
T eval(const Vector< T > &x) const
virtual bool is_fenchel() const
void update_multipliers_LinADMM(Vector< T > &w, const SpMatrix< T > &splitted_w_reg, const SpMatrix< T > &multipliers_w_reg, const T gamma, const T delta)
void resize(int m, int n)
Resize the matrix.
virtual void var_fenchel(const Vector< T > &x, Vector< T > &grad1, Vector< T > &grad2, const bool intercept) const
void setRow(const int i, const Vector< T > &row)
fill the matrix with the row given
T normFsq() const
return ||A||_F^2
T v(const int i) const
access table r
void thrsPos()
perform soft-thresholding of the matrix, with the threshold nu
TreeLasso(const ParamReg< T > ¶m)
void toSparse(SpMatrix< T > &matrix) const
make a sparse copy of the current matrix
void grad(const Matrix< T > &A, Matrix< T > &grad) const
void sub(const Vector< T > &x)
A <- A - x.
MixedL1LINF(const ParamReg< T > ¶m)
virtual T fenchel(const Matrix< T > &input) const
int num_components() const
LossMat(const int N, const AbstractMatrixB< T > &X)
virtual bool is_fenchel() const
virtual bool is_subgrad() const
virtual void init(const Vector< T > &y)
void init(const Vector< T > &y)
T eval(const Vector< T > &x) const
const AbstractMatrixB< T > * _X
T eigLargestMagnSym(const Vector< T > &u0, Vector< T > &u) const
bool loss_for_matrices(const loss_t &loss)
void fenchel(const Vector< T > &input, T &val, T &scal) const
void compute_new_prim(Vector< T > &prim, const Vector< T > &prim_var, const Vector< T > &dual_var, const T gamma, const T delta) const
T eval(const Matrix< T > &w) const
void fenchel(const Vector< T > &input, T &val, T &scal) const
TODO add subgradient.
ComposeProx< T, Vector< T >, Lasso< T >, Ridge< T >, true > type
loss_t loss_from_string(char *loss)
virtual ~SplittingFunction()
void setData(T *X, const int n)
void init(const Vector< T > &x)
SqLossMat(const AbstractMatrixB< T > &D, const Matrix< T > &G)
T LagrangianADMM(const SplittingFunction< T, Matrix< T > > &loss, const SplittingFunction< T, SpMatrix< T > > ®, const T lambda, const T gamma, const Vector< T > &w, const Matrix< T > &splitted_loss, const SpMatrix< T > &splitted_reg, const Matrix< T > &multi_loss, const SpMatrix< T > &multi_reg, T &los, const T *weights=NULL)
void grad(const Matrix< T > &w, Matrix< T > &grad) const
List< int > _missingvalues
virtual T fenchel(const Vector< T > &input) const
virtual void sub_grad(const Matrix< T > &input, Matrix< T > &output) const
void add(const Matrix< T > &mat, const T alpha=1.0)
add alpha*mat to the current matrix
T eval(const Matrix< T > &x) const
ComposeProx< T, Vector< T >, GraphLasso< T >, Ridge< T >, true > type
void l1project(Vector< T > &out, const T thrs, const bool simplex=false) const
T eval(const Matrix< T > &W) const
int r(const int i) const
access table r
void fenchel(const Vector< T > &input, T &val, T &scal) const
TODO add subgradient.
void scal(const T a)
scale the matrix by the a
void project_sft(const Vector< int > &labels, const int clas)
T eval(const Vector< T > &x) const
ComposeProx(const ParamReg< T > ¶m)
const AbstractMatrixB< T > * _D
void fenchel(const Vector< T > &input, T &val, T &scal) const
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)