DMRITool  v0.1.1-139-g860d86b4
Diffusion MRI Tool
fista.h
Go to the documentation of this file.
1 
2 /* Software SPAMS v2.1 - Copyright 2009-2011 Julien Mairal
3  *
4  * This file is part of SPAMS.
5  *
6  * SPAMS is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * SPAMS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with SPAMS. If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifndef FISTA_H
21 #define FISTA_H
22 
23 #include <linalg.h>
24 #include <project.h>
25 
26 namespace spams
27 {
28 
29 namespace FISTA {
30 
33 
34  regul_t regul_from_string(char* regul) {
35  if (strcmp(regul,"l0")==0) return L0;
36  if (strcmp(regul,"l1")==0) return L1;
37  if (strcmp(regul,"l2")==0) return RIDGE;
38  if (strcmp(regul,"linf")==0) return LINF;
39  if (strcmp(regul,"l2-not-squared")==0) return L2;
40  if (strcmp(regul,"l1-constraint")==0) return L1CONSTRAINT;
41  if (strcmp(regul,"elastic-net")==0) return ELASTICNET;
42  if (strcmp(regul,"fused-lasso")==0) return FUSEDLASSO;
43  if (strcmp(regul,"group-lasso-l2")==0) return GROUPLASSO_L2;
44  if (strcmp(regul,"group-lasso-linf")==0) return GROUPLASSO_LINF;
45  if (strcmp(regul,"sparse-group-lasso-l2")==0) return GROUPLASSO_L2_L1;
46  if (strcmp(regul,"sparse-group-lasso-linf")==0) return GROUPLASSO_LINF_L1;
47  if (strcmp(regul,"l1l2")==0) return L1L2;
48  if (strcmp(regul,"l1linf")==0) return L1LINF;
49  if (strcmp(regul,"l1l2+l1")==0) return L1L2_L1;
50  if (strcmp(regul,"l1linf+l1")==0) return L1LINF_L1;
51  if (strcmp(regul,"tree-l0")==0) return TREE_L0;
52  if (strcmp(regul,"tree-l2")==0) return TREE_L2;
53  if (strcmp(regul,"tree-linf")==0) return TREE_LINF;
54  if (strcmp(regul,"graph")==0) return GRAPH;
55  if (strcmp(regul,"graph-ridge")==0) return GRAPH_RIDGE;
56  if (strcmp(regul,"graph-l2")==0) return GRAPH_L2;
57  if (strcmp(regul,"multi-task-tree")==0) return TREEMULT;
58  if (strcmp(regul,"multi-task-graph")==0) return GRAPHMULT;
59  if (strcmp(regul,"l1linf-row-column")==0) return L1LINFCR;
60  if (strcmp(regul,"trace-norm")==0) return TRACE_NORM;
61  if (strcmp(regul,"trace-norm-vec")==0) return TRACE_NORM_VEC;
62  if (strcmp(regul,"rank")==0) return RANK;
63  if (strcmp(regul,"rank-vec")==0) return RANK_VEC;
64  if (strcmp(regul,"graph-path-l0")==0) return GRAPH_PATH_L0;
65  if (strcmp(regul,"graph-path-conv")==0) return GRAPH_PATH_CONV;
66  if (strcmp(regul,"none")==0) return NONE;
67  return INCORRECT_REG;
68  }
69 
70  loss_t loss_from_string(char* loss) {
71  if (strcmp(loss,"square")==0) return SQUARE;
72  if (strcmp(loss,"square-missing")==0) return SQUARE_MISSING;
73  if (strcmp(loss,"logistic")==0) return LOG;
74  if (strcmp(loss,"weighted-logistic")==0) return LOGWEIGHT;
75  if (strcmp(loss,"hinge")==0) return HINGE;
76  if (strcmp(loss,"multi-logistic")==0) return MULTILOG;
77  if (strcmp(loss,"cur")==0) return CUR;
78  return INCORRECT_LOSS;
79  }
80 
81  void print_loss(const loss_t& loss) {
82  switch (loss) {
83  case SQUARE: cout << "Square loss" << endl; break;
84  case SQUARE_MISSING: cout << "Square loss with missing data" << endl; break;
85  case LOG: cout << "Logistic loss" << endl; break;
86  case LOGWEIGHT: cout << "Weighted Logistic loss" << endl; break;
87  case HINGE: cout << "Hinge loss" << endl; break;
88  case MULTILOG: cout << "Multiclass logistic Loss" << endl; break;
89  case CUR: cout << "CUR decomposition" << endl; break;
90  default: cerr << "Not implemented" << endl;
91  }
92  };
93 
94  bool loss_for_matrices(const loss_t& loss) {
95  return loss==MULTILOG || loss==CUR;
96  }
97 
98  void print_regul(const regul_t& regul) {
99  switch (regul) {
100  case L0: cout << "L0 regularization" << endl; break;
101  case L1: cout << "L1 regularization" << endl; break;
102  case RIDGE: cout << "L2-squared regularization" << endl; break;
103  case L2: cout << "L2-not-squared regularization" << endl; break;
104  case L1CONSTRAINT: cout << "L1 constraint regularization" << endl; break;
105  case LINF: cout << "Linf regularization" << endl; break;
106  case ELASTICNET: cout << "Elastic-net regularization" << endl; break;
107  case FUSEDLASSO: cout << "Fused Lasso or total variation regularization" << endl; break;
108  case GROUPLASSO_L2: cout << "Group Lasso L2" << endl; break;
109  case GROUPLASSO_LINF: cout << "Group Lasso LINF" << endl; break;
110  case GROUPLASSO_L2_L1: cout << "Group Lasso L2 + L1" << endl; break;
111  case GROUPLASSO_LINF_L1: cout << "Group Lasso LINF + L1" << endl; break;
112  case L1L2: cout << "L1L2 regularization" << endl; break;
113  case L1LINF: cout << "L1LINF regularization" << endl; break;
114  case TRACE_NORM: cout << "Trace Norm regularization" << endl; break;
115  case TRACE_NORM_VEC: cout << "Trace Norm regularization for vectors" << endl; break;
116  case RANK: cout << "Rank regularization" << endl; break;
117  case RANK_VEC: cout << "Rank regularization for vectors" << endl; break;
118  case L1L2_L1: cout << "L1L2 regularization + L1" << endl; break;
119  case L1LINF_L1: cout << "L1LINF regularization + L1" << endl; break;
120  case TREE_L0: cout << "Tree-L0 regularization" << endl; break;
121  case TREE_L2: cout << "Tree-L2 regularization" << endl; break;
122  case TREE_LINF: cout << "Tree-Linf regularization" << endl; break;
123  case GRAPH: cout << "Graph regularization" << endl; break;
124  case GRAPH_RIDGE: cout << "Graph+ridge regularization" << endl; break;
125  case GRAPH_L2: cout << "Graph regularization with l2" << endl; break;
126  case TREEMULT: cout << "multitask tree regularization" << endl; break;
127  case GRAPHMULT: cout << "multitask graph regularization" << endl; break;
128  case L1LINFCR: cout << "L1LINF regularization on rows and columns" << endl; break;
129  case GRAPH_PATH_L0: cout << "Graph path non-convex regularization" << endl; break;
130  case GRAPH_PATH_CONV: cout << "Graph path convex regularization" << endl; break;
131  case NONE: cout << "No regularization" << endl; break;
132  default: cerr << "Not implemented" << endl;
133  }
134  };
135 
136  bool regul_for_matrices(const regul_t& regul) {
137  return regul==L1L2 || regul==L1LINF || regul==L1L2_L1 || regul==L1LINF_L1
138  || regul==TREEMULT || regul==GRAPHMULT || regul==L1LINFCR ||
139  regul==TRACE_NORM || regul==RANK;
140  }
141 
142  template <typename T> struct ParamFISTA {
143  ParamFISTA() { num_threads=1; max_it=100; L0=0.1; gamma=1.5; tol=1e-10;
144  it0=10; max_iter_backtracking=1000; loss=SQUARE; compute_gram=false; admm=false; lin_admm=false;
145  intercept=false; regul=RIDGE; resetflow=false; delta=0; lambda2=0; lambda3=0; verbose=false;
146  pos=false; clever=true; a=1.0; b=0.0; c=1.0;
147  log=false; logName=NULL; ista=false; subgrad=false;
148  length_names=30;
149  name_regul=new char[length_names];
150  name_loss=new char[length_names];
151  is_inner_weights=false;
152  inner_weights=NULL;
153  eval=false;
154  size_group=1;
155  sqrt_step=true;
156  transpose=false;
157  fixed_step=false;
158  copied=false;
159  eval_dual_norm=false;
160  groups=NULL;
161  ngroups=0;
162  }
164  if (!copied) {
165  delete[](name_regul);
166  delete[](name_loss);
167  }
168  };
169  int num_threads;
170  int max_it;
171  T L0;
172  T gamma;
175  T delta;
178  T a;
179  T b;
180  T c;
181  T tol;
182  int it0;
186  bool lin_admm;
187  bool admm;
188  bool intercept;
189  bool resetflow;
191  char* name_regul;
192  char* name_loss;
193  bool verbose;
194  bool pos;
195  bool clever;
196  bool log;
197  bool ista;
198  bool copied;
199  bool subgrad;
200  char* logName;
203  bool eval;
205  bool sqrt_step;
206  bool transpose;
209  int* groups;
210  int ngroups;
211  };
212 
213  template <typename T> struct ParamReg {
214  ParamReg() { size_group=1; lambda2d1 = 0; lambda=0; lambda3d1 = 0; pos=false; intercept=false; num_cols=1; graph_st=NULL; tree_st=NULL;
215  graph_path_st=NULL; resetflow=false; clever=false; linf=true; transpose=false; ngroups=0;
216  groups=NULL; };
217  T lambda2d1;
221  bool pos;
222  bool intercept;
223  int num_cols;
227  bool resetflow;
228  bool clever;
229  bool linf;
230  bool transpose;
231  int ngroups;
232  int* groups;
233  };
234 
235  template <typename T>
236  bool param_for_admm(const ParamFISTA<T>& param) {
237  return (param.admm) && (param.loss==SQUARE || param.loss == HINGE)
238  && (param.regul==GRAPH_L2 || param.regul==GRAPH || param.regul == NONE);
239  };
240 
241 
242  template <typename T, typename F = Matrix<T>, typename D = Vector<T> ,
243  typename E = Vector<T> >
245  public:
247  virtual ~SplittingFunction() { };
248 
249  virtual void init(const E& y) { };
250  virtual T eval(const D& input) const = 0;
251  virtual void reset() { };
252  virtual T eval_split(const F& input) const = 0;
253  virtual T eval_weighted(const D& input,const F& input_struct, const T* weights) const { return this->eval(input);};
254  virtual int num_components() const = 0;
255  virtual void prox_split(F& splitted_w, const T lambda) const = 0;
256  virtual void init_split_variables(F& splitted_w) const = 0;
257  virtual void init_prim_var(E& prim_var) const { };
258  virtual void prox_prim_var(E& out,const E& dual_var, const E& prim_var, const T gamma) const { };
259  virtual void compute_new_prim(E& prim, const E& prim_var, const E& dual_var, const T gamma, const T delta) const { };
260  virtual void add_mult_design_matrix(const E& prim, E& out, const T fact) const { };
261 
262  private:
265  };
266 
267  template <typename T, typename D = Vector<T> , typename E = Vector<T> >
268  class Loss {
269  public:
270  Loss() { };
271  virtual ~Loss() { };
272 
273  virtual void init(const E& input) = 0;
274  virtual T eval(const D& input) const = 0;
275  virtual void grad(const D& input, D& output) const = 0;
276  virtual inline bool test_backtracking(const D& y, const D& grad, const D& prox, const T L) const {
277  D tmp;
278  tmp.copy(prox);
279  tmp.sub(y);
280  return (this->eval(prox) <= this->eval(y) + grad.dot(tmp) + 0.5*L*tmp.nrm2sq());
281  };
282  virtual T fenchel(const D& input) const = 0;
283  virtual bool is_fenchel() const { return true; };
284  virtual void var_fenchel(const D& x, D& grad1, D& grad2,
285  const bool intercept = false) const = 0;
286 
287  private:
288  explicit Loss<T,D,E>(const Loss<T,D,E>& dict);
289  Loss<T,D,E>& operator=(const Loss<T,D,E>& dict);
290  };
291 
292  template <typename T>
293  class SqLossMissing : public Loss<T> {
294  public:
295  SqLossMissing(const AbstractMatrixB<T>& D) : _D(&D) { };
296  virtual ~SqLossMissing() { };
297 
298  inline void init(const Vector<T>& x) {
299  _x.copy(x);
300  _missingvalues.clear();
301  for (int i = 0; i<_x.n(); ++i) {
302  if (isnan(_x[i])) {
303  _x[i]=0;
304  _missingvalues.push_back(i);
305  }
306  }
307  };
308 
309  inline T eval(const Vector<T>& alpha) const {
310  Vector<T> residual;
311  residual.copy(_x);
312  SpVector<T> spalpha(alpha.n());
313  alpha.toSparse(spalpha);
314  _D->mult(spalpha,residual,T(-1.0),T(1.0));
315  for (ListIterator<int> it = _missingvalues.begin();
316  it != _missingvalues.end(); ++it)
317  residual[*it]=0;
318  return 0.5*residual.nrm2sq();
319  }
320 
321  inline void grad(const Vector<T>& alpha, Vector<T>& grad) const {
322  Vector<T> residual;
323  residual.copy(_x);
324  SpVector<T> spalpha(alpha.n());
325  alpha.toSparse(spalpha);
326  _D->mult(spalpha,residual,T(-1.0),T(1.0));
327  for (ListIterator<int> it = _missingvalues.begin();
328  it != _missingvalues.end(); ++it)
329  residual[*it]=0;
330  _D->multTrans(residual,grad,T(-1.0),T(0.0));
331  };
332  virtual T fenchel(const Vector<T>& input) const {
333  return 0.5*input.nrm2sq()+input.dot(_x);
334  };
335  virtual void var_fenchel(const Vector<T>& x,
336  Vector<T>& grad1, Vector<T>& grad2,
337  const bool intercept) const {
338  grad1.copy(_x);
339  SpVector<T> spalpha(x.n());
340  x.toSparse(spalpha);
341  _D->mult(spalpha,grad1,T(1.0),T(-1.0));
342  for (ListIterator<int> it = _missingvalues.begin();
343  it != _missingvalues.end(); ++it)
344  grad1[*it]=0;
345  if (intercept)
346  grad1.whiten(1); // remove the mean of grad1
347  _D->multTrans(grad1,grad2,T(1.0),T(0.0));
348  };
349 
350  private:
351  explicit SqLossMissing<T>(const SqLossMissing<T>& dict);
352  SqLossMissing<T>& operator=(const SqLossMissing<T>& dict);
356  };
357 
358  template <typename T>
359  class SqLoss : public Loss<T>, public SplittingFunction<T> {
360  public:
361  SqLoss(const AbstractMatrixB<T>& D) : _D(&D) { _compute_gram = false; };
362  SqLoss(const AbstractMatrixB<T>& D, const Matrix<T>& G) : _D(&D), _G(&G) { _compute_gram = true; };
363  virtual ~SqLoss() { };
364 
365  inline void init(const Vector<T>& x) {
366  _x.copy(x);
367  if (_compute_gram) {
368  _D->multTrans(x,_DtX);
369  }
370  };
371 
372  inline T eval(const Vector<T>& alpha) const {
373  Vector<T> residual;
374  residual.copy(_x);
375  SpVector<T> spalpha(alpha.n());
376  alpha.toSparse(spalpha);
377  if (spalpha.L() < alpha.n()/2) {
378  _D->mult(spalpha,residual,T(-1.0),T(1.0));
379  } else {
380  _D->mult(alpha,residual,T(-1.0),T(1.0));
381  }
382  return 0.5*residual.nrm2sq();
383  }
384  inline void grad(const Vector<T>& alpha, Vector<T>& grad) const {
385  if (_compute_gram) {
386  grad.copy(_DtX);
387  SpVector<T> spalpha(alpha.n());
388  alpha.toSparse(spalpha);
389  _G->mult(spalpha,grad,T(1.0),-T(1.0));
390  } else {
391  Vector<T> residual;
392  residual.copy(_x);
393  SpVector<T> spalpha(alpha.n());
394  alpha.toSparse(spalpha);
395  _D->mult(spalpha,residual,T(-1.0),T(1.0));
396  _D->multTrans(residual,grad,T(-1.0),T(0.0));
397  }
398  };
399  virtual inline bool test_backtracking(const Vector<T>& y, const Vector<T>& grad, const Vector<T>& prox, const T L) const {
400  Vector<T> tmp;
401  tmp.copy(y);
402  tmp.sub(prox);
403  SpVector<T> sptmp(tmp.n());
404  tmp.toSparse(sptmp);
405  if (_compute_gram) {
406  return (_G->quad(sptmp) <= L*sptmp.nrm2sq());
407  } else {
408  Vector<T> tmp2(_D->m());
409  _D->mult(sptmp,tmp2);
410  return (tmp2.nrm2sq() <= L*sptmp.nrm2sq());
411  }
412  };
413  virtual T fenchel(const Vector<T>& input) const {
414  return 0.5*input.nrm2sq()+input.dot(_x);
415  };
416  virtual void var_fenchel(const Vector<T>& x,
417  Vector<T>& grad1, Vector<T>& grad2,
418  const bool intercept) const {
419  grad1.copy(_x);
420  SpVector<T> spalpha(x.n());
421  x.toSparse(spalpha);
422  _D->mult(spalpha,grad1,T(1.0),T(-1.0));
423  if (intercept)
424  grad1.whiten(1); // remove the mean of grad1
425  _D->multTrans(grad1,grad2,T(1.0),T(0.0));
426  };
427  inline int num_components() const { return _D->m();};
428 
429  inline void prox_split(Matrix<T>& splitted_w, const T lambda) const {
430  const int n = this->num_components();
431  Vector<T> row(_D->n());
432  Vector<T> wi;
433  for (int i = 0; i<n; ++i) {
434  _D->copyRow(i,row);
435  splitted_w.refCol(i,wi);
436  const T xtw=row.dot(wi);
437  const T xtx=row.dot(row);
438  wi.add(row,-lambda*(xtw-_x[i])/(T(1.0)+lambda*xtx));
439  }
440  };
441  inline T eval_split(const Matrix<T>& input) const {
442  const int n = this->num_components();
443  Vector<T> row(_D->n());
444  Vector<T> wi;
445  T sum = 0;
446  for (int i = 0; i<n; ++i) {
447  _D->copyRow(i,row);
448  input.refCol(i,wi);
449  const T xtw=row.dot(wi);
450  sum += 0.5*(_x[i]-xtw)*(_x[i]-xtw);
451  }
452  return sum;
453  };
454  inline void init_split_variables(Matrix<T>& splitted_w) const {
455  splitted_w.resize(_D->n(),_D->m());
456  splitted_w.setZeros();
457  };
458  inline void init_prim_var(Vector<T>& prim_var) const {
459  prim_var.resize(_D->m());
460  prim_var.setZeros();
461  }
462  virtual void prox_prim_var(Vector<T>& out,const Vector<T>& dual_var,
463  const Vector<T>& prim_var, const T c) const {
464  const T gamma=T(1.0)/c;
465  out.copy(dual_var);
466  out.scal(-gamma);
467  _D->mult(prim_var,out,T(1.0),T(1.0));
468  out.add(_x,gamma);
469  out.scal(T(1.0)/(T(1.0)+gamma));
470  };
471  inline void compute_new_prim(Vector<T>& prim, const Vector<T>& prim_var,
472  const Vector<T>& dual_var, const T gamma, const T delta) const {
473  Vector<T> tmp;
474  _D->mult(prim,tmp);
475  tmp.scal(-gamma);
476  tmp.add(prim_var);
477  tmp.add(dual_var,gamma);
478  _D->multTrans(tmp,prim,T(1.0),delta);
479  };
480  inline void add_mult_design_matrix(const Vector<T>& prim,
481  Vector<T>& out, const T fact) const {
482  _D->mult(prim,out,fact,T(1.0));
483  };
484 
485  private:
486  explicit SqLoss<T>(const SqLoss<T>& dict);
487  SqLoss<T>& operator=(const SqLoss<T>& dict);
491  const Matrix<T>* _G;
493  };
494 
495  template <typename T>
496  class HingeLoss : public SplittingFunction<T > {
497  public:
498  HingeLoss(const AbstractMatrixB<T>& X) : _X(&X) { };
499  virtual ~HingeLoss() { };
500 
501  inline void init(const Vector<T>& y) {
502  _y.copy(y);
503  };
504  inline T eval(const Vector<T>& w) const {
505  Vector<T> tmp(_X->m());
506  SpVector<T> spw(w.n());
507  w.toSparse(spw);
508  _X->mult(spw,tmp);
509  tmp.mult(_y,tmp);
510  tmp.neg();
511  tmp.add(T(1.0));
512  tmp.thrsPos();
513  return tmp.sum()/tmp.n();
514  };
515  virtual T eval_split(const Matrix<T>& input) const {
516  Vector<T> row(_X->n());
517  Vector<T> wi;
518  T sum = 0;
519  for (int i = 0; i<_X->n(); ++i) {
520  _X->copyRow(i,row);
521  input.refCol(i,wi);
522  sum += MAX(0,T(1.0)-_y[i]*row.dot(wi));
523  }
524  return sum/_X->m();
525  };
526  virtual int num_components() const { return _X->m(); };
527  inline void init_split_variables(Matrix<T>& splitted_w) const {
528  splitted_w.resize(_X->n(),_X->m());
529  splitted_w.setZeros();
530  };
531  inline void init_prim_var(Vector<T>& prim_var) const {
532  prim_var.resize(_X->m());
533  prim_var.setZeros();
534  }
535  inline void prox_prim_var(Vector<T>& out,const Vector<T>& dual_var,
536  const Vector<T>& prim_var, const T lambda, const T c) const {
537  const T gamma=T(1.0)/c;
538  out.copy(dual_var);
539  out.scal(-gamma);
540  _X->mult(prim_var,out,T(1.0),T(1.0));
541  const T thrs=T(1.0)-gamma;
542  for (int i = 0; i<out.n(); ++i) {
543  const T y = _y[i]*out[i];
544  if (y < thrs) {
545  out[i]+=_y[i]*gamma;
546  } else if (y < T(1.0)) {
547  out[i]=_y[i];
548  }
549  }
550  }
551  inline void compute_new_prim(Vector<T>& prim, const Vector<T>& prim_var,
552  const Vector<T>& dual_var, const T gamma, const T delta) const {
553  Vector<T> tmp;
554  _X->mult(prim,tmp);
555  tmp.scal(-gamma);
556  tmp.add(prim_var);
557  tmp.add(dual_var,gamma);
558  _X->multTrans(tmp,prim,T(1.0),delta);
559  };
560  inline void add_mult_design_matrix(const Vector<T>& prim, Vector<T>& out,
561  const T fact) const {
562  _X->mult(prim,out,fact,T(1.0));
563  };
564  inline void prox_split(Matrix<T>& splitted_w, const T lambda) const {
565  const int n = this->num_components();
566  Vector<T> row(_X->n());
567  Vector<T> wi;
568  for (int i = 0; i<n; ++i) {
569  _X->copyRow(i,row);
570  splitted_w.refCol(i,wi);
571  const T xtw=row.dot(wi);
572  const T xtx=row.dot(row);
573  const T diff=1-_y[i]*xtw;
574  if (diff > lambda*xtx) {
575  wi.add(row,lambda*_y[i]);
576  } else if (diff > 0) {
577  wi.add(row,_y[i]*diff/xtx);
578  }
579  }
580  };
581 
582  private:
583  explicit HingeLoss<T>(const HingeLoss<T>& dict);
584  HingeLoss<T>& operator=(const HingeLoss<T>& dict);
585 
588  };
589 
590  template <typename T, bool weighted = false>
591  class LogLoss : public Loss<T> {
592  public:
593  LogLoss(const AbstractMatrixB<T>& X) : _X(&X) { };
594  virtual ~LogLoss() { };
595 
596  inline void init(const Vector<T>& y) {
597  _y.copy(y);
598  if (weighted) {
599  int countpos=0;
600  for (int i = 0; i<y.n(); ++i)
601  if (y[i]>0) countpos++;
602  _weightpos=T(1.0)/countpos;
603  _weightneg=T(1.0)/MAX(1e-3,(y.n()-countpos));
604  }
605  };
606 
607  inline T eval(const Vector<T>& w) const {
608  Vector<T> tmp(_X->m());
609  SpVector<T> spw(w.n());
610  w.toSparse(spw);
611  _X->mult(spw,tmp);
612  tmp.mult(_y,tmp);
613  tmp.neg();
614  tmp.logexp();
615  if (weighted) {
616  T sum=0;
617  for (int i = 0; i<tmp.n(); ++i)
618  sum+= _y[i]>0 ? _weightpos*tmp[i] : _weightneg*tmp[i];
619  return sum;
620  } else {
621  return tmp.sum()/tmp.n();
622  }
623  };
624  inline void grad(const Vector<T>& w, Vector<T>& grad) const {
625  Vector<T> tmp(_X->m());
626  SpVector<T> spw(w.n());
627  w.toSparse(spw);
628  _X->mult(spw,tmp);
629  tmp.mult(_y,tmp);
630  tmp.exp();
631  tmp.add(T(1.0));
632  tmp.inv();
633  tmp.mult(_y,tmp);
634  tmp.neg();
635  if (weighted) {
636  for (int i = 0; i<tmp.n(); ++i)
637  tmp[i] *= _y[i] > 0 ? _weightpos : _weightneg;
638  _X->multTrans(tmp,grad);
639  } else {
640  _X->multTrans(tmp,grad);
641  grad.scal(T(1.0)/_X->m());
642  }
643  };
644  virtual bool is_fenchel() const { return !weighted; };
645  virtual T fenchel(const Vector<T>& input) const {
646  T sum = 0;
647  if (weighted) {
648  // TODO : check that
649  for (int i = 0; i<input.n(); ++i) {
650  T prod = _y[i]>0 ? input[i]/_weightpos : -input[i]/_weightneg;
651  sum += _y[i] >0 ? _weightpos*(xlogx(1.0+prod)+xlogx(-prod)) : _weightneg*(xlogx(1.0+prod)+xlogx(-prod));
652  }
653  return sum;
654  } else {
655  for (int i = 0; i<input.n(); ++i) {
656  T prod = _y[i]*input[i]*_X->m();
657  sum += xlogx(1.0+prod)+xlogx(-prod);
658  }
659  return sum/_X->m();
660  }
661  };
662  virtual void var_fenchel(const Vector<T>& w, Vector<T>& grad1, Vector<T>& grad2, const bool intercept) const {
663  grad1.resize(_X->m());
664  SpVector<T> spw(w.n());
665  w.toSparse(spw);
666  _X->mult(spw,grad1);
667  grad1.mult(_y,grad1);
668  grad1.exp();
669  grad1.add(T(1.0));
670  grad1.inv();
671  grad1.mult(_y,grad1);
672  grad1.neg(); // -gradient (no normalization)
673  if (intercept)
674  grad1.project_sft_binary(_y);
675  grad1.scal(T(1.0)/_X->m());
676  _X->multTrans(grad1,grad2);
677  };
678  private:
679  explicit LogLoss<T,weighted>(const LogLoss<T,weighted>& dict);
680  LogLoss<T,weighted>& operator=(const LogLoss<T,weighted>& dict);
681 
686  };
687 
688  template <typename T>
689  class MultiLogLoss : public Loss<T, Matrix<T> > {
690  public:
691  MultiLogLoss(const AbstractMatrixB<T>& X) : _X(&X) { };
692 
693  virtual ~MultiLogLoss() { };
694 
695  inline void init(const Vector<T>& y) {
696  _y.resize(y.n());
697  for (int i = 0; i<y.n(); ++i)
698  _y[i] = static_cast<int>(y[i]);
699  };
700  inline T eval(const Matrix<T>& W) const {
701  Matrix<T> tmp;
702  _X->multSwitch(W,tmp,true,true);
703  //W.mult(*_X,tmp,true,true);
704  Vector<T> col;
705  T sum=0;
706  for (int i = 0; i<tmp.n(); ++i) {
707  tmp.refCol(i,col);
708  sum+=col.softmax(_y[i]);
709  }
710  return sum/tmp.n();
711  };
712  inline void grad(const Matrix<T>& W, Matrix<T>& grad) const {
713  Matrix<T> tmp;
714  _X->multSwitch(W,tmp,true,true);
715  //W.mult(*_X,tmp,true,true);
716  Vector<T> col;
717  grad.resize(W.m(),W.n());
718  for (int i = 0; i<tmp.n(); ++i) {
719  tmp.refCol(i,col);
720  col.add(-col[_y[i]]);
721  bool overweight=false;
722  for (int j = 0; j<col.n(); ++j)
723  if (col[j] > 1e2)
724  overweight=true;
725  if (overweight) {
726  const int ind =col.fmax();
727  col.setZeros();
728  col[ind]=1;
729  } else {
730  col.exp();
731  col.scal(T(1.0)/col.sum());
732  col.scal(T(1.0)/col.sum());
733  }
734  col[_y[i]] = col[_y[i]]-T(1.0);
735  }
736  _X->mult(tmp,grad,true,true);
737  grad.scal(T(1.0)/_X->m());
738  };
739  virtual T fenchel(const Matrix<T>& input) const {
740  T sum = 0;
741  Vector<T> col;
742  for (int i = 0; i<input.n(); ++i) {
743  const int clas = _y[i];
744  input.refCol(i,col);
745  for (int j = 0; j<input.m(); ++j) {
746  if (j == clas) {
747  sum += xlogx(_X->m()*input[i*input.m()+j]+1.0);
748  } else {
749  sum += xlogx(_X->m()*input[i*input.m()+j]);
750  }
751  }
752  }
753  return sum/_X->m();
754  };
755  virtual void var_fenchel(const Matrix<T>& W, Matrix<T>& grad1, Matrix<T>& grad2, const bool intercept) const {
756  _X->multSwitch(W,grad1,true,true);
757  //W.mult(*_X,grad1,true,true);
758  Vector<T> col;
759  for (int i = 0; i<grad1.n(); ++i) {
760  grad1.refCol(i,col);
761  col.add(-col[_y[i]]);
762  bool overweight=false;
763  for (int j = 0; j<col.n(); ++j)
764  if (col[j] > 1e2)
765  overweight=true;
766  if (overweight) {
767  const int ind =col.fmax();
768  col.setZeros();
769  col[ind]=1;
770  } else {
771  col.exp();
772  col.scal(T(1.0)/col.sum());
773  col.scal(T(1.0)/col.sum());
774  }
775  col[_y[i]] = col[_y[i]]-T(1.0);
776  }
777  if (intercept) {
778  Vector<T> row;
779  for (int i = 0; i<grad1.m(); ++i) {
780  grad1.extractRow(i,row);
781  row.project_sft(_y,i);
782  grad1.setRow(i,row);
783  }
784  }
785  grad1.scal(T(1.0)/_X->m());
786  grad2.resize(W.m(),W.n());
787  _X->mult(grad1,grad2,true,true);
788  };
789  private:
790  explicit MultiLogLoss<T>(const MultiLogLoss<T>& dict);
791  MultiLogLoss<T>& operator=(const MultiLogLoss<T>& dict);
792 
795  };
796 
797  template <typename T>
798  class LossCur: public Loss<T, Matrix<T>, Matrix<T> > {
799  public:
800  LossCur(const AbstractMatrixB<T>& X) : _X(&X) { };
801 
802  virtual ~LossCur() { };
803 
804  inline void init(const Matrix<T>& y) { };
805 
806  inline T eval(const Matrix<T>& A) const {
807  Matrix<T> tmp(_X->m(),A.n());
808  _X->mult(A,tmp);
809  Matrix<T> tmp2;
810  //tmp2.copy(*_X);
811  _X->copyTo(tmp2);
812  //tmp.mult(*_X,tmp2,false,false,T(-1.0),T(1.0));
813  _X->multSwitch(tmp,tmp2,false,false,T(-1.0),T(1.0));
814  return 0.5*tmp2.normFsq();
815  };
816  inline void grad(const Matrix<T>& A, Matrix<T>& grad) const {
817  Matrix<T> tmp(_X->m(),A.n());
818  _X->mult(A,tmp);
819  Matrix<T> tmp2;
820  //tmp2.copy(*_X);
821  _X->copyTo(tmp2);
822  //tmp.mult(*_X,tmp2,false,false,T(-1.0),T(1.0));
823  _X->multSwitch(tmp,tmp2,false,false,T(-1.0),T(1.0));
824  //tmp2.mult(*_X,tmp,false,true,T(-1.0),T(0.0));
825  _X->multSwitch(tmp2,tmp,true,false,T(-1.0),T(0.0));
826  grad.resize(A.m(),A.n());
827  _X->mult(tmp,grad,true,false);
828  };
829  virtual T fenchel(const Matrix<T>& input) const {
830  return 0.5*input.normFsq()+_X->dot(input);
831  }
832  virtual void var_fenchel(const Matrix<T>& A, Matrix<T>& grad1, Matrix<T>& grad2, const bool intercept) const {
833  Matrix<T> tmp(_X->m(),A.n());
834  _X->mult(A,tmp);
835  //grad1.copy(*_X);
836  _X->copyTo(grad1);
837  //tmp.mult(*_X,grad1,false,false,T(1.0),T(-1.0));
838  _X->multSwitch(tmp,grad1,false,false,T(1.0),T(-1.0));
839  //grad1.mult(*_X,tmp,false,true,T(1.0),T(0.0));
840  _X->multSwitch(grad1,tmp,true,false,T(1.0),T(0.0));
841  grad2.resize(A.m(),A.n());
842  _X->mult(tmp,grad2,true,false);
843  };
844  private:
845  explicit LossCur<T>(const LossCur<T>& dict);
846  LossCur<T>& operator=(const LossCur<T>& dict);
847 
849  };
850 
851  template <typename T>
852  class SqLossMat : public Loss<T, Matrix<T> , Matrix<T> > {
853  public:
854  SqLossMat(const AbstractMatrixB<T>& D) : _D(&D) { _compute_gram = false; };
855  SqLossMat(const AbstractMatrixB<T>& D, const Matrix<T>& G) : _D(&D), _G(&G) {
856  _compute_gram = true; };
857  virtual ~SqLossMat() { };
858 
859  virtual inline void init(const Matrix<T>& x) {
860  _x.copy(x);
861  if (_compute_gram) {
862  _D->mult(x,_DtX,true,false);
863  }
864  };
865 
866  inline T eval(const Matrix<T>& alpha) const {
867  Matrix<T> residual;
868  residual.copy(_x);
869  SpMatrix<T> spalpha;
870  alpha.toSparse(spalpha);
871  _D->mult(spalpha,residual,false,false,T(-1.0),T(1.0));
872  return 0.5*residual.normFsq();
873  }
874  inline void grad(const Matrix<T>& alpha, Matrix<T>& grad) const {
875  SpMatrix<T> spalpha;
876  alpha.toSparse(spalpha);
877  if (_compute_gram) {
878  grad.copy(_DtX);
879  _G->mult(spalpha,grad,false,false,T(1.0),-T(1.0));
880  } else {
881  Matrix<T> residual;
882  residual.copy(_x);
883  _D->mult(spalpha,residual,false,false,T(-1.0),T(1.0));
884  _D->mult(residual,grad,true,false,T(-1.0),T(0.0));
885  }
886  };
887  virtual inline bool test_backtracking(const Matrix<T>& y, const Matrix<T>& grad, const Matrix<T>& prox, const T L) const {
888  Matrix<T> tmp;
889  tmp.copy(y);
890  tmp.sub(prox);
891  SpMatrix<T> sptmp;
892  tmp.toSparse(sptmp);
893  if (_compute_gram) {
894  SpVector<T> col;
895  T sum=0;
896  for (int i = 0; i<sptmp.n(); ++i) {
897  sptmp.refCol(i,col);
898  sum += _G->quad(col);
899  }
900  return (sum <= L*sptmp.normFsq());
901  } else {
902  Matrix<T> tmp2;
903  _D->mult(sptmp,tmp2);
904  return (tmp2.normFsq() <= L*sptmp.normFsq());
905  }
906  };
907  virtual T fenchel(const Matrix<T>& input) const {
908  return 0.5*input.normFsq()+input.dot(_x);
909  };
910  virtual void var_fenchel(const Matrix<T>& x, Matrix<T>& grad1, Matrix<T>& grad2, const bool intercept) const {
911  grad1.copy(_x);
912  SpMatrix<T> spalpha;
913  x.toSparse(spalpha);
914  _D->mult(spalpha,grad1,false,false,T(1.0),T(-1.0));
915  if (intercept)
916  grad1.center();
917  _D->mult(grad1,grad2,true,false,T(1.0),T(0.0));
918  };
919 
920  private:
921  explicit SqLossMat<T>(const SqLossMat<T>& dict);
922  SqLossMat<T>& operator=(const SqLossMat<T>& dict);
926  const Matrix<T>* _G;
928  };
929 
930  template <typename T, typename L>
931  class LossMatSup : public Loss<T,Matrix<T>, Matrix<T> > {
932  public:
933  LossMatSup() { };
934 
935  virtual ~LossMatSup() {
936  for (int i = 0; i<_N; ++i) {
937  delete(_losses[i]);
938  _losses[i]=NULL;
939  }
940  delete[](_losses);
941  };
942 
943  virtual void init(const Matrix<T>& input) {
944  Vector<T> col;
945  _m=input.m();
946  for (int i = 0; i<_N; ++i) {
947  input.refCol(i,col);
948  _losses[i]->init(col);
949  }
950  };
951 
952  inline T eval(const Matrix<T>& w) const {
953  Vector<T> col;
954  T sum = 0;
955  for (int i = 0; i<_N; ++i) {
956  w.refCol(i,col);
957  sum+=_losses[i]->eval(col);
958  }
959  return sum;
960  }
961  inline void grad(const Matrix<T>& w, Matrix<T>& grad) const {
962  Vector<T> col, col2;
963  grad.resize(w.m(),w.n());
964  for (int i = 0; i<_N; ++i) {
965  w.refCol(i,col);
966  grad.refCol(i,col2);
967  _losses[i]->grad(col,col2);
968  }
969  };
970  virtual T fenchel(const Matrix<T>& input) const {
971  Vector<T> col;
972  T sum = 0;
973  for (int i = 0; i<_N; ++i) {
974  input.refCol(i,col);
975  sum += _losses[i]->fenchel(col);
976  }
977  return sum;
978  }
979  virtual void var_fenchel(const Matrix<T>& x, Matrix<T>& grad1, Matrix<T>& grad2, const bool intercept) const {
980  grad1.resize(_m,x.n());
981  grad2.resize(x.m(),x.n());
982  Vector<T> col, col2, col3;
983  for (int i = 0; i<_N; ++i) {
984  x.refCol(i,col);
985  grad1.refCol(i,col2);
986  grad2.refCol(i,col3);
987  _losses[i]->var_fenchel(col,col2,col3,intercept);
988  }
989  };
990  virtual bool is_fenchel() const {
991  bool ok=true;
992  for (int i = 0; i<_N; ++i)
993  ok = ok && _losses[i]->is_fenchel();
994  return ok;
995  };
996  virtual void dummy() = 0;
997 
998  private:
999  explicit LossMatSup<T,L>(const LossMatSup<T,L>& dict);
1000  LossMatSup<T,L>& operator=(const LossMatSup<T,L>& dict);
1001  int _m;
1002 
1003  protected:
1004  int _N;
1005  L** _losses;
1006  };
1007 
1008  template <typename T, typename L>
1009  class LossMat : public LossMatSup<T,L> { };
1010 
1011  template <typename T, bool weighted>
1012  class LossMat<T, LogLoss<T,weighted> > : public LossMatSup<T, LogLoss<T,weighted> > {
1013  public:
1014  LossMat(const int N, const AbstractMatrixB<T>& X) {
1015  this->_N=N;
1016  this->_losses=new LogLoss<T,weighted>*[this->_N];
1017  Vector<T> col;
1018  for (int i = 0; i<this->_N; ++i)
1019  this->_losses[i]=new LogLoss<T,weighted>(X);
1020  }
1021  virtual void dummy() { };
1022  virtual ~LossMat() { };
1023  };
1024 
1025  template <typename T>
1026  class LossMat<T, SqLossMissing<T> > : public LossMatSup<T, SqLossMissing<T> > {
1027  public:
1028  LossMat(const int N, const AbstractMatrixB<T>& X) {
1029  this->_N=N;
1030  this->_losses=new SqLossMissing<T>*[this->_N];
1031  Vector<T> col;
1032  for (int i = 0; i<this->_N; ++i)
1033  this->_losses[i]=new SqLossMissing<T>(X);
1034  }
1035  virtual void dummy() { };
1036  virtual ~LossMat() { };
1037  };
1038 
1039  template <typename T, typename D = Vector<T> >
1040  class Regularizer {
1041  public:
1043  Regularizer(const ParamReg<T>& param) {
1044  _intercept=param.intercept;
1045  _pos=param.pos;
1046  }
1047  virtual ~Regularizer() { };
1048 
1049  virtual void reset() { };
1050  virtual void prox(const D& input, D& output, const T lambda) = 0;
1051  virtual T eval(const D& input) const = 0;
1054  virtual void fenchel(const D& input, T& val, T& scal) const = 0;
1055  virtual bool is_fenchel() const { return true; };
1056  virtual bool is_intercept() const { return _intercept; };
1057  virtual bool is_subgrad() const { return false; };
1058  virtual void sub_grad(const D& input, D& output) const { };
1059  virtual T eval_paths(const D& x, SpMatrix<T>& paths_mat) const { return this->eval(x); };
1060  virtual T eval_dual_norm(const D& x) const { return 0; };
1061  // TODO complete for all norms
1062  virtual T eval_dual_norm_paths(const D& x, SpMatrix<T>& path) const { return this->eval_dual_norm(x); };
1063 
1064  protected:
1065  bool _pos;
1067 
1068  private:
1069  explicit Regularizer<T,D>(const Regularizer<T,D>& reg);
1070  Regularizer<T,D>& operator=(const Regularizer<T,D>& reg);
1071 
1072  };
1073 
1074  template <typename T>
1075  class Lasso : public Regularizer<T> {
1076  public:
1077  Lasso(const ParamReg<T>& param) : Regularizer<T>(param) { };
1078  virtual ~Lasso() { };
1079 
1080  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
1081  y.copy(x);
1082  if (this->_pos) y.thrsPos();
1083  y.softThrshold(lambda);
1084  if (this->_intercept) y[y.n()-1] = x[y.n()-1];
1085  };
1086  T inline eval(const Vector<T>& x) const {
1087  return (this->_intercept ? x.asum() - abs(x[x.n()-1]) : x.asum());
1088  };
1089  void inline fenchel(const Vector<T>& input, T& val, T& scal) const {
1090  Vector<T> output;
1091  output.copy(input);
1092  if (this->_pos) output.thrsPos();
1093  T mm = output.fmaxval();
1094  scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1095  val=0;
1096  if (this->_intercept & abs<T>(output[output.n()-1]) > EPSILON) val=INFINITY;
1097  };
1098  virtual bool is_subgrad() const { return true; };
1099  virtual void sub_grad(const Vector<T>& input, Vector<T>& output) const {
1100  output.resize(input.n());
1101  if (!this->_pos) {
1102  for (int i = 0; i<input.n(); ++i) {
1103  output[i] = input[i] > 0 ? T(1.0) : input[i] < 0 ? -T(1.0) : 0;
1104  }
1105  } else {
1106  for (int i = 0; i<input.n(); ++i) {
1107  output[i] = input[i] > 0 ? T(1.0) : 0;
1108  }
1109  }
1110  if (this->_intercept) output[output.n()-1]=0;
1111  }
1112  };
1113 
1114  template <typename T>
1115  class LassoConstraint : public Regularizer<T> {
1116  public:
1117  LassoConstraint(const ParamReg<T>& param) : Regularizer<T>(param) { _thrs=param.lambda; };
1118  virtual ~LassoConstraint() { };
1119 
1120  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
1121  Vector<T> tmp;
1122  tmp.copy(x);
1123  if (this->_intercept) {
1124  tmp[tmp.n()-1]=0;
1125  tmp.sparseProject(y,_thrs,1,0,0,0,this->_pos);
1126  y[y.n()-1] = x[y.n()-1];
1127  } else {
1128  tmp.sparseProject(y,_thrs,1,0,0,0,this->_pos);
1129  }
1130  };
1131  T inline eval(const Vector<T>& x) const {
1132  return 0;
1133  };
1134  void inline fenchel(const Vector<T>& input, T& val, T& scal) const {
1135  scal=1.0;
1136  Vector<T> output;
1137  output.copy(input);
1138  if (this->_intercept) output[output.n()-1]=0;
1139  val = _thrs*(this->_pos ? MAX(output.maxval(),0) : output.fmaxval());
1140  };
1141  virtual bool is_subgrad() const { return false; };
1142  private:
1143  T _thrs;
1144  };
1145 
1146  template <typename T>
1147  class Lzero : public Regularizer<T> {
1148  public:
1149  Lzero(const ParamReg<T>& param) : Regularizer<T>(param) { };
1150  virtual ~Lzero() { };
1151 
1152  virtual bool is_fenchel() const { return false; };
1153  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
1154  y.copy(x);
1155  if (this->_pos) y.thrsPos();
1156  y.hardThrshold(sqrt(2*lambda));
1157  if (this->_intercept) y[y.n()-1] = x[y.n()-1];
1158  };
1159  T inline eval(const Vector<T>& x) const {
1160  return (this->_intercept ? x.lzero() - 1 : x.lzero());
1161  };
1162  void inline fenchel(const Vector<T>& input, T& val, T& scal) const { };
1163  };
1164 
1165  template <typename T>
1166  class None: public Regularizer<T>, public SplittingFunction<T, SpMatrix<T> > {
1167  public:
1168  None() { };
1169  None(const ParamReg<T>& param) { };
1170  virtual ~None() { };
1171 
1172  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
1173  y.copy(x);
1174  };
1175  T inline eval(const Vector<T>& x) const { return 0; };
1176  void inline fenchel(const Vector<T>& input, T& val, T& scal) const { };
1177  virtual bool is_fenchel() const { return false; };
1178  virtual bool is_subgrad() const { return true; };
1179  virtual void sub_grad(const Vector<T>& input, Vector<T>& output) const {
1180  output.setZeros();
1181  }
1182  virtual void reset() { };
1183  virtual T eval_split(const SpMatrix<T>& input) const { return 0; };
1184  virtual int num_components() const { return 0; };
1185  virtual void prox_split(SpMatrix<T>& splitted_w, const T lambda) const { };
1186  virtual void init_split_variables(SpMatrix<T>& splitted_w) const { };
1187  virtual void init(const Vector<T>& y) { };
1188  };
1189 
1190  template <typename T>
1191  class Ridge: public Regularizer<T> {
1192  public:
1193  Ridge(const ParamReg<T>& param) : Regularizer<T>(param) { };
1194  virtual ~Ridge() { };
1195 
1196  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
1197  y.copy(x);
1198  if (this->_pos) y.thrsPos();
1199  y.scal(T(1.0/(1.0+lambda)));
1200  if (this->_intercept) y[y.n()-1] = x[y.n()-1];
1201  };
1202  T inline eval(const Vector<T>& x) const {
1203  return (this->_intercept ? 0.5*x.nrm2sq() - 0.5*x[x.n()-1]*x[x.n()-1] : 0.5*x.nrm2sq());
1204  };
1205  void inline fenchel(const Vector<T>& input, T& val, T& scal) const {
1206  Vector<T> tmp;
1207  tmp.copy(input);
1208  if (this->_pos) tmp.thrsPos();
1209  val=this->eval(tmp);
1210  scal=T(1.0);
1211  if (this->_intercept & abs<T>(tmp[tmp.n()-1]) > EPSILON) val=INFINITY;
1212  };
1213  virtual bool is_subgrad() const { return true; };
1214  virtual void sub_grad(const Vector<T>& input, Vector<T>& output) const {
1215  output.resize(input.n());
1216  if (!this->_pos) {
1217  for (int i = 0; i<input.n(); ++i) {
1218  output[i] = input[i] > 0 ? 0.5*input[i] : 0;
1219  }
1220  } else {
1221  output.copy(input);
1222  output.scal(0.5);
1223  }
1224  if (this->_intercept) output[output.n()-1]=0;
1225  }
1226  };
1227 
1228  template <typename T>
1229  class normL2: public Regularizer<T> {
1230  public:
1231  normL2(const ParamReg<T>& param) : Regularizer<T>(param) { };
1232  virtual ~normL2() { };
1233 
1234  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
1235  y.copy(x);
1236  if (this->_pos) y.thrsPos();
1237  Vector<T> xref(x.rawX(),this->_intercept ? x.n()-1 : x.n());
1238  const T nrm=xref.nrm2();
1239  if (nrm < lambda) {
1240  y.setZeros();
1241  } else {
1242  y.scal(T(1.0) - lambda/nrm);
1243  }
1244  if (this->_intercept) y[y.n()-1] = x[y.n()-1];
1245  };
1246  T inline eval(const Vector<T>& x) const {
1247  Vector<T> xref(x.rawX(),this->_intercept ? x.n()-1 : x.n());
1248  return xref.nrm2();
1249  };
1251  void inline fenchel(const Vector<T>& input, T& val, T& scal) const {
1252  Vector<T> output;
1253  output.copy(input);
1254  if (this->_pos) output.thrsPos();
1255  T mm = output.nrm2();
1256  scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1257  val=0;
1258  if (this->_intercept & abs<T>(output[output.n()-1]) > EPSILON) val=INFINITY;
1259  };
1260  };
1261 
1262  template <typename T>
1263  class normLINF: public Regularizer<T> {
1264  public:
1265  normLINF(const ParamReg<T>& param) : Regularizer<T>(param) { };
1266  virtual ~normLINF() { };
1267 
1268  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
1269  y.copy(x);
1270  if (this->_pos) y.thrsPos();
1271  Vector<T> xref(y.rawX(),this->_intercept ? x.n()-1 : x.n());
1272  Vector<T> row(xref.n());
1273  xref.l1project(row,lambda);
1274  for (int j = 0; j<xref.n(); ++j)
1275  y[j]=y[j]-row[j];
1276  if (this->_intercept) y[y.n()-1] = x[y.n()-1];
1277  };
1278  T inline eval(const Vector<T>& x) const {
1279  Vector<T> xref(x.rawX(),this->_intercept ? x.n()-1 : x.n());
1280  return xref.fmaxval();
1281  };
1283  void inline fenchel(const Vector<T>& input, T& val, T& scal) const {
1284  Vector<T> output;
1285  output.copy(input);
1286  if (this->_pos) output.thrsPos();
1287  T mm = output.asum();
1288  scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1289  val=0;
1290  if (this->_intercept & abs<T>(output[output.n()-1]) > EPSILON) val=INFINITY;
1291  };
1292  };
1293 
1294  template <typename T, typename D, typename RegA, typename RegB, bool order = true, bool scale_lambda = false>
1295  class ComposeProx: public Regularizer<T,D> {
1296  public:
1297  ComposeProx(const ParamReg<T>& param) : Regularizer<T,D>(param) {
1298  _lambda2d1=param.lambda2d1;
1299  _regA=new RegA(param);
1300  _regB=new RegB(param);
1301  }
1302  virtual ~ComposeProx() { delete(_regA); delete(_regB); };
1303 
1304  void inline prox(const D& x, D& y, const T lambda) {
1305  D tmp;
1306  if (scale_lambda) {
1307  if (order) {
1308  _regA->prox(x,tmp,lambda);
1309  _regB->prox(tmp,y,lambda*_lambda2d1/(T(1.0)+lambda));
1310  } else {
1311  _regB->prox(x,tmp,lambda*_lambda2d1);
1312  _regA->prox(tmp,y,lambda/(T(1.0)+lambda*_lambda2d1));
1313  }
1314  } else {
1315  if (order) {
1316  _regA->prox(x,tmp,lambda);
1317  _regB->prox(tmp,y,lambda*_lambda2d1);
1318  } else {
1319  _regB->prox(x,tmp,lambda*_lambda2d1);
1320  _regA->prox(tmp,y,lambda);
1321  }
1322  }
1323  };
1324  T inline eval(const D& x) const {
1325  return _regA->eval(x) + _lambda2d1*_regB->eval(x);
1326  };
1327  virtual bool is_fenchel() const { return false; };
1328  void inline fenchel(const D& input, T& val, T& scal) const { };
1329  virtual bool is_subgrad() const { return _regA->is_subgrad() && _regB->is_subgrad(); };
1330  virtual void sub_grad(const D& input, D& output) const {
1331  _regA->sub_grad(input,output);
1332  D tmp;
1333  _regB->sub_grad(input,tmp);
1334  output.add(tmp,_lambda2d1);
1335  };
1336  private:
1337  RegA* _regA;
1338  RegB* _regB;
1340  };
1341 
1342  template <typename T>
1343  struct ElasticNet {
1345  };
1346 
1347  template <typename T>
1348  class FusedLasso: public Regularizer<T> {
1349  public:
1350  FusedLasso(const ParamReg<T>& param) : Regularizer<T>(param) {
1351  _lambda2d1=param.lambda2d1;
1352  _lambda3d1=param.lambda3d1;
1353  };
1354  virtual ~FusedLasso() { };
1355 
1356  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
1357  y.resize(x.n());
1358  Vector<T> copyx;
1359  copyx.copy(x);
1360  copyx.fusedProjectHomotopy(y,_lambda2d1*lambda,lambda,_lambda3d1*lambda,true);
1361  };
1362  T inline eval(const Vector<T>& x) const {
1363  T sum = T();
1364  const int maxn = this->_intercept ? x.n()-1 : x.n();
1365  for (int i = 0; i<maxn-1; ++i)
1366  sum += abs(x[i+1]-x[i]) + _lambda2d1*abs(x[i]) + 0.5*_lambda3d1*x[i]*x[i];
1367  sum += _lambda2d1*abs(x[maxn-1])+0.5*_lambda3d1*x[maxn-1]*x[maxn-1];
1368  return sum;
1369  };
1370  virtual bool is_fenchel() const { return false; };
1371  void inline fenchel(const Vector<T>& input, T& val, T& scal) const { };
1372 
1373  private:
1374  T _lambda2d1;
1376  };
1377 
1378  template <typename T>
1379  class GraphLasso : public Regularizer<T>, public SplittingFunction<T, SpMatrix<T> > {
1380  public:
1381  GraphLasso(const ParamReg<T>& param) : Regularizer<T>(param) {
1382  const bool resetflow = param.resetflow;
1383  const bool linf = param.linf;
1384  const bool clever = param.clever;
1385  const GraphStruct<T>& graph_st=*(param.graph_st);
1386  _clever=clever;
1387  _resetflow=resetflow;
1388  _graph.create_graph(graph_st.Nv,graph_st.Ng,graph_st.weights,
1389  graph_st.gv_ir,graph_st.gv_jc,graph_st.gg_ir,graph_st.gg_jc);
1390  _graph.save_capacities();
1391  _work.resize(graph_st.Nv+graph_st.Ng+2);
1392  _weights.resize(graph_st.Ng);
1393  for (int i = 0; i<graph_st.Ng; ++i) _weights[i] = graph_st.weights[i];
1394  _old_lambda=-1.0;
1395  _linf=linf;
1396  };
1397  virtual ~GraphLasso() { };
1398 
1399  void inline reset() { _old_lambda = -1.0; };
1400 
1401  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
1402  if (!_linf) {
1403  cerr << "Not implemented" << endl;
1404  exit(1);
1405  }
1406  y.copy(x);
1407  _graph.restore_capacities();
1408  _graph.set_weights(_weights.rawX(),lambda);
1409  if (_old_lambda < 0 || _resetflow) {
1410  _graph.reset_flow();
1411  } else {
1412  if (lambda != _old_lambda)
1413  _graph.scale_flow(lambda/_old_lambda);
1414  }
1415  if (this->_pos) {
1416  Vector<T> xc;
1417  xc.copy(x);
1418  xc.thrsPos();
1419  _graph.proximal_operator(xc.rawX(),y.rawX(),_clever);
1420  } else {
1421  _graph.proximal_operator(x.rawX(),y.rawX(),_clever);
1422  }
1423 #ifdef VERB2
1424  T duality_gap2 = y.nrm2sq()-y.dot(x)+lambda*this->eval(y);
1425  cerr << "duality_gap2 " << duality_gap2 << endl;
1426 #endif
1427  _old_lambda=lambda;
1428  };
1429 
1430  T inline eval(const Vector<T>& x) const {
1431  Graph<T>* gr = const_cast<Graph<T>* >(&_graph);
1432  gr->restore_capacities();
1433  return gr->norm(x.rawX(),_work.rawX(),_weights.rawX(),_linf);
1434  };
1435  virtual bool is_fenchel() const {
1436  return _linf;
1437  };
1438  void inline fenchel(const Vector<T>& input, T& val, T& scal) const {
1439  Graph<T>* gr = const_cast<Graph<T>* >(&_graph);
1440  if (!_resetflow) {
1441  gr->save_flow();
1442  }
1443  gr->reset_flow();
1444  gr->restore_capacities();
1445  Vector<T> output;
1446  output.copy(input);
1447  if (this->_pos) output.thrsPos();
1448  T mm = gr->dual_norm_inf(output,_weights);
1449  if (!_resetflow)
1450  gr->restore_flow();
1451  scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1452  val=0;
1453  if (this->_intercept & abs<T>(input[input.n()-1]) > EPSILON) val=INFINITY;
1454  };
1455 
1456  virtual void init(const Vector<T>& y) { };
1457  inline int num_components() const { return _weights.n(); };
1458  inline void prox_split(SpMatrix<T>& splitted_w, const T lambda) const {
1459  Vector<T> tmp;
1460  SpVector<T> col;
1461  if (_linf) {
1462  for (int i = 0; i<splitted_w.n(); ++i) {
1463  splitted_w.refCol(i,col);
1464  tmp.setData(col.rawX(),col.nzmax());
1465  Vector<T> res;
1466  res.copy(tmp);
1467  vAbs<T>(res.n(),res.rawX(),res.rawX());
1468  T thrs=project_tree_l1(res.rawX(),res.n(),lambda);
1469  tmp.thrsabsmin(thrs);
1470  }
1471  } else {
1472  for (int i = 0; i<splitted_w.n(); ++i) {
1473  splitted_w.refCol(i,col);
1474  tmp.setData(col.rawX(),col.nzmax());
1475  const T nrm = tmp.nrm2();
1476  if (nrm > lambda*_weights[i]) {
1477  tmp.scal(T(1.0)-lambda*_weights[i]/nrm);
1478  } else {
1479  tmp.setZeros();
1480  }
1481  }
1482  }
1483  };
1484  inline void init_split_variables(SpMatrix<T>& splitted_w) const {
1485  Graph<T>* gr = const_cast<Graph<T>* >(&_graph);
1486  gr->init_split_variables(splitted_w);
1487  };
1488  inline T eval_split(const SpMatrix<T>& input) const {
1489  SpVector<T> col;
1490  T sum = 0;
1491  for (int i = 0; i<input.n(); ++i) {
1492  input.refCol(i,col);
1493  sum += _linf ? _weights[i]*col.fmaxval() : _weights[i]*col.nrm2();
1494  }
1495  return sum;
1496  }
1497  inline T eval_weighted(const Vector<T>& input,
1498  const SpMatrix<T>& input_struct, const T* inner_weight) const {
1499  SpVector<T> col;
1500  T sum = 0;
1501  Vector<T> tmp(input_struct.m());
1502  for (int i = 0; i<input_struct.n(); ++i) {
1503  input_struct.refCol(i,col);
1504  tmp.setn(col.L());
1505  for (int j = 0; j<col.L(); ++j)
1506  tmp[j]=inner_weight[j]*input[col.r(j)];
1507  sum += _linf ? _weights[i]*tmp.fmaxval() : _weights[i]*tmp.nrm2();
1508  }
1509  return sum;
1510  }
1511 
1512 
1513  private:
1514  bool _clever;
1520  bool _linf;
1521  };
1522 
1523  template <typename T>
1526  };
1527 
1528  template <typename T>
1529  class TreeLasso : public Regularizer<T> {
1530  public:
1531  TreeLasso(const ParamReg<T>& param) : Regularizer<T>(param) {
1532  const TreeStruct<T>& tree_st=*(param.tree_st);
1533  const bool linf = param.linf;
1534  _tree.create_tree(tree_st.Nv,tree_st.own_variables,
1535  tree_st.N_own_variables,tree_st.weights,
1536  tree_st.groups_ir,tree_st.groups_jc,
1537  tree_st.Ng,0);
1538  _linf=linf;
1539  };
1540  virtual ~TreeLasso() { };
1541 
1542  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
1543  y.copy(x);
1544  if (this->_pos) y.thrsPos();
1545  Vector<T> yp;
1546  if (this->_intercept) {
1547  yp.setData(y.rawX(),y.n()-1);
1548  } else {
1549  yp.setData(y.rawX(),y.n());
1550  }
1551  _tree.proj(yp,_linf,lambda);
1552  };
1553  T inline eval(const Vector<T>& x) const {
1554  return const_cast<Tree_Seq<T>* >(&_tree)->val_norm(x.rawX(),0,_linf);
1555  };
1556  void inline fenchel(const Vector<T>& y, T& val, T& scal) const {
1557  if (_linf) {
1558  Vector<T> yp;
1559  if (this->_intercept) {
1560  yp.setData(y.rawX(),y.n()-1);
1561  } else {
1562  yp.setData(y.rawX(),y.n());
1563  }
1564  Vector<T> yp2;
1565  yp2.copy(yp);
1566  if (this->_pos) yp2.thrsPos();
1567  T mm = const_cast<Tree_Seq<T>* >(&_tree)->dual_norm_inf(yp2);
1568  scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1569  val=0;
1570  if (this->_intercept & abs<T>(y[y.n()-1]) > EPSILON) val=INFINITY;
1571  }
1572  };
1573  virtual bool is_fenchel() const {
1574  return _linf;
1575  };
1576  virtual bool is_subgrad() const { return true; };
1577  virtual void sub_grad(const Vector<T>& input, Vector<T>& output) const {
1578  output.resize(input.n());
1579  const_cast<Tree_Seq<T>*>(&_tree)->sub_grad(input,output,_linf);
1580  if (this->_intercept) output[output.n()-1]=0;
1581  }
1582  private:
1584  bool _linf;
1585  };
1586 
1587  template <typename T>
1588  class TreeLzero : public Regularizer<T> {
1589  public:
1590  TreeLzero(const ParamReg<T>& param) : Regularizer<T>(param) {
1591  const TreeStruct<T>& tree_st=*(param.tree_st);
1592  _tree.create_tree(tree_st.Nv,tree_st.own_variables,
1593  tree_st.N_own_variables,tree_st.weights,
1594  tree_st.groups_ir,tree_st.groups_jc,
1595  tree_st.Ng,0);
1596  };
1597  virtual ~TreeLzero() { };
1598 
1599  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
1600  y.copy(x);
1601  if (this->_pos) y.thrsPos();
1602  Vector<T> yp;
1603  if (this->_intercept) {
1604  yp.setData(y.rawX(),y.n()-1);
1605  } else {
1606  yp.setData(y.rawX(),y.n());
1607  }
1608  _tree.proj_zero(yp,lambda);
1609  };
1610  T inline eval(const Vector<T>& x) const {
1611  return const_cast<Tree_Seq<T>* >(&_tree)->val_zero(x.rawX(),0);
1612  };
1613  virtual bool is_fenchel() const { return false; };
1614  void inline fenchel(const Vector<T>& y, T& val, T& scal) const { };
1615 
1616  private:
1617  Tree_Seq<T> _tree;
1618  };
1619 
1620  template <typename T, typename ProxMat>
1621  class ProxMatToVec : public Regularizer<T> {
1622  public:
1623  ProxMatToVec(const ParamReg<T>& param) : Regularizer<T>(param) {
1624  _size_group=param.size_group;
1625  ParamReg<T> param2=param;
1626  param2.intercept=false;
1627  _proxy = new ProxMat(param2);
1628  };
1629  virtual ~ProxMatToVec() { delete(_proxy); };
1630 
1631  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
1632  y.resize(x.n());
1633  int size_vec=this->_intercept ? x.n()-1 : x.n();
1634  Matrix<T> mX(x.rawX(),_size_group,size_vec/_size_group);
1635  Matrix<T> mY(y.rawX(),_size_group,size_vec/_size_group);
1636  _proxy->prox(mX,mY,lambda);
1637  if (this->_intercept) y[y.n()-1]=x[x.n()-1];
1638  }
1639  T inline eval(const Vector<T>& x) const {
1640  int size_vec=this->_intercept ? x.n()-1 : x.n();
1641  Matrix<T> mX(x.rawX(),_size_group,size_vec/_size_group);
1642  return _proxy->eval(mX);
1643  }
1644  virtual bool is_fenchel() const { return (_proxy->is_fenchel()); };
1645  void inline fenchel(const Vector<T>& x, T& val, T& scal) const {
1646  int size_vec=this->_intercept ? x.n()-1 : x.n();
1647  Matrix<T> mX(x.rawX(),_size_group,size_vec/_size_group);
1648  _proxy->fenchel(mX,val,scal);
1649  };
1650 
1651  private:
1652  int _size_group;
1653  ProxMat* _proxy;
1654  };
1655 
1656  template <typename T, typename Reg>
1657  class GroupProx : public Regularizer<T> {
1658  public:
1659  GroupProx(const ParamReg<T> & param) : Regularizer<T>(param) {
1660  ParamReg<T> param2=param;
1661  param2.intercept=false;
1662  _size_group=param.size_group;
1663  if (param.groups) {
1664  int num_groups=0;
1665  for (int i = 0; i<param.ngroups; ++i) num_groups=MAX(num_groups,param.groups[i]);
1666  _groups.resize(num_groups);
1667  for (int i = 0; i<num_groups; ++i) _groups[i]=new list_int();
1668  for (int i = 0; i<param.ngroups; ++i) _groups[param.groups[i]-1]->push_back(i);
1669  }
1670  _prox = new Reg(param2);
1671  }
1672  virtual ~GroupProx() {
1673  delete(_prox);
1674  for (int i = 0; i<_groups.size(); ++i) delete(_groups[i]);
1675  };
1676 
1677  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
1678  y.copy(x);
1679  const int maxn= this->_intercept ? x.n()-1 : x.n();
1680  if (!_groups.empty()) {
1681  for (int i = 0; i<_groups.size(); ++i) {
1682  list_int* group=_groups[i];
1683  Vector<T> tmp(group->size());
1684  Vector<T> tmp2(group->size());
1685  int count=0;
1686  for (const_iterator_int it = group->begin(); it != group->end(); ++it) {
1687  tmp[count++]=x[*it];
1688  }
1689  _prox->prox(tmp,tmp2,lambda);
1690  count=0;
1691  for (const_iterator_int it = group->begin(); it != group->end(); ++it) {
1692  y[*it]=tmp2[count++];
1693  }
1694  }
1695  } else {
1696  Vector<T> tmp;
1697  Vector<T> tmp2;
1698  const int p = _size_group;
1699  for (int i = 0; i+p-1<maxn; i+=p) {
1700  tmp.setPointer(x.rawX()+i,p);
1701  tmp2.setPointer(y.rawX()+i,p);
1702  _prox->prox(tmp,tmp2,lambda);
1703  }
1704  }
1705  }
1706  T inline eval(const Vector<T>& x) const {
1707  const int maxn= this->_intercept ? x.n()-1 : x.n();
1708  T sum=0;
1709  if (!_groups.empty()) {
1710  for (int i = 0; i<_groups.size(); ++i) {
1711  list_int* group=_groups[i];
1712  Vector<T> tmp(group->size());
1713  int count=0;
1714  for (const_iterator_int it = group->begin(); it != group->end(); ++it) {
1715  tmp[count++]=x[*it];
1716  }
1717  sum+=_prox->eval(tmp);
1718  }
1719  } else {
1720  Vector<T> tmp;
1721  const int p = _size_group;
1722  for (int i = 0; i+p-1<maxn; i+=p) {
1723  tmp.setPointer(x.rawX()+i,p);
1724  sum+=_prox->eval(tmp);
1725  }
1726  }
1727  return sum;
1728  }
1729  virtual bool is_fenchel() const { return _prox->is_fenchel(); };
1730  void inline fenchel(const Vector<T>& x, T& val, T& scal) const {
1731  const int maxn= this->_intercept ? x.n()-1 : x.n();
1732  T val2;
1733  T scal2;
1734  scal=T(1.0);
1735  val=0;
1736  if (!_groups.empty()) {
1737  for (int i = 0; i<_groups.size(); ++i) {
1738  list_int* group=_groups[i];
1739  Vector<T> tmp(group->size());
1740  int count=0;
1741  for (const_iterator_int it = group->begin(); it != group->end(); ++it) {
1742  tmp[count++]=x[*it];
1743  }
1744  _prox->fenchel(tmp,val2,scal2);
1745  val+=val2;
1746  scal=MIN(scal,scal2);
1747  }
1748  } else {
1749  const int p = _size_group;
1750  Vector<T> tmp;
1751  for (int i = 0; i+p-1<maxn; i+=p) {
1752  tmp.setPointer(x.rawX()+i,p);
1753  _prox->fenchel(tmp,val2,scal2);
1754  val+=val2;
1755  scal=MIN(scal,scal2);
1756  }
1757  }
1758  };
1759  protected:
1760  int _size_group;
1761  std::vector<list_int*> _groups;
1762  Reg* _prox;
1763  };
1764 
1765  template <typename T>
1766  struct GroupLassoL2 {
1768  };
1769 
1770  template <typename T>
1773  };
1774 
1775  template <typename T>
1778  };
1779 
1780  template <typename T>
1783  };
1784 
1785  template <typename T>
1786  class MixedL1L2 : public Regularizer<T,Matrix<T> > {
1787  public:
1788  MixedL1L2(const ParamReg<T>& param) : Regularizer<T,Matrix<T> >(param) { };
1789  virtual ~MixedL1L2() { };
1790 
1791  void inline prox(const Matrix<T>& x, Matrix<T>& y, const T lambda) {
1792  Vector<T> norm;
1793  y.copy(x);
1794  if (this->_pos) y.thrsPos();
1795  y.norm_2_rows(norm);
1796  y.setZeros();
1797  const int m = x.m();
1798  const int n = x.n();
1799  for (int i = 0; i<m; ++i) {
1800  if (norm[i] > lambda) {
1801  T scal = (norm[i]-lambda)/norm[i];
1802  for (int j = 0; j<n; ++j)
1803  y[j*m+i] = x[j*m+i]*scal;
1804  }
1805  }
1806  if (this->_pos) y.thrsPos();
1807  if (this->_intercept)
1808  for (int j = 0; j<n; ++j)
1809  y[j*m+m-1]=x[j*m+m-1];
1810  }
1811  T inline eval(const Matrix<T>& x) const {
1812  Vector<T> norm;
1813  x.norm_2_rows(norm);
1814  return this->_intercept ? norm.asum() - norm[norm.n() -1] : norm.asum();
1815  }
1816  virtual bool is_subgrad() const { return true; };
1817  virtual void sub_grad(const Matrix<T>& input, Matrix<T>& output) const {
1818  Vector<T> norm;
1819  input.norm_2_rows(norm);
1820  for (int i = 0; i<norm.n(); ++i) {
1821  if (norm[i] < 1e-20) norm[i]=T(1.0);
1822  }
1823  norm.inv();
1824  if (this->_intercept) norm[norm.n()-1]=0;
1825  output.copy(input);
1826  output.multDiagLeft(norm);
1827  };
1828  void inline fenchel(const Matrix<T>& input, T& val, T& scal) const {
1829  Vector<T> norm;
1830  if (this->_pos) {
1831  Matrix<T> output;
1832  output.copy(input);
1833  output.thrsPos();
1834  output.norm_2_rows(norm);
1835  } else {
1836  input.norm_2_rows(norm);
1837  }
1838  T mm = norm.fmaxval();
1839  scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1840  val=0;
1841  if (this->_intercept & abs<T>(norm[norm.n()-1]) > EPSILON) val=INFINITY;
1842  };
1843  };
1844 
1845 
1846 
1847  template <typename T>
1848  class MixedL1LINF : public Regularizer<T,Matrix<T> > {
1849  public:
1850  MixedL1LINF(const ParamReg<T>& param) : Regularizer<T,Matrix<T> >(param) { };
1851  virtual ~MixedL1LINF() { };
1852 
1853  void inline prox(const Matrix<T>& x, Matrix<T>& y, const T lambda) {
1854  y.copy(x);
1855  if (this->_pos) y.thrsPos();
1856  Vector<T> row(x.n());
1857  Vector<T> row2(x.n());
1858  const int maxn= this->_intercept ? x.m()-1 : x.m();
1859  for (int i = 0; i< maxn; ++i) {
1860  for (int j = 0; j<x.n(); ++j)
1861  row[j]=y(i,j);
1862  row.l1project(row2,lambda);
1863  for (int j = 0; j<x.n(); ++j)
1864  y(i,j) = row[j]-row2[j];
1865  }
1866  }
1867  T inline eval(const Matrix<T>& x) const {
1868  Vector<T> norm;
1869  x.norm_inf_rows(norm);
1870  return this->_intercept ? norm.asum() - norm[norm.n() -1] : norm.asum();
1871  }
1872  void inline fenchel(const Matrix<T>& input, T& val, T& scal) const {
1873  Vector<T> norm;
1874  if (this->_pos) {
1875  Matrix<T> output;
1876  output.copy(input);
1877  output.thrsPos();
1878  output.norm_l1_rows(norm);
1879  } else {
1880  input.norm_l1_rows(norm);
1881  }
1882  if (this->_intercept) norm[norm.n()-1]=0;
1883  T mm = norm.fmaxval();
1884  scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1885  val=0;
1886  if (this->_intercept & abs<T>(norm[norm.n()-1]) > EPSILON) val=INFINITY;
1887  };
1888  virtual bool is_subgrad() const { return true; };
1889  virtual void sub_grad(const Matrix<T>& input, Matrix<T>& output) const {
1890  output.resize(input.m(),input.n());
1891  output.setZeros();
1892  const T maxm= this->_intercept ? input.m()-1 : input.m();
1893  Vector<T> row(input.n());
1894  for (int i = 0; i<maxm; ++i) {
1895  input.copyRow(i,row);
1896  T max=row.fmaxval();
1897  if (max > 1e-15) {
1898  int num_max=0;
1899  for (int j = 0; j<row.n(); ++j) {
1900  if (abs<T>(max-abs<T>(row[j])) < 1e-15)
1901  num_max++;
1902  }
1903  T add = T(1.0)/num_max;
1904  for (int j = 0; j<row.n(); ++j) {
1905  if (abs<T>(max-abs<T>(row[j])) < 1e-15)
1906  row[j] = row[j] > 0 ? add : -add;
1907  }
1908  output.setRow(i,row);
1909  }
1910  }
1911  };
1912  };
1913 
1914  template <typename T>
1915  class TraceNorm : public Regularizer<T,Matrix<T> > {
1916  public:
1917  TraceNorm(const ParamReg<T>& param) : Regularizer<T,Matrix<T> >(param) {
1918  if (param.intercept) {
1919  cerr << "Trace norm implementation is not compatible with intercept, intercept deactivated" << endl;
1920  }
1921  if (param.pos) {
1922  cerr << "Trace norm implementation is not compatible with non-negativity constraints" << endl;
1923  }
1924 
1925  };
1926  virtual ~TraceNorm() { };
1927 
1928  void inline prox(const Matrix<T>& x, Matrix<T>& y, const T lambda) {
1929  //Matrix<T> tmp;
1930  //tmp.copy(x);
1931  Matrix<T> U;
1932  Matrix<T> V;
1933  Vector<T> S;
1934  x.svd(U,S,V);
1935  S.softThrshold(lambda);
1936  U.multDiagRight(S);
1937  U.mult(V,y);
1938  /* Vector<T> u0(x.m());
1939  u0.setZeros();
1940  Vector<T> u, v;
1941  for (int i = 0; i<MIN(x.m(),x.n()); ++i) {
1942  tmp.svdRankOne(u0,u,v);
1943  T val=v.nrm2();
1944  if (val < lambda) break;
1945  y.rank1Update(u,v,(val-lambda)/val);
1946  tmp.rank1Update(u,v,-T(1.0));
1947  }*/
1948  }
1949  T inline eval(const Matrix<T>& x) const {
1950  Vector<T> tmp;
1951  x.singularValues(tmp);
1952  return tmp.sum();
1953  /* Matrix<T> XtX;
1954  if (x.m() > x.n()) {
1955  x.XtX(XtX);
1956  } else {
1957  x.XXt(XtX);
1958  }
1959  T sum=0;
1960  Vector<T> u0(XtX.m());
1961  u0.setAleat();
1962  for (int i = 0; i<XtX.m(); ++i) {
1963  T val=XtX.eigLargestMagnSym(u0,u0); // uses power method
1964  XtX.rank1Update(u0,u0,-val);
1965  sum+=sqrt(val);
1966  if (val <= 1e-10) break;
1967  }
1968  return sum;
1969  */
1970  }
1971  void inline fenchel(const Matrix<T>& input, T& val, T& scal) const {
1972  //Vector<T> u0(input.m());
1973  //u0.setZeros();
1974  //Vector<T> u, v;
1975  //input.svdRankOne(u0,u,v);
1976  //T mm = v.nrm2();
1977  Vector<T> tmp;
1978  input.singularValues(tmp);
1979  T mm = tmp.fmaxval();
1980  scal= mm > 1.0 ? T(1.0)/mm : 1.0;
1981  val=0;
1982  };
1983  };
1984 
1985 
1986  template <typename T>
1987  class Rank : public Regularizer<T,Matrix<T> > {
1988  public:
1989  Rank(const ParamReg<T>& param) : Regularizer<T,Matrix<T> >(param) {
1990  if (param.intercept) {
1991  cerr << "Rank implementation is not compatible with intercept, intercept deactivated" << endl;
1992  }
1993  if (param.pos) {
1994  cerr << "Rank implementation is not compatible with non-negativity constraints" << endl;
1995  }
1996 
1997  };
1998  virtual ~Rank() { };
1999 
2000  void inline prox(const Matrix<T>& x, Matrix<T>& y, const T lambda) {
2001  Matrix<T> tmp;
2002  tmp.copy(x);
2003  y.resize(x.m(),x.n());
2004  y.setZeros();
2005  Vector<T> u0(x.m());
2006  u0.setZeros();
2007  Vector<T> u, v;
2008  for (int i = 0; i<MIN(x.m(),x.n()); ++i) {
2009  tmp.svdRankOne(u0,u,v);
2010  T val=v.nrm2();
2011  if (val*val < lambda) break;
2012  y.rank1Update(u,v);
2013  tmp.rank1Update(u,v,-T(1.0));
2014  }
2015  }
2016  T inline eval(const Matrix<T>& x) const {
2017  Matrix<T> XtX;
2018  if (x.m() > x.n()) {
2019  x.XtX(XtX);
2020  } else {
2021  x.XXt(XtX);
2022  }
2023  T sum=0;
2024  Vector<T> u0(XtX.m());
2025  u0.setAleat();
2026  for (int i = 0; i<XtX.m(); ++i) {
2027  T val=XtX.eigLargestMagnSym(u0,u0); // uses power method
2028  XtX.rank1Update(u0,u0,-val);
2029  sum++;
2030  if (val <= 1e-10) break;
2031  }
2032  return sum;
2033  }
2034  virtual bool is_fenchel() const { return false; };
2035  void inline fenchel(const Matrix<T>& input, T& val, T& scal) const { };
2036  };
2037 
2038  template <typename T>
2039  inline void convert_paths_to_mat(const List<Path<long long>*>& paths,SpMatrix<T>& paths_mat, const int n) {
2040  int nzmax=0;
2041  for (ListIterator<Path<long long>*> it=paths.begin(); it != paths.end(); ++it)
2042  nzmax+=it->nodes.size();
2043  paths_mat.resize(n,paths.size(),nzmax);
2044  int* pB =paths_mat.pB();
2045  int* pE =paths_mat.pE();
2046  int* r =paths_mat.r();
2047  T* v =paths_mat.v();
2048  int count_col=0;
2049  int count=0;
2050  pB[0]=0;
2051  for (ListIterator<Path<long long>*> it_path=paths.begin();
2052  it_path != paths.end(); ++it_path) {
2053  for (const_iterator_int it = it_path->nodes.begin();
2054  it != it_path->nodes.end(); ++it) {
2055  r[count]= *it;
2056  v[count++]= it_path->flow;
2057  }
2058  pB[++count_col]=count;
2059  }
2060  for (int i = 0; i<paths_mat.n(); ++i) sort(r,v,pB[i],pE[i]-1);
2061  };
2062 
2063  template <typename T>
2064  class GraphPathL0 : public Regularizer<T> {
2065  public:
2066  GraphPathL0(const ParamReg<T>& param) : Regularizer<T>(param) {
2067  const GraphPathStruct<T>& graph=*(param.graph_path_st);
2068  _graph.init_graph(graph);
2069  }
2070  virtual ~GraphPathL0() { };
2071 
2072  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
2073  // DEBUG
2074  y.copy(x);
2075  if (this->_pos) y.thrsPos();
2076  _graph.proximal_l0(y.rawX(),lambda);
2077  };
2078  T inline eval(const Vector<T>& x) const {
2079  return const_cast<GraphPath<T>* >(&_graph)->eval_l0(x.rawX());
2080  };
2081  T inline eval_paths(const Vector<T>& x, SpMatrix<T>& paths_mat) const {
2082  List<Path<long long>*> paths;
2083  T val=const_cast<GraphPath<T>* >(&_graph)->eval_l0(x.rawX(),&paths);
2084  convert_paths_to_mat<T>(paths,paths_mat,_graph.n());
2085  for (ListIterator<Path<>*> it_path=paths.begin();
2086  it_path != paths.end(); ++it_path) delete(*it_path);
2087  return val;
2088  };
2089 
2090  virtual bool is_fenchel() const { return false; };
2091  void inline fenchel(const Vector<T>& input, T& val, T& scal) const { };
2092 
2093  private:
2094  GraphPath<T> _graph;
2095  };
2096 
2097  template <typename T>
2098  class GraphPathConv : public Regularizer<T> {
2099  public:
2100  GraphPathConv(const ParamReg<T>& param) : Regularizer<T>(param) {
2101  const GraphPathStruct<T>& graph=*(param.graph_path_st);
2102  _graph.init_graph(graph);
2103  }
2104  virtual ~GraphPathConv() { };
2105 
2106  void inline prox(const Vector<T>& x, Vector<T>& y, const T lambda) {
2107  y.copy(x);
2108  if (this->_pos) y.thrsPos();
2109  _graph.proximal_conv(y.rawX(),lambda);
2110  };
2111  T inline eval(const Vector<T>& x) const {
2112  return const_cast<GraphPath<T>* >(&_graph)->eval_conv(x.rawX());
2113  };
2114  T inline eval_dual_norm(const Vector<T>& x) const {
2115  return const_cast<GraphPath<T>* >(&_graph)->eval_dual_norm(x.rawX(),NULL);
2116  };
2117  T inline eval_paths(const Vector<T>& x, SpMatrix<T>& paths_mat) const {
2118  List<Path<long long>*> paths;
2119  T val=const_cast<GraphPath<T>* >(&_graph)->eval_conv(x.rawX(),&paths);
2120  convert_paths_to_mat<T>(paths,paths_mat,_graph.n());
2121  for (ListIterator<Path<long long>*> it_path=paths.begin();
2122  it_path != paths.end(); ++it_path) delete(*it_path);
2123  return val;
2124  };
2125  T inline eval_dual_norm_paths(const Vector<T>& x, SpMatrix<T>& paths_mat) const {
2126  Path<long long> path;
2127  T val=const_cast<GraphPath<T>* >(&_graph)->eval_dual_norm(x.rawX(),&path.nodes);
2128  List<Path<long long>*> paths;
2129  paths.push_back(&path);
2130  path.flow_int=1;
2131  path.flow=double(1.0);
2132  convert_paths_to_mat<T>(paths,paths_mat,_graph.n());
2133  return val;
2134  };
2135  virtual bool is_fenchel() const { return true; };
2136 
2137  void inline fenchel(const Vector<T>& input, T& val, T& scal) const {
2138  T mm;
2139  if (this->_pos) {
2140  Vector<T> output;
2141  output.copy(input);
2142  output.thrsPos();
2143  mm = const_cast<GraphPath<T>* >(&_graph)->eval_dual_norm(output.rawX(),NULL);
2144  } else {
2145  mm = const_cast<GraphPath<T>* >(&_graph)->eval_dual_norm(input.rawX(),NULL);
2146  }
2147  scal= mm > 1.0 ? T(1.0)/mm : 1.0;
2148  val=0;
2149  if (this->_intercept & abs<T>(input[input.n()-1]) > EPSILON) val=INFINITY;
2150  };
2151  private:
2153  };
2154 
2155 
2156  template <typename T,typename Reg>
2157  class RegMat : public Regularizer<T,Matrix<T> > {
2158  public:
2159  RegMat(const ParamReg<T>& param) : Regularizer<T,Matrix<T> >(param) {
2160  _transpose=param.transpose;
2161  const int N = param.num_cols;
2162  _regs=new Reg*[N];
2163  _N=N;
2164  for (int i = 0; i<N; ++i)
2165  _regs[i]=new Reg(param);
2166  };
2167  virtual ~RegMat() {
2168  for (int i = 0; i<_N; ++i) {
2169  delete(_regs[i]);
2170  _regs[i]=NULL;
2171  }
2172  delete[](_regs);
2173  };
2174  void inline reset() {
2175  for (int i = 0; i<_N; ++i) _regs[i]->reset();
2176  };
2177  void inline prox(const Matrix<T>& x, Matrix<T>& y, const T lambda) {
2178  y.copy(x);
2179  int i;
2180  if (_transpose) {
2181 #pragma omp parallel for private(i)
2182  for (i = 0; i<_N; ++i) {
2183  Vector<T> colx, coly;
2184  x.copyRow(i,colx);
2185  _regs[i]->prox(colx,coly,lambda);
2186  y.setRow(i,coly);
2187  }
2188  } else {
2189 #pragma omp parallel for private(i)
2190  for (i = 0; i<_N; ++i) {
2191  Vector<T> colx, coly;
2192  x.refCol(i,colx);
2193  y.refCol(i,coly);
2194  _regs[i]->prox(colx,coly,lambda);
2195  }
2196  }
2197  };
2198  virtual bool is_subgrad() const {
2199  bool ok=true;
2200  for (int i = 0; i<_N; ++i)
2201  ok=ok && _regs[i]->is_subgrad();
2202  return ok;
2203  };
2204  void inline sub_grad(const Matrix<T>& x, Matrix<T>& y) const {
2205  y.resize(x.m(),x.n());
2206  Vector<T> colx, coly, cold;
2207  if (_transpose) {
2208  for (int i = 0; i<_N; ++i) {
2209  x.copyRow(i,colx);
2210  _regs[i]->sub_grad(colx,coly);
2211  y.setRow(i,coly);
2212  }
2213  } else {
2214  for (int i = 0; i<_N; ++i) {
2215  x.refCol(i,colx);
2216  y.refCol(i,coly);
2217  _regs[i]->sub_grad(colx,coly);
2218  }
2219  }
2220  };
2221  T inline eval(const Matrix<T>& x) const {
2222  T sum = 0;
2223  int i;
2224 #pragma omp parallel for private(i)
2225  for (i = 0; i<_N; ++i) {
2226  Vector<T> col;
2227  if (_transpose) {
2228  x.copyRow(i,col);
2229  } else {
2230  x.refCol(i,col);
2231  }
2232 #pragma omp critical
2233  sum += _regs[i]->eval(col);
2234  }
2235  return sum;
2236  };
2237  void inline fenchel(const Matrix<T>& input, T& val, T& scal) const {
2238  Vector<T> col;
2239  val = 0;
2240  scal = 1.0;
2241  for (int i = 0; i<_N; ++i) {
2242  if (_transpose) {
2243  input.copyRow(i,col);
2244  } else {
2245  input.refCol(i,col);
2246  }
2247  T val2 = 0;
2248  T scal2 = 1.0;
2249  _regs[i]->fenchel(col,val2,scal2);
2250  scal=MIN(scal,scal2);
2251  val += val2;
2252  }
2253  };
2254  virtual bool is_fenchel() const {
2255  bool ok=true;
2256  for (int i = 0; i<_N; ++i)
2257  ok = ok && _regs[i]->is_fenchel();
2258  return ok;
2259  };
2260 
2261  protected:
2262  int _N;
2263  Reg** _regs;
2265  };
2266 
2267  template <typename T>
2268  struct MixedL1L2_L1 {
2270  };
2271 
2272  template <typename T>
2275  };
2276 
2277  template <typename T>
2278  class SpecGraphMat : public Regularizer<T,Matrix<T> > {
2279  public:
2280  SpecGraphMat(const ParamReg<T>& param) : Regularizer<T,Matrix<T> >(param) { };
2281  virtual ~SpecGraphMat() { delete(_graphlasso); };
2282 
2283  virtual void dummy() = 0;
2284 
2285  void inline reset() { _graphlasso->reset(); };
2286 
2287  void inline prox(const Matrix<T>& x, Matrix<T>& y, const T lambda) {
2288  Vector<T> xv, yv;
2289  x.toVect(xv);
2290  y.resize(x.m(),x.n());
2291  y.toVect(yv);
2292  _graphlasso->prox(xv,yv,lambda);
2293  }
2294  T inline eval(const Matrix<T>& X) const {
2295  Vector<T> xv;
2296  X.toVect(xv);
2297  return _graphlasso->eval(xv);
2298  }
2299 
2300  void inline fenchel(const Matrix<T>& input, T& val, T& scal) const {
2301  Vector<T> inv;
2302  input.toVect(inv);
2303  _graphlasso->fenchel(inv,val,scal);
2304  };
2305  virtual bool is_fenchel() const {
2306  return _graphlasso->is_fenchel();
2307  };
2308 
2309  protected:
2311  };
2312 
2313  template <typename T>
2314  class MixedL1LINFCR : public SpecGraphMat<T> {
2315  public:
2316  MixedL1LINFCR(const int m, const ParamReg<T>& param) : SpecGraphMat<T>(param) {
2317  const int n = param.num_cols;
2318  const T l2dl1 = param.lambda2d1;
2319  GraphStruct<T> graph_st;
2320  graph_st.Nv=m*n;
2321  graph_st.Ng=m+n;
2322  T* weights = new T[graph_st.Ng];
2323  for (int i = 0; i<n; ++i) weights[i]=T(1.0);
2324  for (int i = 0; i<m; ++i) weights[i+n]=l2dl1;
2325  graph_st.weights=weights;
2326 
2327  mwSize* gv_jc = new mwSize[graph_st.Ng+1];
2328  mwSize* gv_ir = new mwSize[m*n*2];
2329  for (int i = 0; i<n; ++i) {
2330  gv_jc[i]=i*m;
2331  for (int j = 0; j<m; ++j)
2332  gv_ir[i*m+j]=i*m+j;
2333  }
2334  for (int i = 0; i<m; ++i) {
2335  gv_jc[i+n]=i*n+n*m;
2336  for (int j = 0; j<n; ++j)
2337  gv_ir[i*n+n*m+j]=j*m+i;
2338  }
2339  gv_jc[m+n]=2*m*n;
2340  graph_st.gv_jc=gv_jc;
2341  graph_st.gv_ir=gv_ir;
2342 
2343  mwSize* gg_jc = new mwSize[graph_st.Ng+1];
2344  mwSize* gg_ir = new mwSize[1];
2345  for (int i = 0; i< graph_st.Ng+1; ++i) gg_jc[i]=0;
2346  graph_st.gg_jc=gg_jc;
2347  graph_st.gg_ir=gg_ir;
2348 
2349  ParamReg<T> param_lasso = param;
2350  param_lasso.graph_st = &graph_st;
2351  this->_graphlasso = new GraphLasso<T>(param_lasso);
2352 
2353  delete[](weights);
2354  delete[](gv_jc);
2355  delete[](gv_ir);
2356  delete[](gg_jc);
2357  delete[](gg_ir);
2358  };
2359  virtual ~MixedL1LINFCR() { };
2360  virtual void dummy() { };
2361  };
2362 
2363 
2364  template <typename T>
2365  class TreeMult : public SpecGraphMat<T> {
2366  public:
2367  TreeMult(const ParamReg<T>& param) : SpecGraphMat<T>(param) {
2368  const TreeStruct<T>& tree_st=*(param.tree_st);
2369  const int N = param.num_cols;
2370  const T l1dl2 = param.lambda2d1;
2371  GraphStruct<T> graph_st;
2372  int Nv=tree_st.Nv;
2373  if (param.intercept) ++Nv;
2374  int Ng=tree_st.Ng;
2375  graph_st.Nv=Nv*N;
2376  graph_st.Ng=Ng*(N+1);
2377  T* weights=new T[graph_st.Ng];
2378  for (int i = 0; i<N+1; ++i)
2379  for (int j = 0; j<Ng; ++j)
2380  weights[i*Ng+j]=tree_st.weights[j];
2381  for (int j = 0; j<Ng; ++j)
2382  weights[N*Ng+j]*=l1dl2;
2383  graph_st.weights=weights;
2384 
2385  int nzmax_tree=0;
2386  for (int i = 0; i<Ng; ++i)
2387  nzmax_tree += tree_st.N_own_variables[i];
2388  int nzmax_v=nzmax_tree*N;
2389  mwSize* gv_jc = new mwSize[graph_st.Ng+1];
2390  mwSize* gv_ir = new mwSize[nzmax_v];
2391  int count=0;
2392  for (int i = 0; i<N; ++i) {
2393  for (int j = 0; j<Ng; ++j) {
2394  gv_jc[i*Ng+j]=count;
2395  for (int k = 0; k<tree_st.N_own_variables[j]; ++k) {
2396  gv_ir[gv_jc[i*Ng+j] + k] =Nv*i+tree_st.own_variables[j]+k;
2397  ++count;
2398  }
2399  }
2400  }
2401  for (int i = 0; i<Ng+1; ++i) {
2402  gv_jc[N*Ng+i]=count;
2403  }
2404  graph_st.gv_jc=gv_jc;
2405  graph_st.gv_ir=gv_ir;
2406 
2407  mwSize* gg_jc = new mwSize[graph_st.Ng+1];
2408  int nzmax_tree2=tree_st.groups_jc[Ng];
2409  int nzmax2=nzmax_tree2*(N+1)+Ng*N;
2410  mwSize* gg_ir = new mwSize[nzmax2];
2411  count=0;
2412  for (int i = 0; i<N; ++i) {
2413  for (int j = 0; j<Ng; ++j) {
2414  gg_jc[i*Ng+j] = count;
2415  for (int k = tree_st.groups_jc[j]; k<static_cast<int>(tree_st.groups_jc[j+1]); ++k) {
2416  gg_ir[count++] = i*Ng+tree_st.groups_ir[k];
2417  }
2418  }
2419  }
2420  for (int i = 0; i<Ng; ++i) {
2421  gg_jc[N*Ng+i] = count;
2422  for (int j = tree_st.groups_jc[i]; j<static_cast<int>(tree_st.groups_jc[i+1]); ++j) {
2423  gg_ir[count++] = N*Ng+tree_st.groups_ir[j];
2424  }
2425  for (int j = 0; j<N; ++j) {
2426  gg_ir[count++] = j*Ng+i;
2427  }
2428  }
2429  gg_jc[(N+1)*Ng]=nzmax2;
2430 
2431  graph_st.gg_jc=gg_jc;
2432  graph_st.gg_ir=gg_ir;
2433  // param.graph_st=&graph_st;
2434  ParamReg<T> param_lasso = param;
2435  param_lasso.graph_st=&graph_st;
2436  this->_graphlasso = new GraphLasso<T>(param_lasso);
2437 
2438  delete[](weights);
2439  delete[](gv_ir);
2440  delete[](gv_jc);
2441  delete[](gg_ir);
2442  delete[](gg_jc);
2443  };
2444  virtual void dummy() { };
2445  virtual ~TreeMult() { };
2446  };
2447 
2448  template <typename T>
2449  class GraphMult : public SpecGraphMat<T> {
2450  public:
2451  GraphMult(const ParamReg<T>& param) : SpecGraphMat<T>(param) {
2452  const GraphStruct<T>& graph_st=*(param.graph_st);
2453  const int N = param.num_cols;
2454  const T l1dl2 = param.lambda2d1;
2455  GraphStruct<T> g_st;
2456  int Nv=graph_st.Nv;
2457  int Ng=graph_st.Ng;
2458  g_st.Nv=Nv*N;
2459  g_st.Ng=Ng*(N+1);
2460  T* weights=new T[g_st.Ng];
2461  for (int i = 0; i<N+1; ++i)
2462  for (int j = 0; j<Ng; ++j)
2463  weights[i*Ng+j]=graph_st.weights[j];
2464  for (int j = 0; j<Ng; ++j)
2465  weights[N*Ng+j]*=l1dl2;
2466  g_st.weights=weights;
2467  int nzmax_graph=graph_st.gv_jc[Ng]; //just corrected to gv
2468  int nzmax_v=nzmax_graph*N;
2469  mwSize* gv_jc = new mwSize[g_st.Ng+1];
2470  mwSize* gv_ir = new mwSize[nzmax_v];
2471  int count=0;
2472  for (int i = 0; i<N; ++i) {
2473  for (int j = 0; j<Ng; ++j) {
2474  gv_jc[i*Ng+j]=count;
2475  for (int k = graph_st.gv_jc[j]; k<graph_st.gv_jc[j+1]; ++k) {
2476  gv_ir[count++] =Nv*i+graph_st.gv_ir[k];
2477  }
2478  }
2479  }
2480  for (int i = 0; i<Ng+1; ++i) {
2481  gv_jc[N*Ng+i]=count;
2482  }
2483  g_st.gv_jc=gv_jc;
2484  g_st.gv_ir=gv_ir;
2485 
2486  mwSize* gg_jc = new mwSize[g_st.Ng+1];
2487  int nzmax_tree2=graph_st.gg_jc[Ng];
2488  int nzmax2=nzmax_tree2*(N+1)+Ng*N;
2489  mwSize* gg_ir = new mwSize[nzmax2];
2490  count=0;
2491  for (int i = 0; i<N; ++i) {
2492  for (int j = 0; j<Ng; ++j) {
2493  gg_jc[i*Ng+j] = count;
2494  for (int k = graph_st.gg_jc[j]; k<graph_st.gg_jc[j+1]; ++k) {
2495  gg_ir[count++] = i*Ng+graph_st.gg_ir[k];
2496  }
2497  }
2498  }
2499  for (int i = 0; i<Ng; ++i) {
2500  gg_jc[N*Ng+i] = count;
2501  for (int j = graph_st.gg_jc[i]; j<static_cast<int>(graph_st.gg_jc[i+1]); ++j) {
2502  gg_ir[count++] = N*Ng+graph_st.gg_ir[j];
2503  }
2504  for (int j = 0; j<N; ++j) {
2505  gg_ir[count++] = j*Ng+i;
2506  }
2507  }
2508  gg_jc[(N+1)*Ng]=nzmax2;
2509 
2510  g_st.gg_jc=gg_jc;
2511  g_st.gg_ir=gg_ir;
2512  ParamReg<T> param_lasso = param;
2513  param_lasso.graph_st = &g_st;
2514  this->_graphlasso = new GraphLasso<T>(param_lasso);
2515 
2516  delete[](weights);
2517  delete[](gv_ir);
2518  delete[](gv_jc);
2519  delete[](gg_ir);
2520  delete[](gg_jc);
2521  };
2522  virtual void dummy() { };
2523  virtual ~GraphMult() { };
2524  };
2525 
2526  template <typename T, typename D, typename E>
2527  T duality_gap(Loss<T,D,E>& loss, Regularizer<T,D>& regularizer, const D& x,
2528  const T lambda, T& best_dual, const bool verbose = false) {
2529  if (!regularizer.is_fenchel() || !loss.is_fenchel()) {
2530  cerr << "Error: no duality gap available" << endl;
2531  exit(1);
2532  }
2533  T primal= loss.eval(x)+lambda*regularizer.eval(x);
2534  bool intercept=regularizer.is_intercept();
2535  D grad1, grad2;
2536  loss.var_fenchel(x,grad1,grad2,intercept);
2537  grad2.scal(-T(1.0)/lambda);
2538  T val=0;
2539  T scal=1.0;
2540  regularizer.fenchel(grad2,val,scal);
2541  T dual = -lambda*val;
2542  grad1.scal(scal);
2543  dual -= loss.fenchel(grad1);
2544  dual = MAX(dual,best_dual);
2545  T delta= primal == 0 ? 0 : (primal-dual)/abs<T>(primal);
2546  if (verbose) {
2547  cout << "Relative duality gap: " << delta << endl;
2548  flush(cout);
2549  }
2550  best_dual=dual;
2551  return delta;
2552  }
2553 
2554  template <typename T, typename D, typename E>
2555  T duality_gap(Loss<T,D,E>& loss, Regularizer<T,D>& regularizer, const D& x,
2556  const T lambda, const bool verbose = false) {
2557  T best_dual=-INFINITY;
2558  return duality_gap(loss,regularizer,x,lambda,best_dual,verbose);
2559  }
2560 
2561  template <typename T>
2562  void dualityGraph(const Matrix<T>& X, const Matrix<T>& D, const Matrix<T>& alpha0,
2563  Vector<T>& res, const ParamFISTA<T>& param,
2564  const GraphStruct<T>* graph_st) {
2565  Regularizer<T>* regularizer=new GraphLasso<T>(*graph_st,
2566  param.intercept,param.resetflow,param.pos,param.clever);
2567  Loss<T>* loss;
2568  switch (param.loss) {
2569  case SQUARE: loss=new SqLoss<T>(D); break;
2570  case LOG: loss = new LogLoss<T>(D); break;
2571  case LOGWEIGHT: loss = new LogLoss<T,true>(D); break;
2572  default: cerr << "Not implemented"; exit(1);
2573  }
2574  Vector<T> Xi;
2575  X.refCol(0,Xi);
2576  loss->init(Xi);
2577  Vector<T> alpha0i;
2578  alpha0.refCol(0,alpha0i);
2579  regularizer->reset();
2580  res[0]=loss->eval(alpha0i)+param.lambda*regularizer->eval(alpha0i);
2581  res[1]=duality_gap(*loss,*regularizer,alpha0i,param.lambda);
2582  delete(loss);
2583  delete(regularizer);
2584  }
2585 
2586  template <typename T>
2587  void writeLog(const int iter, const T time, const T primal, const T dual,
2588  char* name) {
2589  std::ofstream f;
2590  f.precision(12);
2591  f.flags(std::ios_base::scientific);
2592  f.open(name, ofstream::app);
2593  f << iter << " " << primal << " " << dual << " " << time << std::endl;
2594  f.close();
2595  };
2596 
2597 
2598  template <typename T, typename D, typename E>
2599  void subGradientDescent_Generic(Loss<T,D,E>& loss, Regularizer<T,D>& regularizer, const D& x0, D& x,
2600  Vector<T>& optim_info,
2601  const ParamFISTA<T>& param) {
2602  D grad;
2603  D sub_grad;
2604  const T lambda=param.lambda;
2605  const int it0 = MAX(1,param.it0);
2606  const bool duality = loss.is_fenchel() && regularizer.is_fenchel();
2607  optim_info.set(-1);
2608  T best_dual=-INFINITY;
2609  T rel_duality_gap=-INFINITY;
2610  Timer time;
2611  time.start();
2612  int it;
2613  for (it = 1; it<=param.max_it; ++it) {
2615  if (param.verbose && ((it % it0) == 0)) {
2616  time.stop();
2617  T los=loss.eval(x) + lambda*regularizer.eval(x);
2618  optim_info[0]=los;
2619  T sec=time.getElapsed();
2620  cout << "Iter: " << it << ", loss: " << los << ", time: " << sec << " ";
2621  if (param.log)
2622  writeLog(it,sec,los,best_dual,param.logName);
2623  if (param.verbose)
2624  cout << endl;
2625  flush(cout);
2626  time.start();
2627  }
2628 
2630  loss.grad(x,grad);
2631  regularizer.sub_grad(x,sub_grad);
2632  T step = param.sqrt_step ? param.a/(param.b+sqrt(static_cast<T>(it))) : param.a/(param.b+(static_cast<T>(it)));
2633  x.add(grad,-step);
2634  x.add(sub_grad,-lambda*step);
2635  if (duality && ((it % it0) == 0)) {
2636  time.stop();
2637  rel_duality_gap=duality_gap(loss,regularizer,x,lambda,best_dual,param.verbose);
2638  optim_info[1]=best_dual;
2639  optim_info[2]=rel_duality_gap;
2640  if (rel_duality_gap < param.tol) break;
2641  time.start();
2642  }
2643  }
2644  if ((it % it0) != 0 || !param.verbose) {
2645  T los=loss.eval(x) + lambda*regularizer.eval(x);
2646  optim_info[0]=los;
2647  if (duality) {
2648  rel_duality_gap=duality_gap(loss,regularizer,x,lambda,best_dual,param.verbose);
2649  optim_info[1]=best_dual;
2650  optim_info[2]=rel_duality_gap;
2651  }
2652  }
2653  optim_info[3]=it;
2654  }
2655 
2656  template <typename T, typename D, typename E>
2657  void ISTA_Generic(Loss<T,D,E>& loss, Regularizer<T,D>& regularizer, const D& x0, D& x, Vector<T>& optim_info,
2658  const ParamFISTA<T>& param) {
2659  const int it0 = MAX(1,param.it0);
2660  const T lambda=param.lambda;
2661  T L=param.L0;
2662  T Lold=L;
2663  x.copy(x0);
2664  D grad, tmp, prox, old;
2665 
2666  const bool duality = loss.is_fenchel() && regularizer.is_fenchel();
2667  optim_info.set(-1);
2668  Timer time;
2669  time.start();
2670  T rel_duality_gap=-INFINITY;
2671 
2672  int it;
2673  T best_dual=-INFINITY;
2674  for (it = 1; it<=param.max_it; ++it) {
2676  if (param.verbose && ((it % it0) == 0)) {
2677  time.stop();
2678  T los=loss.eval(x) + lambda*regularizer.eval(x);
2679  optim_info[0]=los;
2680  T sec=time.getElapsed();
2681  cout << "Iter: " << it << ", loss: " << los << ", time: " << sec << ", L: " << L;
2682  flush(cout);
2683  if (param.log)
2684  writeLog(it,sec,los,best_dual,param.logName);
2685  time.start();
2686  }
2687 
2689  loss.grad(x,grad);
2690  int iter=1;
2691  while (iter < param.max_iter_backtracking) {
2692  prox.copy(x);
2693  prox.add(grad,-T(1.0)/L);
2694  regularizer.prox(prox,tmp,lambda/L);
2695 
2696  Lold=L;
2697  if (loss.test_backtracking(x,grad,tmp,L)) {
2698  break;
2699  }
2700  L *= param.gamma;
2701  if (param.verbose && ((it % it0) == 0))
2702  cout << " " << L;
2703  ++iter;
2704  }
2705  if (param.verbose && ((it % it0) == 0))
2706  cout << endl;
2707  old.copy(x);
2708  x.copy(tmp);
2709  if (duality) {
2710  if ((it % it0) == 0) {
2711  time.stop();
2712  rel_duality_gap=duality_gap(loss,regularizer,x,lambda,best_dual,param.verbose);
2713  optim_info[1]=best_dual;
2714  optim_info[2]=rel_duality_gap;
2715  if (rel_duality_gap < param.tol) break;
2716  time.start();
2717  }
2718  } else {
2719  old.sub(x);
2720  if (sqrt(old.nrm2sq()/MAX(EPSILON,x.nrm2sq())) < param.tol) break;
2721  }
2722  }
2723  T los=loss.eval(x) + lambda*regularizer.eval(x);
2724  optim_info[0]=los;
2725  T sec=time.getElapsed();
2726  if (param.verbose) {
2727  cout << "Iter: " << it << ", loss: " << los << ", time: " << sec << ", L: " << L << endl;
2728  flush(cout);
2729  }
2730  if (duality) {
2731  rel_duality_gap=duality_gap(loss,regularizer,x,lambda,best_dual,param.verbose);
2732  optim_info[1]=best_dual;
2733  optim_info[2]=rel_duality_gap;
2734  }
2735  optim_info[3]=it;
2736  }
2737 
2738  template <typename T, typename D, typename E>
2739  void FISTA_Generic(Loss<T,D,E>& loss, Regularizer<T,D>& regularizer, const D& x0, D& x, Vector<T>& optim_info,
2740  const ParamFISTA<T>& param) {
2741  const int it0 = MAX(1,param.it0);
2742  const T lambda=param.lambda;
2743  T L=param.L0;
2744  T t = 1.0;
2745  T Lold=L;
2746  T old_t;
2747  D y, grad, prox, tmp;
2748  y.copy(x0);
2749  x.copy(x0);
2750 
2751  const bool duality = loss.is_fenchel() && regularizer.is_fenchel();
2752  T rel_duality_gap=-INFINITY;
2753  optim_info.set(-1);
2754 
2755  Timer time;
2756  time.start();
2757 
2758  int it;
2759  T best_dual=-INFINITY;
2760  for (it = 1; it<=param.max_it; ++it) {
2762  if (param.verbose && ((it % it0) == 0)) {
2763  time.stop();
2764  T los=loss.eval(x) + lambda*regularizer.eval(x);
2765  optim_info[0]=los;
2766  T sec=time.getElapsed();
2767  cout << "Iter: " << it << ", loss: " << los << ", time: " << sec << ", L: " << L;
2768  flush(cout);
2769  if (param.log)
2770  writeLog(it,sec,los,best_dual,param.logName);
2771  time.start();
2772  }
2773 
2775  loss.grad(y,grad);
2776 
2777  int iter=1;
2778 
2779  while (iter < param.max_iter_backtracking) {
2780  prox.copy(y);
2781  prox.add(grad,-T(1.0)/L);
2782  regularizer.prox(prox,tmp,lambda/L);
2783  Lold=L;
2784  if (param.fixed_step || loss.test_backtracking(y,grad,tmp,L)) break;
2785  L *= param.gamma;
2786  if (param.verbose && ((it % it0) == 0))
2787  cout << " " << L;
2788  ++iter;
2789  }
2790  if (param.verbose && ((it % it0) == 0))
2791  cout << endl;
2792 
2793  prox.copy(x);
2794  prox.sub(tmp);
2795  x.copy(tmp);
2796  old_t=t;
2797  t=(1.0+sqrt(1+4*t*t))/2;
2798  y.copy(x);
2799  y.add(prox,(1-old_t)/t);
2800  if (duality) {
2801  if ((it % it0) == 0) {
2802  time.stop();
2803  rel_duality_gap=duality_gap(loss,regularizer,x,lambda,best_dual,param.verbose);
2804  optim_info[1]=best_dual;
2805  optim_info[2]=rel_duality_gap;
2806  if (rel_duality_gap < param.tol) break;
2807  time.start();
2808  }
2809  } else {
2810  if (sqrt(prox.nrm2sq()/MAX(EPSILON,x.nrm2sq())) < param.tol) break;
2811  }
2812  }
2813  T los=loss.eval(x) + lambda*regularizer.eval(x);
2814  optim_info[0]=los;
2815  T sec=time.getElapsed();
2816  if (param.verbose) {
2817  cout << "Iter: " << it << ", loss: " << los << ", time: " << sec << ", L: " << L << endl;
2818  flush(cout);
2819  }
2820  if (duality) {
2821  rel_duality_gap=duality_gap(loss,regularizer,x,lambda,best_dual,param.verbose);
2822  optim_info[1]=best_dual;
2823  optim_info[2]=rel_duality_gap;
2824  }
2825  optim_info[3]=it;
2826  };
2827 
2828  template <typename T>
2830  const T lambda, const T gamma, const Vector<T>& w, const Matrix<T>& splitted_loss, const SpMatrix<T>& splitted_reg,
2831  const Matrix<T>& multi_loss, const SpMatrix<T>& multi_reg, T& los, const T* weights = NULL) {
2832  const int n_reg=reg.num_components();
2833  //T loss_val = loss.eval(w) + lambda*reg.eval(w);
2834  T lagrangian = loss.eval_split(splitted_loss) + lambda*reg.eval_split(splitted_reg);
2835  Matrix<T> tmp;
2836  tmp.copy(splitted_loss);
2837  tmp.addVecToCols(w,-T(1.0));
2838  T add =0.5*gamma*tmp.normFsq();
2839  lagrangian += add;
2840  los+=add;
2841  if (n_reg > 0) {
2842  SpMatrix<T> stmp;
2843  stmp.copy(splitted_reg);
2844  stmp.addVecToCols(w,-T(1.0));
2845  add=0.5*gamma*stmp.normFsq();
2846  lagrangian += add;
2847  los+=add;
2848  lagrangian -= multi_reg.dot_direct(stmp);
2849  }
2850  lagrangian -= multi_loss.dot(tmp);
2851  return lagrangian;
2852  };
2853 
2854 
2855  template <typename T>
2857  const Matrix<T>& splitted_w_loss,
2858  const Matrix<T>& multipliers_w_loss,
2859  const SpMatrix<T>& splitted_w_reg,
2860  const SpMatrix<T>& multipliers_w_reg,
2861  const T gamma) {
2862  Vector<T> mean(w.n());
2863  splitted_w_loss.sum_cols(mean);
2864  w.copy(mean);
2865  multipliers_w_loss.sum_cols(mean);
2866  w.add(mean,-T(1.0)/gamma);
2867  Vector<T> number_occurences(w.n());
2868  number_occurences.set(splitted_w_loss.n());
2869  const int n_reg=splitted_w_reg.n();
2870  if (n_reg > 0) {
2871  SpVector<T> col;
2872  mean.setZeros();
2873  for (int i = 0; i<n_reg; ++i) {
2874  splitted_w_reg.refCol(i,col);
2875  mean.add(col);
2876  for (int j = 0; j<col.L(); ++j)
2877  number_occurences[col.r(j)]++;
2878  }
2879  w.add(mean);
2880  mean.setZeros();
2881  for (int i = 0; i<n_reg; ++i) {
2882  multipliers_w_reg.refCol(i,col);
2883  mean.add(col);
2884  }
2885  w.add(mean,-T(1.0)/gamma);
2886  };
2887  w.div(number_occurences);
2888  };
2889 
2890 
2891  template <typename T>
2893  const Matrix<T>& splitted_w_loss,
2894  const Matrix<T>& multipliers_w_loss,
2895  const SpMatrix<T>& splitted_w_reg,
2896  const SpMatrix<T>& multipliers_w_reg,
2897  const T gamma, const T* inner_weights) {
2898  Vector<T> mean(w.n());
2899  splitted_w_loss.sum_cols(mean);
2900  w.copy(mean);
2901  multipliers_w_loss.sum_cols(mean);
2902  w.add(mean,-T(1.0)/gamma);
2903  Vector<T> number_occurences(w.n());
2904  number_occurences.set(splitted_w_loss.n());
2905  const int n_reg=splitted_w_reg.n();
2906  if (n_reg > 0) {
2907  SpVector<T> col;
2908  mean.setZeros();
2909  for (int i = 0; i<n_reg; ++i) {
2910  splitted_w_reg.refCol(i,col);
2911  for (int j = 0; j<col.L(); ++j) {
2912  mean[col.r(j)]+=inner_weights[j]*col.v(j);
2913  number_occurences[col.r(j)]+=inner_weights[j]*inner_weights[j];
2914  }
2915  }
2916  w.add(mean);
2917  mean.setZeros();
2918  for (int i = 0; i<n_reg; ++i) {
2919  multipliers_w_reg.refCol(i,col);
2920  for (int j = 0; j<col.L(); ++j)
2921  mean[col.r(j)]+=inner_weights[j]*col.v(j);
2922  }
2923  w.add(mean,-T(1.0)/gamma);
2924  };
2925  w.div(number_occurences);
2926  };
2927 
2928  template <typename T>
2930  const Vector<T>& w0, Vector<T>& w, Vector<T>& optim_info,
2931  const ParamFISTA<T>& param) {
2932  const T gamma = param.c;
2933  const int n_reg=reg.num_components();
2934  const int it0 = MAX(1,param.it0);
2935  const T lambda=param.lambda;
2936 
2937  w.copy(w0);
2938  Matrix<T> splitted_w_loss;
2939  SpMatrix<T> splitted_w_reg;
2940  Matrix<T> multipliers_w_loss;
2941  SpMatrix<T> multipliers_w_reg;
2942  loss.init_split_variables(multipliers_w_loss);
2943  reg.init_split_variables(multipliers_w_reg);
2944  splitted_w_loss.copy(multipliers_w_loss);
2945  splitted_w_loss.addVecToCols(w);
2946  if (n_reg > 0) {
2947  splitted_w_reg.copy(multipliers_w_reg);
2948  splitted_w_reg.addVecToCols(w);
2949  }
2950 
2951  Timer time;
2952  time.start();
2953  int it=0;
2954  T los;
2955  T old_los=INFINITY;
2956 
2957  for (it = 0; it<param.max_it; ++it) {
2958 
2959  if (((it % it0) == 0)) {
2960  time.stop();
2961  if (param.is_inner_weights) {
2962  los= loss.eval(w)+lambda*reg.eval_weighted(w,splitted_w_reg,
2963  param.inner_weights);
2964  } else {
2965  los= loss.eval(w)+lambda*reg.eval(w);
2966  }
2967  optim_info[0]=los;
2968  T sec=time.getElapsed();
2969  optim_info[2]=sec;
2970  if (param.verbose) {
2971  cout << "Iter: " << it << ", loss: " << los << ", time: " << sec << endl;
2972  flush(cout);
2973  if (param.log)
2974  writeLog(it,sec,los,T(0),param.logName);
2975  }
2976  time.start();
2977  }
2978  if (param.is_inner_weights) {
2980  update_multipliers_weighted_ADMM(w,splitted_w_loss,multipliers_w_loss,splitted_w_reg,multipliers_w_reg,gamma,param.inner_weights);
2981 
2983  splitted_w_loss.copy(multipliers_w_loss);
2984  splitted_w_loss.scal((1.0)/gamma);
2985  splitted_w_loss.addVecToCols(w);
2986  loss.prox_split(splitted_w_loss,T(1.0)/gamma);
2987  if (n_reg > 0) {
2988  splitted_w_reg.copy(multipliers_w_reg);
2989  splitted_w_reg.scal((1.0)/gamma);
2990  splitted_w_reg.addVecToColsWeighted(w,param.inner_weights);
2991  reg.prox_split(splitted_w_reg,lambda/gamma);
2992  }
2993 
2995  multipliers_w_loss.addVecToCols(w,gamma);
2996  multipliers_w_loss.add(splitted_w_loss,-gamma);
2997  if (n_reg > 0) {
2998  multipliers_w_reg.addVecToColsWeighted(w,param.inner_weights,
2999  gamma);
3000  multipliers_w_reg.add_direct(splitted_w_reg,-gamma);
3001  }
3002  } else {
3004  update_multipliers_ADMM(w,splitted_w_loss,multipliers_w_loss,splitted_w_reg,multipliers_w_reg,gamma);
3005 
3007  splitted_w_loss.copy(multipliers_w_loss);
3008  splitted_w_loss.scal((1.0)/gamma);
3009  splitted_w_loss.addVecToCols(w);
3010  loss.prox_split(splitted_w_loss,T(1.0)/gamma);
3011  if (n_reg > 0) {
3012  splitted_w_reg.copy(multipliers_w_reg);
3013  splitted_w_reg.scal((1.0)/gamma);
3014  splitted_w_reg.addVecToCols(w);
3015  reg.prox_split(splitted_w_reg,lambda/gamma);
3016  }
3017 
3019  multipliers_w_loss.addVecToCols(w,gamma);
3020  multipliers_w_loss.add(splitted_w_loss,-gamma);
3021  if (n_reg > 0) {
3022  multipliers_w_reg.addVecToCols(w,gamma);
3023  multipliers_w_reg.add_direct(splitted_w_reg,-gamma);
3024  }
3025  }
3026 
3028  if ((it % it0) == 0) {
3029  if (it > 0 && (old_los-los)/old_los < param.tol) break;
3030  old_los=los;
3031  }
3032  }
3033  if (param.is_inner_weights) {
3034  los= loss.eval(w)+lambda*reg.eval_weighted(w,splitted_w_reg,
3035  param.inner_weights);
3036  } else {
3037  los= loss.eval(w)+lambda*reg.eval(w);
3038  }
3039  optim_info[0]=los;
3040  optim_info[3]=it;
3041  };
3042 
3043  template <typename T>
3045  const SpMatrix<T>& splitted_w_reg,
3046  const SpMatrix<T>& multipliers_w_reg,
3047  const T gamma, const T delta) {
3048  Vector<T> mean(w.n());
3049  Vector<T> number_occurences(w.n());
3050  number_occurences.set(delta);
3051  const int n_reg=splitted_w_reg.n();
3052  if (n_reg > 0) {
3053  SpVector<T> col;
3054  mean.setZeros();
3055  for (int i = 0; i<n_reg; ++i) {
3056  splitted_w_reg.refCol(i,col);
3057  mean.add(col);
3058  for (int j = 0; j<col.L(); ++j)
3059  number_occurences[col.r(j)]+=gamma;
3060  }
3061  mean.scal(gamma);
3062  for (int i = 0; i<n_reg; ++i) {
3063  multipliers_w_reg.refCol(i,col);
3064  mean.add(col);
3065  }
3066  w.add(mean);
3067  };
3068  w.div(number_occurences);
3069  };
3070 
3071 
3072  template <typename T>
3074  const Vector<T>& w0, Vector<T>& w, Vector<T>& optim_info,
3075  const ParamFISTA<T>& param) {
3076  const T gamma = param.c;
3077  const int n_reg=reg.num_components();
3078  const int it0 = MAX(1,param.it0);
3079  const T lambda=param.lambda;
3080 
3081  w.copy(w0);
3082  SpMatrix<T> primal_reg;
3083  SpMatrix<T> dual_reg;
3084  reg.init_split_variables(dual_reg);
3085  if (n_reg > 0) {
3086  primal_reg.copy(dual_reg);
3087  primal_reg.addVecToCols(w);
3088  }
3089  Vector<T> prim_loss;
3090  loss.init_prim_var(prim_loss);
3091  Vector<T> dual_loss;
3092  dual_loss.copy(prim_loss);
3093 
3094  Timer time;
3095  time.start();
3096  int it=0;
3097  T los;
3098  T old_los=INFINITY;
3099 
3100  for (it = 0; it<param.max_it; ++it) {
3101  /*w.print("w");
3102  prim_loss.print("z");
3103  dual_loss.print("nu");
3104  primal_reg.print("zg");
3105  dual_reg.print("nug");*/
3106 
3107  if (((it % it0) == 0)) {
3108  time.stop();
3109  los= loss.eval(w)+lambda*reg.eval(w);
3110  optim_info[0]=los;
3111  T sec=time.getElapsed();
3112  optim_info[2]=sec;
3113  if (param.verbose) {
3114  cout << "Iter: " << it << ", loss: " << los << ", time: " << sec << endl;
3115  flush(cout);
3116  if (param.log)
3117  writeLog(it,sec,los,T(0),param.logName);
3118  }
3119  time.start();
3120  }
3122  loss.prox_prim_var(prim_loss,dual_loss,w,gamma);
3123 
3125  if (n_reg > 0) {
3126  primal_reg.copy(dual_reg);
3127  primal_reg.scal(-(1.0)/gamma);
3128  primal_reg.addVecToCols(w);
3129  reg.prox_split(primal_reg,lambda/gamma);
3130  }
3131 
3133  loss.compute_new_prim(w,prim_loss,dual_loss,gamma,param.delta);
3134  update_multipliers_LinADMM(w,primal_reg,dual_reg,gamma,param.delta);
3135 
3137  if (n_reg > 0) {
3138  dual_reg.addVecToCols(w,-gamma);
3139  dual_reg.add_direct(primal_reg,gamma);
3140  }
3141  loss.add_mult_design_matrix(w,dual_loss,-gamma);
3142  dual_loss.add(prim_loss,gamma);
3143 
3145  if ((it % it0) == 0) {
3146  if (it > 0 && (old_los-los)/old_los < param.tol) break;
3147  old_los=los;
3148  }
3149  }
3150  los= loss.eval(w)+lambda*reg.eval(w);
3151  optim_info[0]=los;
3152  optim_info[3]=it;
3153  };
3154 
3155  template <typename T>
3157  const GraphStruct<T>* graph_st = NULL,
3158  const TreeStruct<T>* tree_st = NULL) {
3160  ParamReg<T> param_reg;
3161  param_reg.pos=param.pos;
3162  param_reg.intercept=param.intercept;
3163  param_reg.tree_st=const_cast<TreeStruct<T>* >(tree_st);
3164  param_reg.graph_st=const_cast<GraphStruct<T>* >(graph_st);
3165  param_reg.resetflow=param.resetflow;
3166  param_reg.clever=param.clever;
3167  switch (param.regul) {
3168  case GRAPH: param_reg.linf=true; reg=new GraphLasso<T>(param_reg); break;
3169  case GRAPH_L2: param_reg.linf=false; reg=new GraphLasso<T>(param_reg); break;
3170  case NONE: reg=new None<T>(); break;
3171  default: cerr << "Not implemented"; exit(1);
3172  }
3173  return reg;
3174  };
3175 
3176  template <typename T>
3178  const GraphStruct<T>* graph_st = NULL,
3179  const TreeStruct<T>* tree_st = NULL,
3180  const GraphPathStruct<T>* graph_path_st=NULL) {
3181  ParamReg<T> param_reg;
3182  param_reg.pos=param.pos;
3183  param_reg.intercept=param.intercept;
3184  param_reg.lambda=param.lambda;
3185  param_reg.lambda2d1=param.lambda2/param.lambda;
3186  param_reg.lambda3d1=param.lambda3/param.lambda;
3187  param_reg.size_group=param.size_group;
3188  param_reg.tree_st=const_cast<TreeStruct<T>* >(tree_st);
3189  param_reg.graph_st=const_cast<GraphStruct<T>* >(graph_st);
3190  param_reg.graph_path_st=const_cast<GraphPathStruct<T>* >(graph_path_st);
3191  param_reg.resetflow=param.resetflow;
3192  param_reg.clever=param.clever;
3193  param_reg.ngroups=param.ngroups;
3194  param_reg.groups=param.groups;
3195  Regularizer<T>* reg;
3196  switch (param.regul) {
3197  case L0: reg=new Lzero<T>(param_reg); break;
3198  case L1: reg=new Lasso<T>(param_reg); break;
3199  case L1CONSTRAINT: reg=new LassoConstraint<T>(param_reg); break;
3200  case L2: reg=new normL2<T>(param_reg); break;
3201  case LINF: reg=new normLINF<T>(param_reg); break;
3202  case RIDGE: reg=new Ridge<T>(param_reg); break;
3203  case ELASTICNET: reg=new typename ElasticNet<T>::type(param_reg); break;
3204  case FUSEDLASSO: reg=new FusedLasso<T>(param_reg); break;
3205  case TREE_L0: reg=new TreeLzero<T>(param_reg); break;
3206  case TREE_L2: param_reg.linf=false; reg=new TreeLasso<T>(param_reg); break;
3207  case TREE_LINF: param_reg.linf=true; reg=new TreeLasso<T>(param_reg); break;
3208  case GRAPH: param_reg.linf=true; reg=new GraphLasso<T>(param_reg); break;
3209  case GRAPH_RIDGE: param_reg.linf=true; reg=new typename GraphLassoRidge<T>::type(param_reg); break;
3210  case GRAPH_L2: param_reg.linf=false; reg=new GraphLasso<T>(param_reg); break;
3211  case TRACE_NORM_VEC: reg=new ProxMatToVec<T, TraceNorm<T> >(param_reg); break;
3212  case RANK_VEC: reg=new ProxMatToVec<T, Rank<T> >(param_reg); break;
3213  case GROUPLASSO_L2: reg=new typename GroupLassoL2<T>::type(param_reg); break;
3214  case GROUPLASSO_LINF: reg=new typename GroupLassoLINF<T>::type(param_reg); break;
3215  case GROUPLASSO_L2_L1: reg=new typename GroupLassoL2_L1<T>::type(param_reg); break;
3216  case GROUPLASSO_LINF_L1: reg=new typename GroupLassoLINF_L1<T>::type(param_reg); break;
3217  case GRAPH_PATH_L0: reg = new GraphPathL0<T>(param_reg); break;
3218  case GRAPH_PATH_CONV: reg = new GraphPathConv<T>(param_reg); break;
3219  case NONE: reg=new None<T>(); break;
3220  default: cerr << "Not implemented"; exit(1);
3221  }
3222  return reg;
3223  };
3224 
3225  template <typename T>
3227  const int m, const int n,
3228  const GraphStruct<T>* graph_st = NULL,
3229  const TreeStruct<T>* tree_st = NULL,
3230  const GraphPathStruct<T>* graph_path_st=NULL) {
3231  ParamReg<T> param_reg;
3232  param_reg.transpose=param.transpose;
3233  param_reg.pos=param.pos;
3234  param_reg.intercept=param.intercept;
3235  param_reg.lambda2d1=param.lambda2/param.lambda;
3236  param_reg.lambda3d1=param.lambda3/param.lambda;
3237  param_reg.size_group=param.size_group;
3238  param_reg.num_cols=param.transpose ? m : n;
3239  param_reg.tree_st=const_cast<TreeStruct<T>* >(tree_st);
3240  param_reg.graph_st=const_cast<GraphStruct<T>* >(graph_st);
3241  param_reg.resetflow=param.resetflow;
3242  param_reg.clever=param.clever;
3243  Regularizer<T, Matrix<T> >* reg;
3244  switch (param.regul) {
3245  case L0: reg=new RegMat<T, Lzero<T> >(param_reg); break;
3246  case L1: reg=new RegMat<T, Lasso<T> >(param_reg); break;
3247  case L1CONSTRAINT: reg=new RegMat<T, LassoConstraint<T> >(param_reg); break;
3248  case L2: reg=new RegMat<T, normL2<T> >(param_reg); break;
3249  case LINF: reg=new RegMat<T, normLINF<T> >(param_reg); break;
3250  case RIDGE: reg=new RegMat<T, Ridge<T> >(param_reg); break;
3251  case ELASTICNET: reg=new RegMat<T, typename ElasticNet<T>::type >(param_reg); break;
3252  case FUSEDLASSO: reg=new RegMat<T, FusedLasso<T> >(param_reg); break;
3253  case L1L2: reg=new MixedL1L2<T>(param_reg); break;
3254  case L1LINF: reg=new MixedL1LINF<T>(param_reg); break;
3255  case TRACE_NORM: reg=new TraceNorm<T>(param_reg); break;
3256  case RANK: reg=new Rank<T>(param_reg); break;
3257  case L1L2_L1: reg=new typename MixedL1L2_L1<T>::type(param_reg); break;
3258  case L1LINF_L1: reg=new typename MixedL1LINF_L1<T>::type(param_reg); break;
3259  case TREE_L0: reg=new RegMat<T, TreeLzero<T> >(param_reg); break;
3260  case TREE_L2: param_reg.linf=false; reg=new RegMat<T, TreeLasso<T> >(param_reg); break;
3261  case TREE_LINF: param_reg.linf=true; reg=new RegMat<T, TreeLasso<T> >(param_reg); break;
3262  case GRAPH: reg=new RegMat<T, GraphLasso<T> >(param_reg); break;
3263  case TREEMULT: reg = new TreeMult<T>(param_reg); break;
3264  case GRAPHMULT: reg=new GraphMult<T>(param_reg); break;
3265  case L1LINFCR: reg = new MixedL1LINFCR<T>(m,param_reg); break;
3266  case GRAPH_PATH_L0: reg = new RegMat<T, GraphPathL0<T> >(param_reg); break;
3267  case GRAPH_PATH_CONV: reg = new RegMat<T, GraphPathConv<T> >(param_reg); break;
3268  case NONE: reg=new RegMat<T, None<T> >(param_reg); break;
3269  default: cerr << "not implemented"; exit(1);
3270  }
3271  return reg;
3272  }
3273 
3274  template <typename T>
3275  void print_info_solver(const ParamFISTA<T>& param) {
3276  if (param.verbose) {
3277  print_loss(param.loss);
3278  print_regul(param.regul);
3279  if (param_for_admm(param)) {
3280  if (param.admm || param.lin_admm) {
3281  if (param.lin_admm) {
3282  cout << "Linearized ADMM algorithm" << endl;
3283  } else {
3284  cout << "ADMM algorithm" << endl;
3285  }
3286  }
3287  } else {
3288  if (param.ista) {
3289  cout << "ISTA algorithm" << endl;
3290  } else if (param.subgrad) {
3291  cout << "Subgradient descent" << endl;
3292  } else {
3293  cout << "FISTA algorithm" << endl;
3294  }
3295  if ((param.regul == GRAPH || param.regul == TREEMULT ||
3296  param.regul == GRAPHMULT || param.regul==L1LINFCR) &&
3297  param.clever)
3298  cout << "Projections with arc capacities" << endl;
3299  if (param.intercept) cout << "with intercept" << endl;
3300  if (param.log && param.logName) {
3301  cout << "log activated " << endl;
3302  cout << param.logName << endl;
3303  cout << endl;
3304  }
3305  }
3306  flush(cout);
3307  }
3308  };
3309 
3310 
3311  template <typename T>
3312  void solver_admm(const Matrix<T>& X, const Matrix<T>& alpha0,
3313  Matrix<T>& alpha, Matrix<T>& optim_info, SplittingFunction<T, SpMatrix<T> >** regularizers,
3314  SplittingFunction<T, Matrix<T> >** losses, const ParamFISTA<T>& param) {
3315  const int M = X.n();
3316  optim_info.resize(4,M);
3317 
3318  int i1;
3319 #pragma omp parallel for private(i1)
3320  for (i1 = 0; i1< M; ++i1) {
3321 #ifdef _OPENMP
3322  int numT=omp_get_thread_num();
3323 #else
3324  int numT=0;
3325 #endif
3326  Vector<T> Xi;
3327  X.refCol(i1,Xi);
3328  losses[numT]->init(Xi);
3329  Vector<T> alpha0i;
3330  alpha0.refCol(i1,alpha0i);
3331  Vector<T> alphai;
3332  alpha.refCol(i1,alphai);
3333  regularizers[numT]->reset();
3334  Vector<T> optim_infoi;
3335  optim_info.refCol(i1,optim_infoi);
3336  if (param.admm || param.lin_admm) {
3337  if (param.lin_admm) {
3338  LinADMM(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3339  } else {
3340  ADMM(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3341  }
3342  }
3343  }
3344  }
3345 
3346 
3347  template <typename T>
3348  void solver_aux1(const Matrix<T>& X, const Matrix<T>& alpha0,
3349  Matrix<T>& alpha, Matrix<T>& optim_info, Regularizer<T, Vector<T> >** regularizers,
3350  Loss<T, Vector<T> >** losses, const ParamFISTA<T>& param) {
3351  const int M = X.n();
3352  if (param.verbose) {
3353  const bool duality = losses[0]->is_fenchel() && regularizers[0]->is_fenchel();
3354  if (duality) cout << "Duality gap via Fenchel duality" << endl;
3355  if (!param.ista && param.subgrad && !regularizers[0]->is_subgrad()) {
3356  cerr << "Subgradient algorithm is not implemented for this combination of loss/regularization" << endl;
3357  exit(1);
3358  }
3359  cout << "Timings reported do not include loss and fenchel evaluation" << endl;
3360  flush(cout);
3361  }
3362  optim_info.resize(4,M);
3363 
3364  int i1;
3365 #pragma omp parallel for private(i1)
3366  for (i1 = 0; i1< M; ++i1) {
3367 #ifdef _OPENMP
3368  int numT=omp_get_thread_num();
3369 #else
3370  int numT=0;
3371 #endif
3372  Vector<T> Xi;
3373  X.refCol(i1,Xi);
3374  losses[numT]->init(Xi);
3375  Vector<T> alpha0i;
3376  alpha0.refCol(i1,alpha0i);
3377  Vector<T> alphai;
3378  alpha.refCol(i1,alphai);
3379  regularizers[numT]->reset();
3380  Vector<T> optim_infoi;
3381  optim_info.refCol(i1,optim_infoi);
3382  if (param.ista) {
3383  ISTA_Generic(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3384  } else if (param.subgrad) {
3385  subGradientDescent_Generic(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3386  } else {
3387  FISTA_Generic(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3388  }
3389  }
3390  }
3391 
3392  template <typename T>
3393  void solver_aux2(const Matrix<T>& X, const Matrix<T>& alpha0,
3394  Matrix<T>& alpha, Matrix<T>& optim_info, Regularizer<T, Matrix<T> >** regularizers,
3395  Loss<T, Matrix<T> >** losses, const ParamFISTA<T>& param) {
3396  const int M = X.n();
3397  if (param.verbose) {
3398  const bool duality = losses[0]->is_fenchel() && regularizers[0]->is_fenchel();
3399  if (duality) cout << "Duality gap via Fenchel duality" << endl;
3400  flush(cout);
3401  }
3402 
3403  optim_info.resize(4,M);
3404 
3405  int i2;
3406 #pragma omp parallel for private(i2)
3407  for (i2 = 0; i2< M; ++i2) {
3408 #ifdef _OPENMP
3409  int numT=omp_get_thread_num();
3410 #else
3411  int numT=0;
3412 #endif
3413  Vector<T> Xi;
3414  X.refCol(i2,Xi);
3415  losses[numT]->init(Xi);
3416  const int N = alpha0.n()/X.n();
3417  Matrix<T> alpha0i;
3418  alpha0.refSubMat(i2*N,N,alpha0i);
3419  Matrix<T> alphai;
3420  alpha.refSubMat(i2*N,N,alphai);
3421  regularizers[numT]->reset();
3422  Vector<T> optim_infoi;
3423  optim_info.refCol(i2,optim_infoi);
3424  if (param.ista) {
3425  ISTA_Generic(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3426  } else if (param.subgrad) {
3427  subGradientDescent_Generic(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3428  } else {
3429  FISTA_Generic(*(losses[numT]),*(regularizers[numT]),alpha0i,alphai,optim_infoi,param);
3430  }
3431  }
3432  }
3433 
3435  template <typename T>
3436  void solver(const Matrix<T>& X, const AbstractMatrixB<T>& D, const Matrix<T>& alpha0,
3437  Matrix<T>& alpha, const ParamFISTA<T>& param1, Matrix<T>& optim_info,
3438  const GraphStruct<T>* graph_st = NULL,
3439  const TreeStruct<T>* tree_st = NULL,
3440  const GraphPathStruct<T>* graph_path_st=NULL) {
3441  print_info_solver(param1);
3442 
3443  int num_threads=MIN(X.n(),param1.num_threads);
3444  num_threads=init_omp(num_threads);
3445  ParamFISTA<T> param=param1;
3446  param.copied=true;
3447  if (param_for_admm(param)) {
3448  if (num_threads > 1) param.verbose=false;
3451  for (int i = 0; i<num_threads; ++i) {
3452  regularizers[i]=setRegularizerADMM(param,graph_st,tree_st);
3453  switch (param.loss) {
3454  case SQUARE: losses[i]=new SqLoss<T>(D); break;
3455  case HINGE: losses[i] = new HingeLoss<T>(D); break;
3456  default: cerr << "Not implemented" << endl; exit(1);
3457  }
3458  }
3459  solver_admm(X, alpha0, alpha, optim_info, regularizers,losses,param);
3460  for (int i = 0; i<num_threads; ++i) {
3461  delete(losses[i]);
3462  delete(regularizers[i]);
3463  }
3464  delete[](losses);
3465  delete[](regularizers);
3466 
3467  } else {
3468  Matrix<T> G;
3469  if (param.loss==HINGE) {
3470  cerr << "Loss only implemented for ADMM" << endl;
3471  return;
3472  }
3473  if (param.compute_gram && (param.loss==SQUARE)) D.XtX(G);
3474  if (!loss_for_matrices(param.loss) && !(param.transpose || regul_for_matrices(param.regul))) {
3475  if (num_threads > 1) param.verbose=false;
3476  Loss<T>** losses = new Loss<T>*[num_threads];
3477  Regularizer<T>** regularizers= new Regularizer<T>*[num_threads];
3478  for (int i = 0; i<num_threads; ++i) {
3479  regularizers[i]=setRegularizerVectors(param,graph_st,tree_st,graph_path_st);
3480  switch (param.loss) {
3481  case SQUARE: if (param.compute_gram) {
3482  losses[i]=new SqLoss<T>(D,G);
3483  } else {
3484  losses[i]=new SqLoss<T>(D);
3485  }
3486  break;
3487  case SQUARE_MISSING: losses[i]=new SqLossMissing<T>(D); break;
3488  case LOG: losses[i] = new LogLoss<T>(D); break;
3489  case LOGWEIGHT: losses[i] = new LogLoss<T,true>(D); break;
3490  default: cerr << "Not implemented"; exit(1);
3491  }
3492  }
3493 
3494  solver_aux1(X, alpha0, alpha, optim_info, regularizers,losses,param);
3495  for (int i = 0; i<num_threads; ++i) {
3496  delete(losses[i]);
3497  losses[i]=NULL;
3498  delete(regularizers[i]);
3499  regularizers[i]=NULL;
3500  }
3501  delete[](losses);
3502  delete[](regularizers);
3503 
3504  } else if (loss_for_matrices(param.loss) && param.loss != CUR) {
3505  if (num_threads > 1) param.verbose=false;
3506  Loss<T, Matrix<T> >** losses = new Loss<T, Matrix<T> >*[num_threads];
3508  const int N = alpha0.n()/X.n();
3509  for (int i = 0; i<num_threads; ++i) {
3510  regularizers[i]=setRegularizerMatrices(param,alpha0.m(),N,graph_st,tree_st,graph_path_st);
3511  switch (param.loss) {
3512  case MULTILOG: losses[i] = new MultiLogLoss<T>(D); break;
3513  default: cerr << "Not implemented"; exit(1);
3514  }
3515  }
3516  solver_aux2(X, alpha0, alpha, optim_info, regularizers,losses,param);
3517 
3518  for (int i = 0; i<num_threads; ++i) {
3519  delete(losses[i]);
3520  losses[i]=NULL;
3521  delete(regularizers[i]);
3522  regularizers[i]=NULL;
3523  }
3524 
3525  delete[](losses);
3526  delete[](regularizers);
3527  } else {
3530  Regularizer<T, Matrix<T> >* regularizer;
3531  switch (param.loss) {
3532  case SQUARE: if (param.compute_gram) {
3533  loss=new SqLossMat<T>(D,G);
3534  } else {
3535  loss=new SqLossMat<T>(D);
3536  }
3537  break;
3538  case SQUARE_MISSING: loss=new LossMat<T, SqLossMissing<T> >(X.n(),D); break;
3539  case LOG: loss = new LossMat<T, LogLoss<T,false> >(X.n(),D); break;
3540  case LOGWEIGHT: loss = new LossMat<T, LogLoss<T,true> >(X.n(),D); break;
3541  case CUR: loss = new LossCur<T>(D); break;
3542  default: cerr << "Not implemented"; exit(1);
3543  }
3544  regularizer=setRegularizerMatrices(param,alpha0.m(),alpha0.n(),graph_st,tree_st,graph_path_st);
3545  if (param.verbose) {
3546  const bool duality = loss->is_fenchel() && regularizer->is_fenchel();
3547  if (duality) cout << "Duality gap via Fenchel duality" << endl;
3548  }
3549  loss->init(X);
3550  optim_info.resize(4,1);
3551  Vector<T> optim_infoi;
3552  optim_info.refCol(0,optim_infoi);
3553  if (param.ista) {
3554  ISTA_Generic(*loss,*regularizer,alpha0,alpha,optim_infoi,param);
3555  } else if (param.subgrad) {
3556  subGradientDescent_Generic(*loss,*regularizer,alpha0,alpha,optim_infoi,param);
3557  } else {
3558  FISTA_Generic(*loss,*regularizer,alpha0,alpha,optim_infoi,param);
3559  }
3560  delete(regularizer);
3561  delete(loss);
3562  }
3563  }
3564  };
3565 
3566  template <typename T>
3567  void PROX(const Matrix<T>& alpha0,
3568  Matrix<T>& alpha, const ParamFISTA<T>& param,
3569  Vector<T>& val_loss,
3570  const GraphStruct<T>* graph_st = NULL,
3571  const TreeStruct<T>* tree_st = NULL,
3572  const GraphPathStruct<T>* graph_path_st = NULL) {
3573  if (param.verbose) {
3574  print_regul(param.regul);
3575  if ((param.regul == GRAPH || param.regul == TREEMULT ||
3576  param.regul == GRAPHMULT || param.regul==L1LINFCR) &&
3577  param.clever)
3578  cout << "Projections with arc capacities" << endl;
3579  if (param.intercept) cout << "with intercept" << endl;
3580  flush(cout);
3581  }
3582  int num_threads=MIN(alpha.n(),param.num_threads);
3583  num_threads=init_omp(num_threads);
3584  const int M = alpha.n();
3585  if (!graph_st && param.regul==GRAPH) {
3586  cerr << "Graph structure should be provided" << endl;
3587  return;
3588  }
3589 
3590  if (!regul_for_matrices(param.regul)) {
3591  Regularizer<T>** regularizers= new Regularizer<T>*[num_threads];
3592  for (int i = 0; i<num_threads; ++i)
3593  regularizers[i]=setRegularizerVectors(param,graph_st,tree_st,graph_path_st);
3594 
3595  int i;
3596  if (param.eval)
3597  val_loss.resize(M);
3598 #pragma omp parallel for private(i)
3599  for (i = 0; i< M; ++i) {
3600 #ifdef _OPENMP
3601  int numT=omp_get_thread_num();
3602 #else
3603  int numT=0;
3604 #endif
3605  Vector<T> alpha0i;
3606  alpha0.refCol(i,alpha0i);
3607  Vector<T> alphai;
3608  alpha.refCol(i,alphai);
3609  regularizers[numT]->reset();
3610  regularizers[numT]->prox(alpha0i,alphai,param.lambda);
3611  if (param.eval)
3612  val_loss[i]=regularizers[numT]->eval(alphai);
3613  }
3614  for (i = 0; i<num_threads; ++i) {
3615  delete(regularizers[i]);
3616  regularizers[i]=NULL;
3617  }
3618  delete[](regularizers);
3619 
3620  } else {
3622  if (param.eval)
3623  val_loss.resize(1);
3624  Regularizer<T, Matrix<T> >* regularizer;
3625  regularizer=setRegularizerMatrices(param,alpha0.m(),alpha0.n(),graph_st,tree_st,graph_path_st);
3626  regularizer->prox(alpha0,alpha,param.lambda);
3627  if (param.eval)
3628  val_loss[0]=regularizer->eval(alpha);
3629  delete(regularizer);
3630  }
3631  };
3632 
3633  template <typename T>
3634  void EvalGraphPath(const Matrix<T>& alpha0,
3635  const ParamFISTA<T>& param,
3636  Vector<T>& val_loss,
3637  const GraphPathStruct<T>* graph_path_st,
3638  SpMatrix<T>* paths = NULL) {
3639  if (param.verbose) {
3640  print_regul(param.regul);
3641  if (param.intercept) cout << "with intercept" << endl;
3642  if (param.eval_dual_norm) cout << "Evaluate the dual norm only" << endl;
3643  flush(cout);
3644  }
3645  int num_threads=MIN(alpha0.n(),param.num_threads);
3646  num_threads=init_omp(num_threads);
3647  const int M = alpha0.n();
3648 
3649  if (!regul_for_matrices(param.regul)) {
3650  Regularizer<T>** regularizers= new Regularizer<T>*[num_threads];
3651  for (int i = 0; i<num_threads; ++i)
3652  regularizers[i]=setRegularizerVectors<T>(param,NULL,NULL,graph_path_st);
3653 
3654  int i;
3655  val_loss.resize(M);
3656 #pragma omp parallel for private(i)
3657  for (i = 0; i< M; ++i) {
3658 #ifdef _OPENMP
3659  int numT=omp_get_thread_num();
3660 #else
3661  int numT=0;
3662 #endif
3663  Vector<T> alphai;
3664  alpha0.refCol(i,alphai);
3665  regularizers[numT]->reset();
3666  if (i==0 && paths) {
3667  if (param.eval_dual_norm) {
3668  val_loss[i]=regularizers[numT]->eval_dual_norm_paths(alphai,*paths);
3669  } else {
3670  val_loss[i]=regularizers[numT]->eval_paths(alphai,*paths);
3671  }
3672  } else {
3673  if (param.eval_dual_norm) {
3674  val_loss[i]=regularizers[numT]->eval_dual_norm(alphai);
3675  } else {
3676  val_loss[i]=regularizers[numT]->eval(alphai);
3677  }
3678  }
3679  }
3680  for (i = 0; i<num_threads; ++i) {
3681  delete(regularizers[i]);
3682  regularizers[i]=NULL;
3683  }
3684  delete[](regularizers);
3685 
3686  } else {
3687  cerr << "Not implemented" << endl;
3688  return;
3689  }
3690  };
3691 }
3692 
3693 }
3694 
3695 #endif
void add_direct(const SpMatrix< T > &mat, const T a)
Definition: linalg.h:4183
void norm_l1_rows(Vector< T > &norms) const
returns the linf norms of the columns
Definition: linalg.h:2193
std::list< int > group
Definition: linalg.h:67
void center()
center the columns of the matrix
Definition: linalg.h:1299
void sub(const Matrix< T > &mat)
substract the matrix mat to the current matrix
Definition: linalg.h:2080
T eval(const Matrix< T > &X) const
Definition: fista.h:2294
void addVecToCols(const Vector< T > &diag, const T a=1.0)
Definition: linalg.h:1526
T nzmax() const
Definition: linalg.h:935
void scal(const T a) const
scale the matrix by a
Definition: linalg.h:4240
virtual void init(const E &input)=0
T eval(const Vector< T > &x) const
Definition: fista.h:1086
void addVecToColsWeighted(const Vector< T > &diag, const T *weights, const T a=1.0)
Definition: linalg.h:4509
Sparse Matrix class.
Definition: linalg.h:63
GraphPathConv(const ParamReg< T > &param)
Definition: fista.h:2100
virtual bool is_fenchel() const
Definition: fista.h:990
T eval_dual_norm(const Vector< T > &x) const
Definition: fista.h:2114
const AbstractMatrixB< T > * _D
Definition: fista.h:488
List< int > list_int
Definition: list.h:146
void multSwitch(const Matrix< T > &B, Matrix< T > &C, const bool transA=false, const bool transB=false, const T a=1.0, const T b=0.0) const
perform C = a*B*A + b*C, possibly transposing A or B.
Definition: linalg.h:1840
void fenchel(const Vector< T > &y, T &val, T &scal) const
Definition: fista.h:1614
void neg()
A <- -A.
Definition: linalg.h:3282
Definition: dag.h:26
void rank1Update(const Vector< T > &vec1, const Vector< T > &vec2, const T alpha=1.0)
perform A <- A + alpha*vec1*vec2&#39;
Definition: linalg.h:2384
int * N_own_variables
Definition: project.h:2245
virtual T eval_split(const SpMatrix< T > &input) const
Definition: fista.h:1183
virtual bool is_subgrad() const
Definition: fista.h:1141
T sum() const
returns the sum of the vector
Definition: linalg.h:3344
GraphLasso< T > * _graphlasso
Definition: fista.h:2307
virtual ~TreeLzero()
Definition: fista.h:1597
virtual bool test_backtracking(const D &y, const D &grad, const D &prox, const T L) const
Definition: fista.h:276
virtual bool is_fenchel() const
Definition: fista.h:2034
void grad(const Matrix< T > &alpha, Matrix< T > &grad) const
Definition: fista.h:874
virtual ~GraphMult()
Definition: fista.h:2523
void EvalGraphPath(const Matrix< T > &alpha0, const ParamFISTA< T > &param, Vector< T > &val_loss, const GraphPathStruct< T > *graph_path_st, SpMatrix< T > *paths=NULL)
Definition: fista.h:3634
SqLossMat(const AbstractMatrixB< T > &D)
Definition: fista.h:854
virtual bool is_subgrad() const
Definition: fista.h:1576
T * rawX() const
Definition: linalg.h:956
virtual ~normLINF()
Definition: fista.h:1266
TreeLzero(const ParamReg< T > &param)
Definition: fista.h:1590
const AbstractMatrixB< T > * _X
Definition: fista.h:793
void init_prim_var(Vector< T > &prim_var) const
Definition: fista.h:531
void PROX(const Matrix< T > &alpha0, Matrix< T > &alpha, const ParamFISTA< T > &param, Vector< T > &val_loss, const GraphStruct< T > *graph_st=NULL, const TreeStruct< T > *tree_st=NULL, const GraphPathStruct< T > *graph_path_st=NULL)
Definition: fista.h:3567
virtual bool is_subgrad() const
Definition: fista.h:1213
void sum_cols(Vector< T > &sum) const
whiten
Definition: linalg.h:2215
T project_tree_l1(T *variables, const int n, const T lambda)
Definition: project.h:59
int pB(const int i) const
returns pB[i]
Definition: linalg.h:779
mwSize * gv_ir
Definition: project.h:2234
void copy(const Vector< T > &x)
make a copy of x
Definition: linalg.h:2865
void fenchel(const Vector< T > &input, T &val, T &scal) const
Definition: fista.h:2091
SqLoss(const AbstractMatrixB< T > &D, const Matrix< T > &G)
Definition: fista.h:362
void sub_grad(const Matrix< T > &x, Matrix< T > &y) const
Definition: fista.h:2204
virtual void sub_grad(const Vector< T > &input, Vector< T > &output) const
Definition: fista.h:1099
virtual void var_fenchel(const D &x, D &grad1, D &grad2, const bool intercept=false) const =0
MultiLogLoss(const AbstractMatrixB< T > &X)
Definition: fista.h:691
void sparseProject(Vector< T > &out, const T thrs, const int mode=1, const T lambda1=0, const T lambda2=0, const T lambda3=0, const bool pos=false)
Definition: linalg.h:3508
void solver(const Matrix< T > &X, const AbstractMatrixB< T > &D, const Matrix< T > &alpha0, Matrix< T > &alpha, const ParamFISTA< T > &param1, Matrix< T > &optim_info, const GraphStruct< T > *graph_st=NULL, const TreeStruct< T > *tree_st=NULL, const GraphPathStruct< T > *graph_path_st=NULL)
AbstractMatrixB is basically either SpMatrix or Matrix.
Definition: fista.h:3436
virtual ~MultiLogLoss()
Definition: fista.h:693
Int flow_int
Definition: project.h:2274
T dual_norm_inf(const Vector< T > &input, const Vector< T > &weights)
Definition: project.h:2033
virtual bool is_fenchel() const
Definition: fista.h:1370
void compute_new_prim(Vector< T > &prim, const Vector< T > &prim_var, const Vector< T > &dual_var, const T gamma, const T delta) const
Definition: fista.h:471
T eval(const Vector< T > &x) const
Definition: fista.h:1362
void scal(const T a)
scale the vector by a
Definition: linalg.h:3277
T nrm2() const
returns ||A||_2
Definition: linalg.h:3002
void inv(const Vector< T > &x)
A <- 1./x.
Definition: linalg.h:3103
virtual bool is_subgrad() const
Definition: fista.h:1178
T normFsq() const
compute the sum of the matrix elements
Definition: linalg.h:4178
virtual void sub_grad(const Vector< T > &input, Vector< T > &output) const
Definition: fista.h:1577
virtual ~ComposeProx()
Definition: fista.h:1302
virtual bool is_fenchel() const
Definition: fista.h:1435
LogLoss(const AbstractMatrixB< T > &X)
Definition: fista.h:593
void stop()
stop the time
Definition: utils.h:149
void fenchel(const Matrix< T > &input, T &val, T &scal) const
Definition: fista.h:1971
int num_components() const
Definition: fista.h:427
void fenchel(const Matrix< T > &input, T &val, T &scal) const
Definition: fista.h:1828
virtual T eval_weighted(const D &input, const F &input_struct, const T *weights) const
Definition: fista.h:253
TraceNorm(const ParamReg< T > &param)
Definition: fista.h:1917
T fmaxval() const
returns the maximum magnitude
Definition: linalg.h:2799
void grad(const Vector< T > &w, Vector< T > &grad) const
Definition: fista.h:624
void copyRow(const int i, Vector< T > &x) const
Copy the column i into x.
Definition: linalg.h:1107
virtual bool is_fenchel() const
Definition: fista.h:283
std::vector< list_int * > _groups
Definition: fista.h:1761
T eval_paths(const Vector< T > &x, SpMatrix< T > &paths_mat) const
Definition: fista.h:2081
virtual void var_fenchel(const Matrix< T > &A, Matrix< T > &grad1, Matrix< T > &grad2, const bool intercept) const
Definition: fista.h:832
void prox_split(SpMatrix< T > &splitted_w, const T lambda) const
Definition: fista.h:1458
virtual bool is_fenchel() const
Definition: fista.h:2090
virtual void reset()
Definition: fista.h:1049
virtual ~TreeMult()
Definition: fista.h:2445
void add(const Vector< T > &x, const T a=1.0)
A <- A + a*x.
Definition: linalg.h:3029
T eval(const Vector< T > &x) const
Definition: fista.h:1430
void XtX(Matrix< T > &XtX) const
XtX = A&#39;*A.
Definition: linalg.h:1959
T dot_direct(const SpMatrix< T > &mat) const
Definition: linalg.h:4197
virtual bool is_fenchel() const
Definition: fista.h:1613
Regularizer< T, Matrix< T > > * setRegularizerMatrices(const ParamFISTA< T > &param, const int m, const int n, const GraphStruct< T > *graph_st=NULL, const TreeStruct< T > *tree_st=NULL, const GraphPathStruct< T > *graph_path_st=NULL)
Definition: fista.h:3226
mwSize * groups_ir
Definition: project.h:2247
T eval_weighted(const Vector< T > &input, const SpMatrix< T > &input_struct, const T *inner_weight) const
Definition: fista.h:1497
static constexpr double E
Definition: utlConstants.h:25
T eval(const Vector< T > &x) const
Definition: fista.h:1553
virtual bool test_backtracking(const Matrix< T > &y, const Matrix< T > &grad, const Matrix< T > &prox, const T L) const
Definition: fista.h:887
static T xlogx(const T x)
Definition: linalg.h:108
T eval(const Vector< T > &w) const
Definition: fista.h:504
virtual void XtX(Matrix< T > &XtX) const =0
XtX = A&#39;*A.
T duality_gap(Loss< T, D, E > &loss, Regularizer< T, D > &regularizer, const D &x, const T lambda, T &best_dual, const bool verbose=false)
Definition: fista.h:2527
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:1599
int * pE() const
Direct access to _pE.
Definition: linalg.h:806
void multDiagLeft(const Vector< T > &diag)
mult by a diagonal matrix on the left
Definition: linalg.h:1917
const AbstractMatrixB< T > * _D
Definition: fista.h:353
void mult(const Vector< T > &x, const Vector< T > &y)
A <- x .* y.
Definition: linalg.h:3114
virtual ~SqLoss()
Definition: fista.h:363
void print_regul(const regul_t &regul)
Definition: fista.h:98
T eval(const Vector< T > &w) const
Definition: fista.h:607
T * rawX() const
returns a modifiable reference of the data, DANGEROUS
Definition: linalg.h:593
GroupProx< T, normLINF< T > > type
Definition: fista.h:1772
virtual ~TraceNorm()
Definition: fista.h:1926
Vector< T > _y
Definition: fista.h:587
MixedL1L2(const ParamReg< T > &param)
Definition: fista.h:1788
SpecGraphMat(const ParamReg< T > &param)
Definition: fista.h:2280
void fenchel(const Matrix< T > &input, T &val, T &scal) const
Definition: fista.h:2035
virtual T eval_paths(const D &x, SpMatrix< T > &paths_mat) const
Definition: fista.h:1059
void init(const Vector< T > &y)
Definition: fista.h:695
void norm_inf_rows(Vector< T > &norms) const
returns the linf norms of the columns
Definition: linalg.h:2184
void restore_capacities()
Definition: project.h:1903
int L() const
Definition: linalg.h:958
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:1234
void prox(const D &x, D &y, const T lambda)
Definition: fista.h:1304
virtual T fenchel(const Matrix< T > &input) const
Definition: fista.h:739
GroupProx< T, normL2< T > > type
Definition: fista.h:1767
Tree_Seq< T > _tree
Definition: fista.h:1583
virtual void sub_grad(const Matrix< T > &input, Matrix< T > &output) const
Definition: fista.h:1817
Matrix< T > _DtX
Definition: fista.h:927
LassoConstraint(const ParamReg< T > &param)
Definition: fista.h:1117
void prox_split(Matrix< T > &splitted_w, const T lambda) const
Definition: fista.h:564
GraphStruct< T > * graph_st
Definition: fista.h:225
virtual bool is_fenchel() const
Definition: fista.h:2305
T eval(const Vector< T > &x) const
Definition: fista.h:2111
static void sort(int *irOut, T *prOut, int beg, int end)
Definition: misc.h:141
void copyTo(Matrix< T > &mat) const
make a copy of the matrix mat in the current matrix
Definition: linalg.h:260
T eval_split(const Matrix< T > &input) const
Definition: fista.h:441
T softmax(const int y)
replace each value by its exponential
Definition: linalg.h:3308
void print_loss(const loss_t &loss)
Definition: fista.h:81
T maxval() const
returns the maximum value
Definition: linalg.h:2789
void prox_prim_var(Vector< T > &out, const Vector< T > &dual_var, const Vector< T > &prim_var, const T lambda, const T c) const
Definition: fista.h:535
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:1401
void push_back(T elem)
Definition: list.h:69
virtual void dummy()
Definition: fista.h:2522
GraphPathL0(const ParamReg< T > &param)
Definition: fista.h:2066
GraphLasso(const ParamReg< T > &param)
Definition: fista.h:1381
virtual bool is_intercept() const
Definition: fista.h:1056
virtual ~SqLossMat()
Definition: fista.h:857
SqLoss(const AbstractMatrixB< T > &D)
Definition: fista.h:361
virtual void prox_prim_var(E &out, const E &dual_var, const E &prim_var, const T gamma) const
Definition: fista.h:258
void refCol(int i, SpVector< T > &vec) const
reference the column i into vec
Definition: linalg.h:4116
T eval(const Vector< T > &x) const
Definition: fista.h:1175
virtual bool is_fenchel() const
Definition: fista.h:1152
const AbstractMatrixB< T > * _X
Definition: fista.h:848
int m() const
Definition: linalg.h:222
int mwSize
Definition: utils.h:38
Class Timer.
Definition: utils.h:138
const T & max(const T &a, const T &b)
Return the maximum between a and b.
Definition: utlCore.h:263
virtual bool is_subgrad() const
Definition: fista.h:1057
void ADMM(const SplittingFunction< T, Matrix< T > > &loss, const SplittingFunction< T, SpMatrix< T > > &reg, const Vector< T > &w0, Vector< T > &w, Vector< T > &optim_info, const ParamFISTA< T > &param)
Definition: fista.h:2929
void init(const Vector< T > &x)
Definition: fista.h:365
T eval_dual_norm_paths(const Vector< T > &x, SpMatrix< T > &paths_mat) const
Definition: fista.h:2125
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:1080
void svdRankOne(const Vector< T > &u0, Vector< T > &u, Vector< T > &v) const
Definition: linalg.h:1537
virtual void init(const E &y)
Definition: fista.h:249
void ISTA_Generic(Loss< T, D, E > &loss, Regularizer< T, D > &regularizer, const D &x0, D &x, Vector< T > &optim_info, const ParamFISTA< T > &param)
Definition: fista.h:2657
ListIterator< T > & begin() const
Definition: list.h:117
int n() const
returns the size of the vector
Definition: linalg.h:591
TreeStruct< T > * tree_st
Definition: fista.h:226
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:1356
virtual void prox_split(SpMatrix< T > &splitted_w, const T lambda) const
Definition: fista.h:1185
void init_prim_var(Vector< T > &prim_var) const
Definition: fista.h:458
LossMat(const int N, const AbstractMatrixB< T > &X)
Definition: fista.h:1014
SqLossMissing(const AbstractMatrixB< T > &D)
Definition: fista.h:295
T eval(const Matrix< T > &x) const
Definition: fista.h:2221
#define MIN(a, b)
Definition: utils.h:47
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:2106
void project_sft_binary(const Vector< T > &labels)
Definition: linalg.h:3460
void refCol(int i, Vector< T > &x) const
Reference the column i into the vector x.
Definition: linalg.h:1144
Rank(const ParamReg< T > &param)
Definition: fista.h:1989
T eval(const Matrix< T > &x) const
Definition: fista.h:1949
const Matrix< T > * _G
Definition: fista.h:926
T eval(const Vector< T > &alpha) const
Definition: fista.h:309
void prox(const Matrix< T > &x, Matrix< T > &y, const T lambda)
Definition: fista.h:2000
virtual bool is_fenchel() const
Definition: fista.h:1729
virtual ~GroupProx()
Definition: fista.h:1672
#define MAX(a, b)
Definition: utils.h:48
void fenchel(const Vector< T > &input, T &val, T &scal) const
Definition: fista.h:1089
void setZeros()
Set all values to zero.
Definition: linalg.h:2871
Lasso(const ParamReg< T > &param)
Definition: fista.h:1077
virtual void init(const Matrix< T > &x)
Definition: fista.h:859
Vector< T > _x
Definition: fista.h:489
void fenchel(const Matrix< T > &input, T &val, T &scal) const
Definition: fista.h:1872
regul_t regul_from_string(char *regul)
Definition: fista.h:34
T eval(const Matrix< T > &alpha) const
Definition: fista.h:866
virtual ~Lasso()
Definition: fista.h:1078
virtual T fenchel(const Vector< T > &input) const
Definition: fista.h:645
virtual void var_fenchel(const Matrix< T > &W, Matrix< T > &grad1, Matrix< T > &grad2, const bool intercept) const
Definition: fista.h:755
virtual ~normL2()
Definition: fista.h:1232
T eval(const Vector< T > &x) const
Definition: fista.h:2078
T asum() const
computes the sum of the magnitudes of the vector
Definition: linalg.h:3324
T dot(const Vector< T > &x) const
returns A&#39;x
Definition: linalg.h:3012
T eval(const Matrix< T > &x) const
Definition: fista.h:1867
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:1120
T v(const int i) const
returns v[i]
Definition: linalg.h:783
void update_multipliers_ADMM(Vector< T > &w, const Matrix< T > &splitted_w_loss, const Matrix< T > &multipliers_w_loss, const SpMatrix< T > &splitted_w_reg, const SpMatrix< T > &multipliers_w_reg, const T gamma)
Definition: fista.h:2856
void thrsabsmin(const T nu)
performs thresholding of the vector
Definition: linalg.h:2960
None(const ParamReg< T > &param)
Definition: fista.h:1169
void fenchel(const Vector< T > &input, T &val, T &scal) const
Definition: fista.h:1205
BinaryOpExpr< OP, TLeft, TRight > F(const Expr< TLeft, typename TLeft::ValueType > &lhs, const Expr< TRight, typename TRight::ValueType > &rhs)
virtual bool is_subgrad() const
Definition: fista.h:1888
void multDiagRight(const Vector< T > &diag)
mult by a diagonal matrix on the right
Definition: linalg.h:1929
virtual void fenchel(const D &input, T &val, T &scal) const =0
mwSize * gv_jc
Definition: project.h:2235
void whiten(Vector< T > &mean, const bool pattern=false)
whiten
Definition: linalg.h:3134
void toVect(Vector< T > &vec) const
make a reference of the matrix to a vector vec
Definition: linalg.h:2647
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:1542
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:1677
virtual ~LossMatSup()
Definition: fista.h:935
T eval(const Vector< T > &x) const
Definition: fista.h:1610
virtual void sub_grad(const D &input, D &output) const
Definition: fista.h:1058
virtual void init(const Matrix< T > &input)
Definition: fista.h:943
virtual bool is_fenchel() const
Definition: fista.h:1644
T abs(const T x)
template version of the fabs function
virtual T eval_split(const Matrix< T > &input) const
Definition: fista.h:515
virtual int num_components() const
Definition: fista.h:526
Vector< T > _work
Definition: fista.h:1517
virtual T eval(const D &input) const =0
#define EPSILON
Definition: utils.h:60
virtual void dummy()
Definition: fista.h:2444
T eval_split(const SpMatrix< T > &input) const
Definition: fista.h:1488
void fenchel(const Vector< T > &input, T &val, T &scal) const
Definition: fista.h:1371
HingeLoss(const AbstractMatrixB< T > &X)
Definition: fista.h:498
T eval(const Vector< T > &alpha) const
Definition: fista.h:372
void init_split_variables(SpMatrix< T > &splitted_w)
Definition: project.h:1919
void save_flow()
Definition: project.h:1904
Dense Vector class.
Definition: linalg.h:65
void print_info_solver(const ParamFISTA< T > &param)
Definition: fista.h:3275
RegMat(const ParamReg< T > &param)
Definition: fista.h:2159
void copy(const Matrix< T > &mat)
make a copy of the matrix mat in the current matrix
Definition: linalg.h:1339
TreeMult(const ParamReg< T > &param)
Definition: fista.h:2367
void prox(const Matrix< T > &x, Matrix< T > &y, const T lambda)
Definition: fista.h:1928
virtual T eval_dual_norm(const D &x) const
Definition: fista.h:1060
virtual bool test_backtracking(const Vector< T > &y, const Vector< T > &grad, const Vector< T > &prox, const T L) const
Definition: fista.h:399
virtual void var_fenchel(const Vector< T > &w, Vector< T > &grad1, Vector< T > &grad2, const bool intercept) const
Definition: fista.h:662
void FISTA_Generic(Loss< T, D, E > &loss, Regularizer< T, D > &regularizer, const D &x0, D &x, Vector< T > &optim_info, const ParamFISTA< T > &param)
Definition: fista.h:2739
virtual ~SqLossMissing()
Definition: fista.h:296
virtual void var_fenchel(const Matrix< T > &x, Matrix< T > &grad1, Matrix< T > &grad2, const bool intercept) const
Definition: fista.h:979
void toSparse(SpVector< T > &vec) const
make a sparse copy
Definition: linalg.h:4025
void copy(const SpMatrix< T > &mat)
Definition: linalg.h:4102
double flow
Definition: project.h:2275
void prox(const Matrix< T > &x, Matrix< T > &y, const T lambda)
Definition: fista.h:1791
bool regul_for_matrices(const regul_t &regul)
Definition: fista.h:136
virtual ~Regularizer()
Definition: fista.h:1047
void extractRow(const int i, Vector< T > &row) const
fill the matrix with the row given
Definition: linalg.h:2251
void setPointer(T *X, const int n)
change the data of the vector
Definition: linalg.h:2889
GraphPathStruct< T > * graph_path_st
Definition: fista.h:224
virtual bool is_fenchel() const
Definition: fista.h:1177
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:1172
void exp()
replace each value by its exponential
Definition: linalg.h:3287
void add_mult_design_matrix(const Vector< T > &prim, Vector< T > &out, const T fact) const
Definition: fista.h:560
void prox(const Matrix< T > &x, Matrix< T > &y, const T lambda)
Definition: fista.h:2287
void set(const T val)
set each value of the vector to val
Definition: linalg.h:2997
virtual T fenchel(const Matrix< T > &input) const
Definition: fista.h:907
SplittingFunction< T, SpMatrix< T > > * setRegularizerADMM(const ParamFISTA< T > &param, const GraphStruct< T > *graph_st=NULL, const TreeStruct< T > *tree_st=NULL)
Definition: fista.h:3156
T eval(const Vector< T > &x) const
Definition: fista.h:1278
virtual bool is_fenchel() const
Definition: fista.h:1055
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:1153
void thrsPos()
performs soft-thresholding of the vector
Definition: linalg.h:2973
list_int nodes
Definition: project.h:2273
void add_mult_design_matrix(const Vector< T > &prim, Vector< T > &out, const T fact) const
Definition: fista.h:480
T fmaxval() const
computes the linf norm of the vector
Definition: linalg.h:4906
virtual bool is_subgrad() const
Definition: fista.h:1329
void grad(const Vector< T > &alpha, Vector< T > &grad) const
Definition: fista.h:321
T eval(const Vector< T > &x) const
Definition: fista.h:1246
Matrix< T > _x
Definition: fista.h:924
Vector< T > _y
Definition: fista.h:683
void prox(const Matrix< T > &x, Matrix< T > &y, const T lambda)
Definition: fista.h:1853
T dot(const Matrix< T > &mat) const
add alpha*mat to the current matrix
Definition: linalg.h:2068
virtual T fenchel(const Matrix< T > &input) const
Definition: fista.h:970
void fenchel(const Vector< T > &y, T &val, T &scal) const
Definition: fista.h:1556
int m() const
returns the number of columns
Definition: linalg.h:789
void fenchel(const D &input, T &val, T &scal) const
Definition: fista.h:1328
void fusedProjectHomotopy(Vector< T > &out, const T lambda1, const T lambda2, const T lambda3=0, const bool penalty=true)
Definition: linalg.h:3675
virtual void add_mult_design_matrix(const E &prim, E &out, const T fact) const
Definition: fista.h:260
mwSize * gg_jc
Definition: project.h:2237
GraphMult(const ParamReg< T > &param)
Definition: fista.h:2451
void fenchel(const Vector< T > &x, T &val, T &scal) const
Definition: fista.h:1730
const AbstractMatrixB< T > * _X
Definition: fista.h:682
void convert_paths_to_mat(const List< Path< long long > * > &paths, SpMatrix< T > &paths_mat, const int n)
Definition: fista.h:2039
void mult(const Vector< T > &x, Vector< T > &b, const T alpha=1.0, const T beta=0.0) const
perform b = alpha*A*x+beta*b
Definition: linalg.h:1783
void start()
start the time
Definition: utils.h:146
T eval(const Vector< T > &x) const
Definition: fista.h:1706
virtual bool is_fenchel() const
Definition: fista.h:1327
int n() const
Number of columns.
Definition: linalg.h:224
virtual void grad(const D &input, D &output) const =0
void refSubMat(int i, int n, Matrix< T > &mat) const
Reference the column i to i+n into the Matrix mat.
Definition: linalg.h:1153
virtual void prox(const D &input, D &output, const T lambda)=0
Ridge(const ParamReg< T > &param)
Definition: fista.h:1193
Lzero(const ParamReg< T > &param)
Definition: fista.h:1149
void singularValues(Vector< T > &u) const
Definition: linalg.h:1560
virtual void init_split_variables(SpMatrix< T > &splitted_w) const
Definition: fista.h:1186
T nrm2() const
computes the l2 norm of the vector
Definition: linalg.h:4901
void init(const Matrix< T > &y)
Definition: fista.h:804
Sparse Vector class.
Definition: linalg.h:67
virtual T eval(const D &input) const =0
T nrm2sq() const
returns ||A||_2^2
Definition: linalg.h:3007
virtual void compute_new_prim(E &prim, const E &prim_var, const E &dual_var, const T gamma, const T delta) const
Definition: fista.h:259
void setZeros()
Set all the values to zero.
Definition: linalg.h:1240
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:1196
void resize(const int n)
resize the vector
Definition: linalg.h:2876
virtual ~GraphPathL0()
Definition: fista.h:2070
GraphPath< T > _graph
Definition: fista.h:2150
bool param_for_admm(const ParamFISTA< T > &param)
Definition: fista.h:236
mwSize * groups_jc
Definition: project.h:2248
void prox(const Matrix< T > &x, Matrix< T > &y, const T lambda)
Definition: fista.h:2177
void XXt(Matrix< T > &XXt) const
XXt = A*A&#39;.
Definition: linalg.h:1967
int r(const int i) const
returns r[i]
Definition: linalg.h:781
ProxMatToVec(const ParamReg< T > &param)
Definition: fista.h:1623
virtual bool is_fenchel() const
Definition: fista.h:644
virtual void dummy()
Definition: fista.h:2360
void resize(const int m, const int n, const int nzmax)
resize the matrix
Definition: linalg.h:4221
ComposeProx< T, Matrix< T >, MixedL1L2< T >, RegMat< T, Lasso< T > >, false > type
Definition: fista.h:2269
FusedLasso(const ParamReg< T > &param)
Definition: fista.h:1350
virtual void init_prim_var(E &prim_var) const
Definition: fista.h:257
void svd(Matrix< T > &U, Vector< T > &S, Matrix< T > &V) const
Definition: linalg.h:1582
virtual void var_fenchel(const Matrix< T > &x, Matrix< T > &grad1, Matrix< T > &grad2, const bool intercept) const
Definition: fista.h:910
virtual ~MixedL1L2()
Definition: fista.h:1789
virtual void prox_prim_var(Vector< T > &out, const Vector< T > &dual_var, const Vector< T > &prim_var, const T c) const
Definition: fista.h:462
virtual ~GraphLasso()
Definition: fista.h:1397
void softThrshold(const T nu)
performs soft-thresholding of the vector
Definition: linalg.h:2925
int * own_variables
Definition: project.h:2244
T eval(const Vector< T > &x) const
Definition: fista.h:1131
GroupProx(const ParamReg< T > &param)
Definition: fista.h:1659
virtual T eval_dual_norm_paths(const D &x, SpMatrix< T > &path) const
Definition: fista.h:1062
void update_multipliers_weighted_ADMM(Vector< T > &w, const Matrix< T > &splitted_w_loss, const Matrix< T > &multipliers_w_loss, const SpMatrix< T > &splitted_w_reg, const SpMatrix< T > &multipliers_w_reg, const T gamma, const T *inner_weights)
Definition: fista.h:2892
void fenchel(const Vector< T > &input, T &val, T &scal) const
Definition: fista.h:1162
void writeLog(const int iter, const T time, const T primal, const T dual, char *name)
Definition: fista.h:2587
void addVecToCols(const Vector< T > &diag, const T a=1.0)
Definition: linalg.h:4494
void grad(const Vector< T > &alpha, Vector< T > &grad) const
Definition: fista.h:384
void fenchel(const Matrix< T > &input, T &val, T &scal) const
Definition: fista.h:2300
void grad(const Matrix< T > &W, Matrix< T > &grad) const
Definition: fista.h:712
Vector< T > _weights
Definition: fista.h:1518
normL2(const ParamReg< T > &param)
Definition: fista.h:1231
void hardThrshold(const T nu)
performs soft-thresholding of the vector
Definition: linalg.h:2938
void restore_flow()
Definition: project.h:1905
virtual void sub_grad(const Vector< T > &input, Vector< T > &output) const
Definition: fista.h:1214
void solver_admm(const Matrix< T > &X, const Matrix< T > &alpha0, Matrix< T > &alpha, Matrix< T > &optim_info, SplittingFunction< T, SpMatrix< T > > **regularizers, SplittingFunction< T, Matrix< T > > **losses, const ParamFISTA< T > &param)
Definition: fista.h:3312
virtual void sub_grad(const D &input, D &output) const
Definition: fista.h:1330
virtual ~LogLoss()
Definition: fista.h:594
virtual int num_components() const
Definition: fista.h:1184
int fmax() const
returns the index of the value with largest magnitude
Definition: linalg.h:2843
void subGradientDescent_Generic(Loss< T, D, E > &loss, Regularizer< T, D > &regularizer, const D &x0, D &x, Vector< T > &optim_info, const ParamFISTA< T > &param)
Definition: fista.h:2599
void dualityGraph(const Matrix< T > &X, const Matrix< T > &D, const Matrix< T > &alpha0, Vector< T > &res, const ParamFISTA< T > &param, const GraphStruct< T > *graph_st)
Definition: fista.h:2562
virtual void reset()
Definition: fista.h:1182
void fenchel(const Vector< T > &input, T &val, T &scal) const
Definition: fista.h:1176
Regularizer(const ParamReg< T > &param)
Definition: fista.h:1043
void init_split_variables(Matrix< T > &splitted_w) const
Definition: fista.h:527
virtual ~Ridge()
Definition: fista.h:1194
void * end() const
Definition: list.h:118
void solver_aux1(const Matrix< T > &X, const Matrix< T > &alpha0, Matrix< T > &alpha, Matrix< T > &optim_info, Regularizer< T, Vector< T > > **regularizers, Loss< T, Vector< T > > **losses, const ParamFISTA< T > &param)
Definition: fista.h:3348
T eval(const Matrix< T > &A) const
Definition: fista.h:806
Regularizer< T > * setRegularizerVectors(const ParamFISTA< T > &param, const GraphStruct< T > *graph_st=NULL, const TreeStruct< T > *tree_st=NULL, const GraphPathStruct< T > *graph_path_st=NULL)
Definition: fista.h:3177
void prox_split(Matrix< T > &splitted_w, const T lambda) const
Definition: fista.h:429
ComposeProx< T, Matrix< T >, MixedL1LINF< T >, RegMat< T, Lasso< T > >, false > type
Definition: fista.h:2274
double getElapsed() const
print the elapsed time
Definition: utils.h:193
static int init_omp(const int numThreads)
Definition: misc.h:264
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:2072
virtual bool is_subgrad() const
Definition: fista.h:1816
void LinADMM(const SplittingFunction< T, Matrix< T > > &loss, const SplittingFunction< T, SpMatrix< T > > &reg, const Vector< T > &w0, Vector< T > &w, Vector< T > &optim_info, const ParamFISTA< T > &param)
Definition: fista.h:3073
mwSize * gg_ir
Definition: project.h:2236
ComposeProx< T, Vector< T >, typename GroupLassoLINF< T >::type, Lasso< T >, false > type
Definition: fista.h:1782
virtual ~TreeLasso()
Definition: fista.h:1540
int n() const
returns the number of rows
Definition: linalg.h:787
void solver_aux2(const Matrix< T > &X, const Matrix< T > &alpha0, Matrix< T > &alpha, Matrix< T > &optim_info, Regularizer< T, Matrix< T > > **regularizers, Loss< T, Matrix< T > > **losses, const ParamFISTA< T > &param)
Definition: fista.h:3393
virtual void var_fenchel(const Vector< T > &x, Vector< T > &grad1, Vector< T > &grad2, const bool intercept) const
Definition: fista.h:416
void init_split_variables(SpMatrix< T > &splitted_w) const
Definition: fista.h:1484
T eval(const D &x) const
Definition: fista.h:1324
virtual ~Loss()
Definition: fista.h:271
virtual ~RegMat()
Definition: fista.h:2167
virtual ~LossCur()
Definition: fista.h:802
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:1631
normLINF(const ParamReg< T > &param)
Definition: fista.h:1265
void fenchel(const Matrix< T > &input, T &val, T &scal) const
Definition: fista.h:2237
virtual bool is_fenchel() const
Definition: fista.h:1573
virtual T fenchel(const D &input) const =0
virtual void init(const Vector< T > &y)
Definition: fista.h:1187
void setAleat()
put random values in the vector (White Gaussian Noise)
Definition: linalg.h:2912
T eval_paths(const Vector< T > &x, SpMatrix< T > &paths_mat) const
Definition: fista.h:2117
void fenchel(const Vector< T > &x, T &val, T &scal) const
Definition: fista.h:1645
virtual T fenchel(const Vector< T > &input) const
Definition: fista.h:413
void norm_2_rows(Vector< T > &norms) const
returns the l2 norms of the columns
Definition: linalg.h:2139
LossCur(const AbstractMatrixB< T > &X)
Definition: fista.h:800
virtual ~None()
Definition: fista.h:1170
T norm(const T *variables, T *work, const T *weights, const bool linf=true)
Definition: project.h:1914
virtual bool is_subgrad() const
Definition: fista.h:2198
void init(const Vector< T > &y)
Definition: fista.h:501
virtual void sub_grad(const Vector< T > &input, Vector< T > &output) const
Definition: fista.h:1179
T eval(const Matrix< T > &x) const
Definition: fista.h:2016
const Matrix< T > * _G
Definition: fista.h:491
MixedL1LINFCR(const int m, const ParamReg< T > &param)
Definition: fista.h:2316
virtual ~Rank()
Definition: fista.h:1998
virtual ~Lzero()
Definition: fista.h:1150
void init_split_variables(Matrix< T > &splitted_w) const
Definition: fista.h:454
void div(const Vector< T > &x)
A <- A ./ x.
Definition: linalg.h:3064
ComposeProx< T, Vector< T >, typename GroupLassoL2< T >::type, Lasso< T >, false > type
Definition: fista.h:1777
void fenchel(const Vector< T > &input, T &val, T &scal) const
Definition: fista.h:2137
T eval(const Vector< T > &x) const
Definition: fista.h:1159
Abstract matrix class.
Definition: linalg.h:144
virtual bool is_fenchel() const
Definition: fista.h:2254
void update_multipliers_LinADMM(Vector< T > &w, const SpMatrix< T > &splitted_w_reg, const SpMatrix< T > &multipliers_w_reg, const T gamma, const T delta)
Definition: fista.h:3044
void resize(int m, int n)
Resize the matrix.
Definition: linalg.h:1217
virtual void var_fenchel(const Vector< T > &x, Vector< T > &grad1, Vector< T > &grad2, const bool intercept) const
Definition: fista.h:335
void setRow(const int i, const Vector< T > &row)
fill the matrix with the row given
Definition: linalg.h:2260
T normFsq() const
return ||A||_F^2
Definition: linalg.h:2110
T v(const int i) const
access table r
Definition: linalg.h:955
void thrsPos()
perform soft-thresholding of the matrix, with the threshold nu
Definition: linalg.h:2376
TreeLasso(const ParamReg< T > &param)
Definition: fista.h:1531
void toSparse(SpMatrix< T > &matrix) const
make a sparse copy of the current matrix
Definition: linalg.h:2566
virtual ~HingeLoss()
Definition: fista.h:499
void grad(const Matrix< T > &A, Matrix< T > &grad) const
Definition: fista.h:816
void sub(const Vector< T > &x)
A <- A - x.
Definition: linalg.h:3052
MixedL1LINF(const ParamReg< T > &param)
Definition: fista.h:1850
virtual T fenchel(const Matrix< T > &input) const
Definition: fista.h:829
int num_components() const
Definition: fista.h:1457
LossMat(const int N, const AbstractMatrixB< T > &X)
Definition: fista.h:1028
virtual bool is_fenchel() const
Definition: fista.h:2135
Dense Matrix class.
Definition: linalg.h:61
virtual bool is_subgrad() const
Definition: fista.h:1098
virtual void init(const Vector< T > &y)
Definition: fista.h:1456
void init(const Vector< T > &y)
Definition: fista.h:596
T eval(const Vector< T > &x) const
Definition: fista.h:1202
const AbstractMatrixB< T > * _X
Definition: fista.h:586
T eigLargestMagnSym(const Vector< T > &u0, Vector< T > &u) const
Definition: linalg.h:1672
bool loss_for_matrices(const loss_t &loss)
Definition: fista.h:94
void fenchel(const Vector< T > &input, T &val, T &scal) const
Definition: fista.h:1438
void compute_new_prim(Vector< T > &prim, const Vector< T > &prim_var, const Vector< T > &dual_var, const T gamma, const T delta) const
Definition: fista.h:551
T lzero() const
Definition: linalg.h:3328
T eval(const Matrix< T > &w) const
Definition: fista.h:952
int size() const
Definition: list.h:116
void fenchel(const Vector< T > &input, T &val, T &scal) const
TODO add subgradient.
Definition: fista.h:1251
ComposeProx< T, Vector< T >, Lasso< T >, Ridge< T >, true > type
Definition: fista.h:1344
loss_t loss_from_string(char *loss)
Definition: fista.h:70
void setData(T *X, const int n)
Definition: linalg.h:607
virtual ~MixedL1LINF()
Definition: fista.h:1851
void init(const Vector< T > &x)
Definition: fista.h:298
SqLossMat(const AbstractMatrixB< T > &D, const Matrix< T > &G)
Definition: fista.h:855
T LagrangianADMM(const SplittingFunction< T, Matrix< T > > &loss, const SplittingFunction< T, SpMatrix< T > > &reg, const T lambda, const T gamma, const Vector< T > &w, const Matrix< T > &splitted_loss, const SpMatrix< T > &splitted_reg, const Matrix< T > &multi_loss, const SpMatrix< T > &multi_reg, T &los, const T *weights=NULL)
Definition: fista.h:2829
void grad(const Matrix< T > &w, Matrix< T > &grad) const
Definition: fista.h:961
List< int > _missingvalues
Definition: fista.h:355
virtual T fenchel(const Vector< T > &input) const
Definition: fista.h:332
virtual void sub_grad(const Matrix< T > &input, Matrix< T > &output) const
Definition: fista.h:1889
virtual ~FusedLasso()
Definition: fista.h:1354
void add(const Matrix< T > &mat, const T alpha=1.0)
add alpha*mat to the current matrix
Definition: linalg.h:2062
T eval(const Matrix< T > &x) const
Definition: fista.h:1811
void reset_flow()
Definition: project.h:1906
ComposeProx< T, Vector< T >, GraphLasso< T >, Ridge< T >, true > type
Definition: fista.h:1525
#define INFINITY
Definition: utils.h:62
void l1project(Vector< T > &out, const T thrs, const bool simplex=false) const
Definition: linalg.h:3364
T eval(const Matrix< T > &W) const
Definition: fista.h:700
int r(const int i) const
access table r
Definition: linalg.h:953
void fenchel(const Vector< T > &input, T &val, T &scal) const
TODO add subgradient.
Definition: fista.h:1283
void scal(const T a)
scale the matrix by the a
Definition: linalg.h:1334
void project_sft(const Vector< int > &labels, const int clas)
Definition: linalg.h:3482
T eval(const Vector< T > &x) const
Definition: fista.h:1639
ComposeProx(const ParamReg< T > &param)
Definition: fista.h:1297
const AbstractMatrixB< T > * _D
Definition: fista.h:923
Vector< T > _DtX
Definition: fista.h:492
Vector< int > _y
Definition: fista.h:794
void fenchel(const Vector< T > &input, T &val, T &scal) const
Definition: fista.h:1134
void prox(const Vector< T > &x, Vector< T > &y, const T lambda)
Definition: fista.h:1268