//+------------------------------------------------------------------+
//|                                            CoordinateDescent.mqh |
//|                        Copyright 2023, MetaQuotes Software Corp. |
//|                                             https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2023, MetaQuotes Software Corp."
#property link      "https://www.mql5.com"

#define CONVERGE 1.e-9

//+------------------------------------------------------------------+
//| Coordinate Descent optimization class                            |
//+------------------------------------------------------------------+
class CCoordinateDescent
  {

private:
   bool              m_initialized;   // Was everything legal and allocs successful?
   double            m_beta[];        // Beta coefs (m_nvars of them)
   double            m_explained;     // Fraction of variance m_explained by model; computed by Train()
   double            m_xmeans[];      // Mean of each X predictor
   double            m_xscales[];     // And standard deviation
   double            m_ymean;         // Intercept (mean of Y)
   double            m_yscale;        // Standard deviation of Y

   int               m_nvars ;        // Number of variables
   int               m_observs ;      // Number of cases
   bool              m_covarupdates ; // Does user want (often faster) covariance update method?
   int               m_nlambda ;      // Reserve space for this many m_beta sets for saving by TrainLambda() (may be zero)
   double            m_lambdabeta_matrix[];  // Saved m_beta coefs (m_nlambda sets of m_nvars of them)
   double            m_lambdas[];     // Lambdas tested by TrainLambda()
   double            m_x_matrix[];           // Normalized (mean=0, std=1) X; m_observs by m_nvars
   double            m_y[];           // Normalized (mean=0, std=1) Y
   double            m_resid[];       // Residual
   double            m_xinner_matrix[];      // Nvars square inner product matrix if m_covarupdates
   double            m_yinner[];      // Nvars XY inner product vector if m_covarupdates

public:
                     //constructor
                     CCoordinateDescent(const int num_predictors, const int num_observations, const bool use_covariance_updates, const int num_lambdas_to_trial) ;
                     //desctructor
                    ~CCoordinateDescent(void) ;
                    
                     //Accessor methods for private properties
   double            GetYmean(void)                     { return m_ymean; }
   double            GetYscale(void)                    { return m_yscale;}
   double            GetExplainedVariance(void)         { return m_explained;}

   double            GetXmeansAt(const int index)       { if(index>=0 && index<ArraySize(m_xmeans)) return m_xmeans[index]; else return 0;}
   double            GetXscalesAt(const int index)      { if(index>=0 && index<ArraySize(m_xscales)) return m_xscales[index]; else return 0;}
   double            GetBetaAt(const int index)         { if(index>=0 && index<ArraySize(m_beta)) return m_beta[index]; else return 0;}
   double            GetLambdaAt(const int index)       { if(index>=0 && index<ArraySize(m_lambdas)) return m_lambdas[index]; else return 0;}
   double            GetLambdaBetaAt(const int index)   { if(index>=0 && index<ArraySize(m_lambdabeta_matrix)) return m_lambdabeta_matrix[index]; else return 0;}
   double            GetLambdaThreshold(const double alpha) ;
                     //Set model parameters and raw input data
   bool              SetData(const int begin, const int num_observations, double &xx_matrix[], double &yy[]) ;
                     //Training routines
   void              Train(const double alpha, const double lambda, const int maxits, const double convergence_criterion, const bool fast_test, const bool warm_start) ;
   void              TrainLambda(const double alpha, const int maxits, const double convergence_criterion, const bool fast_test, const double maxlambda, const bool print_steps) ;
  } ;
//+------------------------------------------------------------------+
//|Constructor                                                       |
//+------------------------------------------------------------------+
CCoordinateDescent::CCoordinateDescent(
   const int  num_predictors,   // Number of predictor variables
   const int num_observations,    // Number of cases we will be training
   const bool use_covariance_updates,    // Use fast covariance updates rather than slow naive method
   const int num_lambdas_to_trial     // Number of m_lambdas we will be using in training
)
  {
   m_nvars = num_predictors ;
   m_observs = num_observations ;
   m_covarupdates = use_covariance_updates ;
   m_nlambda = num_lambdas_to_trial ;
   m_initialized=true;
   m_ymean=m_yscale=m_explained=0;
   
   if(m_nvars<0 || m_observs<0 || m_nlambda<0)
     {
       m_initialized=false;
       Print("Invalid parameter value, neither num_predictors ,num_observations, nor num_lambdas_to_trial can be negative");
       return;
     }  
      
   if(ArrayResize(m_x_matrix,m_observs*m_nvars)<m_observs*m_nvars    ||
      ArrayResize(m_y,m_observs)<m_observs                ||
      ArrayResize(m_xmeans,m_nvars)<m_nvars             ||
      ArrayResize(m_xscales,m_nvars)<m_nvars            ||
      ArrayResize(m_beta,m_nvars)<m_nvars               ||
      ArrayResize(m_resid,m_observs)<m_observs)
      m_initialized=false;
//---conditional allocation
   if(m_covarupdates)
     {
      if(ArrayResize(m_xinner_matrix,m_nvars*m_nvars)<m_nvars*m_nvars||
         ArrayResize(m_yinner,m_nvars)<m_nvars)
         m_initialized=false;
     }
//---
   if(m_nlambda>0)
     {
      if(ArrayResize(m_lambdabeta_matrix,m_nlambda*m_nvars)<m_nlambda*m_nvars ||
         ArrayResize(m_lambdas,m_nlambda)<m_nlambda)
         m_initialized=false;
     }
//---return immediately if any error
   if(!m_initialized)
      Print("Memory allocation error ", GetLastError());

  }
//+------------------------------------------------------------------+
//| Desctructor                                                      |
//+------------------------------------------------------------------+
CCoordinateDescent::~CCoordinateDescent(void)
  {
//---
   ArrayFree(m_x_matrix) ;
//---
   ArrayFree(m_y) ;
//---
   ArrayFree(m_xmeans) ;
//---
   ArrayFree(m_xscales) ;
//---
   ArrayFree(m_beta) ;
//---
   ArrayFree(m_resid) ;
//---
   ArrayFree(m_xinner_matrix) ;
//---
   ArrayFree(m_yinner) ;
//---
   ArrayFree(m_lambdabeta_matrix) ;
//---
   ArrayFree(m_lambdas) ;

  }
//+------------------------------------------------------------------+
//|Get and standardize the data                                      |
//|   Also compute inner products if covar_update                    |
//+------------------------------------------------------------------+
bool CCoordinateDescent::SetData(
   const int begin,    // Starting index in full database for getting m_observs of training set
   const int num_observations,// Number of cases in full database (we wrap back to the start if needed)
   double  &xx_matrix[],    // Full database (num_observations rows, m_nvars columns)
   double  &yy[]   // Predicted variable vector, num_observations long
)
  {
   if(!m_initialized)
      return false;
   // parameter checks   
   if(begin<0 || num_observations<0)
     {
      Print("Invalid parameter value: neither begin nor num_observations can be negative");
      return false;
     }
   //--- invalid a    
   if(ArraySize(xx_matrix)<(m_observs*m_nvars) || ArraySize(yy)<m_observs)
     {
      Print("Insufficient data supplied relative to object specification");
      return false;
     }
   //---  
   int icase, ivar, jvar, k,xptr;
   double sum, xm, xs, diff;

   /*
      Standardize X
   */

   for(ivar=0 ; ivar<m_nvars ; ivar++)
     {

      xm = 0.0 ;
      for(icase=0 ; icase<m_observs ; icase++)
        {
         k = (icase + begin) % num_observations ;
         xm += xx_matrix[k*m_nvars+ivar] ;
        }
      xm /= m_observs ;
      m_xmeans[ivar] = xm ;

      xs = 1.e-60 ;  // Prevent division by zero later
      for(icase=0 ; icase<m_observs ; icase++)
        {
         k = (icase + begin) % num_observations ;
         diff = xx_matrix[k*m_nvars+ivar] - xm ;
         xs += diff * diff ;
        }
      xs = sqrt(xs / m_observs) ;
      m_xscales[ivar] = xs ;

      for(icase=0 ; icase<m_observs ; icase++)
        {
         k = (icase + begin) % num_observations ;
         m_x_matrix[icase*m_nvars+ivar] = (xx_matrix[k*m_nvars+ivar] - xm) / xs ;
        }
     }

   /*
      Standardize Y
   */

   m_ymean = 0.0 ;
   for(icase=0 ; icase<m_observs ; icase++)
     {
      k = (icase + begin) % num_observations ;
      m_ymean += yy[k] ;
     }
   m_ymean /= m_observs ;

   m_yscale = 1.e-60 ;  // Prevent division by zero later
   for(icase=0 ; icase<m_observs ; icase++)
     {
      k = (icase + begin) % num_observations ;
      diff = yy[k] - m_ymean ;
      m_yscale += diff * diff ;
     }
   m_yscale = sqrt(m_yscale / m_observs) ;

   for(icase=0 ; icase<m_observs ; icase++)
     {
      k = (icase + begin) % num_observations ;
      m_y[icase] = (yy[k] - m_ymean) / m_yscale ;
     }

   

   /*
      If user requests covariance updates, compute required inner products
      We store the full m_xinner_matrix matrix for faster addressing later,
      even though it is symmetric.
      We handle both unweighted and weighted cases here.
   */

   if(m_covarupdates)
     {
      for(ivar=0 ; ivar<m_nvars ; ivar++)
        {
         xptr = ivar ;

         // Do XiY
         sum = 0.0 ;
         
            for(icase=0 ; icase<m_observs ; icase++)
               sum += m_x_matrix[xptr+icase*m_nvars] * m_y[icase] ;
            m_yinner[ivar] = sum / m_observs ;

         // Do XiXj
         
            for(jvar=0 ; jvar<m_nvars ; jvar++)
              {
               if(jvar == ivar)
                  m_xinner_matrix[ivar*m_nvars+jvar] = 1.0 ;      // Recall that X is standardized
               else
                  if(jvar < ivar)                    // Matrix is symmetric, so just copy
                     m_xinner_matrix[ivar*m_nvars+jvar] = m_xinner_matrix[jvar*m_nvars+ivar] ;
                  else
                    {
                     sum = 0.0 ;
                     for(icase=0 ; icase<m_observs ; icase++)
                        sum += m_x_matrix[xptr+icase*m_nvars] * m_x_matrix[icase*m_nvars+jvar] ;
                     m_xinner_matrix[ivar*m_nvars+jvar] = sum / m_observs ;
                    }
              }
        } // For ivar
     }
//---
   return true;     
  }
//+------------------------------------------------------------------+
//|Core training routine                                             |
//+------------------------------------------------------------------+
void CCoordinateDescent::Train(
   const double alpha,     // User-specified alpha, (0,1) (0 problematic for descending lambda)
   const double lambda,    // Can be user-specified, but usually from TrainLambda()
   const int maxits,       // Maximum iterations, for safety only
   const double convergence_criterion,       // Convergence criterion, typically 1.e-5 or so
   const bool fast_test,    // Base convergence on max m_beta change vs m_explained variance?
   const bool warm_start    // Start from existing m_beta, rather than zero?
)
  {
   if(!m_initialized)
      return;
   
   if(alpha<0 || alpha>1)
    { 
     Print("Invalid parameter value: Legal values for alpha are between 0 and 1 inclusive");
     return;
    }
   
   if(lambda<0)
    {
     Print("Invalid parameter value: lambda accepts only positive values");
     return;
    }
    
   if(maxits<=0)
    {
     Print("Invalid parameter value: maxist accepts only non zero positive values");
     return;
    } 
    
   int i, iter, icase, ivar, kvar, do_active_only, active_set_changed, converged,xptr ;
   double residual_sum, S_threshold, argument, new_beta, correction, update_factor ;
   double sum, explained_variance, crit, prior_crit, penalty, max_change, Xss, YmeanSquare ;


   /*
      Initialize
   */

   S_threshold = alpha * lambda ;   // Threshold for the soft-thresholding operator S()
   do_active_only = 0 ;             // Begin with a complete pass
   prior_crit = 1.0e60 ;            // For convergence test

   if(warm_start)                   // Pick up with current betas?
     {
      if(! m_covarupdates)           // If not using covariance updates, must recompute residuals
        {
         for(icase=0 ; icase<m_observs ; icase++)
           {
            xptr = icase * m_nvars ;
            sum = 0.0 ;
            for(ivar=0 ; ivar<m_nvars ; ivar++)
               sum += m_beta[ivar] * m_x_matrix[xptr+ivar] ;
            m_resid[icase] = m_y[icase] - sum ;
           }
        }
     }

   else                             // Not warm start, so initial betas are all zero
     {
      for(i=0 ; i<m_nvars ; i++)
         m_beta[i] = 0.0 ;
      for(i=0 ; i<m_observs ; i++)     // Initial residuals are just the Y variable
         m_resid[i] = m_y[i] ;
     }

// YmeanSquare will remain fixed throughout training.
// Its only use is for computing m_explained variance for the user's edification.

   YmeanSquare = 1.0 ;


   /*
      Outmost loop iterates until converged or user's maxits limit hit

   */

   for(iter=0 ; iter<maxits ; iter++)
     {

      /*
         Pass through variables
      */

      active_set_changed = 0 ;  // Did any betas go to/from 0.0?
      max_change = 0.0 ;        // For fast convergence test

      for(ivar=0 ; ivar<m_nvars ; ivar++)     // Descend on this m_beta
        {

         if(do_active_only  &&  m_beta[ivar] == 0.0)
            continue ;

          Xss = 1 ;        // X was standardized
         update_factor = Xss + lambda * (1.0 - alpha) ;

         if(m_covarupdates)      // Any sensible user will specify this unless m_observs < m_nvars
           {
            sum = 0.0 ;
            for(kvar=0 ; kvar<m_nvars ; kvar++)
               sum += m_xinner_matrix[ivar*m_nvars+kvar] * m_beta[kvar] ;
            residual_sum = m_yinner[ivar] - sum ;
            argument = residual_sum + Xss * m_beta[ivar] ;   // Argument to S() [MY FORMULA]
           }

         else
        // Use slow definitional formula (okay if m_observs < m_nvars)
             {
               residual_sum = 0.0 ;
               xptr = ivar ;    // Point to column of this variable
               for(icase=0 ; icase<m_observs ; icase++)
                  residual_sum += m_x_matrix[xptr+icase*m_nvars] * m_resid[icase] ;  // X_ij * RESID_i
               residual_sum /= m_observs ;
               argument = residual_sum + m_beta[ivar] ;  // Argument to S() ;    (Eq 8)
              }

         // Apply the soft-thresholding operator S()

         if(argument > 0.0  &&  S_threshold < argument)
            new_beta = (argument - S_threshold) / update_factor ;
         else
            if(argument < 0.0  &&  S_threshold < -argument)
               new_beta = (argument + S_threshold) / update_factor ;
            else
               new_beta = 0.0 ;

         // Apply the update, if changed, and adjust the residual if using naive or weighted updates
         // This is also used to update the fast convergence criterion

         correction = new_beta - m_beta[ivar] ;  // Will use this to adjust residual if using naive updates
         if(fabs(correction) > max_change)
            max_change = fabs(correction) ;    // Used for fast convergence criterion

         if(correction != 0.0)      // Did this m_beta change?
           {
            if(! m_covarupdates)     // Must we update the residual vector (needed for naive methods)?
              {
               xptr = ivar ;    // Point to column of this variable
               for(icase=0 ; icase<m_observs ; icase++)             // Update residual for this new m_beta
                  m_resid[icase] -= correction * m_x_matrix[xptr+icase*m_nvars] ;
              }
            if((m_beta[ivar] == 0.0  &&  new_beta != 0.0)  || (m_beta[ivar] != 0.0  &&  new_beta == 0.0))
               active_set_changed = 1 ;
            m_beta[ivar] = new_beta ;
           }

        } // For all variables; a complete pass

      /*
         A pass (complete or active only) through variables has been done.
         If we are using the fast convergence test, it is simple.  But if using the slow method...
           Compute m_explained variance and criterion; compare to prior for convergence test
           If the user requested the covariance update method, we must compute residuals for these.
      */

      if(fast_test)               // Quick and simple test
        {
         if(max_change < convergence_criterion)
            converged = 1 ;
         else
            converged = 0 ;
        }

      else     // Slow test (change in m_explained variance) which requires residual
        {
         if(m_covarupdates)     // We have until now avoided computing residuals
           {
            for(icase=0 ; icase<m_observs ; icase++)
              {
               xptr = icase * m_nvars ;
               sum = 0.0 ;
               for(ivar=0 ; ivar<m_nvars ; ivar++)
                  sum += m_beta[ivar] * m_x_matrix[xptr+ivar] ; // Cumulate predicted value
               m_resid[icase] = m_y[icase] - sum ;     // Residual = true - predicted
              }
           }

         sum = 0.0 ;       // Will cumulate squared error for convergence test
         
            for(i=0 ; i<m_observs ; i++)
               sum += m_resid[i] * m_resid[i] ;
            crit = sum / m_observs ;               // MSE component of optimization criterion

         explained_variance = (YmeanSquare - crit) / YmeanSquare ; // Fraction of Y m_explained

         penalty = 0.0 ;
         for(i=0 ; i<m_nvars ; i++)
            penalty += 0.5 * (1.0 - alpha) * m_beta[i] * m_beta[i]  +  alpha * fabs(m_beta[i]) ;
         penalty *= 2.0 * lambda ;           // Regularization component of optimization criterion

         crit += penalty ;                   // This is what we are minimizing

         if(prior_crit - crit < convergence_criterion)
            converged = 1 ;
         else
            converged = 0 ;

         prior_crit = crit ;
        }

      /*
            After doing a complete (all variables) pass, we iterate on only
            the active set (m_beta != 0) until convergence.  Then we do a complete pass.
            If the active set does not change, we are done:
            If a m_beta goes from zero to nonzero, by definition the active set changed.
            If a m_beta goes from nonzero to another nonzero, then this is a theoretical flaw
            in this process.  However, if we just iterated the active set to convergence,
            it is highly unlikely that we would get anything other than a tiny move.
      */

      if(do_active_only)         // Are we iterating on the active set only?
        {
         if(converged)           // If we converged
            do_active_only = 0 ; // We now do a complete pass
        }

      else                       // We just did a complete pass (all variables)
        {
         if(converged  &&  ! active_set_changed)
            break ;
         do_active_only = 1 ;    // We now do an active-only pass
        }

     } // Outer loop iterations

   /*
      We are done.  Compute and save the m_explained variance.
      If we did the fast convergence test and covariance updates,
      we must compute the residual in order to get the m_explained variance.
      Those two options do not require regular residual computation,
      so we don't currently have the residual.
   */

   if(fast_test  &&  m_covarupdates)     // Residuals have not been maintained?
     {
      for(icase=0 ; icase<m_observs ; icase++)
        {
         xptr = icase * m_nvars ;
         sum = 0.0 ;
         for(ivar=0 ; ivar<m_nvars ; ivar++)
            sum += m_beta[ivar] * m_x_matrix[xptr+ivar] ;
         m_resid[icase] = m_y[icase] - sum ;
        }
     }

   sum = 0.0 ;
   
      for(i=0 ; i<m_observs ; i++)
         sum += m_resid[i] * m_resid[i] ;
      crit = sum / m_observs ;               // MSE component of optimization criterion

   m_explained = (YmeanSquare - crit) / YmeanSquare ;  // This variable is a member of the class
  }
//+------------------------------------------------------------------+
//|Compute minimum lambda such that all betas remain at zero         |
//+------------------------------------------------------------------+
double CCoordinateDescent::GetLambdaThreshold(const double alpha)
  {

   if(!m_initialized)
      return 0;
   
   if(alpha>1 || alpha<0)
    {
     Print("Invalid parameter for Alpha, legal values are between 0 and 1 inclusive");
     return 0;
    }
    
   int ivar, icase,xptr ;
   double thresh, sum;

   thresh = 0.0 ;
   for(ivar=0 ; ivar<m_nvars ; ivar++)
     {
      xptr = ivar ;
      sum = 0.0 ;
      
      for(icase=0 ; icase<m_observs ; icase++)
           sum += m_x_matrix[xptr+icase*m_nvars] * m_y[icase] ;
      sum /= m_observs ;

      sum = fabs(sum) ;
      if(sum > thresh)
         thresh = sum ;
     }

   return thresh / (alpha + 1.e-60) ;
  }
//+----------------------------------------------------------------------------------------+
//| Multiple-lambda training routine calls Train() repeatedly, saving each m_beta vector   |                                                                 |
//+----------------------------------------------------------------------------------------+
void CCoordinateDescent::TrainLambda(
   const double alpha,       // User-specified alpha, (0,1) (0 problematic for descending lambda)
   const int maxits,         // Maximum iterations, for safety only
   const double convergence_criterion,         // Convergence criterion, typically 1.e-5 or so
   const bool fast_test,      // Base convergence on max m_beta change vs m_explained variance?
   const double maxlambda,  // Starting lambda, or negative for automatic computation
   const bool print_steps     // Print lambda/m_explained table?
)
  {
   if(!m_initialized)
      return;

   int ivar, ilambda, n_active ;
   double lambda, min_lambda, lambda_factor,max_lambda=maxlambda;
   string fprint ;

   if(m_nlambda <= 1)
      return ;

   /*
      Compute the minimum lambda for which all m_beta weights remain at zero
      This (slightly decreased) will be the lambda from which we start our descent.
   */

   if(max_lambda <= 0.0)
      max_lambda = 0.999 * GetLambdaThreshold(alpha) ;
   min_lambda = 0.001 * max_lambda ;
   lambda_factor = exp(log(min_lambda / max_lambda) / (m_nlambda-1)) ;

   /*
      Repeatedly train with decreasing m_lambdas
   */

   if(print_steps)
     {
      fprint+="\nDescending lambda path...";
     }

   lambda = max_lambda ;
   for(ilambda=0 ; ilambda<m_nlambda ; ilambda++)
     {
      m_lambdas[ilambda] = lambda ;   // Save in case we want to use later
      Train(alpha, lambda, maxits, convergence_criterion, fast_test,(bool)ilambda) ;
      for(ivar=0 ; ivar<m_nvars ; ivar++)
         m_lambdabeta_matrix[ilambda*m_nvars+ivar] = m_beta[ivar] ;
      if(print_steps)
        {
         n_active = 0 ;
         for(ivar=0 ; ivar<m_nvars ; ivar++)
           {
            if(fabs(m_beta[ivar]) > 0.0)
               ++n_active ;
           } 
         fprint+=StringFormat("\n %8.4lf %4d %12.4lf", lambda, n_active, m_explained) ;
        }
      lambda *= lambda_factor ;
     }

   if(print_steps)
      Print(fprint);
  }

//+------------------------------------------------------------------------------------------+
//|   Cross-validation training routine calls TrainLambda() repeatedly to optimize lambda    |
//+------------------------------------------------------------------------------------------+
double OptimizeLambda(
   int n_observations,               // Number of cases in full database
   int n_predictors,                 // Number of variables (columns in database)
   int n_folds,                      // Number of folds
   bool covar_updates,               // Does user want (usually faster) covariance update method?
   int n_lambda,                     // This many out_lambdas tested by lambda_train() (must be at least 2)
   double alpha,                     // User-specified alpha, (0,1) (0 problematic for descending lambda)
   int maxits,                       // Maximum iterations, for safety only
   double convergence_criterion,     // Convergence criterion, typically 1.e-5 or so
   bool fast_test,                   // Base convergence on max beta change vs explained variance?
   double &in_matrix[],              // Full database (n_observations rows, n_predictors columns)
   double &in_targets[],             // Predicted variable vector, n_observations long
   double &out_lambdas[],            // Returns out_lambdas tested by lambda_train()
   double &out_lambda_OOS[],         // Returns OOS explained for each of above out_lambdas
   bool print_output = false         // show full output
)
  {
   int i_IS, n_IS, i_OOS, n_OOS, n_done, ifold  ;
   int icase, ivar, ilambda, ibest, k,coefs ;
   double pred, sum, diff, max_lambda, Ynormalized, YsumSquares, best,work[] ;
   CCoordinateDescent *cd ;

   if(n_lambda < 2)
      return 0.0 ;

   /*
      Use the entire dataset to find the max lambda that will be used for all descents.
      Also, copy the normalized case weights if there are any.
   */

   cd = new CCoordinateDescent(n_predictors, n_observations, covar_updates, n_lambda) ;
   cd.SetData(0, n_observations, in_matrix, in_targets) ;                           // Fetch the training set for this fold
   max_lambda = cd.GetLambdaThreshold(alpha) ;
   delete cd ;

   if(print_output)
      PrintFormat("%s starting for %d folds with max lambda=%.9lf",__FUNCTION__, n_folds, max_lambda) ;

   i_IS = 0 ;        // Training data starts at this index in complete database
   n_done = 0 ;      // Number of cases treated as OOS so far

   for(ilambda=0 ; ilambda<n_lambda ; ilambda++)
      out_lambda_OOS[ilambda] = 0.0 ;  // Will cumulate across folds here

   YsumSquares = 0.0 ;    // Will cumulate to compute explained fraction

   /*
      Process the folds
   */

   for(ifold=0 ; ifold<n_folds ; ifold++)
     {

      n_OOS = (n_observations - n_done) / (n_folds - ifold) ;  // Number OOS  (test set)
      n_IS = n_observations - n_OOS ;                         // Number IS (training set)
      i_OOS = (i_IS + n_IS) % n_observations ;                // OOS starts at this index

      // Train the model with this IS set

      cd = new CCoordinateDescent(n_predictors, n_IS, covar_updates, n_lambda) ;
      cd.SetData(i_IS, n_observations, in_matrix, in_targets) ;                                        // Fetch the training set for this fold
      cd.TrainLambda(alpha, maxits, convergence_criterion, fast_test, max_lambda,print_output) ;        // Compute the complete set of betas (all out_lambdas)

      // Compute OOS performance for each lambda and sum across folds.
      // Normalization of X and Y is repeated, when it could be done once and saved.
      // But the relative cost is minimal, and it is simpler doing it this way.

      for(ilambda=0 ; ilambda<n_lambda ; ilambda++)
        {
         out_lambdas[ilambda] = cd.GetLambdaAt(ilambda) ;  // This will be the same for all folds
         coefs = ilambda * n_predictors ;
         sum = 0.0 ;
         for(icase=0 ; icase<n_OOS ; icase++)
           {
            k = (icase + i_OOS) % n_observations ;
            pred = 0.0 ;
            for(ivar=0 ; ivar<n_predictors ; ivar++)
               pred += cd.GetLambdaBetaAt(coefs+ivar) * (in_matrix[k*n_predictors+ivar] - cd.GetXmeansAt(ivar)) / cd.GetXscalesAt(ivar) ;
            Ynormalized = (in_targets[k] - cd.GetYmean()) / cd.GetYscale() ;
            diff = Ynormalized - pred ;
            
            if(ilambda == 0)
               YsumSquares += Ynormalized * Ynormalized ;
            sum += diff * diff ;
           }
         out_lambda_OOS[ilambda] += sum ;  // Cumulate for this fold
        }  // For ilambda

      delete cd ;

      n_done += n_OOS ;                           // Cumulate OOS cases just processed
      i_IS = (i_IS + n_OOS) % n_observations ;                 // Next IS starts at this index

     }  // For ifold

   /*
      Compute OOS explained variance for each lambda, and keep track of the best
   */

   best = -1.e60 ;
   for(ilambda=0 ; ilambda<n_lambda ; ilambda++)
     {
      out_lambda_OOS[ilambda] = (YsumSquares - out_lambda_OOS[ilambda]) / YsumSquares ;
      if(out_lambda_OOS[ilambda] > best)
        {
         best = out_lambda_OOS[ilambda] ;
         ibest = ilambda ;
        }
     }

   if(print_output)
      PrintFormat("\n%s ending with best lambda=%9.9lf  explained=%9.9lf",__FUNCTION__, out_lambdas[ibest], best) ;

   return out_lambdas[ibest] ;
  }
//+------------------------------------------------------------------+
