//+------------------------------------------------------------------+
//|                                                         fsca.mqh |
//|                                  Copyright 2024, MetaQuotes Ltd. |
//|                                             https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2024, MetaQuotes Ltd."
#property link      "https://www.mql5.com"
#include<np.mqh>
//+------------------------------------------------------------------+
//| standardize a matrix                                             |
//+------------------------------------------------------------------+
matrix            stdmat(matrix &in)
  {
   vector mean = in.Mean(0);
   vector std = in.Std(0);
   std+=1e-10;
   matrix out = in;

   for(ulong row =0; row<out.Rows(); row++)
      if(!out.Row((in.Row(row)-mean)/std,row))
        {
         Print(__FUNCTION__, " error ", GetLastError());
         return matrix::Zeros(in.Rows(), in.Cols());
        }

   return out;
  }
//+------------------------------------------------------------------+
//| fsca class implementation                                        |
//+------------------------------------------------------------------+
class CFsca
  {
private:
   bool              m_fitted;              //flag showing if principal factors were extracted
   matrix m_corrmat,             //correlation matrix
          m_covar,               //altered correlation matrix
          m_data,                //standardized data is here
          m_eigvectors,          //matrix of eigen vectors of m_corrmat matrix
          m_structmat,           //factor loading matrix of m_corrmat matrix
          m_principal_components,             //principal components
          m_fscv_struct,        //fsca factor structure
          m_fscv_eigvects,      //fsca eigen structure
          m_Fsca,        //ordered fsca variables
          m_coeffs,      //fsca component coefficients
          m_Fscv;        //refined fsca variables
   vector m_eigvalues,           //vector of eigen values of m_corrmat matrix
          m_sqcorr,              //mean squared correlation matrix
          m_fscv_eigvals,        //fsca eigen values
          m_fscv_cumeigvals,     //fsca cumulative variance contribution
          m_cumeigvalues;        //cumulative variance contributions of m_corrmat matrix
   ulong             m_num_comps;           //unique instances of redundent variation in m_data
   ulong             m_preds;               //number of variables (columns) in dataset (m_data)
   ulong  m_keptorderedcolumns[],//indices of columns upon which components are calculated for ordered fsca
          m_keptrefinedcolumns[],//indices of columns upon which components are calculated for backward refined fsca
          m_keptcolumns[],
          m_bestcolumn;          //index of first selected column in analysis
   double            m_best_crit;           //best criterion value

   //+------------------------------------------------------------------+
   //|   adjust correlation matrix according to specific columns        |
   //+------------------------------------------------------------------+

   void              adjust_corr_matrix(matrix &dat,ulong &keptcols[])
     {
      m_covar = m_corrmat;

      for(ulong i=1 ; i<ulong(keptcols.Size()) ; i++)
        {
         for(ulong j=0 ; j<i ; j++)
            m_covar[i][j] = 0.0 ;
        }

      for(ulong i=0; i<dat.Rows(); i++)
        {
         for(ulong j=1; j<ulong(keptcols.Size()); j++)
           {
            double dtemp = dat[i][keptcols[j]] ;
            for(ulong k=0; k<j ; k++)
               m_covar[j][k] += dtemp * dat[i][keptcols[k]] ;
           }
        }

      for(ulong j=1; j<ulong(keptcols.Size()); j++)
        {
         for(ulong k=0; k<j; k++)
            m_covar[k][j] = m_covar[j][k] ;
        }

      for(ulong j=0; j<ulong(keptcols.Size()); j++)
         m_covar[j][j] = double(dat.Rows());

      m_covar = np::sliceMatrix(m_covar,0,ulong(keptcols.Size()),1,0,ulong(keptcols.Size()));
     }

   //+------------------------------------------------------------------------+
   //|  forward stepwise component selection with optional backward refinement|
   //+------------------------------------------------------------------------+

   bool              stepwise_comp_selection(ulong &keptcols[],ulong &kept,bool refine)
     {
      kept  = 1;
      ulong found = 0;
      keptcols[0] = m_sqcorr.ArgMax();
      m_best_crit = -1.e50;
      double crit = 0.0;

      while((kept)<ulong(m_num_comps))
        {
         m_bestcolumn = ULONG_MAX;

         for(ulong icol=0 ; icol<m_preds ; icol++)
           {
            for(uint i=0 ; i<keptcols.Size(); i++)
              {
               found = i;
               if(keptcols[i] == icol)
                  break ;
              }

            if(found < kept)
               continue ;

            crit = compute_criterion(m_corrmat, keptcols,kept,icol) ;

            if(crit > m_best_crit)
              {
               m_best_crit = crit ;
               m_bestcolumn = icol ;
              }
           }

         if(m_bestcolumn==ULONG_MAX)
           {
            Print(__FUNCTION__," Adding a new column failed to improve the criterion!");
            return false;
           }

         keptcols[kept] = m_bestcolumn;
         ++kept;

         //Print(" Added column ", m_bestcolumn, " with crit ", m_best_crit);

         if(refine)
            while(backward_refinement(m_corrmat,keptcols,kept,crit));
        }

      return true;
     }
   //+------------------------------------------------------------------+
   //| backward refinement routine                                      |
   //+------------------------------------------------------------------+

   ulong             backward_refinement(matrix &covar, ulong &kept_columns[], ulong nkept, double &best_crit)
     {
      ulong i, old_col, new_col, best_col, refined ;
      double crit;

      best_crit = substvar(covar,kept_columns,nkept,0,kept_columns[0]);

      refined = 0;

      for(old_col=0 ; old_col<nkept ; old_col++)
        {

         if(old_col == nkept-1  &&  !refined)
            break ;

         best_col = ULONG_MAX ;
         for(new_col=0 ; new_col<m_preds ; new_col++)
           {

            for(i=0 ; i<nkept ; i++)
              {
               if(new_col == kept_columns[i])
                  break ;
              }
            if(i < nkept)
               continue ;

            crit = substvar(covar, kept_columns,nkept, old_col, new_col) ;

            if(crit > best_crit)
              {
               best_crit = crit ;
               best_col = new_col ;
              }
           }

         if(best_col!=ULONG_MAX && best_col>=0)
           {
            //Print(__FUNCTION__,"  Replaced predictor at column ",kept_columns[old_col], " with ",best_col," to get criterion = ", best_crit) ;
            kept_columns[old_col] = best_col ;
            refined = 1 ;
           }
        }
      return refined;
     }
   //+------------------------------------------------------------------+
   //|   Gram Schmidt routine                                           |
   //+------------------------------------------------------------------+

   matrix            gram_schmidt(matrix &input_)
     {
      ulong irow, icol, inner ;
      double dtemp, sum ;
      ulong nrows = input_.Rows();
      ulong ncols = input_.Cols();
      matrix output = input_;

      sum = 0.0 ;

      vector colsum = output.Col(0);
      colsum = MathPow(colsum,2.0);
      sum = colsum.Sum();

      sum = sqrt(sum) ;

      if(sum == 0.0)
        {
         Print(__FUNCTION__, " sum == 0.0 ");
         return matrix::Zeros(0,0);
        }



      if(!output.Col(output.Col(0)/sum,0))
        {
         Print(__FUNCTION__, " failed column insertion ", GetLastError());
         return matrix::Zeros(0,0);
        }

      for(icol=1 ; icol<ncols ; icol++)
        {


         for(inner=0 ; inner<icol ; inner++)
           {
            sum = 0.0 ;
            for(irow=0 ; irow<nrows ; irow++)
               sum += (output[irow][icol] * output[irow][inner]);
            for(irow=0 ; irow<nrows ; irow++)
               output[irow][icol] -= (sum * output[irow][inner]) ;
           }

         sum = 0.0 ;
         for(irow=0 ; irow<nrows ; irow++)
           {
            dtemp = output[irow][icol] ;
            sum += dtemp * dtemp ;
           }

         sum = sqrt(sum) ;

         if(sum == 0.0)
           {
            Print(__FUNCTION__, " sum == 0.0 ");
            return matrix::Zeros(0,0);
           }

         if(!output.Col(output.Col(icol)/sum,icol))
           {
            Print(__FUNCTION__, " failed column insertion ", GetLastError());
            return matrix::Zeros(0,0);
           }


        }

      return output;

     }
   //+------------------------------------------------------------------+
   //| variable substitution routine                                    |
   //+------------------------------------------------------------------+

   double            substvar(matrix &covar,ulong &keptcols[],ulong nkept, ulong old_col,ulong new_col)
     {
      ulong i, j, k, irow, saved_col ;
      double sum, crit, dtemp ;
      matrix mt(nkept,nkept);

      saved_col = keptcols[old_col] ;
      keptcols[old_col] = new_col ;

      for(i=0 ; i<nkept ; i++)
        {
         irow = keptcols[i] ;
         for(j=0 ; j<nkept ; j++)
            mt[i][j] = covar[irow][keptcols[j]] ;
        }

      matrix mtinv = mt.Inv();
      vector vec(nkept);

      crit = 0.0 ;
      for(j=0 ; j<m_preds ; j++)
        {

         for(i=0 ; i<nkept ; i++)
            vec[i] = covar[j][keptcols[i]] ;


         sum = 0.0 ;
         for(i=0 ; i<nkept ; i++)
            sum += vec[i] * vec[i] * mtinv[i][i] ;
         crit += sum ;

         sum = 0.0 ;
         for(i=1 ; i<nkept ; i++)
           {
            dtemp = vec[i] ;
            for(k=0 ; k<i ; k++)
               sum += dtemp * vec[k] * mtinv[i][k] ;
           }
         crit += 2.0 * sum ;
        }

      keptcols[old_col] = saved_col ;

      return crit ;
     }
   //+------------------------------------------------------------------+
   //|  calculates the criterion for assessing a component              |
   //+------------------------------------------------------------------+

   double            compute_criterion(matrix &covar, ulong &keptcols[],ulong nkept, ulong trial_col)
     {
      ulong i, j, k, irow, new_kept;
      double sum, crit, dtemp ;
      new_kept = nkept+1;
      matrix mt(new_kept,new_kept);

      for(i=0 ; i<new_kept ; i++)
        {
         if(i < nkept)
            irow = keptcols[i] ;
         else
            irow = trial_col ;
         for(j=0 ; j<nkept ; j++)
            mt[i][j] = covar[irow][keptcols[j]] ;
         mt[i][nkept] = covar[irow][trial_col] ;
        }

      matrix mtinv = mt.Inv();
      vector vec(new_kept);

      crit = 0.0;
      for(j=0 ; j<m_preds ; j++)
        {
         for(i=0 ; i<nkept ; i++)
            vec[i] = covar[j][keptcols[i]] ;
         vec[nkept] = covar[j][trial_col] ;

         sum = 0.0 ;
         for(i=0 ; i<new_kept ; i++)
            sum += vec[i] * vec[i] * mtinv[i][i] ;
         crit += sum ;

         sum = 0.0 ;
         for(i=1 ; i<new_kept ; i++)
           {
            dtemp = vec[i] ;
            for(k=0 ; k<i ; k++)
               sum += dtemp * vec[k] * mtinv[i][k] ;
           }
         crit += 2.0 * sum ;
        }

      return crit ;

     }
   //+------------------------------------------------------------------+
   //|   calculates the principal components                            |
   //+------------------------------------------------------------------+

   matrix            compute_principal_components(void)
     {
      matrix out(m_data.Rows(),ulong(m_num_comps));
      vector drow, eigcol,nv;
      double sum;

      for(ulong i=0; i<m_data.Rows(); i++)
        {
         drow = m_data.Row(i);
         for(ulong j = 0; j<m_num_comps; j++)
           {
            sum = 0.0;
            for(ulong k = 0; k<m_data.Cols(); k++)
              {
               sum+=drow[k]*m_eigvectors[k][j]/sqrt(m_eigvalues[j]);
              }
            out[i][j]=sum;
           }
        }

      return out;
     }
   //+------------------------------------------------------------------+
   //|   calculates the ordered fsca components                         |
   //+------------------------------------------------------------------+

   matrix            compute_fsca_components(matrix &in)
     {
      ulong numkept;

      ArrayInitialize(m_keptorderedcolumns,ULONG_MAX);

      if(!stepwise_comp_selection(m_keptorderedcolumns,numkept,false))
         return matrix::Zeros(0,0);

      adjust_corr_matrix(in,m_keptorderedcolumns);

      matrix kept_matrixcols = np::selectMatrixCols(in,m_keptorderedcolumns);

      kept_matrixcols = gram_schmidt(kept_matrixcols);

      for(ulong i=0 ; i<kept_matrixcols.Rows() ; i++)
        {
         for(ulong j=0 ; j<kept_matrixcols.Cols() ; j++)
            kept_matrixcols[i][j] *= sqrt((double) kept_matrixcols.Rows()) ;
        }

      matrix inv_covar = m_covar.Inv();

      matrix zm(numkept,numkept);

      for(ulong i=0 ; i<numkept ; i++)
        {
         for(ulong j=0 ; j<numkept ; j++)
           {
            double sum = 0.0 ;
            for(ulong k=0 ; k<in.Rows() ; k++)
               sum += in[k][m_keptorderedcolumns[i]] * kept_matrixcols[k][j] ;
            zm[i][j] = sum ;
           }
        }

      matrix coefs(numkept,numkept);

      for(ulong i=0 ; i<(numkept) ; i++)
        {
         for(ulong j=0 ; j<(numkept) ; j++)
           {
            double sum = 0.0 ;
            for(ulong k=0 ; k<(numkept) ; k++)
               sum += inv_covar[i][k] * zm[k][j] ;
            coefs[i][j] = sum ;
           }
        }

      m_coeffs = coefs;

      matrix out(m_data.Rows(),coefs.Cols());
      vector drow;
      double sum;

      for(ulong i=0; i<m_data.Rows(); i++)
        {
         drow = m_data.Row(i);
         for(ulong j = 0; j<coefs.Cols(); j++)
           {
            sum = 0.0;
            for(ulong k = 0; k<coefs.Rows(); k++)
               sum+=drow[m_keptorderedcolumns[k]]*coefs[k][j];
            out[i][j]=sum;
           }
        }

      return out;
     }

   //+------------------------------------------------------------------+
   //|  computes the backward refined fsca components                   |
   //+------------------------------------------------------------------+

   matrix            compute_fscv_components(matrix &in)
     {
      ulong numkept;

      ArrayInitialize(m_keptrefinedcolumns,ULONG_MAX);

      if(!stepwise_comp_selection(m_keptrefinedcolumns,numkept,true))
         return matrix::Zeros(0,0);

      matrix selected = np::selectMatrixCols(in,m_keptrefinedcolumns);

      m_covar = selected.CorrCoef(false);

      m_fscv_struct = compute_factor_structure(m_covar,m_fscv_eigvects,m_fscv_eigvals,m_fscv_cumeigvals);

      matrix out(m_data.Rows(),numkept);
      vector drow;
      double sum;

      for(ulong i=0; i<m_data.Rows(); i++)
        {
         drow = m_data.Row(i);
         for(ulong j = 0; j<numkept; j++)
           {
            sum = 0.0;
            for(ulong k = 0; k<numkept; k++)
               sum+=drow[m_keptrefinedcolumns[k]]*m_fscv_eigvects[k][j]/m_fscv_eigvals[j];
            out[i][j]=sum;
           }
        }

      return out;
     }
   //+------------------------------------------------------------------+
   //| computes the factor structure of a correlation matrix            |
   //+------------------------------------------------------------------+

   matrix            compute_factor_structure(matrix &covar,matrix &eigenvectors,vector &eigenvalues,vector &cumeigenvalues)
     {
      if(!covar.EigenSymmetricDC(EIGVALUES_V,eigenvalues,eigenvectors))
        {
         Print(__FUNCTION__, " error ", GetLastError());
         return matrix::Zeros(1,1);
        }
      double sum = 0.0;


      if(!np::reverseVector(eigenvalues) ||  !np::reverseMatrixCols(eigenvectors))
        {
         Print(__FUNCTION__, " reverse operation error ", GetLastError());
         return matrix::Zeros(1,1);
        }

      double cumulate[];
      for(ulong i=0 ; i<eigenvalues.Size() ; i++)
        {
         if(eigenvalues[i]>1.e-8)
           {
            sum += eigenvalues[i] ;
            if(!cumulate.Push(sum))
              {
               Print(__FUNCTION__," error adding element ", GetLastError());
               return matrix::Zeros(1,1);
              }
           }
        }

      if(!cumeigenvalues.Assign(cumulate))
        {
         Print(__FUNCTION__," vector assignment error ", GetLastError());
         return matrix::Zeros(1,1);
        }

      cumeigenvalues/=cumeigenvalues[cumeigenvalues.Size()-1];

      cumeigenvalues*=100.0;

      matrix structmat=eigenvectors;

      for(ulong i = 0; i<structmat.Cols(); i++)
         if(!structmat.Col(eigenvectors.Col(i)*sqrt(eigenvalues[i]>=0.0?eigenvalues[i]:0.0),i))
           {
            Print(__FUNCTION__, "error ", GetLastError());
            return matrix::Zeros(1,1);
           }

      if(!structmat.Clip(-1.0,1.0))
        {
         Print(__FUNCTION__, "error ", GetLastError());
         return matrix::Zeros(1,1);
        }

      return structmat;
     }
public:
   //+------------------------------------------------------------------+
   //|  constructor                                                     |
   //+------------------------------------------------------------------+

                     CFsca(void)
     {
      m_num_comps = -1;
     }
   //+------------------------------------------------------------------+
   //| destructor                                                       |
   //+------------------------------------------------------------------+

                    ~CFsca(void)
     {

     }
   //+------------------------------------------------------------------+
   //| perform forward selection component analysis on a raw dataset    |
   //+------------------------------------------------------------------+

   bool              fit(matrix &data)
     {
      m_preds = data.Cols();
      m_fitted = false;
      m_sqcorr = vector::Zeros(m_preds);

      m_data = stdmat(data);
      m_corrmat = m_data.CorrCoef(false);

      m_structmat = compute_factor_structure(m_corrmat,m_eigvectors,m_eigvalues,m_cumeigvalues);

      if(m_structmat.Rows()==1)
         return false;

      m_num_comps = m_cumeigvalues.Size();

      if(ArrayResize(m_keptorderedcolumns,int(m_num_comps))<0 || ArrayResize(m_keptrefinedcolumns,int(m_num_comps))<0 ||
         ArrayResize(m_keptcolumns,int(m_num_comps))<0  || ArrayInitialize(m_keptcolumns,ULONG_MAX)<0)
        {
         Print(__FUNCTION__, " array error ", GetLastError());
         return false;
        }

      m_principal_components = compute_principal_components();

      for(ulong i=0; i<m_preds; i++)
         m_sqcorr[i] = (compute_criterion(m_corrmat,m_keptcolumns,0,i) - 1.0)/double(m_preds-1);

      vector evd_vals = m_eigvalues;
      while(evd_vals[m_preds-1]<= 0.0)
        {
         for(ulong j=1 ; j<m_preds ; j++)
           {
            for(ulong k=0 ; k<j ; k++)
              {
               m_corrmat[j][k] *= 0.99999 ;
               m_corrmat[k][j] = m_corrmat[j][k] ;
              }
           }

         matrix empty;
         if(!m_corrmat.EigenSymmetricDC(EIGVALUES_N,evd_vals,empty))
           {
            Print(__FUNCTION__, " failed eig decomp ", GetLastError());
            return false;
           }
        }

      m_Fsca = compute_fsca_components(m_data);

      m_Fscv = compute_fscv_components(m_data);

      m_fitted = (m_Fsca.Rows()>1 && m_Fscv.Rows()>1);

      return m_fitted;

     }

   //+------------------------------------------------------------------+
   //| get the principal components                                     |
   //+------------------------------------------------------------------+

   matrix            get_principal_components(void)
     {
      if(!m_fitted)
        {
         Print(__FUNCTION__," either analyze() returned an error or it was not called ");
         matrix::Zeros(0,0);
        }

      return m_principal_components;
     }


   //+------------------------------------------------------------------+
   //| get the ordered fsca components                                  |
   //+------------------------------------------------------------------+

   matrix            get_fsca_components(void)
     {
      if(!m_fitted)
        {
         Print(__FUNCTION__," either analyze() returned an error or it was not called ");
         matrix::Zeros(0,0);
        }

      return m_Fsca;
     }


   //+------------------------------------------------------------------+
   //| get the backward refined fsca components                         |
   //+------------------------------------------------------------------+

   matrix            get_fscv_components(void)
     {
      if(!m_fitted)
        {
         Print(__FUNCTION__," either analyze() returned an error or it was not called ");
         matrix::Zeros(0,0);
        }

      return m_Fscv;
     }


   //+------------------------------------------------------------------+
   //| get indices of variables defining the ordered fsca components    |
   //+------------------------------------------------------------------+

   bool              get_fsca_var_indices(ulong &indices[])
     {
      if(!m_fitted)
        {
         Print(__FUNCTION__," either analyze() returned an error or it was not called ");
         return false;
        }

      return (ArrayCopy(indices,m_keptorderedcolumns,0,0,int(m_num_comps))>0);
     }

   //+---------------------------------------------------------------------------+
   //| get indices of variables defining the backward refined fsca components    |
   //+---------------------------------------------------------------------------+

   bool              get_fscv_var_indices(ulong &indices[])
     {
      if(!m_fitted)
        {
         Print(__FUNCTION__," either analyze() returned an error or it was not called ");
         return false;
        }

      return (ArrayCopy(indices,m_keptrefinedcolumns,0,0,int(m_num_comps))>0);
     }

   //+-------------------------------------------------------------------+
   //| get cumulative variance contribution based on principal components|
   //+-------------------------------------------------------------------+
   vector            get_principal_components_cumulative_variance_contribution(void)
     {
      if(!m_fitted)
        {
         Print(__FUNCTION__," either analyze() returned an error or it was not called ");
         return vector::Zeros(0);
        }

      return m_cumeigvalues;
     }
   //+-------------------------------------------------------------------+
   //| get cumulative variance contribution based on principal components|
   //+-------------------------------------------------------------------+
   vector            get_fscv_cumulative_variance_contribution(void)
     {
      if(!m_fitted)
        {
         Print(__FUNCTION__," either analyze() returned an error or it was not called ");
         return vector::Zeros(0);
        }

      return m_fscv_cumeigvals;
     }
   //+-------------------------------------------------------------------+
   //| get eigen structure of principal components                       |
   //+-------------------------------------------------------------------+
   bool              get_principal_components_eigstructure(matrix &vectors,vector &values)
     {
      if(!m_fitted)
        {
         Print(__FUNCTION__," either analyze() returned an error or it was not called ");
         return false;
        }

      vectors = m_eigvectors;
      values = m_eigvalues;

      return true;
     }
   //+-------------------------------------------------------------------+
   //| get eigen structure of backward refined FSCs                      |
   //+-------------------------------------------------------------------+
   bool              get_fscv_eigstructure(matrix &vectors,vector &values)
     {
      if(!m_fitted)
        {
         Print(__FUNCTION__," either analyze() returned an error or it was not called ");
         return false;
        }

      vectors = m_fscv_eigvects;
      values = m_fscv_eigvals;

      return true;
     }
   //+-------------------------------------------------------------------+
   //| get cumulative variance contribution based on principal components|
   //+-------------------------------------------------------------------+
   matrix            get_principal_components_factorstructure(void)
     {
      if(!m_fitted)
        {
         Print(__FUNCTION__," either analyze() returned an error or it was not called ");
         return matrix::Zeros(0,0);
        }

      return m_structmat;
     }
   //+-------------------------------------------------------------------+
   //| get the factor structure of FSC with backward refinement          |
   //+-------------------------------------------------------------------+
   matrix            get_fscv_factorstructure(void)
     {
      if(!m_fitted)
        {
         Print(__FUNCTION__," either analyze() returned an error or it was not called ");
         return matrix::Zeros(0,0);
        }

      return m_fscv_struct;
     }
   //+------------------------------------------------------------------+
   //|get mean squared correlations                                     |
   //+------------------------------------------------------------------+
   vector            get_avg_correlations(void)
     {
      if(!m_fitted)
        {
         Print(__FUNCTION__," either analyze() returned an error or it was not called ");
         return vector::Zeros(0);
        }

      return m_sqcorr;
     }

   //+-------------------------------------------------------------------+
   //| get forward selection component coefficients matrix               |
   //+-------------------------------------------------------------------+
   matrix            get_fsca_component_coeffs(void)
     {
      if(!m_fitted)
        {
         Print(__FUNCTION__," either analyze() returned an error or it was not called ");
         return matrix::Zeros(0,0);
        }

      return m_coeffs;
     }

  };
//+------------------------------------------------------------------+
