//+------------------------------------------------------------------+
//|                                                          OLS.mqh |
//|                                  Copyright 2023, MetaQuotes Ltd. |
//|                                             https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2023, MetaQuotes Ltd."
#property link      "https://www.mql5.com"
//+------------------------------------------------------------------+
//| matrix multiplication                                            |
//+------------------------------------------------------------------+
template<typename T>
matrix<T> matmul(const matrix<T>& matrix_a, const matrix<T>& matrix_b)
  {
   matrix<T> matrix_c = matrix<T>::Zeros(0,0);

   if(matrix_a.Cols()!=matrix_b.Rows())
      return(matrix_c);

   ulong M=matrix_a.Rows();
   ulong K=matrix_a.Cols();
   ulong N=matrix_b.Cols();
   matrix_c=matrix<T>::Zeros(M,N);

   for(ulong m=0; m<M; ++m)
      for(ulong k=0; k<K; ++k)
         for(ulong n=0; n<N; ++n)
            matrix_c[m][n]+=matrix_a[m][k]*matrix_b[k][n];

   return(matrix_c);
  }
//+------------------------------------------------------------------+
//| Matrix multiply with vector                                      |
//+------------------------------------------------------------------+
template<typename T>
vector<T> matmul(const matrix<T>& matrix_a, const vector<T>& vector_b)
  {
   matrix<T> matrix_b = matrix<T>::Zeros(vector_b.Size(),1);
   matrix_b.Col(vector_b,0);
   matrix<T>out = matmul(matrix_a,matrix_b);
   return out.Col(0);
  }
//+------------------------------------------------------------------+
//| Ordinary least squares class                                     |
//+------------------------------------------------------------------+
class OLS
  {
private:
   bool   m_const_prepended;    //check column for constant in design matrix
   matrix m_exog,               //design matrix
          m_pinv,               //pseudo-inverse of matrix
          m_cov_params,         //covariance of matrix
          m_m_error,            //error matrix
          m_norm_cov_params;    //normalized covariance matrix
   vector m_endog,              //dependent variables
          m_weights,            //weights
          m_singularvalues,     //singular values of solution
          m_params,             //coefficients of regression model(solution)
          m_tvalues,            //test statistics of model
          m_bse,                //standard errors of model
          m_const_cols,         //mark constant columns in design matrix
          m_resid;              //residuals of model
   ulong  m_obs,                //number of observations
          m_model_dof,          //degrees of freedom of model
          m_resid_dof,          //degrees of freedom of residuals
          m_kconstant,          //number of constants
          m_rank;               //rank of design matrix
   double m_aic,                //Akiake information criteria
          m_bic,                //Bayesian information criteria
          m_scale,              //scale of model
          m_llf,                //loglikelihood of model
          m_sse,                //sum of squared errors
          m_rsqe,               //r-squared of model
          m_centeredtss,        //centered sum of squares
          m_uncenteredtss;      //uncentered sum of squares
   uint              m_error;              //error flag
   // private methods
   ulong             countconstants(void);
   void              scale(void);
   void              sse(void);
   void              rsqe(void);
   void              centeredtss(void);
   void              uncenteredtss(void);
   void              aic(void);
   void              bic(void);
   void              bse(void);
   void              llf(void);
   void              tvalues(void);
   void              covariance_matrix(void);


public:
   //constructor
                     OLS(void);
   //destructor
                    ~OLS(void);
   //public methods
   bool              Fit(vector &y_vars,matrix &x_vars);
   double            Predict(vector &inputs);
   double            Predict(double _input);
   //get properties of OLS model
   ulong             ModelDOF(void) { if(m_error) return 0; else return m_model_dof;}
   ulong             ResidDOF(void) { if(m_error) return 0; else return m_resid_dof;}
   double            Scale(void)  { if(m_error) return EMPTY_VALUE; else return m_scale;    }
   double            Aic(void)    { if(m_error) return EMPTY_VALUE; else return m_aic;      }
   double            Bic(void)    { if(m_error) return EMPTY_VALUE; else return m_bic;    }
   double            Sse(void)    { if(m_error) return EMPTY_VALUE; else return m_sse;    }
   double            Rsqe(void)   { if(m_error) return EMPTY_VALUE; else return m_rsqe;   }
   double            C_tss(void)  { if(m_error) return EMPTY_VALUE; else return m_centeredtss;}
   double            Loglikelihood(void) { if(m_error) return EMPTY_VALUE; return m_llf; }
   vector            Tvalues(void) { if(m_error) return m_m_error.Col(0); return m_tvalues; }
   vector            Residuals(void) { if(m_error) return m_m_error.Col(0); return m_resid; }
   vector            ModelParameters(void) { if(m_error) return m_m_error.Col(0); return m_params; }
   vector            Bse(void) { if(m_error) return m_m_error.Col(0);  return m_bse; } 
   matrix            CovarianceMatrix(void) { if(m_error) return m_m_error; return m_cov_params; }
  };
//+------------------------------------------------------------------+
//|  constructor                                                     |
//+------------------------------------------------------------------+
OLS::OLS(void)
  {
   m_kconstant = 0;
   m_obs = 0;
   m_model_dof = 0;
   m_resid_dof = 0;
   m_kconstant = 0;
   m_rank = 0;
   m_aic = 0;
   m_bic = 0;
   m_scale = 0;
   m_llf   = 0;
   m_error = 1;

   m_m_error.Resize(2,2);
   m_m_error.Fill(EMPTY_VALUE);

  }
//+------------------------------------------------------------------+
//|Destructor                                                        |
//+------------------------------------------------------------------+
OLS::~OLS(void)
  {

  }
//+------------------------------------------------------------------+
//| Fit data                                                         |
//+------------------------------------------------------------------+
bool OLS::Fit(vector &y_vars,matrix &x_vars)
  {
//---re-initialize internal properties
   m_endog = y_vars;
//---
   m_exog  = x_vars;
//---
   m_kconstant = countconstants();
//---
   m_error = 0;
//---
   m_model_dof = m_resid_dof = m_rank = m_obs= 0;
//---
   m_aic = m_bic= m_llf=0;
//---
   m_obs=m_exog.Rows();
//---
   m_weights = vector::Ones(m_obs);
//---
   m_rank = m_exog.Rank();
//---
   m_model_dof = m_rank - m_kconstant;
//---
   m_resid_dof = m_obs - m_rank;
//---
   if(y_vars.Size()!=x_vars.Rows())
     {
      Print(__FUNCTION__," ",__LINE__," Error Invalid inputs Rows in x_vars not equal to length of y_vars");
      m_error = 1;
      return false;
     }
//---
   matrix U,V;
//---
   ::ResetLastError();
//--- SVD operation
   if(!m_exog.SVD(U,V,m_singularvalues))
     {
      Print(__FUNCTION__," ",__LINE__," SVD operation failed : ",::GetLastError());
      m_error = 1;
      return false;
     }
//--- Compute the pseudo-inverse of a matrix by the Moore-Penrose method
   m_pinv = m_exog.PInv();
//--- transpose m_pinv
   matrix pinv_t = m_pinv.Transpose();
//--- normalize covariance matrix
   m_norm_cov_params = matmul(m_pinv,pinv_t);
//---
   matrix diag;
   if(!diag.Diag(m_singularvalues))
     {
      Print(__FUNCTION__," ",__LINE__," Diag operation failed : ",::GetLastError());
      m_error = 1;
      return false;
     }
//--- rank of diag
   m_rank = diag.Rank();
//--- estimate parameters of the model
   m_params = matmul(m_pinv,m_endog);
//--- caluclate its degrees of freedom
   m_model_dof = m_rank - m_kconstant;
//--- as well as those of the residuals
   m_resid_dof = m_obs - m_rank;
//---get the residuals
   m_resid = m_endog - matmul(m_exog,m_params);
//---compute scale of model
   scale();
//--- compute covariance matrix
   covariance_matrix();
//--- compute standard errors
   bse();
//--- compute sum of squared errors
   sse();
//--- compute loglikelihood function
   llf();
//--- compute centered sum of squares
   centeredtss();
//--- compute uncentered sum of squares
   uncenteredtss();
//--- compute r-squared
   rsqe();
//--- compute AIC
   aic();
//--- compute BIC
   bic();
//--- compute tvalues
   tvalues();
//---
   return true;
  }
//+------------------------------------------------------------------+
//| Predict next value based on model parameters                     |
//+------------------------------------------------------------------+
double OLS::Predict(vector &inputs)
  {
//---
   if(m_error)
     {
      Print("Invalid model");
      return EMPTY_VALUE;
     }
//---
   if(inputs.Size()!=(m_params.Size()-m_kconstant))
     {
      Print("invalid inputs: supplied vector does not match size of model parameters");
      return EMPTY_VALUE;
     }
//---
   double prediction = 0;
//---   
   for(ulong i = 0,k = 0;i<m_const_cols.Size(); i++)
      {
       if(!m_const_cols[i])
          prediction+=(m_params[i]*inputs[k++]); 
       else
          prediction+=(m_params[i]);   
      }           
//---
   return prediction;
  }
//+------------------------------------------------------------------+
//| Predict next value based on model parameters                     |
//+------------------------------------------------------------------+
double OLS::Predict(double _input)
  {
//---
   if(m_error || (m_params.Size()-m_kconstant)>1)
     {
      if(m_error)
         Print("Invalid model");
      else
         Print("invalid inputs: insufficient number of predictors supplied");   
      return EMPTY_VALUE;
     }
//---
  double prediction = 0;
//---
  if(m_kconstant)
      prediction=(m_const_cols[0])?m_params[0]+(m_params[1]*_input):m_params[1]+(m_params[0]*_input);
  else
      prediction=m_params[0]*_input;    
//---
   return prediction;
  }
//+------------------------------------------------------------------+
//|Count the number of constants in RHS matrix                       |
//+------------------------------------------------------------------+
ulong OLS::countconstants(void)
  {
   ulong count = 0;
   vector temp;
   
   m_const_cols = vector::Zeros(m_exog.Cols());
   
   for(ulong i =0; i<m_exog.Cols(); i++)
     {
      temp = m_exog.Col(i);
      if((temp.Max()-temp.Min()) == 0.0 && temp.Max() == 1.0)
         {
          count++;
          m_const_cols[i]=1.0;
         }

     }
     
   return count;
  }
//+------------------------------------------------------------------+
//|scale factor for the covariance matrix                            |
//+------------------------------------------------------------------+
void OLS::scale(void)
  {
   m_scale =  m_resid.Dot(m_resid)/double(m_resid_dof);
  }
//+------------------------------------------------------------------+
//| the variance/covariance matrix                                   |
//+------------------------------------------------------------------+
void OLS::covariance_matrix(void)
  {
//---
   m_cov_params=m_norm_cov_params*m_scale;
  }
//+------------------------------------------------------------------+
//| standard errors of the parameter estimates                       |
//+------------------------------------------------------------------+
void OLS::bse(void)
  {
   m_bse=m_cov_params.Diag();
   m_bse=MathSqrt(m_bse);
  }
//+------------------------------------------------------------------+
//| sum of squared errors                                            |
//+------------------------------------------------------------------+
void OLS::sse(void)
  {
   vector sqresid=MathPow(m_resid,2);

   m_sse = sqresid.Sum();

  }
//+------------------------------------------------------------------+
//|likelihood function for the OLS model                             |
//+------------------------------------------------------------------+
void OLS::llf(void)
  {
   double obs2=double(m_obs)/2.0;
   m_llf = -obs2*log(2.0*M_PI) - obs2*log(m_sse / double(m_obs)) - obs2;
//m_llf = -1*obs2*MathLog(2.0*M_PI*m_scale) - m_sse/(2.0*m_scale);
  }
//+------------------------------------------------------------------+
//| Akaike's information criteria                                    |
//+------------------------------------------------------------------+
void OLS::aic(void)
  {
   double params = double(m_model_dof)+double(m_kconstant);

   m_aic = -2*m_llf+2*params;

  }
//+------------------------------------------------------------------+
//|Bayes' information criteria                                       |
//+------------------------------------------------------------------+
void OLS::bic(void)
  {
   double params = double(m_model_dof)+double(m_kconstant);

   m_bic = -2*m_llf+MathLog(m_obs)*params;

  }
//+------------------------------------------------------------------+
//|t-statistic for a given parameter estimate                        |
//+------------------------------------------------------------------+
void OLS::tvalues(void)
  {
   m_tvalues = m_params/m_bse;
  }
//+------------------------------------------------------------------+
//|total  sum of squares centered about the mean                     |
//+------------------------------------------------------------------+
void OLS::centeredtss(void)
  {
   vector centered_endog = m_endog - m_endog.Mean();
//---
   m_centeredtss = centered_endog.Dot(centered_endog);
  }
//+------------------------------------------------------------------+
//| The sum of the squared values of the endogenous response variable|
//+------------------------------------------------------------------+
void OLS::uncenteredtss(void)
  {
   m_uncenteredtss = m_endog.Dot(m_endog);
  }
//+------------------------------------------------------------------+
//|R-squared of the model                                            |
//+------------------------------------------------------------------+
void OLS::rsqe(void)
  {
   m_rsqe = (m_kconstant)? 1 - m_sse/m_centeredtss: 1 - m_sse/m_uncenteredtss;
  }
//+------------------------------------------------------------------+
