//+------------------------------------------------------------------+
//|                                                        Solve.mq5 |
//|                                               Gamuchirai Ndawana |
//|                    https://www.mql5.com/en/users/gamuchiraindawa |
//+------------------------------------------------------------------+
#property copyright "Gamuchirai Ndawana"
#property link      "https://www.mql5.com/en/users/gamuchiraindawa"
#property version   "1.00"
#property script_show_inputs

//+------------------------------------------------------------------+
//| System definitions                                               |
//+------------------------------------------------------------------+
#define HORIZON 10
#define START   0

//+------------------------------------------------------------------+
//| User inputs                                                      |
//+------------------------------------------------------------------+
input int FETCH =  10;//How many bars should we fetch?

//+------------------------------------------------------------------+
//| Global variables                                                 |
//+------------------------------------------------------------------+
int ROWS        =   5;
//Dependent variable
matrix y,y_test;
//Indenpendent variable
matrix X = matrix::Ones(ROWS,FETCH);
matrix X_test = matrix::Ones(ROWS,FETCH);
//Coefficients
matrix b;
vector temp;
//Row Norms
vector row_norms = vector::Zeros(4);
vector error_vector = vector::Zeros(4);

//+------------------------------------------------------------------+
//| Script program start function                                    |
//+------------------------------------------------------------------+
void OnStart()
  {
//--- Observe the input matrix in its original form
PrintFormat("Input Matrix Gathered From %s",Symbol());
Print(X);
  
//--- Fetch the data
temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_OPEN,START+HORIZON+(FETCH*2),FETCH);
row_norms[0] = temp.Norm(VECTOR_NORM_P);
X.Row(temp/row_norms[0],1);

temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_HIGH,START+HORIZON+(FETCH*2),FETCH);
row_norms[1] = temp.Norm(VECTOR_NORM_P);
X.Row(temp/row_norms[1],2);

temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_LOW,START+HORIZON+(FETCH*2),FETCH);
row_norms[2] = temp.Norm(VECTOR_NORM_P);
X.Row(temp/row_norms[2],3);

temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_CLOSE,START+HORIZON+(FETCH*2),FETCH);
row_norms[3] = temp.Norm(VECTOR_NORM_P);
X.Row(temp/row_norms[3],4);

//--- Fetch the test data
temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_OPEN,START+HORIZON+(FETCH),FETCH);
X_test.Row(temp/row_norms[0],1);

temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_HIGH,START+HORIZON+(FETCH),FETCH);
X_test.Row(temp/row_norms[1],2);

temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_LOW,START+HORIZON+(FETCH),FETCH);
X_test.Row(temp/row_norms[2],3);

temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_CLOSE,START+HORIZON+(FETCH),FETCH);
X_test.Row(temp/row_norms[3],4);

//--- The train data
Print("Input");
Print(X);

//--- Fill the target
y.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_CLOSE,START+(FETCH*2),FETCH);
y_test.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_CLOSE,START,FETCH);

Print("Target");
Print(y);

//--- More Penrose Psuedo Inverse Solution implemented by MQL5 Developers, enterprise level effeciency!
b = y.MatMul(X.PInv());

Print("Pseudo Inverse Solution: ");
Print(b);

//--- Native MQL5 SVD Solution are also possible without relying on OpenBLAS
Print("Computing Singular Value Decomposition using MQL5");
matrix U,VT;
vector S;
X.SVD(U,VT,S);

Print("U");
Print(U);

Print("VT");
Print(VT);

Print("S");
Print(S);
matrix SIGMA;
SIGMA.Diag(S);

//--- OpenBLAS SVD Solution, considerably powerful substitute to the closed solution provided by the MQL5 developers 
matrix OB_U,OB_VT,OB_SIGMA;
vector OB_S;

//--- Perform truncated SVD, we will explore what 'truncated' means later.
PrintFormat("Computing Singular Value Decomposition of %s Data using OpenBLAS",Symbol());
X.SingularValueDecompositionDC(SVDZ_S,OB_S,OB_U,OB_VT);

//--- U is a unitary matrix that is of dimension (m,r)
Print("Open BLAS U");
Print(OB_U);

//--- VT is a mathematically a symmetrical matrix that is (r,r), for effeciency in software it is represented as a vector that is (1,r)
Print("Open BLAS VT");
Print(OB_VT);

//--- We need it in its intended form as an (r,r) matrix, we will explore what this means later. 
Print("Open BLAS S");
Print(OB_S);
OB_SIGMA.Diag(OB_S);

Print("Comparing OLS Solutions");
Print("Native MQL5 Solution");
//--- We will always benchmark the native solution as the truth, the MQL5 developers implemented an extremely performant benchmark for us
Print(b);

//--- The OpenBLAS solution came closest to the native solution implemented for us
Print("OpenBLAS Solution");
matrix ob_solution = y.MatMul(OB_VT.Transpose().MatMul(OB_SIGMA.Inv()).MatMul(OB_U.Transpose()));
Print(ob_solution);

//--- Our manual solution was not even close! We will therefore rely on the OpenBLAS solution.
Print("Manual SVD Solution");
matrix svd_solution = y.MatMul(VT).MatMul(SIGMA.Inv()).MatMul(U.Transpose());
Print(svd_solution);

//--- Measuring the amount of error
//--- Information lost by MQL5 PsuedoInverse solution
//--- The Frobenius norm squares all 
PrintFormat("Information Loss in Forcasting %s Market : ",Symbol());
Print("PInv: ");
matrix pinv_error = ((b.MatMul(X_test)) - y_test);
Print(pinv_error.Norm(MATRIX_NORM_FROBENIUS));

//--- Let the MQL5 implementation be our benchmark
double benchmark = pinv_error.Norm(MATRIX_NORM_FROBENIUS);

//--- Information lost by Manual SVD solution
Print("Manual SVD: ");
matrix svd_error = ((svd_solution.MatMul(X_test)) - y_test);
Print(svd_error.Norm(MATRIX_NORM_FROBENIUS));

//--- Information lost by OpenBLAS SVD solution
Print("OpenBLAS SVD: ");
matrix ob_error = ((ob_solution.MatMul(X_test)) - y_test);
Print(ob_error.Norm(MATRIX_NORM_FROBENIUS));

//+------------------------------------------------------------------+
//| What are we demonstrating here?                                  |
//| 1) We have shown you that any matrix of market data you have,    |
//|    can be analyzed intelligently, to build a linear regression   |
//|    model, using just the raw data.                               |
//| 2) We have demonstrated that the solution to such Linear         |
//|    regression problems, can be obtained through effecient and    |
//|    dedicated functions available in MQL5 or through matrix       |
//|    factorization.                                                |   
//|__________________________________________________________________|
//| I now ask the reader the following question:                     |
//|  "If dedicated functions exist, why bother learning matrix       |
//|   factorization?"                                                |
//+------------------------------------------------------------------+

//--- Matrix factorization gives us a description of the data and it properties
//--- Questions such as: "How stable/chaotic is the market we are in?" can be answered by the factorization we have just performed
//--- Or even questions such as: "How best can I expose the hidden trends in all of this market data?" can still be answered by the factorization we have just performed
//--- I'm only trying to give you a few examples of why you should bother learning these factorizations, even though dedicated functions exist.

//--- Any given matrix A can be represented as the sum of smaller matrices A = USV, this is theorem behind the Singular Value Decomposition.
//--- Each factor is special because each describes different charectersitics of its parent.

//--- Let's get to know Sigma, represented as the S in A = USV.
//--- Sigma technically tells us how many different modes our market appears to exist in, and how important each mode is. 
//--- However, reintepreted in terms of market data, these modes may correspond to investor sentiment.
PrintFormat("Taking a closer look at The Eigenvalues of %s Market Data: ",Symbol());
Print(OB_S/OB_S.Sum());
Print("If sigma has a only few values that are far from 0, then investor's sentiment in this market appears well established and hardly changes");
//--- If Sigma has a lot values that are all far away from 0, then the market is chaotic and it appears investor's sentiment and expectations constantly change
//--- If Sigma has a few, or even just one value that is far away from 0, then investor sentiment in that market appears stable, and hardly changes.
//--- Traders explicitly looking for fast-action scalping oppurtunities may use Sigma as a filter of how much energy the market has. 
//--- Quiet market will have a few dominant values in Sigma, not ideal for scalpers, better suited for long-term trend traders.

//--- Fetch the data and prepare to perform PCA
temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_OPEN,START+HORIZON+(FETCH*2),FETCH);
row_norms[0] = temp.Mean();
X.Row(temp-row_norms[0],1);

temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_HIGH,START+HORIZON+(FETCH*2),FETCH);
row_norms[1] = temp.Mean();
X.Row(temp-row_norms[1],2);

temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_LOW,START+HORIZON+(FETCH*2),FETCH);
row_norms[2] = temp.Mean();
X.Row(temp-row_norms[2],3);

temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_CLOSE,START+HORIZON+(FETCH*2),FETCH);
row_norms[3] = temp.Mean();
X.Row(temp-row_norms[3],4);

//--- Fetch the test data
temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_OPEN,START+HORIZON+(FETCH),FETCH);
X_test.Row(temp-row_norms[0],1);

temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_HIGH,START+HORIZON+(FETCH),FETCH);
X_test.Row(temp-row_norms[1],2);

temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_LOW,START+HORIZON+(FETCH),FETCH);
X_test.Row(temp-row_norms[2],3);

temp.CopyRates(Symbol(),PERIOD_CURRENT,COPY_RATES_CLOSE,START+HORIZON+(FETCH),FETCH);
X_test.Row(temp-row_norms[3],4);

//--- Perform truncated SVD, we will explore what 'truncated' means later.
Print("Computing Singular Value Decomposition using OpenBLAS");
X.SingularValueDecompositionDC(SVDZ_S,OB_S,OB_U,OB_VT);

OB_SIGMA.Diag(OB_S);

//--- Calculating Principal Components
Print("Principal Components");
matrix pc = OB_SIGMA.MatMul(OB_VT);
Print(pc);

//--- PCA reduces the amount of correlation in our dataset 
Print("How correlated is our new representation of the data?");
//--- First we will measure the size of our original correlation matrix
Print(X.Norm(MATRIX_NORM_FROBENIUS));
//--- Then, we will measure the size of our new correlation matrix produced by factorizing the data
Print(pc.CorrCoef().Norm(MATRIX_NORM_FROBENIUS));

PrintFormat("Most Important Principal Components of %s Market Data",Symbol());
Print(OB_S / OB_S.Sum());

matrix mpc;

mpc.Row(pc.Row(0),0);
mpc.Row(pc.Row(1),1);
mpc.Row(pc.Row(2),2);

//--- Recall that the first row in a linear model should just be a row of ones.
Print(mpc);

//--- The factor VT describes the correlational structure across the columns of our data
Print("Performing PCA");
matrix pca_coefs = y.MatMul(mpc.PInv());

//--- Performing PCA on the test data
X_test.SingularValueDecompositionDC(SVDZ_S,OB_S,OB_U,OB_VT);
Print("Principal Components of Test Data");
pc = OB_SIGMA.MatMul(OB_VT);

Print(pc);

PrintFormat("Most Important Principal Components in %s Market Test Data",Symbol());
Print(OB_S / OB_S.Sum());

//--- Main principal components
mpc.Row(pc.Row(0),0);
mpc.Row(pc.Row(1),1);
mpc.Row(pc.Row(2),2);

matrix pca_error = pca_coefs.MatMul(mpc) - y_test;

Print("PCA Error: ");
Print(pca_error.Norm(MATRIX_NORM_FROBENIUS));


Print("OpenBLAS Error: ");
Print(ob_error.Norm(MATRIX_NORM_FROBENIUS));

Print("Manual Error: ");
Print(svd_error.Norm(MATRIX_NORM_FROBENIUS));

}
//+------------------------------------------------------------------+