All Blogs / My Trading / Statistics

Mahalanobis Distance with the matrix libraries of MQ , and beer.

18 December 2023, 23:41

Lorentzos Roussos

209

First what is it ?

Secondly some docs

Wikipedia page

We will need :

Covariance matrix
Matrix inversion
Matrix multiplication
Vector mean

The matrix and vectors library mql5 has covers all of our needs actually.

Okay so we will build a structure that we can prep once and reuse for as long as the sample set is not changing.

What do we mean with sample set ?

A set of observations with properties .

To simplify , lets say you have 10 candles (chart candles) ,and they have OHLC . So you have 10 samples and 4 properties or 4 features.

Here is an example usage

/*
collect some bar data 
*/
double open[],high[],low[],close[];
ArrayResize(open,10,0);
ArrayResize(high,10,0);
ArrayResize(low,10,0);
ArrayResize(close,10,0);
for(int i=0;i<10;i++){
   open[i]=iOpen(_Symbol,_Period,i+1);
   high[i]=iHigh(_Symbol,_Period,i+1);
   low[i]=iLow(_Symbol,_Period,i+1);
   close[i]=iClose(_Symbol,_Period,i+1);
   }
//okay now these are our 4 features 
mahalanober M;
M.setup(4,10);//4 features 10 samples
//fill up the 4 features 
  M.fill_feature(0,open);
  M.fill_feature(1,high);
  M.fill_feature(2,low);
  M.fill_feature(3,close);
//done 
  //now let's get the distance of sample 3 to the distribution 
    double md=M.distanceOfSampleToDistribution(2);
    Print("Mahalabonis Distance of bar 2 to the distribution "+DoubleToString(md,4));
  //or between 2 bars , 6th and 1st
    md=M.distanceOfSampleToSample(5,0);
    Print("Mahalabonis Distance of bar[0] to bar[5] in the distribution "+DoubleToString(md,4));

and here is the structure

struct mahalanober{
       /*
       features is an array of vectors so you could say
       a 2D array .
       features[x][y] means we want the value of: 
          the feature with index X
          the sample with index Y
          For example features[0][3] means we want the open price of bar 4 [0][1][2][3]...
       */
       private:
vector features[];
  bool filled[];
vector feature_means;
matrix covariance_matrix_inverse;
   int total_features,total_samples;
       public:
       mahalanober(void){reset();}
      ~mahalanober(void){reset();}
  void reset(){
       total_features=0;
       total_samples=0;
       ArrayFree(features);
       ArrayFree(filled);
       feature_means.Init(0);
       covariance_matrix_inverse.Init(0,0);
       }
  void setup(int _total_features,
             int _total_samples){
       total_features=_total_features;
       total_samples=_total_samples;
       ArrayResize(features,total_features,0);
       ArrayResize(filled,total_features,0);
       ArrayFill(filled,0,total_features,false);
       feature_means.Init(total_features);
       for(int i=0;i<ArraySize(features);i++){
          features[i].Init(total_samples);
          }
       }
  bool fill_feature(int which_feature_ix,
                    double &values_across_samples[]){
       if(which_feature_ix<ArraySize(features)){
       if(ArraySize(values_across_samples)==total_samples){
       for(int i=0;i<total_samples;i++){
          features[which_feature_ix][i]=values_across_samples[i];
          }
       feature_means[which_feature_ix]=features[which_feature_ix].Mean();
       filled[which_feature_ix]=true;
       //if all features are filled pull off the 
         if(all_filled()){
           calculate_inverse_covariance_matrix();
           }
       return(true);
       }else{
       Print("MHLNB::fill_feature::Amount of values does not match total samples");
       }
       }else{
       Print("MHLNB::fill_feature::Feature("+IntegerToString(which_feature_ix)+") does not exist");
       }
       return(false);
       }
double distanceOfSampleToDistribution(int which_sample){
       if(all_filled()){
       if(which_sample<total_samples){
       //deltas of each feature with it's mean for this sample
         matrix term0;
         term0.Init(total_features,1);//1 columns , rows as many as features
         for(int i=0;i<total_features;i++){
            term0[i][0]=features[i][which_sample]-feature_means[i];
            } 
         //the last term of the formula , we do this because we will transpose term0
         matrix term3=term0;
         matrix term1;
         //term 1 is the transpose of term 0 this is where the equation starts
           term1=term0.Transpose();
           //multiply term1 with the covariance matrix inverse
           matrix term2=term1.MatMul(covariance_matrix_inverse); 
           //then the above with term 3 
           matrix last_term=term2.MatMul(term3);  
           //resulting in a 1x1 matrix 
           //of which we want the square root
           return(MathSqrt(last_term[0][0]));      
         }else{
         Print("MLHNB::distanceOfSampleToDistribution()::Sample ("+IntegerToString(which_sample)+") does not exist returning 0.0");
         }
       }else{
       list_unfilled("distanceOfSampleToDistribution()");
       }
       return(0.0);
       }
double distanceOfSampleToSample(int sample_a,int sample_b){
       if(all_filled()){
       if(sample_a<total_samples){
       if(sample_b<total_samples){
       //deltas of each feature of a minus each feature of b
         matrix term0;
         term0.Init(total_features,1);//1 columns , rows as many as features
         for(int i=0;i<total_features;i++){
            term0[i][0]=features[i][sample_a]-features[i][sample_b];
            } 
         //the last term of the formula , we do this because we will transpose term0
         matrix term3=term0;
         matrix term1;
         //term 1 is the transpose of term 0 this is where the equation starts
           term1=term0.Transpose();
           //multiply term1 with the covariance matrix inverse
           matrix term2=term1.MatMul(covariance_matrix_inverse); 
           //then the above with term 3 
           matrix last_term=term2.MatMul(term3);  
           //resulting in a 1x1 matrix 
           //of which we want the square root
           return(MathSqrt(last_term[0][0]));    
           }else{
           Print("MLHNB::distanceOfSampleToSample()::Sample ("+IntegerToString(sample_b)+") does not exist returning 0.0");
           }
         }else{
         Print("MLHNB::distanceOfSampleToSample()::Sample ("+IntegerToString(sample_a)+") does not exist returning 0.0");
         }
       }else{
       list_unfilled("distanceOfSampleToSample()");
       }
       return(0.0);
       }
       private:
  void calculate_inverse_covariance_matrix(){  
       /*
       so what happens here ?
       We fill up a matrix where : 
          each row is a sample 
          each column is a feature
       */
       matrix samples_by_features;
       samples_by_features.Init(total_samples,total_features);
       //fill up
       //loop to features
         for(int f=0;f<total_features;f++){
         //loop to samples
           for(int s=0;s<total_samples;s++){
              samples_by_features[s][f]=features[f][s];
              } 
           }
       //build covariance matrix with features in the columns , so false
       matrix covariance_matrix=samples_by_features.Cov(false);
       //but we need the inverse 
       covariance_matrix_inverse=covariance_matrix.Inv();
       }
  bool all_filled(){
       if(total_features>0){
       for(int i=0;i<total_features;i++){
          if(!filled[i]){
            return(false);
            }
          }
       return(true);
       }
       return(false);
       }
  void list_unfilled(string fx){
       for(int i=0;i<total_features;i++){
          if(!filled[i]){
            Print("MLHNB::"+fx+"::Feature("+IntegerToString(i)+") is not filled!");
            }
          }
       }
};