Calculating Correlation

 

Hi All,

I'm trying to calculate the correlation coefficient between two arrays. I basically want to replicate the Excel "CORREL" function, where I input the two arrays and the function outputs the correlation value.

Does anyone have working code to do this and be willing to share it?

The best I have found so far is the code below I found on another forum, but it's giving me incorrect answers. Well answers that are different to what I get in Excel anyway!

Any help much appreciated!

Thanks!

#define RET_OK    0
#define RET_ERROR EMPTY
#define VAL_ERROR EMPTY_VALUE

int   PearsonCorr_r( double const &vectorX[], //   |-> INPUT X[]      = { 1, 3,  5,  5,  6 }
                     double const &vectorY[], //   |-> INPUT Y[]      = { 5, 6, 10, 12, 13 }
                     double       &pearson_r  // <=|   returns RESULT = 0.968
                     )
{
      double  sumX = 0,
             meanX = 0,
             meanY = 0,
              sumY = 0,
             sumXY = 0,
             sumX2 = 0,
             sumY2 = 0;
          // deviation_score_x[],               // may be re-used for _x^2
          // deviation_score_y[],               // may be re-used for _y^2
          // deviation_score_xy[];
/* =====================================================================
                  DEVIATION SCORES                                       >>> http://onlinestatbook.com/2/describing_bivariate_data/calculation.html
        X[]  Y[]  x      y      xy    x^2    y^2
        1    4   -3     -5      15    9     25
        3    6   -1     -3       3    1      9
        5   10    1      1       1    1      1
        5   12    1      3       3    1      9
        6   13    2      4       8    4     16
       _______________________________________

SUM    20   45    0      0      30   16     60
MEAN    4    9    0      0       6  

       r = SUM(xy) / SQRT(  SUM( x^2 ) * SUM( y^2 ) )
       r =      30 / SQRT( 960 )
       r = 0.968
   =====================================================================
                                                                        */

      int    vector_maxLEN = MathMin( ArrayRange( vectorX, 0 ),
                                      ArrayRange( vectorY, 0 )
                                      );

      if (   vector_maxLEN == 0 ){
             pearson_r = VAL_ERROR;          // STOR VAL ERROR IN RESULT
             return(     RET_ERROR );        // FLAG RET_ERROR in JIT/RET
      }
      for ( int jj = 0; jj < vector_maxLEN; jj++ ){
            sumX += vectorX[jj];
            sumY += vectorY[jj];
      }
      meanX = sumX / vector_maxLEN;          // DIV!0 FUSED
      meanY = sumY / vector_maxLEN;          // DIV!0 FUSED

      for ( int jj = 0; jj < vector_maxLEN; jj++ ){
         // deviation_score_x[ jj] =   meanX - vectorX[jj];  //
         // deviation_score_y[ jj] =   meanY - vectorY[jj];
         // deviation_score_xy[jj] = deviation_score_x[jj]
         //                        * deviation_score_y[jj];
         //              sumXY    += deviation_score_x[jj]
         //                        * deviation_score_y[jj];
                         sumXY    += ( meanX - vectorX[jj] ) // PSPACE MOTIVATED MINIMALISTIC WITH CACHE-BENEFITS IN PROCESSING
                                   * ( meanY - vectorY[jj] );
         // deviation_score_x[jj] *= deviation_score_x[jj];  // PSPACE MOTIVATED RE-USE, ROW-WISE DESTRUCTIVE, BUT VALUE WAS NEVER USED AGAIN
         //              sumX2    += deviation_score_x[jj]
         //                        * deviation_score_x[jj];
                         sumX2    += ( meanX - vectorX[jj] ) // PSPACE MOTIVATED MINIMALISTIC WITH CACHE-BENEFITS IN PROCESSING
                                   * ( meanX - vectorX[jj] );
         // deviation_score_y[jj] *= deviation_score_y[jj];  // PSPACE MOTIVATED RE-USE, ROW-WISE DESTRUCTIVE, BUT VALUE WAS NEVER USED AGAIN
         //              sumY2    += deviation_score_y[jj]
         //                        * deviation_score_y[jj];
                         sumY2    += ( meanY - vectorY[jj] ) // PSPACE MOTIVATED MINIMALISTIC WITH CACHE-BENEFITS IN PROCESSING
                                   * ( meanY - vectorY[jj] );
      }
      pearson_r = sumXY
                / MathSqrt( sumX2
                          * sumY2
                            );            // STOR RET VALUE IN RESULT
      return( RET_OK );                   // FLAG RET_OK in JIT/RET
}
 
James Parker:

Hi All,

I'm trying to calculate the correlation coefficient between two arrays. I basically want to replicate the Excel "CORREL" function, where I input the two arrays and the function outputs the correlation value.

Does anyone have working code to do this and be willing to share it?

The best I have found so far is the code below I found on another forum, but it's giving me incorrect answers. Well answers that are different to what I get in Excel anyway!

Any help much appreciated!

Thanks!

#define RET_OK    0
#define RET_ERROR EMPTY
#define VAL_ERROR EMPTY_VALUE

int   PearsonCorr_r( double const &vectorX[], //   |-> INPUT X[]      = { 1, 3,  5,  5,  6 }
                     double const &vectorY[], //   |-> INPUT Y[]      = { 5, 6, 10, 12, 13 }
                     double       &pearson_r  // <=|   returns RESULT = 0.968
                     )
{
      double  sumX = 0,
             meanX = 0,
             meanY = 0,
              sumY = 0,
             sumXY = 0,
             sumX2 = 0,
             sumY2 = 0;
          // deviation_score_x[],               // may be re-used for _x^2
          // deviation_score_y[],               // may be re-used for _y^2
          // deviation_score_xy[];
/* =====================================================================
                  DEVIATION SCORES                                       >>> http://onlinestatbook.com/2/describing_bivariate_data/calculation.html
        X[]  Y[]  x      y      xy    x^2    y^2
        1    4   -3     -5      15    9     25
        3    6   -1     -3       3    1      9
        5   10    1      1       1    1      1
        5   12    1      3       3    1      9
        6   13    2      4       8    4     16
       _______________________________________

SUM    20   45    0      0      30   16     60
MEAN    4    9    0      0       6  

       r = SUM(xy) / SQRT(  SUM( x^2 ) * SUM( y^2 ) )
       r =      30 / SQRT( 960 )
       r = 0.968
   =====================================================================
                                                                        */

      int    vector_maxLEN = MathMin( ArrayRange( vectorX, 0 ),
                                      ArrayRange( vectorY, 0 )
                                      );

      if (   vector_maxLEN == 0 ){
             pearson_r = VAL_ERROR;          // STOR VAL ERROR IN RESULT
             return(     RET_ERROR );        // FLAG RET_ERROR in JIT/RET
      }
      for ( int jj = 0; jj < vector_maxLEN; jj++ ){
            sumX += vectorX[jj];
            sumY += vectorY[jj];
      }
      meanX = sumX / vector_maxLEN;          // DIV!0 FUSED
      meanY = sumY / vector_maxLEN;          // DIV!0 FUSED

      for ( int jj = 0; jj < vector_maxLEN; jj++ ){
         // deviation_score_x[ jj] =   meanX - vectorX[jj];  //
         // deviation_score_y[ jj] =   meanY - vectorY[jj];
         // deviation_score_xy[jj] = deviation_score_x[jj]
         //                        * deviation_score_y[jj];
         //              sumXY    += deviation_score_x[jj]
         //                        * deviation_score_y[jj];
                         sumXY    += ( meanX - vectorX[jj] ) // PSPACE MOTIVATED MINIMALISTIC WITH CACHE-BENEFITS IN PROCESSING
                                   * ( meanY - vectorY[jj] );
         // deviation_score_x[jj] *= deviation_score_x[jj];  // PSPACE MOTIVATED RE-USE, ROW-WISE DESTRUCTIVE, BUT VALUE WAS NEVER USED AGAIN
         //              sumX2    += deviation_score_x[jj]
         //                        * deviation_score_x[jj];
                         sumX2    += ( meanX - vectorX[jj] ) // PSPACE MOTIVATED MINIMALISTIC WITH CACHE-BENEFITS IN PROCESSING
                                   * ( meanX - vectorX[jj] );
         // deviation_score_y[jj] *= deviation_score_y[jj];  // PSPACE MOTIVATED RE-USE, ROW-WISE DESTRUCTIVE, BUT VALUE WAS NEVER USED AGAIN
         //              sumY2    += deviation_score_y[jj]
         //                        * deviation_score_y[jj];
                         sumY2    += ( meanY - vectorY[jj] ) // PSPACE MOTIVATED MINIMALISTIC WITH CACHE-BENEFITS IN PROCESSING
                                   * ( meanY - vectorY[jj] );
      }
      pearson_r = sumXY
                / MathSqrt( sumX2
                          * sumY2
                            );            // STOR RET VALUE IN RESULT
      return( RET_OK );                   // FLAG RET_OK in JIT/RET
}
Check here : https://www.mql5.com/en/forum/180028
 
#include <Math\Stat\Math.mqh>
//+------------------------------------------------------------------+
//| Script program start function                                    |
//+------------------------------------------------------------------+
void OnStart()
  {
//---
   double buf1[],buf2[];
   ArrayInitialize(buf1,0);
   ArrayInitialize(buf2,0);
   int copied1=CopyClose("EURJPY",NULL,1,100,buf1);
   int copied2=CopyClose("USDJPY",NULL,1,100,buf2);
   if(copied1!=copied2)
      return;
   double pearson,spearman,kendall;
   if(MathCorrelationPearson(buf1,buf2,pearson))
      PrintFormat("Pearson =%5.8f",pearson);
   if(MathCorrelationSpearman(buf1,buf2,spearman))
      PrintFormat("Spearman =%5.8f",spearman);
   if(MathCorrelationKendall(buf1,buf2,kendall))
      PrintFormat("Kendall =%5.8f",kendall);
  }

 
Mladen Rakic:
Check here : https://www.mql5.com/en/forum/180028

Thank you!

Indicator will be very useful! 

 
ivanivan_11:
#include <Math\Stat\Math.mqh>
//+------------------------------------------------------------------+
//| Script program start function                                    |
//+------------------------------------------------------------------+
void OnStart()
  {
//---
   double buf1[],buf2[];
   ArrayInitialize(buf1,0);
   ArrayInitialize(buf2,0);
   int copied1=CopyClose("EURJPY",NULL,1,100,buf1);
   int copied2=CopyClose("USDJPY",NULL,1,100,buf2);
   if(copied1!=copied2)
      return;
   double pearson,spearman,kendall;
   if(MathCorrelationPearson(buf1,buf2,pearson))
      PrintFormat("Pearson =%5.8f",pearson);
   if(MathCorrelationSpearman(buf1,buf2,spearman))
      PrintFormat("Spearman =%5.8f",spearman);
   if(MathCorrelationKendall(buf1,buf2,kendall))
      PrintFormat("Kendall =%5.8f",kendall);
  }

Thanks. Do you know where I can get the Math.mqh include file?
 

Quick update on this - I have it working now.

I had a problem with the arrays I was feeding into the function!

Thanks for the responses all. 

Reason: