# Calculating Correlation

Hi All,

I'm trying to calculate the correlation coefficient between two arrays. I basically want to replicate the Excel "CORREL" function, where I input the two arrays and the function outputs the correlation value.

Does anyone have working code to do this and be willing to share it?

The best I have found so far is the code below I found on another forum, but it's giving me incorrect answers. Well answers that are different to what I get in Excel anyway!

Any help much appreciated!

Thanks!

#define RET_OK    0
#define RET_ERROR EMPTY
#define VAL_ERROR EMPTY_VALUE

int   PearsonCorr_r( double const &vectorX[], //   |-> INPUT X[]      = { 1, 3,  5,  5,  6 }
double const &vectorY[], //   |-> INPUT Y[]      = { 5, 6, 10, 12, 13 }
double       &pearson_r  // <=|   returns RESULT = 0.968
)
{
double  sumX = 0,
meanX = 0,
meanY = 0,
sumY = 0,
sumXY = 0,
sumX2 = 0,
sumY2 = 0;
// deviation_score_x[],               // may be re-used for _x^2
// deviation_score_y[],               // may be re-used for _y^2
// deviation_score_xy[];
/* =====================================================================
DEVIATION SCORES                                       >>> http://onlinestatbook.com/2/describing_bivariate_data/calculation.html
X[]  Y[]  x      y      xy    x^2    y^2
1    4   -3     -5      15    9     25
3    6   -1     -3       3    1      9
5   10    1      1       1    1      1
5   12    1      3       3    1      9
6   13    2      4       8    4     16
_______________________________________

SUM    20   45    0      0      30   16     60
MEAN    4    9    0      0       6

r = SUM(xy) / SQRT(  SUM( x^2 ) * SUM( y^2 ) )
r =      30 / SQRT( 960 )
r = 0.968
=====================================================================
*/

int    vector_maxLEN = MathMin( ArrayRange( vectorX, 0 ),
ArrayRange( vectorY, 0 )
);

if (   vector_maxLEN == 0 ){
pearson_r = VAL_ERROR;          // STOR VAL ERROR IN RESULT
return(     RET_ERROR );        // FLAG RET_ERROR in JIT/RET
}
for ( int jj = 0; jj < vector_maxLEN; jj++ ){
sumX += vectorX[jj];
sumY += vectorY[jj];
}
meanX = sumX / vector_maxLEN;          // DIV!0 FUSED
meanY = sumY / vector_maxLEN;          // DIV!0 FUSED

for ( int jj = 0; jj < vector_maxLEN; jj++ ){
// deviation_score_x[ jj] =   meanX - vectorX[jj];  //
// deviation_score_y[ jj] =   meanY - vectorY[jj];
// deviation_score_xy[jj] = deviation_score_x[jj]
//                        * deviation_score_y[jj];
//              sumXY    += deviation_score_x[jj]
//                        * deviation_score_y[jj];
sumXY    += ( meanX - vectorX[jj] ) // PSPACE MOTIVATED MINIMALISTIC WITH CACHE-BENEFITS IN PROCESSING
* ( meanY - vectorY[jj] );
// deviation_score_x[jj] *= deviation_score_x[jj];  // PSPACE MOTIVATED RE-USE, ROW-WISE DESTRUCTIVE, BUT VALUE WAS NEVER USED AGAIN
//              sumX2    += deviation_score_x[jj]
//                        * deviation_score_x[jj];
sumX2    += ( meanX - vectorX[jj] ) // PSPACE MOTIVATED MINIMALISTIC WITH CACHE-BENEFITS IN PROCESSING
* ( meanX - vectorX[jj] );
// deviation_score_y[jj] *= deviation_score_y[jj];  // PSPACE MOTIVATED RE-USE, ROW-WISE DESTRUCTIVE, BUT VALUE WAS NEVER USED AGAIN
//              sumY2    += deviation_score_y[jj]
//                        * deviation_score_y[jj];
sumY2    += ( meanY - vectorY[jj] ) // PSPACE MOTIVATED MINIMALISTIC WITH CACHE-BENEFITS IN PROCESSING
* ( meanY - vectorY[jj] );
}
pearson_r = sumXY
/ MathSqrt( sumX2
* sumY2
);            // STOR RET VALUE IN RESULT
return( RET_OK );                   // FLAG RET_OK in JIT/RET
}

James Parker:

Hi All,

I'm trying to calculate the correlation coefficient between two arrays. I basically want to replicate the Excel "CORREL" function, where I input the two arrays and the function outputs the correlation value.

Does anyone have working code to do this and be willing to share it?

The best I have found so far is the code below I found on another forum, but it's giving me incorrect answers. Well answers that are different to what I get in Excel anyway!

Any help much appreciated!

Thanks!

#define RET_OK    0
#define RET_ERROR EMPTY
#define VAL_ERROR EMPTY_VALUE

int   PearsonCorr_r( double const &vectorX[], //   |-> INPUT X[]      = { 1, 3,  5,  5,  6 }
double const &vectorY[], //   |-> INPUT Y[]      = { 5, 6, 10, 12, 13 }
double       &pearson_r  // <=|   returns RESULT = 0.968
)
{
double  sumX = 0,
meanX = 0,
meanY = 0,
sumY = 0,
sumXY = 0,
sumX2 = 0,
sumY2 = 0;
// deviation_score_x[],               // may be re-used for _x^2
// deviation_score_y[],               // may be re-used for _y^2
// deviation_score_xy[];
/* =====================================================================
DEVIATION SCORES                                       >>> http://onlinestatbook.com/2/describing_bivariate_data/calculation.html
X[]  Y[]  x      y      xy    x^2    y^2
1    4   -3     -5      15    9     25
3    6   -1     -3       3    1      9
5   10    1      1       1    1      1
5   12    1      3       3    1      9
6   13    2      4       8    4     16
_______________________________________

SUM    20   45    0      0      30   16     60
MEAN    4    9    0      0       6

r = SUM(xy) / SQRT(  SUM( x^2 ) * SUM( y^2 ) )
r =      30 / SQRT( 960 )
r = 0.968
=====================================================================
*/

int    vector_maxLEN = MathMin( ArrayRange( vectorX, 0 ),
ArrayRange( vectorY, 0 )
);

if (   vector_maxLEN == 0 ){
pearson_r = VAL_ERROR;          // STOR VAL ERROR IN RESULT
return(     RET_ERROR );        // FLAG RET_ERROR in JIT/RET
}
for ( int jj = 0; jj < vector_maxLEN; jj++ ){
sumX += vectorX[jj];
sumY += vectorY[jj];
}
meanX = sumX / vector_maxLEN;          // DIV!0 FUSED
meanY = sumY / vector_maxLEN;          // DIV!0 FUSED

for ( int jj = 0; jj < vector_maxLEN; jj++ ){
// deviation_score_x[ jj] =   meanX - vectorX[jj];  //
// deviation_score_y[ jj] =   meanY - vectorY[jj];
// deviation_score_xy[jj] = deviation_score_x[jj]
//                        * deviation_score_y[jj];
//              sumXY    += deviation_score_x[jj]
//                        * deviation_score_y[jj];
sumXY    += ( meanX - vectorX[jj] ) // PSPACE MOTIVATED MINIMALISTIC WITH CACHE-BENEFITS IN PROCESSING
* ( meanY - vectorY[jj] );
// deviation_score_x[jj] *= deviation_score_x[jj];  // PSPACE MOTIVATED RE-USE, ROW-WISE DESTRUCTIVE, BUT VALUE WAS NEVER USED AGAIN
//              sumX2    += deviation_score_x[jj]
//                        * deviation_score_x[jj];
sumX2    += ( meanX - vectorX[jj] ) // PSPACE MOTIVATED MINIMALISTIC WITH CACHE-BENEFITS IN PROCESSING
* ( meanX - vectorX[jj] );
// deviation_score_y[jj] *= deviation_score_y[jj];  // PSPACE MOTIVATED RE-USE, ROW-WISE DESTRUCTIVE, BUT VALUE WAS NEVER USED AGAIN
//              sumY2    += deviation_score_y[jj]
//                        * deviation_score_y[jj];
sumY2    += ( meanY - vectorY[jj] ) // PSPACE MOTIVATED MINIMALISTIC WITH CACHE-BENEFITS IN PROCESSING
* ( meanY - vectorY[jj] );
}
pearson_r = sumXY
/ MathSqrt( sumX2
* sumY2
);            // STOR RET VALUE IN RESULT
return( RET_OK );                   // FLAG RET_OK in JIT/RET
}
Check here : https://www.mql5.com/en/forum/180028

#include <Math\Stat\Math.mqh>
//+------------------------------------------------------------------+
//| Script program start function                                    |
//+------------------------------------------------------------------+
void OnStart()
{
//---
double buf1[],buf2[];
ArrayInitialize(buf1,0);
ArrayInitialize(buf2,0);
int copied1=CopyClose("EURJPY",NULL,1,100,buf1);
int copied2=CopyClose("USDJPY",NULL,1,100,buf2);
if(copied1!=copied2)
return;
double pearson,spearman,kendall;
if(MathCorrelationPearson(buf1,buf2,pearson))
PrintFormat("Pearson =%5.8f",pearson);
if(MathCorrelationSpearman(buf1,buf2,spearman))
PrintFormat("Spearman =%5.8f",spearman);
if(MathCorrelationKendall(buf1,buf2,kendall))
PrintFormat("Kendall =%5.8f",kendall);
}

Check here : https://www.mql5.com/en/forum/180028

Thank you!

Indicator will be very useful!

ivanivan_11:
#include <Math\Stat\Math.mqh>
//+------------------------------------------------------------------+
//| Script program start function                                    |
//+------------------------------------------------------------------+
void OnStart()
{
//---
double buf1[],buf2[];
ArrayInitialize(buf1,0);
ArrayInitialize(buf2,0);
int copied1=CopyClose("EURJPY",NULL,1,100,buf1);
int copied2=CopyClose("USDJPY",NULL,1,100,buf2);
if(copied1!=copied2)
return;
double pearson,spearman,kendall;
if(MathCorrelationPearson(buf1,buf2,pearson))
PrintFormat("Pearson =%5.8f",pearson);
if(MathCorrelationSpearman(buf1,buf2,spearman))
PrintFormat("Spearman =%5.8f",spearman);
if(MathCorrelationKendall(buf1,buf2,kendall))
PrintFormat("Kendall =%5.8f",kendall);
}

Thanks. Do you know where I can get the Math.mqh include file?

Quick update on this - I have it working now.

I had a problem with the arrays I was feeding into the function!

Thanks for the responses all.

Reason: