统计分布在交易者工作中的作用

31 十二月 2013, 10:09
0
1 358

(Georgiy Aleksandrov)

1. 指定分布下随机数的生成

//+------------------------------------------------------------------+
//|                    Random class definition                       |
//+------------------------------------------------------------------+
class Random
{
private:
ulong u, //unsigned 64-bit integers
v,
w;
public:
//+------------------------------------------------------------------+
//| The Random class constructor                                     |
//+------------------------------------------------------------------+
void Random()
{
randomSet(184467440737095516);
}
//+------------------------------------------------------------------+
//| The Random class set-method                                      |
//+------------------------------------------------------------------+
void randomSet(ulong j)
{
v=4101842887655102017;
w=1;
u=14757395258967641292;
u=j^v;
int64();
v = u; int64();
w = v; int64();
}
//+------------------------------------------------------------------+
//| Return 64-bit random integer                                     |
//+------------------------------------------------------------------+
ulong int64()
{
uint k=4294957665;
u=u*2862933555777941757+7046029254386353087;
v^= v>> 17; v ^= v<< 31; v ^= v>> 8;
w = k*(w & 0xffffffff) +(w>> 32);
ulong x=u^(u<<21); x^=x>>35; x^=x<<4;
return(x+v)^w;
};
//+------------------------------------------------------------------+
//| Return random double-precision value in the range 0. to 1.       |
//+------------------------------------------------------------------+
double doub()
{
return 5.42101086242752217e-20*int64();
}
//+------------------------------------------------------------------+
//| Return 32-bit random integer                                     |
//+------------------------------------------------------------------+
uint int32()
{
return(uint)int64();
}
};

//+------------------------------------------------------------------+
//|                    CNormaldev class definition                   |
//+------------------------------------------------------------------+
class CNormaldev : public Random
{
public:
CNormaldist       N; //Normal Distribution instance
//+------------------------------------------------------------------+
//| The CNormaldev class constructor                                 |
//+------------------------------------------------------------------+
void CNormaldev()
{
CNormaldist Nn;
setNormaldev(Nn,18446744073709);
}
//+------------------------------------------------------------------+
//| The CNormaldev class set-method                                  |
//+------------------------------------------------------------------+
void setNormaldev(CNormaldist &Nn,ulong j)
{
N.mu=Nn.mu;
N.sig=Nn.sig;
randomSet(j);
}
//+------------------------------------------------------------------+
//| Return  Normal deviate                                           |
//+------------------------------------------------------------------+
double dev()
{
double u,v,x,y,q;
do
{
u = doub();
v = 1.7156*(doub()-0.5);
x = u - 0.449871;
y = fabs(v) + 0.386595;
q = pow(x,2) + y*(0.19600*y-0.25472*x);
}
while(q>0.27597
&& (q>0.27846 || pow(v,2)>-4.*log(u)*pow(u,2)));
return N.mu+N.sig*v/u;
}
};
//+------------------------------------------------------------------+

typedef double Doub;
typedef unsigned __int64 Ullong;

struct Normaldev : Ran
{
Doub mu,sig;
Normaldev(Doub mmu, Doub ssig, Ullong i)
...
}

2. 分布参数的估计、统计假设

2.1 使用 CExpStatistics 类处理样本

//+------------------------------------------------------------------+
//|             Expected Statistics class definition                 |
//+------------------------------------------------------------------+
class CExpStatistics
{
private:
double            arr[];      //initial array
int               N;          //initial array size
double            Parr[];     //processed array
int               pN;         //processed array size
void              stdz(double &outArr_st[],bool A); //standardization
public:
void              setArrays(bool A,double &Arr[],int &n); //set array for processing
bool              isProcessed;  //array processed?
void              CExpStatistics(){};  //constructor
void              setCExpStatistics(double &Arr[]); //set the initial array for the class
void              ZeroCheckArray(bool A); //check the input array for zero elements
int               get_arr_N();           //get the initial array length
double            median(bool A);         //median
double            median50(bool A); //median of 50% interquantile range (midquartile range)
double            mean(bool A);     //mean of the entire initial sample
double            mean50(bool A);   //mean of 50% interquantile range
double            interqtlRange(bool A); //interquartile range
double            RangeCenter(bool A); //range center
double            meanCenter(bool A);  //mean of the top five estimates
double            expVariance(bool A); //estimated variance
double            expSampleVariance(bool A); //shifted estimate of sample variance
double            expStddev(bool A);   //estimated standard deviation
double            Moment(int index,bool A,int sw,double xm); //moment of distribution
double            expKurtosis(bool A,double &Skewness); ////estimated kurtosis and skewness
double            censorR(bool A); //censoring coefficient
int               outlierDelete(); //deletion of outliers from the sample
int               pArrOutput(double &outArr[],bool St); //processed array output
void              ~CExpStatistics(){};//destructor
};
//+------------------------------------------------------------------+

2.2 创建经过处理的样本直方图

int Sturges(int n)
/*
Function for determining the number of class intervals using Sturges' rule.
Variables:
y is the number of sampling observations.
Returned value:
number of class intervals.
*/
{
double s;        // Returned value
s=1.+log2(y);
if(s>15)         // Empirical rule
s=15;
return(int) floor(s);
}

void  Allocate(double &data[],int n,double &f[],double &b[],int k)
/*
Function for allocating observations to classes.
Variables:
1) data — initial sample (array)
2) n — sample size
3) f — calculated array of observations allocated to classes
4) b — array of class midpoints
5) k — number of classes
*/
{
int i,j;                     // Loop counter
double t,c;                  // Auxiliary variable
t=data[ArrayMinimum(data)]; // Sample minimum
t=t>0 ? t*0.99 : t*1.01;
c=data[ArrayMaximum(data)]; // Sample maximum
c=c>0 ? c*1.01 : c*0.99;
c=(c-t)/k/2;                // Half of the class interval
b[0]=t+c;                   // Array of class interval midpoints
f[0]= 0;
for(i=1; i<k; i++)
{
b[i] = b[i - 1] + c + c;
f[i] = 0;
}
// Grouping
for(i=0; i<n; i++)
for(j=0; j<k; j++)
if(data[i]>b[j]-c && data[i]<=b[j]+c)
{
f[j]++;
break;
}
}

2.3 正态性假设

//+------------------------------------------------------------------+
//                   the Jarque-Bera Test                            |
//+------------------------------------------------------------------+
void jarqueberatest(double &x[],double &p)
/*
The Jarque-Bera test is used to check hypothesis about the fact that
a given sample xS  is a sample of normal random variable with unknown
mean and variance.
Variables:
x - sample Xs;
p - p-value;
*/
{
int n=ArraySize(x);
double s;
p=0.;
if(n<5)//N is too small
{
p=1.0;
return;
}
//N is large enough
jarquebera_jarqueberastatistic(x,n,s);
p=jarquebera_jarqueberaapprox(n,s);
}
//+------------------------------------------------------------------+

3. 分布拟合

3.1. 估算与测试

void chsone(double &f[],double &ebins[],double &df,
double &chsq,double &prob,const int knstrn=1)
/*
1) f — array of observations allocated to classes
2) ebins - array of expected frequencies
3) df - number of degrees of freedom
3) chsq — chi-square statistics
4) prob - probability of accepting a true null hypothesis
5) knstrn — constraint
*/
{
CGamma gam;
int j,nbins=ArraySize(bins),q,g;
double temp;
df=nbins-knstrn;
chsq=0.0;
q=nbins/2;
g=nbins-1;
for(j=0;j<nbins/2;j++) //passing through the left side of the distribution
{
if(ebins[j]<0.0 || (ebins[j]==0. && bins[j]>0.))
if(ebins[j]<=5.0)
{
--df;
ebins[j+1]+=ebins[j];
bins[j+1]+=bins[j];
}
else
{
temp=bins[j]-ebins[j];
chsq+=pow(temp,2)/ebins[j];
}
}
for(j=nbins-1;j>nbins/2-1;j--) //passing through the right side of the distribution
{
if(ebins[j]<0.0 || (ebins[j]==0. && bins[j]>0.))
if(ebins[j]<=5.0)
{
--df;
ebins[j-1]+=ebins[j];   //starting with the last class
bins[j-1]+=bins[j];
}
else
{
temp=bins[j]-ebins[j];
chsq+=pow(temp,2)/ebins[j];
}
}
if(df<1)df=1; //compensate
prob=gam.gammq(0.5*df,0.5*chsq); //Chi-square probability function
}

"Chi-square statistic:1.89; probability of rejecting a true null hypothesis: 0.8648"

"Chi-square statistic:6.17; probability of rejecting a true null hypothesis: 0.4040"

Jarque-Bera 测试："The Jarque-Bera test:probability of rejecting a true null hypothesis is 0.9381";
Parameter estimation:Normal distribution:X~Nor(3.58, 2.94);
Chi-square test results:"Chi-square statistic:0.38; probability of rejecting a true null hypothesis: 0.9843".

3.2 随机变量值的概率

cdf(750) - cdf(500) = 0.84 - 0.59 = 0.25.

总结

#文件路径说明
1 Distribution_class.mqh %MetaTrader%\MQL5\Include 分布类的库
2 DistributionFigure_class.mqh %MetaTrader%\MQL5\Include 分布的图表显示类
3 Random_class.mqh %MetaTrader%\MQL5\Include 随机数样本生成类
4 ExpStatistics_class.mqh %MetaTrader%\MQL5\Include 统计特征估计的类和函数
5 volatilityTest.mq5 %MetaTrader%\MQL5\Scripts EURUSD H4 波动样本估计的脚本
6 returnsTest.mq5 %MetaTrader%\MQL5\Scripts EURUSD H4 返回样本估计的脚本
7 randomTest.mq5 %MetaTrader%\MQL5\Scripts 随机变量样本估计的脚本
8 fitAll.mq5 %MetaTrader%\MQL5\Scripts 所有分布的拟合及估计的脚本
9 Volat.csv %MetaTrader%\MQL5\Files EURUSD H4 波动样本数据文件
10 Returns_std.csv %MetaTrader%\MQL5\Files EURUSD H4 返回样本数据文件
11 Randoms.csv %MetaTrader%\MQL5\Files 随机变量样本数据文件
12 Histogram.htm %MetaTrader%\MQL5\Files HTML 格式的样本直方图
13 Histogram2.htm %MetaTrader%\MQL5\Files HTML 格式的样本双直方图
14 chi_test.htm %MetaTrader%\MQL5\Files 样本估计的统计 HTML 报告
15 dataHist.txt %MetaTrader%\MQL5\Files 用于显示样本直方图的数据
16 dataHist2.txt %MetaTrader%\MQL5\Files 用于显示样本双直方图的数据
17 dataFitAll.txt %MetaTrader%\MQL5\Files 用于显示 HTML 报告的数据
18 highcharts.js %MetaTrader%\MQL5\Files 互动式图表的 JavaScript 库
19 jquery.min.js %MetaTrader%\MQL5\Files JavaScript 库
20 ReturnsIndicator.mq5 %MetaTrader%\MQL5\Indicators 对数返回指标

data.zip (64.83 KB)
random_class.mqh (51.46 KB)
fitall.mq5 (14.57 KB)
randomtest.mq5 (9.37 KB)
returnstest.mq5 (8.89 KB)
volatilitytest.mq5 (4.65 KB)