//+------------------------------------------------------------------+
//|                                             transfer_entropy.mqh |
//|                                  Copyright 2024, MetaQuotes Ltd. |
//|                                             https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2024, MetaQuotes Ltd."
#property link      "https://www.mql5.com"
#include<TestUtilities.mqh>
#include<OLS.mqh>
#include<np.mqh>
//+------------------------------------------------------------------+
//|significance results struct                                       |
//+------------------------------------------------------------------+
struct SigResult
  {
   vector            pvalue;
   vector            zscore;
   vector            mean;

                     SigResult(void)
     {
      if(!pvalue.Resize(2) || !zscore.Resize(2) || !mean.Resize(2))
         Print(__FUNCTION__, " error ", GetLastError());

     }

  };
//+------------------------------------------------------------------+
//| Transfer entropy results struct                                  |
//+------------------------------------------------------------------+
struct TEResult
  {
   vector            TE_XY;
   vector            TE_YX;
   vector            p_value_XY;
   vector            p_value_YX;
   vector            z_score_XY;
   vector            z_score_YX;
   vector            Ave_TE_XY;
   vector            Ave_TE_YX;
  };

//+------------------------------------------------------------------+
//| Type of entropy transfer analysis                                |
//+------------------------------------------------------------------+
enum ENUM_TE_TYPE
  {
   LINEAR_TE=0,//linear transfer entropy
   NONLINEAR_TE//nonlinear transfer entropy
  };
//+------------------------------------------------------------------+
//|class that generates windows of the dataset to be analyzed        |
//+------------------------------------------------------------------+
class CDataWindows
  {
private:
   matrix m_dwins[],
          m_data;
   ulong  m_lag,
          m_win_size,
          m_stride_size;

   bool m_max_lag_only,
        m_has_windows;

   matrix            applylags(void)
     {
      matrix out=np::sliceMatrixRows(m_data,m_lag);

      if(m_max_lag_only)
        {
         if(!out.Resize(out.Rows(),m_data.Cols()+2))
           {
            Print(__FUNCTION__, " error ", GetLastError());
            return matrix::Zeros(1,1);
           }

         for(ulong i = 2; i<4; i++)
           {
            vector col = m_data.Col(i-2);
            col = np::sliceVector(col,0,col.Size()-m_lag);

            if(!out.Col(col,i))
              {
               Print(__FUNCTION__, " error ", GetLastError());
               return matrix::Zeros(1,1);
              }
           }
        }
      else
        {
         if(!out.Resize(out.Rows(),m_data.Cols()+(m_lag*2)))
           {
            Print(__FUNCTION__, " error ", GetLastError());
            return matrix::Zeros(1,1);
           }

         for(ulong i = 0,k = 2; i<2; i++)
           {
            for(ulong t = 1; t<(m_lag+1); t++,k++)
              {
               vector col = m_data.Col(i);
               col = np::sliceVector(col,m_lag-t,col.Size()-t);

               if(!out.Col(col,k))
                 {
                  Print(__FUNCTION__, " error ", GetLastError());
                  return matrix::Zeros(1,1);
                 }
              }

           }
        }

      return out;

     }

   bool              applywindows(void)
     {
      if(m_dwins.Size())
         ArrayFree(m_dwins);

      for(ulong i = (m_stride_size+m_win_size); i<m_data.Rows(); i+=ulong(MathMax(m_stride_size,1)))
        {
         if(ArrayResize(m_dwins,int(m_dwins.Size()+1),100)<0)
           {
            Print(__FUNCTION__," error ", GetLastError());
            return false;
           }
         m_dwins[m_dwins.Size()-1] = np::sliceMatrixRows(m_data,i-m_win_size,(i-m_win_size)+m_win_size);
        }

      return true;
     }


public:
                     CDataWindows(void)
     {

     }

                    ~CDataWindows(void)
     {

     }

   bool              Initialize(matrix &data, ulong lag, bool max_lag_only=true, ulong window_size=0, ulong window_stride =0)
     {
      if(data.Cols()<2)
        {
         Print(__FUNCTION__, " matrix should contain at least 2 columns ");
         return false;
        }

      m_data = data;

      m_max_lag_only = max_lag_only;

      if(lag)
        {
         m_lag = lag;
         m_data = applylags();
        }

      if(window_size)
        {
         m_win_size = window_size;
         m_stride_size = window_stride;
         m_has_windows = true;
         if(!applywindows())
            return false;
        }
      else
        {
         m_has_windows = false;

         if(m_dwins.Size())
            ArrayFree(m_dwins);

         if(ArrayResize(m_dwins,1)<0)
           {
            Print(__FUNCTION__," error ", GetLastError());
            return false;
           }

         m_dwins[0]=m_data;
        }

      return true;
     }

   matrix            getWindowAt(ulong ind)
     {
      if(ind < ulong(m_dwins.Size()))
         return m_dwins[ind];
      else
        {
         Print(__FUNCTION__, " Index out of bounds ");
         return matrix::Zeros(1,1);
        }
     }

   ulong             numWindows(void)
     {
      return ulong(m_dwins.Size());
     }

   bool              hasWindows(void)
     {
      return m_has_windows;
     }
  };

//+------------------------------------------------------------------+
//| class implementing transfer entropy analysis                     |
//+------------------------------------------------------------------+
class CTransEntropy
  {
private:
   matrix            m_dataset;
   TEResult          m_results;
   ulong             m_endog,m_exog,m_tlag;
   bool              m_maxlagonly;
   vector            m_transfer_entropies;
   CDataWindows      m_wins;

   double            nonlinear_transfer(matrix &testdata,long dep_index, long indep_index, ulong numbins)
     {
      double entropy=0.0;

      matrix one;
      matrix two;
      matrix three;
      matrix four;

      if(m_maxlagonly)
        {
         if(!one.Resize(testdata.Rows(),3) || !two.Resize(testdata.Rows(),2) || !three.Resize(testdata.Rows(),2) || !four.Resize(testdata.Rows(),1) ||
            !one.Col(testdata.Col(dep_index),0) || !one.Col(testdata.Col(dep_index+2),1) || !one.Col(testdata.Col(indep_index+2),2) ||
            !two.Col(testdata.Col(indep_index+2),0) || !two.Col(testdata.Col(dep_index+2),1) ||
            !three.Col(testdata.Col(dep_index),0) || !three.Col(testdata.Col(dep_index+2),1) ||
            !four.Col(testdata.Col(dep_index),0))
           {
            Print(__FUNCTION__, " error ", GetLastError());
            return entropy;
           }
        }
      else
        {

         if(!one.Resize(testdata.Rows(), testdata.Cols()-1) || !two.Resize(testdata.Rows(), testdata.Cols()-2) ||
            !three.Resize(testdata.Rows(), m_tlag+1))
           {
            Print(__FUNCTION__, " error ", GetLastError());
            return entropy;
           }

         matrix deplag = np::sliceMatrixCols(testdata,dep_index?dep_index+m_tlag+1:2,dep_index?END:2+m_tlag);
         matrix indlag = np::sliceMatrixCols(testdata,indep_index?indep_index+m_tlag+1:2,indep_index?END:2+m_tlag);
         //one
         if(!np::matrixCopyCols(one,deplag,1,1+m_tlag) || !np::matrixCopyCols(one,indlag,1+m_tlag) || !one.Col(testdata.Col(dep_index),0))
           {
            Print(__FUNCTION__, " error ", GetLastError());
            return entropy;
           }
         //two
         if(!np::matrixCopyCols(two,indlag,indlag.Cols()) || !np::matrixCopyCols(two,deplag,indlag.Cols()))
           {
            Print(__FUNCTION__, " error ", GetLastError());
            return entropy;
           }
         //three
         if(!np::matrixCopyCols(three,deplag,1) || !three.Col(testdata.Col(dep_index),0))
           {
            Print(__FUNCTION__, " error ", GetLastError());
            return entropy;
           }
         //four
         four = deplag;
        }

      double h1=get_entropy(one,numbins);
      double h2=get_entropy(two,numbins);
      double h3=get_entropy(three,numbins);
      double h4=get_entropy(four,numbins);

      // entropy = independent conditional entropy (h3-h4)  - joint conditional entropy (h1-h2)
      entropy = (h3-h4) - (h1-h2);

      return entropy;

     }

   double            get_entropy(matrix &testdata, ulong num_bins)
     {

      vector hist;
      vector bounds[];
      hist=vector::Ones(10);

      if(!np::histogramdd(testdata,num_bins,hist,bounds))
        {
         Print(__FUNCTION__, " error ");
         return EMPTY_VALUE;
        }

      vector pdf = hist/hist.Sum();
      vector lpdf = pdf;

      for(ulong i = 0; i<pdf.Size(); i++)
        {
         if(lpdf[i]==0.0)
            lpdf[i] = 1.0;
        }

      vector ent = pdf*log(lpdf);

      return -1.0*ent.Sum();

     }
   double            linear_transfer(matrix &testdata,long dep_index, long indep_index)
     {
      vector joint_residuals,independent_residuals;
      double entropy=0.0;

      OLS ols;

      double gc;
      vector y;
      matrix x,xx;

      matrix joint;
      if(m_maxlagonly)
         joint = np::sliceMatrixCols(testdata,2);
      else
        {
         if(!joint.Resize(testdata.Rows(), testdata.Cols()-1))
           {
            Print(__FUNCTION__, " error ", GetLastError());
            return entropy;
           }
         matrix sliced = np::sliceMatrixCols(testdata,2);
         if(!np::matrixCopyCols(joint,sliced,1) || !joint.Col(testdata.Col(indep_index),0))
           {
            Print(__FUNCTION__, " error ", GetLastError());
            return entropy;
           }
        }
      matrix indep = (m_maxlagonly)?np::sliceMatrixCols(testdata,dep_index+2,dep_index+3):np::sliceMatrixCols(testdata,(dep_index==0)?2:dep_index+m_tlag+1,(dep_index==0)?2+m_tlag:END);

      y = testdata.Col(dep_index);

      if(dep_index>indep_index)
        {
         if(m_maxlagonly)
           {
            if(!joint.SwapCols(0,1))
              {
               Print(__FUNCTION__, " error ", GetLastError());
               return entropy;
              }
           }
         else
           {
            for(ulong i = 0; i<m_tlag; i++)
              {
               if(!joint.SwapCols(i,i+m_tlag))
                 {
                  Print(__FUNCTION__, " error ", GetLastError());
                  return entropy;
                 }
              }
           }
        }

      if(!addtrend(joint,xx))
         return entropy;

      if(!ols.Fit(y,xx))
         return entropy;

      joint_residuals = ols.Residuals();

      if(!addtrend(indep,x))
         return entropy;

      if(!ols.Fit(y,x))
         return entropy;

      independent_residuals = ols.Residuals();

      gc = log(independent_residuals.Var()/joint_residuals.Var());

      entropy = gc/2.0;

      return entropy;

     }

public:
                     CTransEntropy(void)
     {
      if(!m_transfer_entropies.Resize(2))
         Print(__FUNCTION__, " error ", GetLastError());

     }
                    ~CTransEntropy(void)
     {


     }

   bool              Initialize(matrix &in, ulong endog_index, ulong exog_index, ulong lag, bool maxLagOnly=true, ulong winsize=0,ulong winstride=0)
     {
      if(!lag || lag>in.Rows()/2)
        {
         Print(__FUNCTION__, " Invalid parameter(s) : lag must be > 0  and < rows/2");
         return false;
        }

      if(endog_index==exog_index)
        {
         Print(__FUNCTION__, " Invalid parameter(s) : endog cannot be = exog ");
         return false;
        }

      if(!m_dataset.Resize(in.Rows(),2))
        {
         Print(__FUNCTION__, " error ", GetLastError());
         return false;
        }

      if(!m_dataset.Col(in.Col(endog_index),0) || !m_dataset.Col(in.Col(exog_index),1))
        {
         Print(__FUNCTION__, " error ", GetLastError());
         return false;
        }

      if(!m_wins.Initialize(m_dataset,lag,maxLagOnly,winsize,winstride))
         return false;

      m_tlag = lag;
      m_endog = endog_index;
      m_exog = exog_index;
      m_maxlagonly = maxLagOnly;

      return true;
     }

   bool              Calculate_Linear_TE(ulong n_shuffles=0)
     {
      ulong c = m_wins.numWindows();

      matrix TE(c,2);
      matrix sTE(c,2);
      matrix pvals(c,2);
      matrix zscores(c,2);

      for(ulong i=0; i<m_wins.numWindows(); i++)
        {
         matrix df = m_wins.getWindowAt(i);

         m_transfer_entropies[0] = linear_transfer(df,0,1);

         m_transfer_entropies[1] = linear_transfer(df,1,0);


         if(!TE.Row(m_transfer_entropies,i))
           {
            Print(__FUNCTION__, " error ", GetLastError());
            return false;
           }

         SigResult rlts;

         if(n_shuffles)
           {
            significance(df,m_transfer_entropies,m_endog,m_exog,m_tlag,m_maxlagonly,n_shuffles,rlts);

            if(!sTE.Row(rlts.mean,i) || !pvals.Row(rlts.pvalue,i) || !zscores.Row(rlts.zscore,i))
              {
               Print(__FUNCTION__, " error ", GetLastError());
               return false;
              }

           }

        }

      m_results.TE_XY = TE.Col(0);
      m_results.TE_YX = TE.Col(1);
      m_results.p_value_XY = pvals.Col(0);
      m_results.p_value_YX = pvals.Col(1);
      m_results.z_score_XY = zscores.Col(0);
      m_results.z_score_YX = zscores.Col(1);
      m_results.Ave_TE_XY = sTE.Col(0);
      m_results.Ave_TE_YX = sTE.Col(1);

      return true;
     }

   bool              Calculate_NonLinear_TE(ulong numBins, ulong n_shuffles=0)
     {
      ulong c = m_wins.numWindows();

      matrix TE(c,2);
      matrix sTE(c,2);
      matrix pvals(c,2);
      matrix zscores(c,2);

      for(ulong i=0; i<m_wins.numWindows(); i++)
        {
         matrix df = m_wins.getWindowAt(i);

         m_transfer_entropies[0] = nonlinear_transfer(df,0,1,numBins);

         m_transfer_entropies[1] = nonlinear_transfer(df,1,0,numBins);


         if(!TE.Row(m_transfer_entropies,i))
           {
            Print(__FUNCTION__, " error ", GetLastError());
            return false;
           }

         SigResult rlts;

         if(n_shuffles)
           {
            significance(df,m_transfer_entropies,m_endog,m_exog,m_tlag,m_maxlagonly,n_shuffles,rlts,numBins,NONLINEAR_TE);

            if(!sTE.Row(rlts.mean,i) || !pvals.Row(rlts.pvalue,i) || !zscores.Row(rlts.zscore,i))
              {
               Print(__FUNCTION__, " error ", GetLastError());
               return false;
              }

           }

        }

      m_results.TE_XY = TE.Col(0);
      m_results.TE_YX = TE.Col(1);
      m_results.p_value_XY = pvals.Col(0);
      m_results.p_value_YX = pvals.Col(1);
      m_results.z_score_XY = zscores.Col(0);
      m_results.z_score_YX = zscores.Col(1);
      m_results.Ave_TE_XY = sTE.Col(0);
      m_results.Ave_TE_YX = sTE.Col(1);

      return true;


     }
   vector            get_transfer_entropies(void)
     {
      return m_transfer_entropies;
     }

   TEResult            get_results(void)
     {
      return m_results;
     }
  };



//+---------------------------------------------------------------------------+
//|  performs significance analysis on hypthesis test of statistical causality|
//+---------------------------------------------------------------------------+
void significance(matrix &in_df, vector &TE, ulong endog, ulong exog, ulong lag, bool maxlagOnly, ulong n_shuffles, SigResult &sresult, ulong bins = 0, ENUM_TE_TYPE type=LINEAR_TE)
  {
   matrix shuffled_TEs(2,n_shuffles);
   ulong count_0,count_1;
   count_0 = count_1 = 0;

   for(ulong i = 0; i<n_shuffles; i++)
     {
      matrix sdf = np::shuffleMatrix(in_df,false);

      CTransEntropy te;

      if(!te.Initialize(sdf,endog,exog,lag,maxlagOnly))
         return;

      if((type==LINEAR_TE && !te.Calculate_Linear_TE()) ||
         (type==NONLINEAR_TE && !te.Calculate_NonLinear_TE(bins)))
         return;

      vector cte = te.get_transfer_entropies();

      if(!shuffled_TEs.Col(cte,i))
        {
         Print(__FUNCTION__," error ", GetLastError());
         return;
        }

      if(TE[0]<cte[0])
         ++count_0;

      if(TE[1]<cte[1])
         ++count_1;

     }

   sresult.pvalue[0] = double(count_0)/double(n_shuffles);
   sresult.pvalue[1] = double(count_1)/double(n_shuffles);

   sresult.zscore[0] = (TE[0]-shuffled_TEs.Row(0).Mean())/shuffled_TEs.Row(0).Std();
   sresult.zscore[1] = (TE[1]-shuffled_TEs.Row(1).Mean())/shuffled_TEs.Row(1).Std();

   sresult.mean[0] = shuffled_TEs.Row(0).Mean();
   sresult.mean[1] = shuffled_TEs.Row(1).Mean();

  }

//+------------------------------------------------------------------+
