//+------------------------------------------------------------------+
//|                                                          EDA.mq5 |
//|                                  Copyright 2024, MetaQuotes Ltd. |
//|                                             https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2024, MetaQuotes Ltd."
#property link      "https://www.mql5.com"
#property version   "1.00"
#include<pfa.mqh>
#include<ErrorDescription.mqh>
#property script_show_inputs
//+------------------------------------------------------------------+
//|indicator type                                                    |
//+------------------------------------------------------------------+
enum SELECT_INDICATOR
  {
   ATR=0,//ATR
   MA//MA
  };
//--- input parameters
input uint     period_inc=2;//lookback increment
input uint     max_lookback=50;
input ENUM_MA_METHOD         AppliedMA = MODE_SMA;
input ENUM_APPLIED_PRICE     AppliedPrice = PRICE_CLOSE;
input datetime SampleStartDate=D'2019.12.31';
input datetime SampleStopDate=D'2022.12.31';
input string   SetSymbol="BTCUSD";
input ENUM_TIMEFRAMES SetTF = PERIOD_D1;
input ENUM_FACTOR_ROTATION AppliedFactorRotation = MODE_PROMAX;
input ENUM_DIST_CRIT AppliedDistanceCriterion = DIST_CUSTOM;
input ENUM_LINK_METHOD AppliedClusterAlgorithm = MODE_COMPLETE;
input ulong Num_Dimensions = 10;
//----
string csv_header="";                 //csv file header
int size_sample,                      //training set size
    size_observations,                //size of of both training and testing sets combined
    maxperiod,                        //maximum lookback
    indicator_handle=INVALID_HANDLE;  //long moving average indicator handle
//---
vector indicator[];                   //indicator indicator values;
//---
matrix feature_matrix;          //full matrix of features;
//+------------------------------------------------------------------+
//| Script program start function                                    |
//+------------------------------------------------------------------+
void OnStart()
  {
//---get relative shift of sample set
   int samplestart,samplestop,num_features;
   samplestart=iBarShift(SetSymbol!=""?SetSymbol:NULL,SetTF,SampleStartDate);
   samplestop=iBarShift(SetSymbol!=""?SetSymbol:NULL,SetTF,SampleStopDate);
   num_features = int((max_lookback/period_inc)*2);
//---check for errors from ibarshift calls
   if(samplestart<0 || samplestop<0)
     {
      Print(ErrorDescription(GetLastError()));
      return;
     }
//---set the size of the sample sets
   size_observations=(samplestart - samplestop) + 1 ;
   maxperiod=int(max_lookback);
//---check for input errors
   if(size_observations<=0 || maxperiod<=0)
     {
      Print("Invalid inputs ");
      return;
     }
//---allocate memory
   if(ArrayResize(indicator,num_features)<num_features)
     {
      Print(ErrorDescription(GetLastError()));
      return;
     }
//----get the full collection of indicator values
   int period_len;
   int k=0;
//---
   for(SELECT_INDICATOR select_indicator = 0; select_indicator<2; select_indicator++)
     {
      for(int iperiod=0; iperiod<int(indicator.Size()/2); iperiod++)
        {
         period_len=int((iperiod+1) * period_inc);
         if(StringLen(csv_header)>2)
            csv_header+=",";
         csv_header+=EnumToString(select_indicator)+"_"+string(period_len);
         int try
               =10;
         while(try)
           {
            switch(select_indicator)
              {
               case ATR:
                  indicator_handle=iATR(SetSymbol!=""?SetSymbol:NULL,SetTF,period_len);
                  break;
               case MA:
                  indicator_handle=iMA(SetSymbol!=""?SetSymbol:NULL,SetTF,period_len,0,AppliedMA,AppliedPrice);
                  break;
              }

            if(indicator_handle==INVALID_HANDLE)
               try
                  --;
            else
               break;
           }

         if(indicator_handle==INVALID_HANDLE)
           {
            Print("Invalid indicator handle ",EnumToString(select_indicator)," ", GetLastError());
            return;
           }

         Comment("copying data to buffer for indicator ",period_len);
         try
               = 0;
         while(!indicator[k].CopyIndicatorBuffer(indicator_handle,0,samplestop,size_observations) && try
                  <10)
              {
               try
                  ++;
               Sleep(5000);
              }

         if(try
               <10)
               ++k;
         else
           {
            Print("error copying to indicator buffers ",GetLastError());
            Comment("");
            return;
           }

         if(indicator_handle!=INVALID_HANDLE && IndicatorRelease(indicator_handle))
            indicator_handle=INVALID_HANDLE;
        }
     }

//---resize matrix
   if(!feature_matrix.Resize(size_observations,indicator.Size()))
     {
      Print(ErrorDescription(GetLastError()));
      Comment("");
      return;
     }
//---copy collected data to matrix
   for(ulong i = 0; i<feature_matrix.Cols(); i++)
      if(!feature_matrix.Col(indicator[i],i))
        {
         Print(ErrorDescription(GetLastError()));
         Comment("");
         return;
        }
//---
   Comment("");
//---test dataset for principal factor analysis suitability
//---kmo test
   vector kmo_vect;
   double kmo_stat;
   kmo(feature_matrix,kmo_vect,kmo_stat);
   Print("KMO test statistic ", kmo_stat);
//---Bartlett sphericity test
   double bs_stat,bs_pvalue;
   bartlet_sphericity(feature_matrix,bs_stat,bs_pvalue);
   Print("Bartlett sphericity test p_value ", bs_pvalue);
//---Extract the principal factors
   Cpfa fa;
//---
   if(!fa.fit(feature_matrix))
      return;
//---
   matrix fld = fa.get_factor_loadings();
   Print(" factor loading matrix ", fld);
//---
   matrix rotated_fld = fa.rotate_factorloadings(AppliedFactorRotation);
//---
   Print("\n rotated factor loading matrix ", rotated_fld);
//---
   matrix egvcts;
   vector egvals;
   fa.get_eigen_structure(egvcts,egvals);
   Print("\n vects ", egvcts);
   Print("\n evals ", egvals);
//---
   vector clusters[];
   CCluster fc;
   if(!fc.cluster(fld,Num_Dimensions,AppliedClusterAlgorithm,AppliedDistanceCriterion))
      return;
   if(!fc.get_clusters(clusters))
      return;

   string labels[];
   StringSplit(csv_header,StringGetCharacter(",",0),labels);
   string full;
   for(uint i =0; i<clusters.Size(); i++)
     {
      full="";
      for(ulong j = 0; j<clusters[i].Size(); j++)
         full+=labels[int(clusters[i][j])]+",";
      Print("cluster at ", i, "\n contains indicators ", full);
     }
  }
//+------------------------------------------------------------------+
