//+------------------------------------------------------------------+
//|                                       PairWise_Ensemble_Demo.mq5 |
//|                                  Copyright 2024, MetaQuotes Ltd. |
//|                                             https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2024, MetaQuotes Ltd."
#property link      "https://www.mql5.com"
#property version   "1.00"
#property script_show_inputs
#include<multilayerperceptron.mqh>
#include<ensemble.mqh>
#include<nom2ord.mqh>
#include<Generic/SortedSet.mqh>
//+------------------------------------------------------------------+
//|  sample size enumeration                                         |
//+------------------------------------------------------------------+
enum ENUM_SAMPLE_SIZE
  {
   SAMPLE_SIZE_SMALL = 12,//small training size, large test size
   SAMPLE_SIZE_BALANCED = 75,// balanced training and test sizes
   SAMPLE_SIZE_LARGE = 120,//large training size, small test size
  };
//---inputs
input ENUM_SAMPLE_SIZE  NumSamples = SAMPLE_SIZE_SMALL;
//+------------------------------------------------------------------+
//|   unifrand(rngstate)                                             |
//+------------------------------------------------------------------+
double unifrand(CHighQualityRandStateShell &state)
  {
   return CAlglib::HQRndUniformR(state);
  }
//+------------------------------------------------------------------+
//|Multilayer perceptron                                             |
//+------------------------------------------------------------------+
class CMLPC:public ensemble::IClassify
  {
private:
   CMlp              *m_mlfn;
   double             m_learningrate;
   double             m_tolerance;
   double             m_alfa;
   double             m_beyta;
   uint               m_epochs;
   ulong              m_in,m_out;
   ulong              m_hl1,m_hl2;

public:
                     CMLPC(ulong ins, ulong outs,ulong numhl1,ulong numhl2);
                    ~CMLPC(void);
   void              setParams(double alpha_, double beta_,double learning_rate, double tolerance, uint num_epochs);
   bool              train(matrix &predictors,matrix&targets);
   vector            classify(vector &predictors);
   ulong             getNumInputs(void) { return m_in;}
   ulong             getNumOutputs(void) { return m_out;}
  };
//+------------------------------------------------------------------+
//| constructor                                                      |
//+------------------------------------------------------------------+
CMLPC::CMLPC(ulong ins, ulong outs,ulong numhl1,ulong numhl2)
  {
   m_in = ins;
   m_out = outs;
   m_alfa = 0.3;
   m_beyta = 0.01;
   m_learningrate=0.001;
   m_tolerance=1.e-8;
   m_epochs= 1000;
   m_hl1 = numhl1;
   m_hl2 = numhl2;
   m_mlfn = new CMlp();
  }
//+------------------------------------------------------------------+
//| destructor                                                       |
//+------------------------------------------------------------------+
CMLPC::~CMLPC(void)
  {
   if(CheckPointer(m_mlfn) == POINTER_DYNAMIC)
      delete m_mlfn;
  }
//+------------------------------------------------------------------+
//| set other hyperparameters of the p_model                           |
//+------------------------------------------------------------------+
void CMLPC::setParams(double alpha_, double beta_,double learning_rate, double tolerance, uint num_epochs)
  {
   m_alfa = alpha_;
   m_beyta = beta_;
   m_learningrate=learning_rate;
   m_tolerance=tolerance;
   m_epochs= num_epochs;
  }
//+------------------------------------------------------------------+
//| fit a p_model to the data                                          |
//+------------------------------------------------------------------+
bool CMLPC::train(matrix &predictors,matrix &targets)
  {
   if(m_in != predictors.Cols() || m_out != targets.Cols())
     {
      Print(__FUNCTION__, " failed training due to invalid training data");
      return false;
     }

   return m_mlfn.fit(predictors,targets,m_alfa,m_beyta,m_hl1,m_hl2,m_epochs,m_learningrate,m_tolerance);
  }
//+------------------------------------------------------------------+
//| make a prediction with the trained p_model                         |
//+------------------------------------------------------------------+
vector CMLPC::classify(vector &predictors)
  {
   return m_mlfn.predict(predictors);
  }
//+------------------------------------------------------------------+
//| clean up dynamic array pointers                                  |
//+------------------------------------------------------------------+
void cleanup(ensemble::IClassify* &array[])
  {
   for(uint i = 0; i<array.Size(); i++)
      if(CheckPointer(array[i])==POINTER_DYNAMIC)
         delete array[i];
  }
//+------------------------------------------------------------------+
//| global variables                                                 |
//+------------------------------------------------------------------+
int nsamps,nmodels;
int n_classes;
matrix xmatrix,test;
double temp_err;
//---
vector missclassification_err_raw;
double missclassification_err_pairwise ;
//+------------------------------------------------------------------+
//| ensemble p_model objects                                         |
//+------------------------------------------------------------------+
ensemble::CPairWise pairwise_ensemble;
//+------------------------------------------------------------------+
//| Script program start function                                    |
//+------------------------------------------------------------------+
void OnStart()
  {
   matrix data = np::readcsv("iris.csv");
   data = np::sliceMatrixCols(data,1);
//---
   COneHotEncoder enc;
   ulong colum[1] = {4};
   if(!enc.fit(data,colum))
     {
      Print(" failed to encode data ");
      return;
     }
//---
   data = enc.transform(data);
   ulong fullset[],trainset[],testset[],third[],selected[];
//---
   if(!np::arange(fullset,int(data.Rows())))
     {
      Print(" failed to build array of indices ");
      return;
     }
//---
   CSortedSet<ulong> test_set(fullset);
//---
   nsamps = NumSamples ;
   n_classes = 3 ;
   nmodels = 0;
//---
   ensemble::IClassify* models[];
   ulong ntrain_pairs[];
   nmodels = n_classes * (n_classes-1) / 2 ;
   if(ArrayResize(models,nmodels)<0 || ArrayResize(ntrain_pairs,nmodels)<0)
     {
      Print(" Array resize errors ", GetLastError());
      return;
     }
//---
   missclassification_err_pairwise = 0.0 ;
   ArrayInitialize(ntrain_pairs,0);
   vector inputdata = vector::Zeros(5);
//---
   matrix preds,targs;
//---
   for(ulong i=0; i<3; i++)
     {
      if(!ArrayCopy(third,fullset,0,int(i*50),50) ||
         !np::sampleArray(third,NumSamples/3,false,selected) ||
         !ArrayCopy(trainset,selected,int(i*(NumSamples/3)),0,int(NumSamples/3)))
        {
         Print(" failed to build datasets ");
         cleanup(models);
         return;
        }
     }
//---
   test_set.ExceptWith(trainset);
//---
   test_set.CopyTo(testset);
//---
   test_set.UnionWith(trainset);
//---
   test = np::selectMatrixRows(data,testset);
   xmatrix = np::selectMatrixRows(data,trainset);
//---
   int i_model = 0;
   for(int i=0 ; i<n_classes-1 ; i++)
     {
      for(int j=i+1 ; j<n_classes ; j++)
        {
         ntrain_pairs[i_model] = 0 ;
         for(int z=0 ; z<nsamps ; z++)
           {
            if((xmatrix[z][4+i]> 0.5)
               || (xmatrix[z][4+j] > 0.5))
               ++ntrain_pairs[i_model] ;
           }
         ulong hidl = (ntrain_pairs[i_model] - 1)>3 ? 3 : ntrain_pairs[i_model] - 1;
         models[i_model] = new CMLPC(4, 1, hidl,0) ;
         matrix training;
         ulong msize=0;
         for(int z=0 ; z<nsamps ; z++)
           {
            inputdata[0] = xmatrix[z][0];
            inputdata[1] = xmatrix[z][1];
            inputdata[2] = xmatrix[z][2];
            inputdata[3] = xmatrix[z][3];
            if(xmatrix[z][4+i]> 0.5)
               inputdata[4] = 1.0 ;
            else
               if(xmatrix[z][4+j] > 0.5)
                  inputdata[4] = 0.0 ;
               else
                  continue ;
            training.Resize(msize+1,inputdata.Size());
            training.Row(inputdata,msize++);
           } //
         preds = np::sliceMatrixCols(training,0,4);
         targs = np::sliceMatrixCols(training,4);
         models[i_model].train(preds,targs);
         ++i_model ;
        }
     }
//---pairwise ensemble
   temp_err=0.0;
//---
   for(int z=0;z<int(test.Rows());z++)
     {
      vector row = test.Row(z);
      vector rowtest = np::sliceVector(row,0,4);
      vector rowtarg = np::sliceVector(row,4);
      if(pairwise_ensemble.classify(ulong(n_classes),rowtest,models,ntrain_pairs) != rowtarg.ArgMax())
         temp_err += 1.0 ;
     }
   cleanup(models);
//---
   missclassification_err_pairwise += temp_err / double(test.Rows()) ;
//---Print results
   PrintFormat("Pairwise ensemble misclassification score = %8.8lf", missclassification_err_pairwise) ;
//---
  }
//+------------------------------------------------------------------+
