//+------------------------------------------------------------------+
//|                                  ClassificationEnsemble_Demo.mq5 |
//|                                  Copyright 2024, MetaQuotes Ltd. |
//|                                             https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2024, MetaQuotes Ltd."
#property link      "https://www.mql5.com"
#property version   "1.00"
#property script_show_inputs
#include<ensemble.mqh>
#include<multilayerperceptron.mqh>
//--- input parameters
input int      NumSamples=10;
input int      NumClasses=3;
input int      NumModels=3;
input int      NumReplications=1000;
input double   ClassificationDifficultyFactor=0.0;

//+------------------------------------------------------------------+
//|  normal(rngstate)                                                |
//+------------------------------------------------------------------+
double normal(CHighQualityRandStateShell &state)
  {
   return CAlglib::HQRndNormal(state);
  }
//+------------------------------------------------------------------+
//|   unifrand(rngstate)                                             |
//+------------------------------------------------------------------+
double unifrand(CHighQualityRandStateShell &state)
  {
   return CAlglib::HQRndUniformR(state);
  }
//+------------------------------------------------------------------+
//|Multilayer perceptron                                             |
//+------------------------------------------------------------------+
class CMLPC:public ensemble::IClassify
  {
private:
   CMlp              *m_mlfn;
   double             m_learningrate;
   double             m_tolerance;
   double             m_alfa;
   double             m_beyta;
   uint               m_epochs;
   ulong              m_in,m_out;
   ulong              m_hl1,m_hl2;

public:
                     CMLPC(ulong ins, ulong outs,ulong numhl1,ulong numhl2);
                    ~CMLPC(void);
   void              setParams(double alpha_, double beta_,double learning_rate, double tolerance, uint num_epochs);
   bool              train(matrix &predictors,matrix&targets);
   vector            classify(vector &predictors);
   ulong             getNumInputs(void) { return m_in;}
   ulong             getNumOutputs(void) { return m_out;}
  };
//+------------------------------------------------------------------+
//| constructor                                                      |
//+------------------------------------------------------------------+
CMLPC::CMLPC(ulong ins, ulong outs,ulong numhl1,ulong numhl2)
  {
   m_in = ins;
   m_out = outs;
   m_alfa = 0.3;
   m_beyta = 0.01;
   m_learningrate=0.001;
   m_tolerance=1.e-8;
   m_epochs= 1000;
   m_hl1 = numhl1;
   m_hl2 = numhl2;
   m_mlfn = new CMlp();
  }
//+------------------------------------------------------------------+
//| destructor                                                       |
//+------------------------------------------------------------------+
CMLPC::~CMLPC(void)
  {
   if(CheckPointer(m_mlfn) == POINTER_DYNAMIC)
      delete m_mlfn;
  }
//+------------------------------------------------------------------+
//| set other hyperparameters of the i_model                           |
//+------------------------------------------------------------------+
void CMLPC::setParams(double alpha_, double beta_,double learning_rate, double tolerance, uint num_epochs)
  {
   m_alfa = alpha_;
   m_beyta = beta_;
   m_learningrate=learning_rate;
   m_tolerance=tolerance;
   m_epochs= num_epochs;
  }
//+------------------------------------------------------------------+
//| fit a i_model to the data                                          |
//+------------------------------------------------------------------+
bool CMLPC::train(matrix &predictors,matrix &targets)
  {
   if(m_in != predictors.Cols() || m_out != targets.Cols())
     {
      Print(__FUNCTION__, " failed training due to invalid training data");
      return false;
     }

   return m_mlfn.fit(predictors,targets,m_alfa,m_beyta,m_hl1,m_hl2,m_epochs,m_learningrate,m_tolerance);
  }
//+------------------------------------------------------------------+
//| make a prediction with the trained i_model                         |
//+------------------------------------------------------------------+
vector CMLPC::classify(vector &predictors)
  {
   return m_mlfn.predict(predictors);
  }
//+------------------------------------------------------------------+
//| clean up dynamic array pointers                                  |
//+------------------------------------------------------------------+
void cleanup(ensemble::IClassify* &array[])
  {
   for(uint i = 0; i<array.Size(); i++)
      if(CheckPointer(array[i])==POINTER_DYNAMIC)
         delete array[i];
  }
//+------------------------------------------------------------------+
//| global variables                                                 |
//+------------------------------------------------------------------+
int nreplications, nsamps,nmodels, divisor, nreps_done ;
int n_classes, nnn, n_pairs, nh_g ;
ulong ntrain_pair[];
matrix xdata, xbad_data, xtainted_data, test[],x_targ,xbad_targ,xwild_targ;
vector inputdata;
double cd_factor, err_score, err_score_1, err_score_2, err_score_3 ;
vector classification_err_raw, output_vector;
double classification_err_average ;
double classification_err_median ;
double classification_err_maxmax ;
double classification_err_maxmin ;
double classification_err_intersection_1 ;
double classification_err_intersection_2 ;
double classification_err_intersection_3 ;
double classification_err_union_1 ;
double classification_err_union_2 ;
double classification_err_union_3 ;
double classification_err_majority ;
double classification_err_borda ;
double classification_err_logit ;
double classification_err_logitsep ;
double classification_err_localacc ;
double classification_err_fuzzyint ;
double classification_err_pairwise ;
//+------------------------------------------------------------------+
//| ensemble i_model objects                                         |
//+------------------------------------------------------------------+
ensemble::CAvgClass average_ensemble ;
ensemble::CMedian median_ensemble ;
ensemble::CMaxMax maxmax_ensemble ;
ensemble::CMaxMin maxmin_ensemble ;
ensemble::CIntersection intersection_ensemble ;
ensemble::CUnion union_rule ;
ensemble::CMajority majority_ensemble ;
ensemble::CBorda borda_ensemble ;
ensemble::ClogitReg logit_ensemble ;
ensemble::ClogitRegSep logitsep_ensemble ;
ensemble::ClocalAcc localacc_ensemble ;
ensemble::CFuzzyInt fuzzyint_ensemble ;
ensemble::CPairWise pairwise_ensemble ;

int n_hid = 4 ;
//+------------------------------------------------------------------+
//| Script program start function                                    |
//+------------------------------------------------------------------+
void OnStart()
  {
   CHighQualityRandStateShell rngstate;
   CHighQualityRand::HQRndRandomize(rngstate.GetInnerObj());
//---
   nsamps = NumSamples ;
   n_classes = NumClasses ;
   nmodels = NumModels ;
   nreplications = NumReplications ;
   cd_factor = ClassificationDifficultyFactor ;

   if((nsamps <= 3)  || (n_classes <= 1)  || (nmodels <= 0)  || (nreplications <= 0) || (cd_factor < 0.0))
     {
      Alert(" Invalid inputs ");
      return;
     }

   divisor = 1 ;
   ensemble::IClassify* models[];
   ensemble::IClassify* model_pairs[];
   /*
      Allocate memory and initialize
   */
   n_pairs = n_classes * (n_classes-1) / 2 ;
   if(ArrayResize(models,nmodels)<0 || ArrayResize(model_pairs,n_pairs)<0 || ArrayResize(test,10)<0 ||
      ArrayResize(ntrain_pair,n_pairs)<0)
     {
      Print(" Array resize errors ", GetLastError());
      cleanup(models);
      cleanup(model_pairs);
      return;
     }

   ArrayInitialize(ntrain_pair,0);

   for(int i=0 ; i<nmodels ; i++)
      models[i] = new CMLPC(2,ulong(n_classes),4,0) ;

   xdata = matrix::Zeros(nsamps,(2+n_classes));
   xbad_data = matrix::Zeros(nsamps,(2+n_classes));
   xtainted_data = matrix::Zeros(nsamps,(2+n_classes));
   inputdata = vector::Zeros(3);

   for(uint i = 0; i<test.Size(); i++)
      test[i] = matrix::Zeros(nsamps,(2+n_classes));

   classification_err_raw = vector::Zeros(nmodels);
   classification_err_average = 0.0 ;
   classification_err_median = 0.0 ;
   classification_err_maxmax = 0.0 ;
   classification_err_maxmin = 0.0 ;
   classification_err_intersection_1 = 0.0 ;
   classification_err_intersection_2 = 0.0 ;
   classification_err_intersection_3 = 0.0 ;
   classification_err_union_1 = 0.0 ;
   classification_err_union_2 = 0.0 ;
   classification_err_union_3 = 0.0 ;
   classification_err_majority = 0.0 ;
   classification_err_borda = 0.0 ;
   classification_err_logit = 0.0 ;
   classification_err_logitsep = 0.0 ;
   classification_err_localacc = 0.0 ;
   classification_err_fuzzyint = 0.0 ;
   classification_err_pairwise = 0.0 ;



   for(int i_rep=0 ; i_rep<nreplications ; i_rep++)
     {
      nreps_done = i_rep + 1 ;

      if(i_rep>0)
         xdata.Fill(0.0);
      //---
      for(int i=0, z=0; i<nsamps ; i++)
        {
         xdata[i][0] = normal(rngstate) ;
         xdata[i][1] = normal(rngstate) ;
         if(i < n_classes)
            z = i ;
         else
            z = (int)(unifrand(rngstate) * n_classes) ;
         if(z >= n_classes)
            z = n_classes - 1 ;
         xdata[i][2+z] = 1.0 ;
         xdata[i][0] += double(z) * cd_factor ;
         xdata[i][1] -= double(z) * cd_factor ;
        }


      if(nmodels >= 4)
        {
         xbad_data = xdata;
         matrix arm = np::sliceMatrixCols(xbad_data,2);
         for(int i = 0; i<nsamps; i++)
            for(int z = 0; z<n_classes; z++)
               arm[i][z] = (unifrand(rngstate)<(1.0/double(n_classes)))?1.0:0.0;

         np::matrixCopy(xbad_data,arm,0,xbad_data.Rows(),1,2);
        }

      if(nmodels >= 5)
        {
         xtainted_data = xdata;
         matrix arm = np::sliceMatrixCols(xtainted_data,2);
         for(int i = 0; i<nsamps; i++)
            for(int z = 0; z<n_classes; z++)
               if(unifrand(rngstate)<0.1)
                  arm[i][z] = xdata[i][2+z] * 1000.0 - 500.0 ;

         np::matrixCopy(xtainted_data,arm,0,xtainted_data.Rows(),1,2);
        }

      for(int i=0 ; i<10 ; i++)         // Build a test dataset
        {
         if(i_rep>0)
            test[i].Fill(0.0);
         for(int j=0,z=0; j<nsamps; j++)
           {
            test[i][j][0] = normal(rngstate) ;
            test[i][j][1] = normal(rngstate) ;
            z = (int)(unifrand(rngstate) * n_classes) ;
            if(z >= n_classes)
               z = n_classes - 1 ;
            test[i][j][2+z] = 1.0 ;
            test[i][j][0] += double(z) * cd_factor ;
            test[i][j][1] -= double(z) * cd_factor ;
           }
        }


      for(int i_model=0 ; i_model<nmodels ; i_model++)
        {
         matrix preds,targs;
         if(i_model == 3)
           {
            targs = np::sliceMatrixCols(xbad_data,2);
            preds = np::sliceMatrixCols(xbad_data,0,2);
           }
         else
            if(i_model == 4)
              {
               targs = np::sliceMatrixCols(xtainted_data,2);
               preds = np::sliceMatrixCols(xtainted_data,0,2);
              }
            else
              {
               targs = np::sliceMatrixCols(xdata,2);
               preds = np::sliceMatrixCols(xdata,0,2);
              }

         if(!models[i_model].train(preds,targs))
           {
            Print(" failed to train i_model at shift ", i_model);
            cleanup(model_pairs);
            cleanup(models);
            return;
           }

         err_score = 0.0 ;
         for(int i=0 ; i<10 ; i++)
           {
            vector testvec,testin,testtarg;
            for(int j=0; j<nsamps; j++)
              {
               testvec = test[i].Row(j);
               testtarg = np::sliceVector(testvec,2);
               testin = np::sliceVector(testvec,0,2);
               output_vector = models[i_model].classify(testin) ;
               if(output_vector.ArgMax() != testtarg.ArgMax())
                  err_score += 1.0 ;
              }
           }
         classification_err_raw[i_model] += err_score / (10 * nsamps) ;
        }

      int i_model = 0;
      for(int i=0 ; i<n_classes-1 ; i++)
        {
         for(int j=i+1 ; j<n_classes ; j++)
           {

            ntrain_pair[i_model] = 0 ;
            for(int z=0 ; z<nsamps ; z++)
              {
               if((xdata[z][2+i]> 0.5)
                  || (xdata[z][2+j] > 0.5))
                  ++ntrain_pair[i_model] ;
              }
            nh_g = (n_hid < int(ntrain_pair[i_model]) - 1) ? n_hid : int(ntrain_pair[i_model]) - 1;
            model_pairs[i_model] = new CMLPC(2, 1, ulong(nh_g+1),0) ;
            matrix training;
            matrix preds,targs;
            ulong msize=0;
            for(int z=0 ; z<nsamps ; z++)
              {
               inputdata[0] = xdata[z][0] ;
               inputdata[1] = xdata[z][1] ;
               if(xdata[z][2+i]> 0.5)
                  inputdata[2] = 1.0 ;
               else
                  if(xdata[z][2+j] > 0.5)
                     inputdata[2] = 0.0 ;
                  else
                     continue ;
               training.Resize(msize+1,inputdata.Size());
               training.Row(inputdata,msize++);
              }
            preds = np::sliceMatrixCols(training,0,2);
            targs = np::sliceMatrixCols(training,2);
            model_pairs[i_model].train(preds,targs);
            ++i_model ;
           }
        }


      err_score = 0.0 ;
      for(int i=0 ; i<10 ; i++)
        {
         for(int z=0;z<nsamps;z++)
           {
            vector row = test[i].Row(z);
            vector rowtest = np::sliceVector(row,0,2);
            vector rowtarg = np::sliceVector(row,2);
            if(average_ensemble.classify(rowtest,models) != rowtarg.ArgMax())
               err_score += 1.0 ;
           }
        }
      classification_err_average += err_score / (10 * nsamps) ;

      /*
      median_ensemble
      */

      err_score = 0.0 ;
      for(int i=0 ; i<10 ; i++)
        {
         for(int z=0;z<nsamps;z++)
           {
            vector row = test[i].Row(z);
            vector rowtest = np::sliceVector(row,0,2);
            vector rowtarg = np::sliceVector(row,2);
            if(median_ensemble.classify(rowtest,models) != rowtarg.ArgMax())
               err_score += 1.0 ;
           }
        }
      classification_err_median += err_score / (10 * nsamps) ;

      /*
      maxmax_ensemble
      */

      err_score = 0.0 ;
      for(int i=0 ; i<10 ; i++)
        {
         for(int z=0;z<nsamps;z++)
           {
            vector row = test[i].Row(z);
            vector rowtest = np::sliceVector(row,0,2);
            vector rowtarg = np::sliceVector(row,2);
            if(maxmax_ensemble.classify(rowtest,models) != rowtarg.ArgMax())
               err_score += 1.0 ;
           }
        }
      classification_err_maxmax += err_score / (10 * nsamps) ;



      err_score = 0.0 ;
      for(int i=0 ; i<10 ; i++)
        {
         for(int z=0;z<nsamps;z++)
           {
            vector row = test[i].Row(z);
            vector rowtest = np::sliceVector(row,0,2);
            vector rowtarg = np::sliceVector(row,2);
            if(maxmin_ensemble.classify(rowtest,models) != rowtarg.ArgMax())   // If predicted class not true class
               err_score += 1.0 ;      // Count this misclassification
           }
        }
      classification_err_maxmin += err_score / (10 * nsamps) ;

      matrix preds,targs;
      err_score_1 = err_score_2 = err_score_3 = 0.0 ;
      preds = np::sliceMatrixCols(xdata,0,2);
      targs = np::sliceMatrixCols(xdata,2);

      intersection_ensemble.fit(preds,targs,models);
      for(int i=0 ; i<10 ; i++)
        {
         for(int z=0;z<nsamps;z++)
           {
            vector row = test[i].Row(z);
            vector rowtest = np::sliceVector(row,0,2);
            vector rowtarg = np::sliceVector(row,2);
            ulong class_ = intersection_ensemble.classify(rowtest,models) ;
            output_vector = intersection_ensemble.proba();

            if(output_vector[rowtarg.ArgMax()] < 0.5)
              {
               err_score_1 += 1.0 ;
               err_score_2 += 1.0 ;
               err_score_3 += 1.0 ;
              }
            else
              {
               if(class_ > 3)
                  err_score_3 += 1.0 ;
               if(class_ > 2)
                  err_score_2 += 1.0 ;
               if(class_ > 1)
                  err_score_1 += 1.0 ;
              }
           }
        }
      classification_err_intersection_1 += err_score_1 / (10 * nsamps) ;
      classification_err_intersection_2 += err_score_2 / (10 * nsamps) ;
      classification_err_intersection_3 += err_score_3 / (10 * nsamps) ;

      union_rule.fit(preds,targs,models);
      err_score_1 = err_score_2 = err_score_3 = 0.0 ;
      for(int i=0 ; i<10 ; i++)
        {
         for(int z=0;z<nsamps;z++)
           {
            vector row = test[i].Row(z);
            vector rowtest = np::sliceVector(row,0,2);
            vector rowtarg = np::sliceVector(row,2);
            ulong clss = union_rule.classify(rowtest,models) ;
            output_vector = union_rule.proba();

            if(output_vector[rowtarg.ArgMax()] < 0.5)
              {
               err_score_1 += 1.0 ;
               err_score_2 += 1.0 ;
               err_score_3 += 1.0 ;
              }
            else
              {
               if(clss > 3)
                  err_score_3 += 1.0 ;
               if(clss > 2)
                  err_score_2 += 1.0 ;
               if(clss > 1)
                  err_score_1 += 1.0 ;
              }
           }
        }

      classification_err_union_1 += err_score_1 / (10 * nsamps) ;
      classification_err_union_2 += err_score_2 / (10 * nsamps) ;
      classification_err_union_3 += err_score_3 / (10 * nsamps) ;


      err_score = 0.0 ;
      for(int i=0 ; i<10 ; i++)
        {
         for(int z=0;z<nsamps;z++)
           {
            vector row = test[i].Row(z);
            vector rowtest = np::sliceVector(row,0,2);
            vector rowtarg = np::sliceVector(row,2);
            if(majority_ensemble.classify(rowtest,models) != rowtarg.ArgMax())
               err_score += 1.0 ;
           }
        }
      classification_err_majority += err_score / (10 * nsamps) ;



      err_score = 0.0 ;
      for(int i=0 ; i<10 ; i++)
        {
         for(int z=0;z<nsamps;z++)
           {
            vector row = test[i].Row(z);
            vector rowtest = np::sliceVector(row,0,2);
            vector rowtarg = np::sliceVector(row,2);
            if(borda_ensemble.classify(rowtest,models) != rowtarg.ArgMax())
               err_score += 1.0 ;
           }
        }
      classification_err_borda += err_score / (10 * nsamps) ;



      err_score = 0.0 ;
      logit_ensemble.fit(preds,targs,models);
      for(int i=0 ; i<10 ; i++)
        {
         for(int z=0;z<nsamps;z++)
           {
            vector row = test[i].Row(z);
            vector rowtest = np::sliceVector(row,0,2);
            vector rowtarg = np::sliceVector(row,2);
            if(logit_ensemble.classify(rowtest,models) != rowtarg.ArgMax())
               err_score += 1.0 ;
           }
        }
      classification_err_logit += err_score / (10 * nsamps) ;



      err_score = 0.0 ;
      logitsep_ensemble.fit(preds,targs,models);
      for(int i=0 ; i<10 ; i++)
        {
         for(int z=0;z<nsamps;z++)
           {
            vector row = test[i].Row(z);
            vector rowtest = np::sliceVector(row,0,2);
            vector rowtarg = np::sliceVector(row,2);
            if(logitsep_ensemble.classify(rowtest,models) != rowtarg.ArgMax())
               err_score += 1.0 ;
           }
        }
      classification_err_logitsep += err_score / (10 * nsamps) ;



      err_score = 0.0 ;
      localacc_ensemble.fit(preds,targs,models);
      for(int i=0 ; i<10 ; i++)
        {
         for(int z=0;z<nsamps;z++)
           {
            vector row = test[i].Row(z);
            vector rowtest = np::sliceVector(row,0,2);
            vector rowtarg = np::sliceVector(row,2);
            if(localacc_ensemble.classify(rowtest,models) != rowtarg.ArgMax())
               err_score += 1.0 ;
           }
        }
      classification_err_localacc += err_score / (10 * nsamps) ;


      err_score = 0.0 ;
      fuzzyint_ensemble.fit(preds,targs,models);
      for(int i=0 ; i<10 ; i++)
        {
         for(int z=0;z<nsamps;z++)
           {
            vector row = test[i].Row(z);
            vector rowtest = np::sliceVector(row,0,2);
            vector rowtarg = np::sliceVector(row,2);
            if(fuzzyint_ensemble.classify(rowtest,models) != rowtarg.ArgMax())
               err_score += 1.0 ;
           }
        }
      classification_err_fuzzyint += err_score / (10 * nsamps) ;

      err_score = 0.0 ;
      for(int i=0 ; i<10 ; i++)
        {
         for(int z=0;z<nsamps;z++)
           {
            vector row = test[i].Row(z);
            vector rowtest = np::sliceVector(row,0,2);
            vector rowtarg = np::sliceVector(row,2);
            if(pairwise_ensemble.classify(ulong(n_classes),rowtest,model_pairs,ntrain_pair)  != rowtarg.ArgMax())
               err_score += 1.0 ;
           }
        }
      classification_err_pairwise += err_score / (10 * nsamps) ;
      cleanup(model_pairs);
     }
   err_score = 0.0 ;
   PrintFormat("Test Config: Classification Difficulty - %8.8lf\nNumber of classes - %5d\nNumber of component models - %5d\n Sample Size - %5d", ClassificationDifficultyFactor,NumClasses,NumModels,NumSamples);
   PrintFormat("%5d    Replications:", nreps_done) ;
   for(int i_model=0 ; i_model<nmodels ; i_model++)
     {
      PrintFormat("  %.8lf", classification_err_raw[i_model] / nreps_done) ;
      err_score += classification_err_raw[i_model] / nreps_done ;
     }
   PrintFormat("       Mean raw error = %8.8lf", err_score / nmodels) ;
   PrintFormat("        average_ensemble error = %8.8lf", classification_err_average / nreps_done) ;
   PrintFormat("         median_ensemble error = %8.8lf", classification_err_median / nreps_done) ;
   PrintFormat("         maxmax_ensemble error = %8.8lf", classification_err_maxmax / nreps_done) ;
   PrintFormat("         maxmin_ensemble error = %8.8lf", classification_err_maxmin / nreps_done) ;
   PrintFormat("       majority_ensemble error = %8.8lf", classification_err_majority / nreps_done) ;
   PrintFormat("          borda_ensemble error = %8.8lf", classification_err_borda / nreps_done) ;
   PrintFormat("          logit_ensemble error = %8.8lf", classification_err_logit / nreps_done) ;
   PrintFormat("       logitsep_ensemble error = %8.8lf", classification_err_logitsep / nreps_done) ;
   PrintFormat("       localacc_ensemble error = %8.8lf", classification_err_localacc / nreps_done) ;
   PrintFormat("       fuzzyint_ensemble error = %8.8lf", classification_err_fuzzyint / nreps_done) ;
   PrintFormat("       pairwise_ensemble error = %8.8lf", classification_err_pairwise / nreps_done) ;
   PrintFormat(" intersection_ensemble error 1 = %8.8lf", classification_err_intersection_1 / nreps_done) ;
   PrintFormat(" intersection_ensemble error 2 = %8.8lf", classification_err_intersection_2 / nreps_done) ;
   PrintFormat(" intersection_ensemble error 3 = %8.8lf", classification_err_intersection_3 / nreps_done) ;
   PrintFormat("        Union error 1 = %8.8lf", classification_err_union_1 / nreps_done) ;
   PrintFormat("        Union error 2 = %8.8lf", classification_err_union_2 / nreps_done) ;
   PrintFormat("        Union error 3 = %8.8lf", classification_err_union_3 / nreps_done) ;
   cleanup(models);
  }

//+------------------------------------------------------------------+

