//+------------------------------------------------------------------+
//|                                    error_variance_estimation.mqh |
//|                                  Copyright 2024, MetaQuotes Ltd. |
//|                                             https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2024, MetaQuotes Ltd."
#property link      "https://www.mql5.com"
#include<imodel.mqh>
#include<np.mqh>
//+------------------------------------------------------------------+
//|  class for estimating error variance                             |
//+------------------------------------------------------------------+
class CErrorVar
  {
public:
                     CErrorVar(void);
                    ~CErrorVar(void);

   virtual double    error_fun(const double truevalue,const double predictedvalue);
   virtual bool      cross_validation(matrix &predictors, matrix &targets, IModel & model,double &out_err);
   virtual bool      boot_strap(ulong nboot,matrix &predictors, matrix &targets, IModel & model,double &out_err);
   virtual bool      efrons_0(ulong nboot,matrix &predictors, matrix &targets, IModel & model,double &out_err);
   virtual bool      efrons_632(ulong nboot,matrix &predictors, matrix &targets, IModel & model,double &out_err);
  };
//+------------------------------------------------------------------+
//| constructor                                                      |
//+------------------------------------------------------------------+
CErrorVar::CErrorVar(void)
  {
  }
//+------------------------------------------------------------------+
//| destructor                                                       |
//+------------------------------------------------------------------+
CErrorVar::~CErrorVar(void)
  {
  }
//+------------------------------------------------------------------+
//| calculate the error                                              |
//+------------------------------------------------------------------+
double CErrorVar::error_fun(const double truevalue,const double predictedvalue)
  {
   return pow(truevalue-predictedvalue,2.0);
  }
//+------------------------------------------------------------------+
//| estimate error variance using cross validation testing           |
//+------------------------------------------------------------------+
bool CErrorVar::cross_validation(matrix &predictors, matrix &targets, IModel & model,double &out_err)
  {
   out_err = 0.0;
   vector test;
   double test_target;
   matrix preds,targs;

   for(ulong i = 0; i<predictors.Rows(); i++)
     {
      test = predictors.Row(i);
      test_target = targets[i][0];
      
      predictors.SwapRows(i,(predictors.Rows()-1));
      targets.SwapRows(i,(targets.Rows()-1));
      
      preds = np::sliceMatrixRows(predictors,0,long(predictors.Rows()-1));
      targs = np::sliceMatrixRows(targets,0,long(targets.Rows()-1));


      if(!model.train(preds,targs))
        {
         Print(__FUNCTION__," failed to train model ");
         return false;
        }

      out_err += error_fun(test_target,model.forecast(test));
      
      predictors.SwapRows(i,(predictors.Rows()-1));
      targets.SwapRows(i,(targets.Rows()-1));
     }

   out_err/=double(predictors.Rows());

   return true;
  }
//+------------------------------------------------------------------+
//| estimate error variance using ordinary bootstrap                 |
//+------------------------------------------------------------------+
bool CErrorVar::boot_strap(ulong nboot,matrix &predictors, matrix &targets, IModel & model,double &out_err)
  {
   double err,apparent,excess;
   excess = 0.0;
   ulong nsize = predictors.Rows();
   ulong count[];
   ulong k;

   ArrayResize(count,int(nsize));

   vector predicted(nsize);

   CHighQualityRandStateShell rstate;
   CHighQualityRand::HQRndRandomize(rstate.GetInnerObj());

   matrix preds = predictors;
   matrix targs = targets;

   for(ulong boot = 0; boot<nboot; boot++)
     {
      ArrayInitialize(count,0);
      //---
      for(ulong i=0; i<nsize; i++)
        {
         k=(int)(CAlglib::HQRndUniformR(rstate)*nsize);
         //---
         if(k>=nsize)
            k=nsize-1;
         //---
         preds.Row(predictors.Row(k),i);
         targs.Row(targets.Row(k),i);
         ++count[k];
        }

      if(!model.train(preds,targs))
        {
         Print(__FUNCTION__," failed to train model ", boot);
         return false;
        }

      for(ulong i=0; i<nsize; i++)
        {
         predicted[i] = model.forecast(predictors.Row(i));
         err = error_fun(targets[i][0],predicted[i]);
         excess+=(1.0 - double(count[i]))*err;
        }
     }

   excess/=double(nsize*nboot);

   if(!model.train(predictors,targets))
     {
      Print(__FUNCTION__," failed to train model ");
      return false;
     }
   apparent = 0.0;
   for(ulong i=0; i<nsize; i++)
     {
      predicted[i] = model.forecast(predictors.Row(i));
      err = error_fun(targets[i][0],predicted[i]);
      apparent+=err;
     }
   apparent/=double(nsize);
   out_err = apparent+excess;
   return true;
  }
//+------------------------------------------------------------------+
//| estimate error variance using efron's E0 bootstrap               |
//+------------------------------------------------------------------+
bool CErrorVar::efrons_0(ulong nboot,matrix &predictors, matrix &targets, IModel & model,double &out_err)
  {
   out_err = 0.0;
   ulong tot = 0;
   ulong nsize = predictors.Rows();
   ulong count[];
   ulong k;

   ArrayResize(count,int(nsize));

   vector predicted(nsize);

   CHighQualityRandStateShell rstate;
   CHighQualityRand::HQRndRandomize(rstate.GetInnerObj());

   matrix preds = predictors;
   matrix targs = targets;

   for(ulong boot = 0; boot<nboot; boot++)
     {
      ArrayInitialize(count,0);
      //---
      for(ulong i=0; i<nsize; i++)
        {
         k=(int)(CAlglib::HQRndUniformR(rstate)*nsize);
         //---
         if(k>=nsize)
            k=nsize-1;
         //---
         preds.Row(predictors.Row(k),i);
         targs.Row(targets.Row(k),i);
         ++count[k];
        }

      if(!model.train(preds,targs))
        {
         Print(__FUNCTION__," failed to train model ", boot);
         continue;//return false;
        }

      for(ulong i=0; i<nsize; i++)
        {
         if(count[i])
            continue;
         predicted[i] = model.forecast(predictors.Row(i));
         out_err+= error_fun(targets[i][0],predicted[i]);
         ++tot;
        }
     }

   if(tot)
      out_err/=double(tot);
   else
     {
      Print(__FUNCTION__, " zero denominator ");
      return false;
     }
   return true;
  }
//+------------------------------------------------------------------+
//| estimate error variance using efron's E632 bootstrap             |
//+------------------------------------------------------------------+
bool CErrorVar::efrons_632(ulong nboot,matrix &predictors, matrix &targets, IModel & model,double &out_err)
  {
   double apparent;

   if(!efrons_0(nboot,predictors,targets,model,out_err))
      return false;


   if(!model.train(predictors,targets))
     {
      Print(__FUNCTION__," failed to train model ");
      return false;
     }

   apparent = 0.0;
   
   vector predicted(predictors.Rows());

   for(ulong i=0; i<predictors.Rows(); i++)
     {
      predicted[i] = model.forecast(predictors.Row(i));
      apparent+= error_fun(targets[i][0],predicted[i]);
     }

   apparent/=double(predictors.Rows());

   out_err = 0.632*out_err + 0.368*apparent;

   return true;
  }

//+------------------------------------------------------------------+