//+------------------------------------------------------------------+
//|                                                        Arima.mqh |
//|                        Copyright 2023, MetaQuotes Software Corp. |
//|                                             https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2023, MetaQuotes Software Corp."
#property link      "https://www.mql5.com"
#include <Math\Stat\Math.mqh>
#include <Math\Stat\Normal.mqh>
#include <Powells.mqh>
#include <Files\FileBin.mqh>

//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
class CArima:public PowellsMethod
  {
private:
   string            m_modelname;
   bool              m_const,m_istrained;
   uint              m_diff_order,m_ar_order,m_ma_order,m_insize,m_infactor;
   uint              m_arlags[],m_malags[];
   double            m_model[],m_sse;
   double            m_differenced[],m_innovation[],m_leads[];


   void              difference(const uint difference_degree, double &data[], double &differenced[], double &leads[]);
   void              integrate(double &differenced[], double &leads[], double &integrated[]);
   void              evaluate(const uint num_p);
   virtual double    func(const double &p[]);


public :
                     CArima(void);
                     CArima(const uint p,const uint d, const uint q,bool use_const_term=false);
                    ~CArima(void);

   uint              GetMaxArLag(void)    { if(m_ar_order) return m_arlags[ArrayMaximum(m_arlags)]; else return 0;}
   uint              GetMinArLag(void)    { if(m_ar_order) return m_arlags[ArrayMinimum(m_arlags)]; else return 0;}
   uint              GetMaxMaLag(void)    { if(m_ma_order) return m_malags[ArrayMaximum(m_malags)]; else return 0;}
   uint              GetMinMaLag(void)    { if(m_ma_order) return m_malags[ArrayMinimum(m_malags)]; else return 0;}
   uint              GetArOrder(void)     { return m_ar_order;  }
   uint              GetMaOrder(void)     { return m_ma_order;  }
   uint              GetDiffOrder(void)   { return m_diff_order;}
   bool              IsTrained(void)      { return m_istrained; }
   double            GetSSE(void)         { return m_sse;      }
   uint              GetArLagAt(const uint shift);
   uint              GetMaLagAt(const uint shift);


   bool              SetARLags(uint &ar_lags[]);
   bool              SetMALags(uint &ma_lags[]);
   bool              Fit(double &input_series[]);
   bool              Fit(double &input_series[],const uint p,const uint d, const uint q,bool use_const_term=false);
   bool              Predict(const uint num_pred,double &predictions[]);
   bool              Predict(const uint num_pred,double &in_raw[], double &predictions[]);
   bool              SaveModel(const string model_name);
   bool              LoadModel(const string model_name);
   double            BIC(void);
   double            AIC(void);
   void              Summary(void);
   void              SetMaInputsFactor(const uint factor)  { if(factor>2) m_infactor=factor;    }
   void              GetModelParameters(double &out_array[]) { ArrayCopy(out_array,m_model);      }
   void              GetModelInnovation(double &out_array[]) { ArrayCopy(out_array,m_innovation); }
   uint              GetMinModelInputs(void)                 { return(m_diff_order + GetMaxArLag() + (GetMaxMaLag()*m_infactor));}
   string            GetModelName(void)                      { return m_modelname;}

  };

//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
CArima::CArima(void)
  {
   m_ar_order=1;
//---
   ArrayResize(m_arlags,m_ar_order);
   for(uint i=0; i<m_ar_order; i++)
      m_arlags[i]=i+1;
//---
   m_ma_order=m_diff_order=0;
   m_istrained=false;
   m_const=false;
   m_modelname="";
   m_insize=0;
   m_infactor=3;
   m_sse=0;
   ArrayResize(m_model,m_ar_order+m_ma_order+1);
   ArrayInitialize(m_model,0);
  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
CArima::CArima(const uint p,const uint d,const uint q,bool use_const_term=false)
  {
   m_ar_order=m_ma_order=m_diff_order=m_insize=0;
   m_infactor=3;

   if(d)
      m_diff_order=d;
   if(p)
     {
      m_ar_order=p;
      ArrayResize(m_arlags,p);
      for(uint i=0; i<m_ar_order; i++)
         m_arlags[i]=i+1;
     }
   if(q)
     {
      m_ma_order=q;
      ArrayResize(m_malags,q);
      for(uint i=0; i<m_ma_order; i++)
         m_malags[i]=i+1;
     }


   m_istrained=false;
   m_const=use_const_term;
   m_modelname="";
   m_sse=0;
   ArrayResize(m_model,m_ar_order+m_ma_order+1);
   ArrayInitialize(m_model,0);
  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
CArima::~CArima(void)
  {
   ArrayFree(m_differenced);
   ArrayFree(m_leads);
   ArrayFree(m_innovation);
   ArrayFree(m_model);
   ArrayFree(m_arlags);
   ArrayFree(m_malags);

  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
uint CArima::GetMaLagAt(const uint shift)
  {
   if(m_ma_order)
      return(m_malags[shift]);
   else
      return 0;
  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
uint CArima::GetArLagAt(const uint shift)
  {
   if(m_ar_order)
      return(m_arlags[shift]);
   else
      return 0;
  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
bool CArima::SetARLags(uint &ar_lags[])
  {
   if(ArraySize(ar_lags)!=(int)m_ar_order)
     {
      Print("Array size inadequate for set AR order");
      return false;
     }
//---
   if(!ArraySort(ar_lags))
     {
      Print("Failed to sort array: ",__FUNCTION__);
      return false;
     }
//---
   int found =ArrayBsearch(ar_lags,0);
   if(ar_lags[found]<1)
     {
      Print("Cannot set lag to zero");
      return false;
     }
//---
   ArrayCopy(m_arlags,ar_lags);
//---
   return true;

  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
bool CArima::SetMALags(uint &ma_lags[])
  {
   if(ArraySize(ma_lags)!=(int)m_ma_order)
     {
      Print("Array size inadequate for set MA order");
      return false;
     }
//---
   if(!ArraySort(ma_lags))
     {
      Print("Failed to sort array: ",__FUNCTION__);
      return false;
     }
//---
   int found =ArrayBsearch(ma_lags,0);
   if(ma_lags[found]<1)
     {
      Print("Cannot set lag to zero");
      return false;
     }
//---
   ArrayCopy(m_malags,ma_lags);
//---
   return true;
  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
void CArima::difference(const uint difference_degree, double &data[], double &differenced[], double &leads[])
  {
   int n=ArraySize(data);

   if(difference_degree)
     {
      ArrayResize(leads,difference_degree);
      ArrayResize(differenced,ArraySize(data));
      ArrayCopy(differenced,data);
     }

   for(uint pass=0 ; pass<difference_degree ; pass++)
     {
      --n ;
      leads[pass] = differenced[0] ;   // Preserve first point for undo
      for(int i=0 ; i<n ; i++)
        {
         differenced[i] = differenced[i+1] - differenced[i] ;
        }
     }

   if(difference_degree>0)
      ArrayRemove(differenced,n,ArraySize(data)-n);
  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
void CArima::integrate(double &differenced[],double &leads[],double &integrated[])
  {
   int i, pass, passes ;
   double temp, sum;

   passes=ArraySize(leads);

   int n = ArraySize(differenced);

   ArrayResize(differenced,n+passes);


   for(pass=0 ; pass<passes ; pass++)
     {
      sum = leads[passes-pass-1] ;
      for(i=0 ; i<n ; i++)
        {
         temp = differenced[i] ;
         differenced[i] = sum ;
         sum += temp ;
        }
      differenced[n++] = sum ;
     }

   ArrayCopy(integrated,differenced);
  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
double CArima::func(const double &p[])
  {
   double error=0;
   double sum_error=0;
   double pred=0;
   uint start_shift=(m_ar_order)?GetMaxArLag():(m_ma_order)?GetMaxMaLag():0;

   int p_shift;

   for(uint i=start_shift; i<m_differenced.Size(); i++)
     {
      p_shift=0;
      pred=0;
      if(m_const)
         pred+=p[p_shift++];
      for(uint j=0; j<m_ar_order; j++)
         pred+=p[p_shift++]*m_differenced[i-m_arlags[j]];
      for(uint k=0; i>=GetMaxMaLag()&& k<m_ma_order; k++)
         pred+=p[p_shift++]*m_innovation[i-m_malags[k]];
      error=pred-m_differenced[i];
      m_innovation[i]=error;
      error*=error;
      sum_error+=error;
     }

   return sum_error;
  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
void CArima::evaluate(const uint num_p)
  {

   double pred=0;
   uint start_shift=(m_ma_order)?((!m_innovation[m_insize-1])?m_insize-(GetMaxMaLag()*m_infactor):m_insize):m_insize;
   uint d_size=(uint)ArraySize(m_differenced);

   int p_shift;

   for(uint i=start_shift; i<d_size; i++)
     {
      p_shift=0;
      pred=0;
      if(i>=m_insize)
         m_innovation[i]=0.0;
      if(m_const)
         pred+=m_model[p_shift++];
      for(uint j=0; j<m_ar_order; j++)
         pred+=m_model[p_shift++]*m_differenced[i-m_arlags[j]];
      for(uint k=0; i>=GetMaxMaLag() && k<m_ma_order; k++)
         pred+=m_model[p_shift++]*m_innovation[i-m_malags[k]];
      if(i>=m_insize)
         m_differenced[i]=pred;
      if(i<m_insize)
         m_innovation[i]=pred-m_differenced[i];
     }

   return;
  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
bool CArima::Fit(double &input_series[])
  {

   uint input_size=ArraySize(input_series);

   uint in = GetMaxArLag()+(GetMaxMaLag()*m_infactor);

   if(in==0)
     {
      Print("Invalid model order");
      return false;
     }

   if(input_size<=0 || input_size<in)
     {
      Print("Inadequate training dataset size.Minimum size required is: ",in+m_const);
      return false;
     }

   if(m_diff_order)
      difference(m_diff_order,input_series,m_differenced,m_leads);
   else
      ArrayCopy(m_differenced,input_series);

   ArrayResize(m_innovation,ArraySize(m_differenced));
   double parameters[];
   ArrayResize(parameters,(m_const)?m_ar_order+m_ma_order+1:m_ar_order+m_ma_order);
   ArrayInitialize(parameters,0.0);

   int iterations = Optimize(parameters);

   if(iterations>0)
      m_istrained=true;
   else
     {
      Print("Model training failed.");
      return false;
     }

   m_sse=PowellsMethod::GetFret();
   m_modelname="";
   m_insize=ArraySize(m_differenced);

   ArrayCopy(m_model,parameters,(m_const)?0:1,0,WHOLE_ARRAY);


   return true;

  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
bool CArima::Fit(double&input_series[],const uint p,const uint d,const uint q,bool use_const_term=false)
  {
   m_ar_order=m_ma_order=m_diff_order=0;

   ZeroMemory(m_arlags);
   ZeroMemory(m_malags);

   if(d)
      m_diff_order=d;
   if(p)
     {
      m_ar_order=p;
      ArrayResize(m_arlags,p);
      for(uint i=0; i<m_ar_order; i++)
         m_arlags[i]=i+1;
     }
   if(q)
     {
      m_ma_order=q;
      ArrayResize(m_malags,q);
      for(uint i=0; i<m_ma_order; i++)
         m_malags[i]=i+1;
     }

   m_istrained=false;
   m_const=use_const_term;
   m_infactor=3;

   ZeroMemory(m_innovation);
   ZeroMemory(m_model);
   ZeroMemory(m_differenced);

   ArrayResize(m_model,m_ar_order+m_ma_order+1);
   ArrayInitialize(m_model,0);

   return Fit(input_series);

  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
void CArima::Summary(void)
  {

   string print = m_modelname+" Arima("+IntegerToString(m_ar_order)+","+IntegerToString(m_diff_order)+","+IntegerToString(m_ma_order)+")\n";
   print+= "SSE : "+string(m_sse);

   int k=0;
   if(m_const)
      print+="\nConstant: "+string(m_model[k++]);
   else
      k++;
   for(uint i=0; i<m_ar_order; i++)
      print+="\nAR coefficient at lag "+IntegerToString(m_arlags[i])+": "+string(m_model[k++]);
   for(uint j=0; j<m_ma_order; j++)
      print+="\nMA coefficient at lag "+IntegerToString(m_malags[j])+": "+string(m_model[k++]);

   Print(print);

   return;

  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
bool CArima::Predict(const uint num_pred,double &predictions[])
  {
   if(!num_pred)
     {
      Print("Invalid number of predictions");
      return false;
     }

   if(!m_istrained || !m_insize)
     {
      ZeroMemory(predictions);
      if(m_istrained)
         Print("Model not trained");
      else
         Print("No input data available to make predictions");
      return false;
     }

   ArrayResize(m_differenced,ArraySize(m_differenced)+num_pred,num_pred);

   ArrayResize(m_innovation,ArraySize(m_differenced));

   evaluate(num_pred);

   if(m_diff_order)
     {
      double raw[];
      integrate(m_differenced,m_leads,raw);
      ArrayPrint(raw,_Digits,NULL,m_insize-5);
      ArrayCopy(predictions,raw,0,m_insize+m_diff_order);
      ArrayFree(raw);
     }
   else
      ArrayCopy(predictions,m_differenced,0,m_insize);

   return true;
  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
bool CArima::Predict(const uint num_pred,double &in_raw[],double &predictions[])
  {
   if(!num_pred)
     {
      Print("Invalid number of predictions");
      return false;
     }

   if(!m_istrained)
     {
      ZeroMemory(predictions);
      Print("Model not trained");
      return false;
     }

   int numofinputs=0;

   if(m_ar_order)
      numofinputs+=(int)GetMaxArLag();
   if(m_ma_order)
      numofinputs+=int(GetMaxMaLag()*m_infactor);
   if(m_diff_order)
      numofinputs+=(int)m_diff_order;

   if(in_raw.Size()<(uint)numofinputs)
     {
      ZeroMemory(predictions);
      Print("Input dataset size inadequate. Size required: ",numofinputs);
      return false;
     }

   ZeroMemory(m_differenced);

   if(m_diff_order)
     {
      difference(m_diff_order,in_raw,m_differenced,m_leads);
      m_insize=m_differenced.Size();
     }
   else
     {
      m_insize=in_raw.Size();
      ArrayCopy(m_differenced,in_raw);
     }


   if(m_differenced.Size()!=(m_insize+num_pred))
      ArrayResize(m_differenced,m_insize+num_pred,num_pred);

   ArrayFill(m_differenced,m_insize,num_pred,0.0);

   if(m_innovation.Size()!=m_insize+num_pred)
      ArrayResize(m_innovation,ArraySize(m_differenced));

   ArrayInitialize(m_innovation,0.0);

   evaluate(num_pred);

   if(m_diff_order)
     {
      double raw[];
      integrate(m_differenced,m_leads,raw);
      ArrayCopy(predictions,raw,0,m_insize+m_diff_order);
      ArrayFree(raw);
     }
   else
      ArrayCopy(predictions,m_differenced,0,m_insize);

   return true;

  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
bool CArima::SaveModel(const string model_name)
  {
   uint model_order[]= {m_const,m_ar_order,m_diff_order,m_ma_order};

   CFileBin file;
   ResetLastError();
   if(!file.Open("models\\"+model_name+".model",FILE_WRITE|FILE_COMMON))
     {
      Print("Failed to save model.Error: ",GetLastError());
      return false;
     }

   m_modelname=(m_modelname=="")?model_name:m_modelname;

   long written=0;

   written = file.WriteIntegerArray(model_order);

   if(!written)
     {
      Print("Failed write operation, ",__LINE__,".Error: ",GetLastError());
      return false;
     }

   if(m_ar_order)
     {
      written = file.WriteIntegerArray(m_arlags);

      if(!written)
        {
         Print("Failed write operation, ",__LINE__,".Error: ",GetLastError());
         return false;
        }
     }

   if(m_ma_order)
     {
      written = file.WriteIntegerArray(m_malags);

      if(!written)
        {
         Print("Failed write operation, ",__LINE__,".Error: ",GetLastError());
         return false;
        }
     }

   written = file.WriteDoubleArray(m_model);

   if(!written)
     {
      Print("Failed write operation, ",__LINE__,".Error: ",GetLastError());
      return false;
     }

   written = file.WriteDouble(m_sse);

   if(!written)
     {
      Print("Failed write operation, ",__LINE__,".Error: ",GetLastError());
      return false;
     }


   file.Close();

   return true;

  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
bool CArima::LoadModel(const string model_name)
  {
   int found=StringFind(model_name,".model");

   if(found>=0)
      m_modelname=StringSubstr(model_name,0,found);
   else
      m_modelname=model_name;

   if(StringFind(m_modelname,"\\")>=0)
      return false;

   string filename="models\\"+m_modelname+".model";

   if(!FileIsExist(filename,FILE_COMMON))
     {
      Print("Failed to find model, ",__LINE__,".Error: ",GetLastError());
      return false;
     }

   CFileBin file;
   ResetLastError();
   if(file.Open(filename,FILE_READ|FILE_COMMON)<0)
     {
      Print("Failed open operation, ",__LINE__,".Error: ",GetLastError());
      return false;
     }

   uint model_order[];


   file.Seek(0,SEEK_SET);

   if(!file.ReadIntegerArray(model_order,0,4))
     {
      Print("Failed read operation, ",__LINE__,".Error: ",GetLastError());
      return false;
     }

   m_const=bool(model_order[0]);
   m_ar_order=model_order[1];
   m_diff_order=model_order[2];
   m_ma_order=model_order[3];

   file.Seek(sizeof(uint)*4,SEEK_SET);

   if(m_ar_order && !file.ReadIntegerArray(m_arlags,0,m_ar_order))
     {
      Print("Failed read operation, ",__LINE__,".Error: ",GetLastError());
      return false;
     }

   if(!m_ar_order)
      ArrayFree(m_arlags);



   if(m_ar_order)
      file.Seek(sizeof(uint)*(4+m_ar_order),SEEK_SET);


   if(m_ma_order && !file.ReadIntegerArray(m_malags,0,m_ma_order))
     {
      Print("Failed read operation, ",__LINE__,".Error: ",GetLastError());
      return false;
     }

   ArrayPrint(m_malags);

   if(!m_ma_order)
      ArrayFree(m_malags);


   if(m_ar_order || m_ma_order)
      file.Seek(sizeof(uint)*(4+m_ar_order+m_ma_order),SEEK_SET);



   if(!file.ReadDoubleArray(m_model,0,m_ma_order+m_ar_order+1))
     {
      Print("Failed read operation, ",__LINE__,".Error: ",GetLastError());
      return false;
     }

   file.Seek(sizeof(uint)*(4+m_ar_order+m_ma_order) + sizeof(double)*ArraySize(m_model),SEEK_SET);

   if(!file.ReadDouble(m_sse))
     {
      Print("Failed read operation, ",__LINE__,".Error: ",GetLastError());
      return false;
     }

   if(m_model[1])
      m_istrained=true;
   else
      m_istrained=false;

   ZeroMemory(m_differenced);
   ZeroMemory(m_leads);
   ZeroMemory(m_innovation);

   m_insize=0;

   return true;
  }
//+------------------------------------------------------------------+
//| calculate the bayesian information criterion                     |
//+------------------------------------------------------------------+
double CArima::BIC(void)
  {
   if(!m_istrained||!m_sse)
     {
      Print(m_modelname," Model not trained. Train the model first to calculate the BIC.");
      return 0;
     }

   if(!m_differenced.Size())
     {
      Print("To calculate the BIC, supply a training data set");
      return 0;
     }
   uint n = m_differenced.Size();
   uint k = m_ar_order+m_ma_order+m_diff_order+uint(m_const);

   return((n*MathLog(m_sse/n)) + (k*MathLog(n)));

  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
double CArima::AIC(void)
  {
   if(!m_istrained||!m_sse)
     {
      Print(m_modelname," Model not trained. Train the model first to calculate the AIC.");
      return 0;
     }

   if(!m_differenced.Size())
     {
      Print("To calculate the AIC, supply a training data set");
      return 0;
     }

   uint n = m_differenced.Size();
   uint k = m_ar_order+m_ma_order+m_diff_order+uint(m_const);

   return((2.0*k)+(double(n)*MathLog(m_sse/double(n))));
  }
//+------------------------------------------------------------------+
