//+------------------------------------------------------------------+
//|                                                   SignalDDPG.mqh |
//|                             Copyright 2000-2024, MetaQuotes Ltd. |
//|                                             https://www.mql5.com |
//+------------------------------------------------------------------+
#include <Expert\ExpertSignal.mqh>
#include <SRI\57_X.mqh>
// wizard description start
//+------------------------------------------------------------------+
//| Description of the class                                         |
//| Title=Signals of Reinforcement Learning with MA & Stochastic.    |
//| Type=SignalAdvanced                                              |
//| Name=DDPG                                                        |
//| ShortName=Reinforcement Learning with DDPG                       |
//| Class=CSignal_DDPG                                               |
//| Page=signal_ma_sto_ddpg                                          |
//| Parameter=Pattern_0,int,50,Pattern 0 [0...100]                   |
//| Parameter=Pattern_1,int,50,Pattern 1 [0...100]                   |
//| Parameter=Pattern_2,int,50,Pattern 2 [0...100]                   |
//| Parameter=Pattern_3,int,50,Pattern 3 [0...100]                   |
//| Parameter=Pattern_4,int,50,Pattern 4 [0...100]                   |
//| Parameter=Pattern_5,int,50,Pattern 5 [0...100]                   |
//| Parameter=Pattern_8,int,50,Pattern 8 [0...100]                   |
//| Parameter=PatternUsed,uchar,0,Pattern Used [0...8]               |
//| Parameter=Reinforce,bool,false,Use Reinforcement [false...true]  |
//| Parameter=PeriodUsed,int,8,Used Period [3...55]                  |
//+------------------------------------------------------------------+
// wizard description end
//+------------------------------------------------------------------+
//| Class CSignal_DDPG.                                              |
//| Purpose: Class of generator of trade signals based on            |
//|          Reinforcement Learning with MA & Stochastic.            |
//| Is derived from the CExpertSignal class.                         |
//+------------------------------------------------------------------+
//
#resource "Python/57_0.onnx" as uchar __57_0[]
#resource "Python/57_1.onnx" as uchar __57_1[]
#resource "Python/57_2.onnx" as uchar __57_2[]
#resource "Python/57_3.onnx" as uchar __57_3[]
#resource "Python/57_4.onnx" as uchar __57_4[]
#resource "Python/57_5.onnx" as uchar __57_5[]
#resource "Python/57_8.onnx" as uchar __57_8[]
//
#resource "Python/58_a_0.onnx" as uchar __58_a_0[]
#resource "Python/58_a_1.onnx" as uchar __58_a_1[]
#resource "Python/58_a_2.onnx" as uchar __58_a_2[]
#resource "Python/58_a_3.onnx" as uchar __58_a_3[]
#resource "Python/58_a_4.onnx" as uchar __58_a_4[]
#resource "Python/58_a_5.onnx" as uchar __58_a_5[]
#resource "Python/58_a_8.onnx" as uchar __58_a_8[]
//
#resource "Python/58_c_0.onnx" as uchar __58_c_0[]
#resource "Python/58_c_1.onnx" as uchar __58_c_1[]
#resource "Python/58_c_2.onnx" as uchar __58_c_2[]
#resource "Python/58_c_3.onnx" as uchar __58_c_3[]
#resource "Python/58_c_4.onnx" as uchar __58_c_4[]
#resource "Python/58_c_5.onnx" as uchar __58_c_5[]
#resource "Python/58_c_8.onnx" as uchar __58_c_8[]
//
int __IN_SHAPES[7] = {  4, 4, 4, 6, 4, 4, 4 };
class CSignal_DDPG : public CExpertSignal
{
protected:
   CiMA              m_ma, m_ma_lag;
   CiStochastic      m_sto;

   long              m_handles[7];
   long              m_handles_a[7];
   long              m_handles_c[7];
   //--- adjusted parameters

   //--- "weights" of market models (0-100)
   int               m_pattern_0;      // model 0
   int               m_pattern_1;      // model 1
   int               m_pattern_2;      // model 2
   int               m_pattern_3;      // model 3
   int               m_pattern_4;      // model 4
   int               m_pattern_5;      // model 5
   int               m_pattern_8;      // model 8

   int               m_periods;
   
   uchar             m_pattern_used;
   bool              m_reinforce;
   //
   //int               m_patterns_usage;   //

public:
   CSignal_DDPG(void);
   ~CSignal_DDPG(void);
   //--- methods of setting adjustable parameters
   //--- methods of adjusting "weights" of market models
   void              Pattern_0(int value)
   {  m_pattern_0 = value;
   }
   void              Pattern_1(int value)
   {  m_pattern_1 = value;
   }
   void              Pattern_2(int value)
   {  m_pattern_2 = value;
   }
   void              Pattern_3(int value)
   {  m_pattern_3 = value;
   }
   void              Pattern_4(int value)
   {  m_pattern_4 = value;
   }
   void              Pattern_5(int value)
   {  m_pattern_5 = value;
   }
   void              Pattern_8(int value)
   {  m_pattern_8 = value;
   }
   void              PatternUsed(uchar value)
   {  m_pattern_used = value;
   }
   void              Reinforce(bool value)
   {  m_reinforce = value;
   }
   void              PeriodUsed(int value)
   {  m_periods = value;
   }
   //--- method of verification of settings
   virtual bool      ValidationSettings(void);
   //--- method of creating the oscillator and timeseries
   virtual bool      InitIndicators(CIndicators *indicators);
   //--- methods of checking if the market models are formed
   virtual int       LongCondition(void);
   virtual int       ShortCondition(void);

protected:
   //--- method of initialization of the oscillator
   bool              InitIndicator(CIndicators *indicators);
   //--- methods of getting data
   double            MA(int ind)
   {  //
      m_ma.Refresh(-1);
      return(m_ma.Main(ind));
   }
   double            Close(int ind)
   {  //
      m_close.Refresh(-1);
      return(m_close.GetData(ind));
   }
   double            High(int ind)
   {  //
      m_high.Refresh(-1);
      return(m_high.GetData(ind));
   }
   double            Low(int ind)
   {  //
      m_low.Refresh(-1);
      return(m_low.GetData(ind));
   }
   int               X()
   {  //
      return(StartIndex());
   }
   //--- methods to check for patterns
   double              Supervise(int Index, ENUM_POSITION_TYPE T);
   double              Reinforce(int Index, ENUM_POSITION_TYPE T, double State);
};
//+------------------------------------------------------------------+
//| Constructor                                                      |
//+------------------------------------------------------------------+
CSignal_DDPG::CSignal_DDPG(void) : m_pattern_0(50),
   m_pattern_1(50),
   m_pattern_2(50),
   m_pattern_3(50),
   m_pattern_4(50),
   m_pattern_5(50),
   m_pattern_8(50)
//m_patterns_usage(255)
{
//--- initialization of protected data
   m_used_series = USE_SERIES_CLOSE + USE_SERIES_TIME;
   PatternsUsage(m_patterns_usage);
//--- create model from static buffer
   m_handles[0] = OnnxCreateFromBuffer(__57_0, ONNX_DEFAULT);
   m_handles[1] = OnnxCreateFromBuffer(__57_1, ONNX_DEFAULT);
   m_handles[2] = OnnxCreateFromBuffer(__57_2, ONNX_DEFAULT);
   m_handles[3] = OnnxCreateFromBuffer(__57_3, ONNX_DEFAULT);
   m_handles[4] = OnnxCreateFromBuffer(__57_4, ONNX_DEFAULT);
   m_handles[5] = OnnxCreateFromBuffer(__57_5, ONNX_DEFAULT);
   m_handles[6] = OnnxCreateFromBuffer(__57_8, ONNX_DEFAULT);
   //
   m_handles_a[0] = OnnxCreateFromBuffer(__58_a_0, ONNX_DEFAULT);
   m_handles_a[1] = OnnxCreateFromBuffer(__58_a_1, ONNX_DEFAULT);
   m_handles_a[2] = OnnxCreateFromBuffer(__58_a_2, ONNX_DEFAULT);
   m_handles_a[3] = OnnxCreateFromBuffer(__58_a_3, ONNX_DEFAULT);
   m_handles_a[4] = OnnxCreateFromBuffer(__58_a_4, ONNX_DEFAULT);
   m_handles_a[5] = OnnxCreateFromBuffer(__58_a_5, ONNX_DEFAULT);
   m_handles_a[6] = OnnxCreateFromBuffer(__58_a_8, ONNX_DEFAULT);
   //
   m_handles_c[0] = OnnxCreateFromBuffer(__58_c_0, ONNX_DEFAULT);
   m_handles_c[1] = OnnxCreateFromBuffer(__58_c_1, ONNX_DEFAULT);
   m_handles_c[2] = OnnxCreateFromBuffer(__58_c_2, ONNX_DEFAULT);
   m_handles_c[3] = OnnxCreateFromBuffer(__58_c_3, ONNX_DEFAULT);
   m_handles_c[4] = OnnxCreateFromBuffer(__58_c_4, ONNX_DEFAULT);
   m_handles_c[5] = OnnxCreateFromBuffer(__58_c_5, ONNX_DEFAULT);
   m_handles_c[6] = OnnxCreateFromBuffer(__58_c_8, ONNX_DEFAULT);
}
//+------------------------------------------------------------------+
//| Destructor                                                       |
//+------------------------------------------------------------------+
CSignal_DDPG::~CSignal_DDPG(void)
{
}
//+------------------------------------------------------------------+
//| Validation settings protected data.                              |
//+------------------------------------------------------------------+
bool CSignal_DDPG::ValidationSettings(void)
{
//--- validation settings of additional filters
   if(!CExpertSignal::ValidationSettings())
      return(false);
//--- initial data checks
   const long _out_shape[] = {1, 1, 1};
   for(int i = 0; i < 7; i++)
   {  // Set input shapes
      const long _in_shape[] = {1, 1, __IN_SHAPES[i]};
      if(!OnnxSetInputShape(m_handles[i], ONNX_DEFAULT, _in_shape))
      {  Print("OnnxSetInputShape error ", GetLastError());
         return(false);
      }
      // Set output shapes
      if(!OnnxSetOutputShape(m_handles[i], 0, _out_shape))
      {  Print("OnnxSetOutputShape error ", GetLastError());
         return(false);
      }
      // Set input actor shapes
      const long _in_shape_a[] = {1, 1, 1};
      if(!OnnxSetInputShape(m_handles_a[i], ONNX_DEFAULT, _in_shape_a))
      {  Print("OnnxSetInputShape error ", GetLastError());
         return(false);
      }
      // Set output actor shapes
      if(!OnnxSetOutputShape(m_handles_a[i], 0, _out_shape))
      {  Print("OnnxSetOutputShape error ", GetLastError());
         return(false);
      }
      // Set input critic shapes
      const long _in_shape_c[] = {1, 1};
      if(!OnnxSetInputShape(m_handles_c[i], ONNX_DEFAULT, _in_shape_c))
      {  Print("OnnxSetInputShape error ", GetLastError());
         return(false);
      }
      // Set output critic shapes
      const long _out_shape_c[] = {1, 1};
      if(!OnnxSetOutputShape(m_handles_c[i], 0, _out_shape_c))
      {  Print("OnnxSetOutputShape error ", GetLastError());
         return(false);
      }
   }
//--- ok
   return(true);
}
//+------------------------------------------------------------------+
//| Create indicators.                                               |
//+------------------------------------------------------------------+
bool CSignal_DDPG::InitIndicators(CIndicators *indicators)
{
//--- check pointer
   if(indicators == NULL)
      return(false);
//--- initialization of indicators and timeseries of additional filters
   if(!CExpertSignal::InitIndicators(indicators))
      return(false);
//--- create and initialize MA oscillator
   if(!InitIndicator(indicators))
      return(false);
//--- ok
   return(true);
}
//+------------------------------------------------------------------+
//| Initialize MA indicators.                                        |
//+------------------------------------------------------------------+
bool CSignal_DDPG::InitIndicator(CIndicators *indicators)
{
//--- check pointer
   if(indicators == NULL)
      return(false);
//--- add object to collection
   if(!indicators.Add(GetPointer(m_ma)))
   {  printf(__FUNCTION__ + ": error adding object");
      return(false);
   }
//--- initialize object
   if(!m_ma.Create(m_symbol.Name(), m_period, m_periods, 0, MODE_SMA, PRICE_CLOSE))
   {  printf(__FUNCTION__ + ": error initializing object");
      return(false);
   }
   if(!indicators.Add(GetPointer(m_ma_lag)))
   {  printf(__FUNCTION__ + ": error adding object");
      return(false);
   }
//--- initialize object
   if(!m_ma_lag.Create(m_symbol.Name(), m_period, 2 * m_periods, 0, MODE_SMA, PRICE_CLOSE))
   {  printf(__FUNCTION__ + ": error initializing object");
      return(false);
   }
   if(!indicators.Add(GetPointer(m_sto)))
   {  printf(__FUNCTION__ + ": error adding object");
      return(false);
   }
   if(!m_sto.Create(m_symbol.Name(), m_period, m_periods, 3, 3, MODE_EMA, STO_CLOSECLOSE))
   {  printf(__FUNCTION__ + ": error initializing object");
      return(false);
   }
//--- ok
   return(true);
}
//+------------------------------------------------------------------+
//| "Voting" that price will grow.                                   |
//+------------------------------------------------------------------+
int CSignal_DDPG::LongCondition(void)
{  int result  = 0, results = 0;
//--- if the model 0 is used
   if(m_pattern_used == 0)
   {  double _s = Supervise(0, POSITION_TYPE_BUY);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(0, POSITION_TYPE_BUY, _s);
      }
      result += int(round(m_pattern_0 * _r));
      results++;
   }
//--- if the model 1 is used
   if(m_pattern_used == 1)
   {  double _s = Supervise(1, POSITION_TYPE_BUY);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(1, POSITION_TYPE_BUY, _s);
      }
      result += int(round(m_pattern_1 * _r));
      results++;
   }
//--- if the model 2 is used
   if(m_pattern_used == 2)
   {  double _s = Supervise(2, POSITION_TYPE_BUY);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(2, POSITION_TYPE_BUY, _s);
      }
      result += int(round(m_pattern_2 * _r));
      results++;
   }
//--- if the model 3 is used
   if(m_pattern_used == 3)
   {  double _s = Supervise(3, POSITION_TYPE_BUY);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(3, POSITION_TYPE_BUY, _s);
      }
      result += int(round(m_pattern_3 * _r));
      results++;
   }
//--- if the model 4 is used
   if(m_pattern_used == 4)
   {  double _s = Supervise(4, POSITION_TYPE_BUY);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(4, POSITION_TYPE_BUY, _s);
      }
      result += int(round(m_pattern_4 * _r));
      results++;
   }
//--- if the model 5 is used
   if(m_pattern_used == 5)
   {  double _s = Supervise(5, POSITION_TYPE_BUY);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(5, POSITION_TYPE_BUY, _s);
      }
      result += int(round(m_pattern_5 * _r));
      results++;
   }
//--- if the model 8 is used
   if(m_pattern_used == 8)
   {  double _s = Supervise(8, POSITION_TYPE_BUY);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(0, POSITION_TYPE_BUY, _s);
      }
      result += int(round(m_pattern_8 * _r));
      results++;
   }
//--- return the result
   if(results > 0)
   {  return(int(round(result / results)));
   }
   return(0);
}
//+------------------------------------------------------------------+
//| "Voting" that price will fall.                                   |
//+------------------------------------------------------------------+
int CSignal_DDPG::ShortCondition(void)
{  int result  = 0, results = 0;
//--- if the model 0 is used
   if(m_pattern_used == 0)
   {  double _s = Supervise(0, POSITION_TYPE_SELL);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(0, POSITION_TYPE_SELL, _s);
      }
      result += int(round(m_pattern_0 * _r));
      results++;
   }
//--- if the model 1 is used
   if(m_pattern_used == 1)
   {  double _s = Supervise(1, POSITION_TYPE_SELL);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(1, POSITION_TYPE_SELL, _s);
      }
      result += int(round(m_pattern_1 * _r));
      results++;
   }
//--- if the model 2 is used
   if(m_pattern_used == 2)
   {  double _s = Supervise(2, POSITION_TYPE_SELL);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(2, POSITION_TYPE_SELL, _s);
      }
      result += int(round(m_pattern_2 * _r));
      results++;
   }
//--- if the model 3 is used
   if(m_pattern_used == 3)
   {  double _s = Supervise(3, POSITION_TYPE_SELL);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(3, POSITION_TYPE_SELL, _s);
      }
      result += int(round(m_pattern_3 * _r));
      results++;
   }
//--- if the model 4 is used
   if(m_pattern_used == 4)
   {  double _s = Supervise(4, POSITION_TYPE_SELL);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(4, POSITION_TYPE_SELL, _s);
      }
      result += int(round(m_pattern_4 * _r));
      results++;
   }
//--- if the model 5 is used
   if(m_pattern_used == 5)
   {  double _s = Supervise(5, POSITION_TYPE_SELL);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(5, POSITION_TYPE_SELL, _s);
      }
      result += int(round(m_pattern_5 * _r));
      results++;
   }
//--- if the model 8 is used
   if(m_pattern_used == 8)
   {  double _s = Supervise(8, POSITION_TYPE_SELL);
      double _r = _s;
      if(m_reinforce)
      {  _r = Reinforce(0, POSITION_TYPE_SELL, _s);
      }
      result += int(round(m_pattern_8 * _r));
      results++;
   }
//--- return the result
   if(results > 0)
   {  return(int(round(result / results)));
   }
   return(0);
}
//+------------------------------------------------------------------+
//| Supervised Learning Model Forward Pass.                          |
//+------------------------------------------------------------------+
double CSignal_DDPG::Supervise(int Index, ENUM_POSITION_TYPE T)
{  vectorf _x = Get(Index, m_time.GetData(X()), m_close, m_ma, m_ma_lag, m_sto);
   vectorf _y(1);
   _y.Fill(0.0);
   int _i=Index;
   if(_i==8)
   {  _i -= 2;
   }
   ResetLastError();
   if(!OnnxRun(m_handles[_i], ONNX_NO_CONVERSION, _x, _y))
   {  printf(__FUNCSIG__ + " failed to get y forecast, err: %i", GetLastError());
      return(double(_y[0]));
   }
   if(T == POSITION_TYPE_BUY && _y[0] > 0.5f)
   {  _y[0] = 2.0f * (_y[0] - 0.5f);
   }
   else if(T == POSITION_TYPE_SELL && _y[0] < 0.5f)
   {  _y[0] = 2.0f * (0.5f - _y[0]);
   }
   return(double(_y[0]));
}
//+------------------------------------------------------------------+
//| Reinforcement Learning Model Forward Pass.                       |
//+------------------------------------------------------------------+
double CSignal_DDPG::Reinforce(int Index, ENUM_POSITION_TYPE T, double State)
{  vectorf _x(1);
   _x.Fill(float(State));
   vectorf _y(1);
   _y.Fill(0.0);
   vectorf _y_state(1);
   _y_state.Fill(float(State));
   vectorf _y_action(1);
   _y_action.Fill(0.0);
   vectorf _z(1);
   _z.Fill(0.0);
   int _i=Index;
   if(_i==8)
   {  _i -= 2;
   }
   ResetLastError();
   if(!OnnxRun(m_handles_a[_i], ONNX_NO_CONVERSION, _x, _y))
   {  printf(__FUNCSIG__ + " failed to get y action forecast, err: %i", GetLastError());
   }
   _y_action[0] = _y[0];
   ResetLastError();
   if(!OnnxRun(m_handles_c[_i], ONNX_NO_CONVERSION, _y_state, _y_action, _z))
   {  printf(__FUNCSIG__ + " failed to get z reward forecast, err: %i", GetLastError()); 
   }
   //normalize action output to be 0.0-1.0 range;
   if(T == POSITION_TYPE_BUY && _y[0] > 0.5f)
   {  _y[0] = 2.0f * (_y[0] - 0.5f);
   }
   else if(T == POSITION_TYPE_SELL && _y[0] < 0.5f)
   {  _y[0] = 2.0f * (0.5f - _y[0]);
   }
   else
   {  _y[0] = 0.0f;
   }
   return(double(_y[0]*_z[0]));
}
//+------------------------------------------------------------------+
