//+------------------------------------------------------------------+
//|                                                     SignalTD.mqh |
//|                   Copyright 2009-2017, MetaQuotes Software Corp. |
//|                                              http://www.mql5.com |
//+------------------------------------------------------------------+
#include <Expert\ExpertSignal.mqh>
#include <My\Cql.mqh>
#include <My\Cmlp+.mqh>
//+------------------------------------------------------------------+
// wizard description start
//+------------------------------------------------------------------+
//| Description of the class                                         |
//| Title=Signals based on Reinforcement-Learning Temporal Difference.|
//| Type=SignalAdvanced                                              |
//| Name=Reinforcement-Learning Temporal Difference                  |
//| ShortName=TD                                                     |
//| Class=CSignalTD                                                  |
//| Page=signal_temporal_difference                                  |
//| Parameter=QL_Scale,int,5, Matrix Row-to-Col Scale Ratio          |
//| Parameter=QL_Markov,bool,true, Use Markov                        |
//| Parameter=QL_Epsilon,double,0.05, Epsilon                        |
//| Parameter=QL_Policy,bool,true, Use Policy                        |
//+------------------------------------------------------------------+
// wizard description end
//+------------------------------------------------------------------+
//| TDs CSignalTD.                                                   |
//| Purpose: Temporal Difference for Reinforcement-Learning.         |
//|            Derives from class CExpertSignal.                     |
//+------------------------------------------------------------------+
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
class CSignalTD   : public CExpertSignal
{
protected:

   int                           m_actions;           // LetMarkov possible actions
   int                           m_environments;      // Environments, per matrix axis
   int                           m_scale;             // Environments, row-to-col scale
   bool                          m_use_markov;        // Use Markov
   double                        m_epsilon;           // Epsilon
   bool                          m_policy;            // On Policy
   
public:
   void                          CSignalTD(void);
   void                          ~CSignalTD(void);

   //--- methods of setting adjustable parameters
   void                          QL_Scale(int value)
   {  m_scale = value;
   }
   void                          QL_Markov(bool value)
   {  m_use_markov = value;
   }
   void                          QL_Epsilon(bool value)
   {  m_epsilon = value;
   }
   void                          QL_Policy(bool value)
   {  m_policy = value;
   }

   //--- method of verification of arch
   virtual bool      ValidationSettings(void);
   //--- method of creating the indicator and timeseries
   virtual bool      InitIndicators(CIndicators *indicators);
   //--- methods of checking if the market models are formed
   virtual int       LongCondition(void);
   virtual int       ShortCondition(void);

protected:
   int               GetOutput(Cql *QL, CNeuralNetwork &PN);
   Sql               RL;
   Cql               *QL_BUY, *QL_SELL;
   CNeuralNetwork    POLICY_NETWORK_BUY,POLICY_NETWORK_SELL;
};
//+------------------------------------------------------------------+
//| Constructor                                                      |
//+------------------------------------------------------------------+
void CSignalTD::CSignalTD(void) :    m_scale(5),
   m_use_markov(true),
   m_policy(true)

{
//--- initialization of protected data
   m_used_series = USE_SERIES_OPEN + USE_SERIES_HIGH + USE_SERIES_LOW + USE_SERIES_CLOSE + USE_SERIES_SPREAD + USE_SERIES_TIME;
   //
   RL.actions  = 3;//buy, sell, do nothing
   RL.environments = 3;//bullish, bearish, flat
   RL.use_markov = m_use_markov;
   RL.epsilon = m_epsilon;
   QL_BUY = new Cql(RL);
   QL_SELL = new Cql(RL);
   //
   POLICY_NETWORK_BUY.AddDenseLayer(9, AF_SIGMOID, 3);
   POLICY_NETWORK_BUY.AddDenseLayer(3, AF_SOFTMAX);
   POLICY_NETWORK_BUY.Init(0.0004,LOSS_BCE);
   //
   POLICY_NETWORK_SELL.AddDenseLayer(9, AF_SIGMOID, 3);
   POLICY_NETWORK_SELL.AddDenseLayer(3, AF_SOFTMAX);
   POLICY_NETWORK_SELL.Init(0.0004,LOSS_BCE);
}
//+------------------------------------------------------------------+
//| Destructor                                                       |
//+------------------------------------------------------------------+
void CSignalTD::~CSignalTD(void)
{  delete QL_BUY;
   delete QL_SELL;
}
//+------------------------------------------------------------------+
//| Validation arch protected data.                                  |
//+------------------------------------------------------------------+
bool CSignalTD::ValidationSettings(void)
{  if(!CExpertSignal::ValidationSettings())
      return(false);
//--- initial data checks
//read best weights
//--- ok
   return(true);
}
//+------------------------------------------------------------------+
//| Create indicators.                                               |
//+------------------------------------------------------------------+
bool CSignalTD::InitIndicators(CIndicators *indicators)
{
//--- check pointer
   if(indicators == NULL)
      return(false);
//--- initialization of indicators and timeseries of additional filters
   if(!CExpertSignal::InitIndicators(indicators))
      return(false);
//--- ok
   return(true);
}
//+------------------------------------------------------------------+
//| "Voting" that price will grow.                                   |
//+------------------------------------------------------------------+
int CSignalTD::LongCondition(void)
{  int result = 0;
   int _td_act = GetOutput(QL_BUY, POLICY_NETWORK_BUY);
   //if(_td_act==1)printf(__FUNCSIG__+" TD act is: %i",_td_act);
   //return(0);
   if(_td_act == 0)
   {  result = 100;
   }
   return(result);
}
//+------------------------------------------------------------------+
//| "Voting" that price will fall.                                   |
//+------------------------------------------------------------------+
int CSignalTD::ShortCondition(void)
{  int result = 0;
   int _td_act = GetOutput(QL_SELL, POLICY_NETWORK_SELL);
   //if(_td_act==1)printf(__FUNCSIG__+" TD act is: %i",_td_act);
   //return(0);
   if(_td_act == 2)
   {  result = 100;
   }
   return(result);
}
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
int CSignalTD::GetOutput(Cql *QL, CNeuralNetwork &PN)
{  int _td_act = 1;
   vector _in, _in_row, _in_row_old, _in_col, _in_col_old;
   if
   (
      _in_row.Init(m_scale) &&
      _in_row.CopyRates(m_symbol.Name(), m_period, 8, 0, m_scale+1) &&
      _in_row.Size() == m_scale+1
      &&
      _in_row_old.Init(m_scale) &&
      _in_row_old.CopyRates(m_symbol.Name(), m_period, 8, 1, m_scale+1) &&
      _in_row_old.Size() == m_scale+1
      &&
      _in_col.Init(m_scale) &&
      _in_col.CopyRates(m_symbol.Name(), m_period, 8, 0, m_scale+1) &&
      _in_col.Size() == m_scale+1
      &&
      _in_col_old.Init(m_scale) &&
      _in_col_old.CopyRates(m_symbol.Name(), m_period, 8, m_scale, m_scale+1) &&
      _in_col_old.Size() == m_scale+1
   )
   {  _in_row -= _in_row_old;
      _in_col -= _in_col_old;
      _in_row.Resize(m_scale);
      _in_col.Resize(m_scale);
      vector _in_e;
      _in_e.Init(m_scale);
      QL.Environment(_in_row, _in_col, _in_e);
      int _row = 0, _col = 0;
      QL.SetMarkov(int(_in_e[m_scale - 1]), _row, _col);
      double _reward_float = _in_row[m_scale - 1];
      double _reward_max = _in_row.Max();
      double _reward_min = _in_row.Min();
      double _reward = QL.GetReward(_reward_max, _reward_min, _reward_float);
      if(m_policy)
      {  QL.SetOnPolicy(_reward, _in_e);
      }
      else if(!m_policy)
      {  QL.SetOffPolicy(_reward, _in_e);
      }
      PN.Forward(QL.policy_history.Row(1));
      vector _label;
      _label.Init(3);
      _label.Fill(0.0);
      if(_in_row[m_scale-1] > 0.0)
      {  _label[0] = 1.0;
      }
      else if(_in_row[m_scale-1] < 0.0)
      {  _label[2] = 1.0;
      }
      else if(_in_row[m_scale-1] == 0.0)
      {  _label[1] = 1.0;
      }
      PN.Backward(_label);
      vector _td_output = PN.Forward(QL.policy_history.Row(0));
      if(_td_output[0] >= _td_output[1] && _td_output[0] >= _td_output[2])
      {  _td_act = 0;
      }
      else if(_td_output[2] >= _td_output[0] && _td_output[2] >= _td_output[1])
      {  _td_act = 2;
      }
   }
   return(_td_act);
}
//+------------------------------------------------------------------+
