//+------------------------------------------------------------------+
//|                                      CMicrostructureFeatures.mqh |
//|                                               Patrick M. Njoroge |
//|                  https://www.mql5.com/en/users/patricknjoroge743 |
//+------------------------------------------------------------------+
//|  Bar-level market microstructure feature kernels.                |
//|                                                                  |
//|  Implements:                                                     |
//|    Roll (1984)            RollMeasure, RollImpact                |
//|    Corwin-Schultz (2012)  CSSpread, CSSigma                      |
//|    Kyle (1985)            KyleLambda                             |
//|    Amihud (2002)          AmihudLambda                           |
//|    Hasbrouck (2009)       HasbrouckLambda                        |
//|                                                                  |
//|  All features require only OHLCV bar data; no tick feed needed.  |
//|  Results are double arrays indexed by bar offset,                |
//|  with the most recent bar at index 0 (time-series order).        |
//|                                                                  |
//|  Usage:                                                          |
//|    CMicrostructureFeatures f(_Symbol, PERIOD_H1, 20);            |
//|    if(f.Calculate(1, 100))                                       |
//|      {                                                           |
//|       double roll = f.RollMeasure(0);   // most recent bar       |
//|       double cs   = f.CSSpread(0);                               |
//|       double kyle = f.KyleLambda(0);                             |
//|      }                                                           |
//|                                                                  |
//|  Place this file in:                                             |
//|    MQL5/Include/Features/CMicrostructureFeatures.mqh             |
//|  Include with angle brackets from any EA or script:              |
//|    #include <Features\CMicrostructureFeatures.mqh>               |
//+------------------------------------------------------------------+

#ifndef CMICROSTRUCTURE_FEATURES_MQH
#define CMICROSTRUCTURE_FEATURES_MQH

//+------------------------------------------------------------------+
//|  Constants                                                       |
//+------------------------------------------------------------------+
#define MICRO_EMPTY -1.0e38   // sentinel for NaN / insufficient data

//+------------------------------------------------------------------+
//|  CMicrostructureFeatures                                         |
//+------------------------------------------------------------------+
class CMicrostructureFeatures
  {
private:
   //--- parameters
   string            m_symbol;
   ENUM_TIMEFRAMES   m_tf;
   int               m_window;       // rolling window for OLS estimators

   //--- result buffers (index 0 = most recent bar)
   double            m_roll[];
   double            m_roll_impact[];
   double            m_cs_spread[];
   double            m_cs_sigma[];
   double            m_kyle[];
   double            m_amihud[];
   double            m_hasbrouck[];

   int               m_n;            // number of bars last calculated

   //--- raw OHLCV arrays (filled by _CopyBars; time-series order)
   double            m_open[];
   double            m_high[];
   double            m_low[];
   double            m_close[];
   long              m_volume[];

   //--- private methods
   bool              _CopyBars(int start_bar, int n_bars);
   void              _ComputeRoll();
   void              _ComputeCS();
   void              _ComputeLambdas();
   double            _TickRule(int i);
   bool              _OLS(const double &x[], const double &y[],
                          int start, int len,
                          double &beta, double &t_stat);

public:
                     CMicrostructureFeatures(string   symbol,
                           ENUM_TIMEFRAMES tf,
                           int      window = 20);
                    ~CMicrostructureFeatures();

   bool              Calculate(int start_bar, int n_bars);

   //--- accessors (bar_offset: 0 = most recent completed bar)
   double            RollMeasure(int bar_offset)     const;
   double            RollImpact(int bar_offset)      const;
   double            CSSpread(int bar_offset)        const;
   double            CSSigma(int bar_offset)         const;
   double            KyleLambda(int bar_offset)      const;
   double            AmihudLambda(int bar_offset)    const;
   double            HasbrouckLambda(int bar_offset) const;

   int               Count() const { return(m_n); }
  };

//+------------------------------------------------------------------+
//|  Constructor                                                     |
//+------------------------------------------------------------------+
CMicrostructureFeatures::CMicrostructureFeatures(
   string          symbol,
   ENUM_TIMEFRAMES tf,
   int             window = 20
)
  {
   m_symbol = symbol;
   m_tf     = tf;
   m_window = (window >= 5) ? window : 20;
   m_n      = 0;
  }

//+------------------------------------------------------------------+
//|  Destructor                                                      |
//+------------------------------------------------------------------+
CMicrostructureFeatures::~CMicrostructureFeatures()
  {
   ArrayFree(m_roll);
   ArrayFree(m_roll_impact);
   ArrayFree(m_cs_spread);
   ArrayFree(m_cs_sigma);
   ArrayFree(m_kyle);
   ArrayFree(m_amihud);
   ArrayFree(m_hasbrouck);
   ArrayFree(m_open);
   ArrayFree(m_high);
   ArrayFree(m_low);
   ArrayFree(m_close);
   ArrayFree(m_volume);
  }

//+------------------------------------------------------------------+
//|  Calculate: copy bars and compute all features                   |
//+------------------------------------------------------------------+
bool CMicrostructureFeatures::Calculate(int start_bar, int n_bars)
  {
   if(n_bars < m_window + 2)
     {
      Print("CMicrostructureFeatures::Calculate: n_bars too small");
      return(false);
     }

   if(!_CopyBars(start_bar, n_bars))
      return(false);

   m_n = n_bars;

//--- allocate and initialize output arrays
   if(ArrayResize(m_roll,        n_bars) < 0)
      return(false);
   if(ArrayResize(m_roll_impact, n_bars) < 0)
      return(false);
   if(ArrayResize(m_cs_spread,   n_bars) < 0)
      return(false);
   if(ArrayResize(m_cs_sigma,    n_bars) < 0)
      return(false);
   if(ArrayResize(m_kyle,        n_bars) < 0)
      return(false);
   if(ArrayResize(m_amihud,      n_bars) < 0)
      return(false);
   if(ArrayResize(m_hasbrouck,   n_bars) < 0)
      return(false);

   ArrayFill(m_roll,        0, n_bars, MICRO_EMPTY);
   ArrayFill(m_roll_impact, 0, n_bars, MICRO_EMPTY);
   ArrayFill(m_cs_spread,   0, n_bars, MICRO_EMPTY);
   ArrayFill(m_cs_sigma,    0, n_bars, MICRO_EMPTY);
   ArrayFill(m_kyle,        0, n_bars, MICRO_EMPTY);
   ArrayFill(m_amihud,      0, n_bars, MICRO_EMPTY);
   ArrayFill(m_hasbrouck,   0, n_bars, MICRO_EMPTY);

   _ComputeRoll();
   _ComputeCS();
   _ComputeLambdas();

   return(true);
  }

//+------------------------------------------------------------------+
//|  _CopyBars: fill raw OHLCV arrays in time-series order           |
//|  (index 0 = most recent bar, increasing index = older bars)      |
//+------------------------------------------------------------------+
bool CMicrostructureFeatures::_CopyBars(int start_bar, int n_bars)
  {
   ArraySetAsSeries(m_open,   true);
   ArraySetAsSeries(m_high,   true);
   ArraySetAsSeries(m_low,    true);
   ArraySetAsSeries(m_close,  true);
   ArraySetAsSeries(m_volume, true);

   if(CopyOpen(m_symbol, m_tf, start_bar, n_bars, m_open)   < n_bars)
      return(false);
   if(CopyHigh(m_symbol, m_tf, start_bar, n_bars, m_high)   < n_bars)
      return(false);
   if(CopyLow(m_symbol, m_tf, start_bar, n_bars, m_low)    < n_bars)
      return(false);
   if(CopyClose(m_symbol, m_tf, start_bar, n_bars, m_close)  < n_bars)
      return(false);

//--- prefer tick volume; fall back to real volume if unavailable
   if(CopyTickVolume(m_symbol, m_tf, start_bar, n_bars, m_volume) < n_bars)
     {
      Print("CMicrostructureFeatures: CopyTickVolume failed; using real volume");
      if(CopyRealVolume(m_symbol, m_tf, start_bar, n_bars, m_volume) < n_bars)
        {
         Print("CMicrostructureFeatures: CopyRealVolume also failed");
         return(false);
        }
     }
   return(true);
  }

//+------------------------------------------------------------------+
//|  _TickRule: bar-close direction classifier b_t in {-1, 0, +1}    |
//|  Convention: index 0 = newest, index i+1 = bar one step older.   |
//|  Returns +1 if close[i] > close[i+1], -1 if less, 0 if equal.    |
//+------------------------------------------------------------------+
double CMicrostructureFeatures::_TickRule(int i)
  {
   if(i + 1 >= m_n)
      return(0.0);
   if(m_close[i] > m_close[i + 1])
      return(1.0);
   if(m_close[i] < m_close[i + 1])
      return(-1.0);
   return(0.0);  // tie: carry-forward handled in caller
  }

//+------------------------------------------------------------------+
//|  _ComputeRoll                                                    |
//|  Effective spread = 2 * sqrt( max(-Cov(dp_t, dp_{t-1}), 0) )     |
//|  where dp[i] = close[i] - close[i+1] (newer minus older).        |
//|                                                                  |
//|  Covariance is accumulated in a single pass via three running    |
//|  sums; no temporary arrays are required.                         |
//+------------------------------------------------------------------+
void CMicrostructureFeatures::_ComputeRoll()
  {
   int W = m_window;

   for(int i = 0; i <= m_n - W - 1; i++)
     {
      double sum_d   = 0.0;
      double sum_dl  = 0.0;
      double sum_dld = 0.0;
      int    cnt     = 0;

      for(int k = 0; k < W - 1; k++)
        {
         int idx     = i + k;
         int idx_lag = i + k + 1;
         if(idx + 1 >= m_n || idx_lag + 1 >= m_n)
            continue;
         double d  = m_close[idx]     - m_close[idx + 1];
         double dl = m_close[idx_lag] - m_close[idx_lag + 1];
         sum_d   += d;
         sum_dl  += dl;
         sum_dld += d * dl;
         cnt++;
        }

      if(cnt < 3)
         continue;

      double cov      = (sum_dld - sum_d * sum_dl / cnt) / (cnt - 1);
      double roll_val = (-cov > 0.0) ? 2.0 * MathSqrt(-cov) : MICRO_EMPTY;
      m_roll[i] = roll_val;

      //--- roll impact: spread per unit of dollar volume
      if(roll_val > 0.0 && m_close[i] > 0.0 && m_volume[i] > 0)
        {
         double dv = m_close[i] * (double)m_volume[i];
         m_roll_impact[i] = roll_val / dv;
        }
     }
  }

//+------------------------------------------------------------------+
//|  _ComputeCS                                                      |
//|  Corwin-Schultz (2012) effective spread and intraday volatility  |
//|                                                                  |
//|  beta  = (ln H_t/L_t)^2 + (ln H_{t+1}/L_{t+1})^2                 |
//|  gamma = (ln max(H,H+1) / min(L,L+1))^2                          |
//|  alpha = (sqrt(2*beta) - sqrt(beta)) / denom - sqrt(gamma/denom) |
//|  spread = 2*(exp(alpha)-1) / (1+exp(alpha))                      |
//|  sigma  = sqrt( beta / (2 * 4*ln2) )   [Parkinson-Beckers]       |
//|                                                                  |
//|  denom = 3 - 2*sqrt(2) ≈ 0.1716 (constant from C-S derivation)   |
//|  k1    = 4*ln(2)               (Parkinson normalization const.)  |
//|  Both are pre-computed once before the loop.                     |
//+------------------------------------------------------------------+
void CMicrostructureFeatures::_ComputeCS()
  {
   const double denom = 3.0 - 2.0 * MathSqrt(2.0);   // 3 - 2√2 ≈ 0.1716
   const double k1    = 4.0 * MathLog(2.0);          // Parkinson normalization

   for(int i = 0; i < m_n - 1; i++)
     {
      double hi0 = m_high[i],     lo0 = m_low[i];
      double hi1 = m_high[i + 1], lo1 = m_low[i + 1];

      if(lo0 <= 0.0 || lo1 <= 0.0 || hi0 <= 0.0 || hi1 <= 0.0)
         continue;

      double hl0   = MathLog(hi0 / lo0);
      double hl1   = MathLog(hi1 / lo1);
      double beta  = hl0 * hl0 + hl1 * hl1;

      double h2    = MathMax(hi0, hi1);
      double l2    = MathMin(lo0, lo1);
      if(l2 <= 0.0)
         continue;
      double gamma = MathLog(h2 / l2);
      gamma        = gamma * gamma;

      double sq_beta  = MathSqrt(beta);
      double sq_2beta = MathSqrt(2.0 * beta);
      double alpha    = (sq_2beta - sq_beta) / denom - MathSqrt(gamma / denom);

      if(alpha < 0.0)
        {
         m_cs_spread[i] = MICRO_EMPTY;
        }
      else
        {
         double ea      = MathExp(alpha);
         m_cs_spread[i] = 2.0 * (ea - 1.0) / (1.0 + ea);
        }

      m_cs_sigma[i] = MathSqrt(beta / (2.0 * k1));   // Parkinson-Beckers σ
     }
  }

//+------------------------------------------------------------------+
//|  _OLS                                                            |
//|  Bivariate OLS: y = beta * x, no intercept.                      |
//|  Accumulates five sums in a single pass; returns false if the    |
//|  design matrix is singular (|denom| < 1e-20) or n < 4.           |
//+------------------------------------------------------------------+
bool CMicrostructureFeatures::_OLS(
   const double &x[],
   const double &y[],
   int           start,
   int           len,
   double       &beta,
   double       &t_stat
)
  {
   if(len < 4)
      return(false);

   double sx = 0.0, sy = 0.0, sxx = 0.0, sxy = 0.0;
   int    n  = 0;

   for(int j = start; j < start + len; j++)
     {
      if(!MathIsValidNumber(x[j]) || !MathIsValidNumber(y[j]))
         continue;
      sx  += x[j];
      sy  += y[j];
      sxx += x[j] * x[j];
      sxy += x[j] * y[j];
      n++;
     }

   if(n < 4)
      return(false);

   double denom_val = (double)n * sxx - sx * sx;
   if(MathAbs(denom_val) < 1e-20)
      return(false);

   beta = ((double)n * sxy - sx * sy) / denom_val;

//--- residual variance → std(beta)
   double ss_res = 0.0;
   for(int j = start; j < start + len; j++)
     {
      if(!MathIsValidNumber(x[j]) || !MathIsValidNumber(y[j]))
         continue;
      double e = y[j] - beta * x[j];
      ss_res  += e * e;
     }

   double s2    = ss_res / (double)(n - 1);   // one parameter (slope only)
   double var_b = s2 / (sxx - sx * sx / (double)n);
   if(var_b <= 0.0)
      return(false);

   t_stat = beta / MathSqrt(var_b);
   return(true);
  }

//+------------------------------------------------------------------+
//|  _ComputeLambdas                                                 |
//|  Kyle (1985), Amihud (2002), Hasbrouck (2009)                    |
//|  using a rolling window of length m_window.                      |
//|                                                                  |
//|  In time-series order (index 0 = newest):                        |
//|    dp[i]       = close[i] - close[i+1]   (newer minus older)     |
//|    b[i]        = _TickRule(i)                                    |
//|    x_kyle[i]   = b[i] * volume[i]                                |
//|    x_amihud[i] = |dp[i]| / (close[i] * volume[i])                |
//|    x_hbck[i]   = b[i] * sqrt(close[i] * volume[i])               |
//+------------------------------------------------------------------+
void CMicrostructureFeatures::_ComputeLambdas()
  {
   int W = m_window;
   int n = m_n;

   double dp[];
   double x_kyle[];
   double x_amihud[];
   double x_hbck[];
   double y[];

   if(ArrayResize(dp,       n) < 0)
      return;
   if(ArrayResize(x_kyle,   n) < 0)
      return;
   if(ArrayResize(x_amihud, n) < 0)
      return;
   if(ArrayResize(x_hbck,   n) < 0)
      return;
   if(ArrayResize(y,        n) < 0)
      return;

   ArrayFill(dp,       0, n, MICRO_EMPTY);
   ArrayFill(x_kyle,   0, n, MICRO_EMPTY);
   ArrayFill(x_amihud, 0, n, MICRO_EMPTY);
   ArrayFill(x_hbck,   0, n, MICRO_EMPTY);
   ArrayFill(y,        0, n, MICRO_EMPTY);

//--- build per-bar regressors in a single pass
   for(int i = 0; i < n - 1; i++)
     {
      double vol_i = (double)m_volume[i];
      if(vol_i <= 0.0 || m_close[i] <= 0.0 || m_close[i + 1] <= 0.0)
         continue;

      double dv = m_close[i] * vol_i;
      double b  = _TickRule(i);

      dp[i]       = m_close[i] - m_close[i + 1];
      y[i]        = dp[i];
      x_kyle[i]   = b   * vol_i;
      x_amihud[i] = (dv > 0.0) ? MathAbs(dp[i]) / dv : MICRO_EMPTY;
      x_hbck[i]   = b   * MathSqrt(MathAbs(dv));
     }

//--- rolling estimation
   for(int i = 0; i <= n - W - 1; i++)
     {
      //--- Kyle's Lambda (OLS: dp ~ b * volume)
      double beta_k = 0.0, t_k = 0.0;
      if(_OLS(x_kyle, y, i, W, beta_k, t_k))
         m_kyle[i] = beta_k;

      //--- Amihud's ILLIQ (rolling mean of |dp| / dv)
      double sum_a = 0.0;
      int    cnt_a = 0;
      for(int k = i; k < i + W; k++)
        {
         if(x_amihud[k] > MICRO_EMPTY + 1.0)
           {
            sum_a += x_amihud[k];
            cnt_a++;
           }
        }
      if(cnt_a > 0)
         m_amihud[i] = sum_a / (double)cnt_a;

      //--- Hasbrouck's Lambda (OLS: dp ~ b * sqrt(dv))
      double beta_h = 0.0, t_h = 0.0;
      if(_OLS(x_hbck, y, i, W, beta_h, t_h))
         m_hasbrouck[i] = beta_h;
     }

   ArrayFree(dp);
   ArrayFree(x_kyle);
   ArrayFree(x_amihud);
   ArrayFree(x_hbck);
   ArrayFree(y);
  }

//+------------------------------------------------------------------+
//|  Accessors                                                       |
//+------------------------------------------------------------------+
double CMicrostructureFeatures::RollMeasure(int bar_offset) const
  { return((bar_offset < m_n) ? m_roll[bar_offset]        : MICRO_EMPTY); }

//+------------------------------------------------------------------+
//| RollImpact                                                       |
//+------------------------------------------------------------------+
double CMicrostructureFeatures::RollImpact(int bar_offset) const
  { return((bar_offset < m_n) ? m_roll_impact[bar_offset] : MICRO_EMPTY); }

//+------------------------------------------------------------------+
//| Corwin-Schultz Spread                                            |
//+------------------------------------------------------------------+
double CMicrostructureFeatures::CSSpread(int bar_offset) const
  { return((bar_offset < m_n) ? m_cs_spread[bar_offset]   : MICRO_EMPTY); }

//+------------------------------------------------------------------+
//| Corwin-Schultz Sigma                                             |
//+------------------------------------------------------------------+
double CMicrostructureFeatures::CSSigma(int bar_offset) const
  { return((bar_offset < m_n) ? m_cs_sigma[bar_offset]    : MICRO_EMPTY); }

//+------------------------------------------------------------------+
//| Kyle Lambda                                                      |
//+------------------------------------------------------------------+
double CMicrostructureFeatures::KyleLambda(int bar_offset) const
  { return((bar_offset < m_n) ? m_kyle[bar_offset]        : MICRO_EMPTY); }

//+------------------------------------------------------------------+
//| Amihud Lambda                                                    |
//+------------------------------------------------------------------+
double CMicrostructureFeatures::AmihudLambda(int bar_offset) const
  { return((bar_offset < m_n) ? m_amihud[bar_offset]      : MICRO_EMPTY); }

//+------------------------------------------------------------------+
//| Hasbrouck Lambda                                                 |
//+------------------------------------------------------------------+
double CMicrostructureFeatures::HasbrouckLambda(int bar_offset) const
  { return((bar_offset < m_n) ? m_hasbrouck[bar_offset]   : MICRO_EMPTY); }

#endif // CMICROSTRUCTURE_FEATURES_MQH
