#!/usr/bin/env python
# coding: utf-8

# **We start by loading the necessary dependencies**

# In[1]:


#pip install pandas if you haven't installed it allready
#We'll use pandas to store and retrieve data
import pandas as pd       

#pip install pandas-ta if you haven't installed it allready
#We'll use pandas_ta to calculate technical indicators
import pandas_ta as ta    

#pip install numpy if you haven't installed it allready
#We'll use numpy to perform optmized vector calculations 
import numpy as np

import matplotlib.pyplot as plt

#pip install numpy if you haven't installed it allready
#We'll use MetaTrader 5 connect to and control our MetaTrader 5 Terminal 
import MetaTrader5 as MT5

#Standard python library
import time


# Then we enter our login credentials

# In[2]:


login = 30516497
password = "**dARERECHIMURENGA__2008"
server = "Deriv-Demo"


# Now we initialize our MetaTrader 5 Terminal and login

# In[3]:


if(MT5.initialize(login=login, password= password, server=server)):
    print("Logged in succesfully")
else:
    print("Failed to initialize the terminal and login")


# Let's request a million rows of M1 data on the Volatility 75 Index, or whichever Symbol you preffer.

# In[4]:


data = pd.DataFrame(MT5.copy_rates_from_pos("Volatility 75 Index",MT5.TIMEFRAME_M1,0,100000))
data


# Let's calculate technical indicators that may help us forecast price

# In[5]:


#20 period exponential moving average
data["ema_20"] = data.ta.ema(length=20)
#40 period exponential moving average
data["ema_40"] = data.ta.ema(length=40)
#100 period exponential moving average
data["ema_100"] = data.ta.ema(length=100)
#20 period relative strength indicator
data.ta.rsi(length=20,append=True)
#20 period bollinger bands with 3 standard deviations
data.ta.bbands(length=20,sd=3,append=True)
#14 period average true range
data.ta.atr(length=14,append=True)
#Awesome oscilator with default settings
data.ta.ao(append=True)
#Moving average convergence divergence (MACD)
data.ta.macd(append=True)
#Chaikins commidity index
data.ta.cci(append=True)
#Know sure thing oscilator
data.ta.kst(append=True)
#True strength index
data.ta.tsi(append=True)
#Rate of change
data.ta.roc(append=True)
#Slope between 2 points
data.ta.slope(append=True)
#Directional movement
data.ta.dm(append=True)


# Setting up the target

# In[6]:


data["target"] = data["close"].shift(-30)


# In[7]:


data = data.loc[100:99969,:]


# **Let's setup our black-box model**

# In[8]:


from catboost import CatBoostRegressor


# Preparing our training and testing splits

# In[9]:


train_start = 100
train_end   = 10000

test_start = train_end + 100
test_end = test_start + 30000

predictors = [
    "open",
    "high",
    "low",
    "close",
    "KSTs_9",
    "KST_10_15_20_30_10_10_10_15",
    "CCI_14_0.015",
    "AO_5_34",
    "ATRr_14",
    "BBM_20_2.0",
    "BBP_20_2.0",
    "BBB_20_2.0",
    "BBU_20_2.0",
    "BBL_20_2.0",
    "RSI_20",
    "ema_20",
    "ema_40",
    "ema_100",
    "SLOPE_1",
    "ROC_10",
    "TSIs_13_25_13",
    "TSI_13_25_13",
    "MACD_12_26_9",
    "MACDh_12_26_9",
    "MACDs_12_26_9",
    "DMP_14",
    "DMN_14"
]
target = "target"


# Decision trees are sensitive to scale so we will normalise input values 
# 
# We will store the first readings from each feature in this array called "first_values"

# In[10]:


first_values = {}


# In[11]:


#Iterating over the columns in the dataset
for col in data.columns:
    #Which of those columns are part of the model inputs?
    if col in predictors:
        #What was the first value in that column?
        first_values[col] = data[col][train_start]
        data[col] = data[col]/first_values[col]


# Train test split

# In[12]:


train_x = data.loc[train_start:train_end,predictors]
train_y = data.loc[train_start:train_end,target]

test_x = data.loc[test_start:test_end,predictors]
test_y = data.loc[test_start:test_end,target]


# Fitting our black-box model

# In[13]:


cat_full = CatBoostRegressor()
cat_full.fit(train_x,train_y)


# **Obtaining predictions from our black-box model**

# In[14]:


cat_full_predictions = pd.DataFrame(index=test_x.index)
cat_full_predictions["predictions"] = cat_full.predict(test_x)
cat_full_predictions.plot(label=True)
test_y.plot()


# **Black Box Explanation Algorithms**

# **Drop Clumn Importance**
# 
# We will use the Recursive Feature Elimination algorithm from the sklearn library

# In[15]:


from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR


# The Recursive Feature Elimination algorithm expects us to pass supervised learning model which it can fit and assess.
# 
# The model should provide information about feature importance either through it's coefficients or a dedicted function.
# 
# The model doesn't have to be the same as the model we are using in our problem. 
# 
# The model doesn't have to be from the sklearn library, but it should at least have an sklearn wrapper
# 
# We'll use a linear model and assess it's accuracy as we randomly drop features.

# In[16]:


lm = LinearRegression()


# In[17]:


rfe = RFE(lm,step=1)


# The step argument denotes how many features to drop with each iteration

# In[18]:


rfe = rfe.fit(train_x,train_y)


# Let's inspect which features our RFE algorithm found informative

# In[19]:


rfe.support_


# We can also get rankings of each feature

# In[20]:


rfe.ranking_


# The sklearn implementation furnishes us with a mask we can apply to the columns in our train_x dataframe, to see the names of the columns RFE found important.

# In[21]:


train_x.columns[rfe.support_]


# So from all the indicators we applied it appears that the following indicators had valuable information:
# 
#     1)Open
#     
#     2)High
#     
#     3)Low
#     
#     4)Close
#     
#     6)KSTs_9
#     
#     7)KST_10_15_20_30_10_10_10_15
#     
#     7)All 3 exponential moving averages
#     
#     8)4 Bollinger Band Components: 'BBM_20_2.0' 'BBB_20_2.0' 'BBU_20_2.0' 'BBL_20_2.0'
#     
#     9)MACD: 'MACDs_12_26_9'
#     
#     10)Directional movement negative 
#     
# Remember these are only estimations and serve as guides! 
# 
# We cannot conclude that this is precisely the truth of the matter, however it's still a reasonable assertion for us to make.

# **Next we move on to assessing Permutation Importance**
# 
# We will use a python library called Explain Like I'm 5 (ELI5) 

# In[22]:


#pip install eli5 if you don't allready have it installed
import eli5
from eli5.sklearn import PermutationImportance
from sklearn.ensemble import GradientBoostingRegressor


# In[23]:


gbr = GradientBoostingRegressor().fit(train_x,train_y)

permutation = PermutationImportance(gbr).fit(test_x,test_y)


# In[24]:


eli5.show_weights(permutation,feature_names = test_x.columns.to_list())


# **Partial Dependence Plots**

# In[59]:


#Import partial dependence display from sklearn
from sklearn.inspection import PartialDependenceDisplay


# In[60]:


for feature_name in predictors:
    PartialDependenceDisplay.from_estimator(cat_full,test_x,[feature_name])
    plt.grid()
    plt.show()


# In[61]:


for feature in predictors:
    PartialDependenceDisplay.from_estimator(cat_full,test_x,[feature],kind='both')


# In[62]:


#Setting up the plot
fig , ax = plt.subplots(figsize=(10,5))
column_names = [('ROC_10','ATRr_14')]

#Plotting 2D PDP
disp_4 = PartialDependenceDisplay.from_estimator(cat_full, test_x[0:1000],column_names, ax=ax)
plt.show()


# In[64]:


#Setting up the plot
fig , ax = plt.subplots(figsize=(10,5))
column_names = [('high','low')]

#Plotting 2D PDP
disp_4 = PartialDependenceDisplay.from_estimator(cat_full, test_x[0:1000],column_names, ax=ax)
plt.show()


# **Shapely Additive Explanations (SHAP) Values**

# In[65]:


#Import SHAP
import shap

#Initialise the shap package
shap.initjs()


# In[66]:


#Before using shap we should note that shap values assume uncorrelated features
#Let's inspect whether our features are uncorrelated
print(data[predictors].corr())


# In[67]:


#Initialise shap value calculator
tree_explainer = shap.TreeExplainer(cat_full)

#Store SHAP values
shap_values = tree_explainer.shap_values(test_x)

#Plot SHAP values
shap.summary_plot(shap_values,test_x)


# **Mutual Information**

# In[68]:


from sklearn.feature_selection import mutual_info_regression


# In[69]:


mi_scores = mutual_info_regression(train_x, train_y)
mi_scores = pd.Series(mi_scores, name="MI Scores", index=train_x.columns)
mi_scores = mi_scores.sort_values(ascending=False)


# In[70]:


mi_scores


# In[71]:


def plot_mi_scores(scores):
    scores = scores.sort_values(ascending=True)
    width = np.arange(len(scores))
    ticks = list(scores.index)
    plt.barh(width, scores)
    plt.yticks(width, ticks)
    plt.title("Mutual Information Scores")


# In[72]:


plt.figure(dpi=100, figsize=(8, 5))
plt.grid()
plot_mi_scores(mi_scores)


# In[73]:


from sklearn.linear_model import LinearRegression
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error


# In[74]:


#First we fit the simpler model
lm = LinearRegression()

lm.fit(train_x.loc[:,["open","high","low","close"]],train_y)


# In[75]:


lm_predictions = pd.DataFrame(lm.predict(train_x.loc[:,["open","high","low","close"]]), index = train_y.index)
lm_fit = lm.predict(train_x.loc[:,["open","high","low","close"]])


# In[76]:


residuals = pd.DataFrame(train_y - lm_fit)


# In[77]:


residuals


# In[79]:


#Now we bring in our more powerfull black-box model
cat = CatBoostRegressor()
cat.fit(
    train_x.loc[:,["BBM_20_2.0","BBL_20_2.0","BBU_20_2.0","ema_40","ema_20","ema_100"]],
    residuals)


# In[81]:


lm_test_predictions = pd.DataFrame(lm.predict(test_x.loc[:,["open","high","low","close"]]),index=test_y.index)
cat_full_test_predictions = cat_full.predict(test_x[predictors])

cat_residuals_predictions = pd.DataFrame(cat.predict(test_x.loc[:,["BBM_20_2.0","BBL_20_2.0","BBU_20_2.0","ema_40","ema_20","ema_100"]]),index=test_y.index)


# In[84]:


full_error = mean_squared_error(test_y,cat_full_test_predictions)


# In[85]:


hybrid_predictions = lm_test_predictions.iloc[:,0] + cat_residuals_predictions.iloc[:,0]
hybrid_error = mean_squared_error(test_y, hybrid_predictions)


# In[88]:


delta_error = full_error - hybrid_error
(delta_error / full_error) * 100


# In[94]:


hybrid_predictions.plot()


# In[95]:


test_y.plot()


# **Bringing It All Together**

# In[116]:


MARKET_SYMBOL = "Volatility 75 Index"
DEVIATION = 100
VOLUME = 0
symbol_info = MT5.symbol_info(MARKET_SYMBOL)
VOLUME = symbol_info.volume_min * 1


# In[117]:


def preprocess(df):
    #20 period exponential moving average
    df["ema_20"] = df.ta.ema(length=20)
    #40 period exponential moving average
    df["ema_40"] = df.ta.ema(length=40)
    #100 period exponential moving average
    df["ema_100"] = df.ta.ema(length=100)
    #20 period bollinger bands with 3 standard deviations
    df.ta.bbands(length=20,sd=2,append=True)
    df = df.loc[100:,:]


# In[118]:


def fetch_prices():
    current_prices = pd.DataFrame()
    current_prices = pd.DataFrame(MT5.copy_rates_from_pos(MARKET_SYMBOL,MT5.TIMEFRAME_M1,0,200))
    preprocess(current_prices)
    return(current_prices)


# In[129]:


def normalise_prices(raw_data):
    for col in raw_data.columns:
        if col in first_values:
            raw_data[col] = raw_data[col] / first_values[col]


# In[130]:


model_forecast = 0


# In[149]:


def hybrid_forecast(model_1,model_2):
    market_data = fetch_prices()
    normalise_prices(market_data)
    forecast_1 = model_1.predict(market_data.loc[199:200,["open","high","low","close"]])
    forecast_2 = model_2.predict(market_data.loc[199:200,["BBM_20_2.0","BBL_20_2.0","BBU_20_2.0","ema_40","ema_20","ema_100"]])
    out = forecast_1 + forecast_2
    return(out)


# In[153]:


INITIAL_BALANCE = MT5.account_info().balance
CURRENT_BALANCE = 0


# In[154]:


#Let's see how performance changes if we just rely on the baseline forecast.

if __name__ == "__main__":
    while True:
        
        #Account standing
        info = MT5.account_info()
        CURRENT_BALANCE = info.balance
        profit = CURRENT_BALANCE - INITIAL_BALANCE
        
        model_forecast = hybrid_forecast(lm,cat)
        print("Current forecast: ",model_forecast)
        
        #We have no open positions
        if(MT5.positions_total() == 0):
            print("No open positions")
            
            #Buy
            if(model_forecast > MT5.symbol_info(MARKET_SYMBOL).ask):
                print("Following model forecast buy")
                MT5.Buy(MARKET_SYMBOL,VOLUME)
                last_trade = 1
                        
            #Sell    
            elif(model_forecast < MT5.symbol_info(MARKET_SYMBOL).ask):
                print("Following model forecast sell")
                MT5.Sell(MARKET_SYMBOL,VOLUME)
                last_trade = 0
                
        elif(MT5.positions_total() > 0):
            print("Checking model forecast")
            
            if((model_forecast > MT5.symbol_info(MARKET_SYMBOL).ask) & (last_trade == 0)):
                print("Model is forecasting a move that hurts our exposure. Closing positions")
                MT5.Close()
            
            elif((model_forecast < MT5.symbol_info(MARKET_SYMBOL).ask) & (last_trade == 1)):
                print("Model is forecasting a move that hurts our exposure. Closing positions")
                MT5.Close()
                
        print("Total Profit/Loss: ",profit)
        time.sleep(60)

