In [7]:
# imports 
from datetime import datetime
import MetaTrader5 as mt5
import pandas as pd
import numpy  as np
import pytz
import os
from category_encoders import OrdinalEncoder, OneHotEncoder, BinaryEncoder,TargetEncoder, CountEncoder, HashingEncoder, LeaveOneOutEncoder,JamesSteinEncoder

In [8]:
if not mt5.initialize():
    print("initialize() failed ")
    mt5.shutdown()
    exit()

In [9]:
#set up timezone infomation   
tz=pytz.timezone("Etc/UTC")

#use time zone to set correct date for history data extraction
startdate = datetime(2023,12,31,hour=23,minute=59,second=59,tzinfo=tz)
stopdate = datetime(2017,12,31,hour=23,minute=59,second=59,tzinfo=tz)

#list the symbol 
symbol = "BTCUSD"

#get price history
prices = pd.DataFrame(mt5.copy_rates_range(symbol,mt5.TIMEFRAME_D1,stopdate,startdate))

if len(prices) < 1:
    print(" Error downloading rates history ")
    mt5.shutdown()
    exit()

#shutdown mt5 tether
mt5.shutdown()


True

In [10]:
#initialize categorical features
prices.drop(labels=["time","tick_volume","spread","real_volume"],axis=1,inplace=True)
prices["bar_type"] = np.where(prices["close"]>=prices["open"],"bullish","bearish")
prices["body_type"] = np.empty((len(prices),),dtype='str')
prices["bar_pattern"] = np.empty((len(prices),),dtype='str')
prices.head()

Unnamed: 0,open,high,low,close,bar_type,body_type,bar_pattern
0,13743.0,13855.0,12362.69,13347.0,bearish,,
1,13348.0,15381.0,12535.67,14689.0,bullish,,
2,14232.48,15408.0,14110.57,15130.0,bullish,,
3,15114.0,15370.0,13786.18,15139.0,bullish,,
4,15055.8,16894.0,14349.84,16725.0,bullish,,


In [11]:
#set feature values
for i in np.arange(len(prices)):
    bodyratio = np.abs(prices.iloc[i,3]-prices.iloc[i,0])/np.abs(prices.iloc[i,1]-prices.iloc[i,2])
    if bodyratio >= 0.75:
        prices.iloc[i,5] = ">=0.75"
    elif bodyratio < 0.75 and bodyratio >= 0.5:
        prices.iloc[i,5]=">=0.5<0.75"
    elif bodyratio < 0.5 and bodyratio >= 0.25:
        prices.iloc[i,5]=">=0.25<0.5"
    else:
        prices.iloc[i,5]="<0.25"
    if i < 1:
      prices.iloc[i,6] = None
      continue
    if(prices.iloc[i,4]=="bullish" and prices.iloc[i-1,4]=="bullish") and (prices.iloc[i,1]>prices.iloc[i-1,1]) and (prices.iloc[i,2]>prices.iloc[i-1,2]):
        prices.iloc[i,6] = "higherHigh"
    elif(prices.iloc[i,4]=="bearish" and prices.iloc[i-1,4]=="bearish") and (prices.iloc[i,2]<prices.iloc[i-1,2]) and (prices.iloc[i,1]<prices.iloc[i-1,1]):
        prices.iloc[i,6] = "lowerLow"
    else :
        prices.iloc[i,6] = "flat"
        
prices.head()

Unnamed: 0,open,high,low,close,bar_type,body_type,bar_pattern
0,13743.0,13855.0,12362.69,13347.0,bearish,>=0.25<0.5,
1,13348.0,15381.0,12535.67,14689.0,bullish,>=0.25<0.5,flat
2,14232.48,15408.0,14110.57,15130.0,bullish,>=0.5<0.75,higherHigh
3,15114.0,15370.0,13786.18,15139.0,bullish,<0.25,flat
4,15055.8,16894.0,14349.84,16725.0,bullish,>=0.5<0.75,higherHigh


In [12]:
#calculate target
look_ahead = 1
prices["target"] = np.log(prices["close"])
prices["target"] = prices["target"].diff(look_ahead)
prices["target"] = prices["target"].shift(-look_ahead)
prices

Unnamed: 0,open,high,low,close,bar_type,body_type,bar_pattern,target
0,13743.000,13855.000,12362.690,13347.000,bearish,>=0.25<0.5,,0.095807
1,13348.000,15381.000,12535.670,14689.000,bullish,>=0.25<0.5,flat,0.029581
2,14232.480,15408.000,14110.570,15130.000,bullish,>=0.5<0.75,higherHigh,0.000595
3,15114.000,15370.000,13786.180,15139.000,bullish,<0.25,flat,0.099630
4,15055.800,16894.000,14349.840,16725.000,bullish,>=0.5<0.75,higherHigh,-0.032758
...,...,...,...,...,...,...,...,...
2134,42537.338,43698.738,42144.501,43452.538,bullish,>=0.5<0.75,flat,-0.019842
2135,43452.538,43846.738,42319.776,42598.838,bearish,>=0.5<0.75,flat,-0.011923
2136,42598.838,43169.338,41286.845,42093.938,bearish,>=0.25<0.5,lowerLow,0.001877
2137,42093.938,42652.845,41551.538,42173.038,bullish,<0.25,flat,0.003307


In [13]:
#drop rows with NA values
prices.dropna(axis=0,inplace=True,ignore_index=True)
prices


Unnamed: 0,open,high,low,close,bar_type,body_type,bar_pattern,target
0,13348.000,15381.000,12535.670,14689.000,bullish,>=0.25<0.5,flat,0.029581
1,14232.480,15408.000,14110.570,15130.000,bullish,>=0.5<0.75,higherHigh,0.000595
2,15114.000,15370.000,13786.180,15139.000,bullish,<0.25,flat,0.099630
3,15055.800,16894.000,14349.840,16725.000,bullish,>=0.5<0.75,higherHigh,-0.032758
4,15699.530,16474.000,15672.990,16186.000,bullish,>=0.5<0.75,flat,-0.082785
...,...,...,...,...,...,...,...,...
2132,43610.738,43633.838,41598.908,42537.338,bearish,>=0.5<0.75,flat,0.021287
2133,42537.338,43698.738,42144.501,43452.538,bullish,>=0.5<0.75,flat,-0.019842
2134,43452.538,43846.738,42319.776,42598.838,bearish,>=0.5<0.75,flat,-0.011923
2135,42598.838,43169.338,41286.845,42093.938,bearish,>=0.25<0.5,lowerLow,0.001877


In [14]:
#Ordinal encoding
ord_encoder = OrdinalEncoder(cols = ["bar_type","body_type","bar_pattern"])
ordinal_data = ord_encoder.fit_transform(prices)

ordinal_data.head()


Unnamed: 0,open,high,low,close,bar_type,body_type,bar_pattern,target
0,13348.0,15381.0,12535.67,14689.0,1,1,1,0.029581
1,14232.48,15408.0,14110.57,15130.0,1,2,2,0.000595
2,15114.0,15370.0,13786.18,15139.0,1,3,1,0.09963
3,15055.8,16894.0,14349.84,16725.0,1,2,2,-0.032758
4,15699.53,16474.0,15672.99,16186.0,1,2,1,-0.082785


In [15]:
#One-Hot encoding
onehot_encoder = OneHotEncoder(cols = ["bar_type","body_type","bar_pattern"])
onehot_data = onehot_encoder.fit_transform(prices)

onehot_data.head()

Unnamed: 0,open,high,low,close,bar_type_1,bar_type_2,body_type_1,body_type_2,body_type_3,body_type_4,bar_pattern_1,bar_pattern_2,bar_pattern_3,target
0,13348.0,15381.0,12535.67,14689.0,1,0,1,0,0,0,1,0,0,0.029581
1,14232.48,15408.0,14110.57,15130.0,1,0,0,1,0,0,0,1,0,0.000595
2,15114.0,15370.0,13786.18,15139.0,1,0,0,0,1,0,1,0,0,0.09963
3,15055.8,16894.0,14349.84,16725.0,1,0,0,1,0,0,0,1,0,-0.032758
4,15699.53,16474.0,15672.99,16186.0,1,0,0,1,0,0,1,0,0,-0.082785


In [16]:
#Binary encoding
binary_encoder = BinaryEncoder(cols = ["bar_type","body_type","bar_pattern"])
binary_data = binary_encoder.fit_transform(prices)

binary_data.head()

Unnamed: 0,open,high,low,close,bar_type_0,bar_type_1,body_type_0,body_type_1,body_type_2,bar_pattern_0,bar_pattern_1,target
0,13348.0,15381.0,12535.67,14689.0,0,1,0,0,1,0,1,0.029581
1,14232.48,15408.0,14110.57,15130.0,0,1,0,1,0,1,0,0.000595
2,15114.0,15370.0,13786.18,15139.0,0,1,0,1,1,0,1,0.09963
3,15055.8,16894.0,14349.84,16725.0,0,1,0,1,0,1,0,-0.032758
4,15699.53,16474.0,15672.99,16186.0,0,1,0,1,0,0,1,-0.082785


In [17]:
#Frequency encoding
freq_encoder = CountEncoder(cols = ["bar_type","body_type","bar_pattern"])
freq_data = freq_encoder.fit_transform(prices)

freq_data.head()

Unnamed: 0,open,high,low,close,bar_type,body_type,bar_pattern,target
0,13348.0,15381.0,12535.67,14689.0,1090,644,1447,0.029581
1,14232.48,15408.0,14110.57,15130.0,1090,617,370,0.000595
2,15114.0,15370.0,13786.18,15139.0,1090,634,1447,0.09963
3,15055.8,16894.0,14349.84,16725.0,1090,617,370,-0.032758
4,15699.53,16474.0,15672.99,16186.0,1090,617,1447,-0.082785


In [18]:
#Hashing encoding
hash_encoder = HashingEncoder(cols = ["bar_type","body_type","bar_pattern"],n_components=3)
hash_data = hash_encoder.fit_transform(prices)

hash_data.head()

Unnamed: 0,col_0,col_1,col_2,open,high,low,close,target
0,0,1,2,13348.0,15381.0,12535.67,14689.0,0.029581
1,1,2,0,14232.48,15408.0,14110.57,15130.0,0.000595
2,0,1,2,15114.0,15370.0,13786.18,15139.0,0.09963
3,1,2,0,15055.8,16894.0,14349.84,16725.0,-0.032758
4,1,1,1,15699.53,16474.0,15672.99,16186.0,-0.082785


In [19]:
#Target encoding
target_encoder = TargetEncoder(cols = ["bar_type","body_type","bar_pattern"])
target_data = target_encoder.fit_transform(prices[["open","high","low","close","bar_type","body_type","bar_pattern"]], prices["target"])

target_data.head()

Unnamed: 0,open,high,low,close,bar_type,body_type,bar_pattern
0,13348.0,15381.0,12535.67,14689.0,-0.001408,0.001978,6.2e-05
1,14232.48,15408.0,14110.57,15130.0,-0.001408,0.000576,0.000416
2,15114.0,15370.0,13786.18,15139.0,-0.001408,-0.001658,6.2e-05
3,15055.8,16894.0,14349.84,16725.0,-0.001408,0.000576,0.000416
4,15699.53,16474.0,15672.99,16186.0,-0.001408,0.000576,6.2e-05


In [20]:
#LeaveOneOut encoding
oneout_encoder = LeaveOneOutEncoder(cols = ["bar_type","body_type","bar_pattern"])
oneout_data = oneout_encoder.fit_transform(prices[["open","high","low","close","bar_type","body_type","bar_pattern"]], prices["target"])

oneout_data.head()

Unnamed: 0,open,high,low,close,bar_type,body_type,bar_pattern
0,13348.0,15381.0,12535.67,14689.0,-0.001436,0.001935,4.2e-05
1,14232.48,15408.0,14110.57,15130.0,-0.00141,0.000576,0.000416
2,15114.0,15370.0,13786.18,15139.0,-0.001501,-0.001818,-7e-06
3,15055.8,16894.0,14349.84,16725.0,-0.001379,0.00063,0.000506
4,15699.53,16474.0,15672.99,16186.0,-0.001333,0.000712,0.000119


In [21]:
#James Stein encoding
james_encoder = JamesSteinEncoder(cols = ["bar_type","body_type","bar_pattern"])
james_data = james_encoder.fit_transform(prices[["open","high","low","close","bar_type","body_type","bar_pattern"]], prices["target"])

james_data.head()

Unnamed: 0,open,high,low,close,bar_type,body_type,bar_pattern
0,13348.0,15381.0,12535.67,14689.0,-0.001408,0.001738,6.2e-05
1,14232.48,15408.0,14110.57,15130.0,-0.001408,0.000564,0.000416
2,15114.0,15370.0,13786.18,15139.0,-0.001408,-0.001268,6.2e-05
3,15055.8,16894.0,14349.84,16725.0,-0.001408,0.000564,0.000416
4,15699.53,16474.0,15672.99,16186.0,-0.001408,0.000564,6.2e-05
