# Copyright 2023, MetaQuotes Ltd.
# https://www.mql5.com

# python libraries
import MetaTrader5 as mt5
import tensorflow as tf
import numpy as np
import pandas as pd
import tf2onnx
from tensorflow.keras import layers, callbacks


symbol="EURUSD"

inp_history_size = 120

inp_model_name = str(symbol)+"_LSTM_120_1h_not_filtered.onnx" 

# we will save generated onnx-file near our script to use as resource
from sys import argv
data_path=argv[0]
last_index=data_path.rfind("\\")+1
data_path=data_path[0:last_index]
print("data path to save onnx model",data_path)


import os

path_csv = '.../MQL5/Files/'

file_path = path_csv + 'ticks_data_'+symbol+'.csv'

date_format = "%Y-%m-%d  %H:%M"

if os.path.exists(file_path):
    chunk_size = 1000
    rates = pd.read_csv(file_path, encoding='utf-16le', dtype={1: 'float32', 2: 'float32'})#,chunksize=chunk_size)

else:
    print(f"Error: File not found - {file_path}")
    quit()

print(rates)

# set start and end dates for history data
from datetime import timedelta, datetime
# Initialize an empty list to store results
df = pd.DataFrame()

df = df.drop(index=df.index)

# Create an empty dictionary to store DataFrames
dfs = pd.DataFrame()
# Empty the DataFrame
dfs = dfs.drop(index=dfs.index)



# import the 'pandas' module for displaying data obtained in the tabular form
import pandas as pd
pd.set_option('display.max_columns', 500) # number of columns to be displayed
pd.set_option('display.width', 1500)      # max table width to display
# import pytz module for working with time zone
import pytz

# set time zone to UTC
timezone = pytz.timezone("Etc/UTC")

# Obtener la fecha y hora actual
now=None
#now = datetime.now() - timedelta(days=int(30))
now = datetime(2024, 3, 5, 0)- timedelta(days=int(30))
# Print actual date and time in legible format
print("Fecha y hora actual:", now)


formatted_now=None
formatted_now = now.strftime("%Y-%m-%d %H:%M:%S")
print("Fecha y hora formateadas:", formatted_now)

# Minus n days
date_n_days_ago =None
date_n_days_ago = now - timedelta(days=int(inp_history_size))

# Print date from n days ago
print("Fecha y hora hace n días:", date_n_days_ago)

formated_n_time=date_n_days_ago.strftime("%Y-%m-%d %H:%M:%S")
print("Fecha y hora n days ago",formated_n_time)

# También puedes formatear la salida según tus preferencias
formated_date_n_days_ago = date_n_days_ago.strftime("%Y,%m,%d")
formated_date_n_days_ago_y = date_n_days_ago.strftime("%Y")
formated_date_n_days_ago_m = date_n_days_ago.strftime("%m")
formated_date_n_days_ago_d = date_n_days_ago.strftime("%d")
print("Fecha y hora formateadas hace n días:", formated_date_n_days_ago)

# create 'datetime' object in UTC time zone to avoid the implementation of a local time zone offset
utc_from = datetime(int(formated_date_n_days_ago_y),int(formated_date_n_days_ago_m),int(formated_date_n_days_ago_d), tzinfo=timezone)

####################################################################################################################################
print(rates)

# create DataFrame out of the obtained data
rates_frame=pd.DataFrame()
# Vaciar el DataFrame
rates_frame = rates_frame.drop(index=rates_frame.index)
rates_frame = pd.DataFrame(rates)
#rates = rates.drop(index=rates.index)
#print(rates_frame)
#rename columns
rates_frame.rename(columns={0: 'Time', 1: 'Bid', 2 : 'Ask',3:'spread'}, inplace=True)

# Step 2: Convert the 'Time' column to pandas datetime format
rates_frame['Time'] = pd.to_datetime(rates_frame['Time'], format='%Y.%m.%d %H:%M', errors='coerce')
# Verificar si hay fechas nulas después de la conversión
if rates_frame['Time'].isnull().any():
    print("Hay fechas inválidas en la columna 'timestamp'.")

# Asumiendo que tu DataFrame rates_frame tiene una columna 'timestamp'
# Convierte la columna 'timestamp' a tipo datetime
#rates_frame['timestamp'] = pd.to_datetime(rates_frame['timestamp'])

# Seleccionar el rango de fechas que deseas
fecha_inicio = formated_n_time #'2023-01-01'
fecha_fin =formatted_now #'2023-12-31'
print("start date",fecha_inicio)
print("end date",fecha_fin)

# Filtrar el DataFrame para obtener solo las filas dentro del rango de fechas
rates_frame_filtrado = rates_frame[(rates_frame['Time'] >= fecha_inicio) & (rates_frame['Time'] <= fecha_fin)]

# Step 4: Sort the DataFrame by the 'Time' column
rates_frame_filtrado = rates_frame_filtrado.sort_values(by='Time')

#rates_frame_filtrado['time']=pd.to_datetime(rates_frame_filtrado['Time'], unit='s')
rates_frame_filtrado['close']=(((rates_frame_filtrado['Ask']+rates_frame_filtrado['Bid'])/2))

# display data
print("\nDisplay dataframe with data")
print(rates_frame_filtrado) 

# Tamaño del lote
batch_size = 1000
# Inicializar df2 fuera del bucle
df2 = pd.DataFrame()

# Procesar el DataFrame por lotes
for i in range(0, len(rates_frame_filtrado), batch_size):
    # Seleccionar el lote actual
    current_batch = rates_frame_filtrado.iloc[i:i+batch_size].copy()

    # Realizar alguna operación en el lote actual utilizando NumPy
    current_batch['target'] = current_batch['close']
    current_batch['time'] = current_batch['Time']
    current_batch['time_target'] = current_batch['time'].sub(pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
    current_batch['time_target_seconds'] = current_batch['time'].sub(pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')

    serie_resultante = current_batch[['close', 'target', 'time','Ask','Bid']]

    # Almacenar el lote actual en la lista
    df_batch = pd.DataFrame(serie_resultante.values.reshape(-1, 5), columns=['close', 'target', 'time','Ask','Bid'])

    # Concatenar cada lote a df2
    df2 = pd.concat([df2, df_batch], ignore_index=True,axis=0)

print("tabla df2", df2)
print(df2.dtypes)

############################################
df2['Bid'] = pd.to_numeric(df2['Bid'], errors='coerce')
df2['Ask'] = pd.to_numeric(df2['Ask'], errors='coerce')
df2['close'] = pd.to_numeric(df2['close'], errors='coerce')
#########################################
df2.dropna()
print("df2 despues de dropna",df2)

######################################## v8
# Convert 'Time' column to datetime format
df2['time'] = pd.to_datetime(df2['time'])

# Set 'Time' column as the index
df2.set_index('time', inplace=True)

####################################################################################################################

# Resample the data to 1-minute intervals and use the first (open), last (close),
# maximum (high), and minimum (low) values within each minute
ohlc_data = df2.resample('1H').agg({'Bid': 'first', 'Ask': 'first', 'close': 'ohlc'})

# Flatten the multi-level columns
ohlc_data.columns = ohlc_data.columns.map('_'.join)

# Rename the OHLC columns
ohlc_data.rename(columns={'close_open': 'open', 'close_high': 'high', 'close_low': 'low', 'close_close': 'close'}, inplace=True)

# Drop NaN rows if any
ohlc_data.dropna(inplace=True)

# Reset the index to have a separate 'Time' column
ohlc_data.reset_index(inplace=True)

# Display the resulting OHLC data
ohlc_data = ohlc_data.dropna()
print("ohlc",ohlc_data)
ohlc_data=pd.DataFrame(ohlc_data)
#############################################
###########

# create dataframe
dff3 = pd.DataFrame(ohlc_data)

# get close prices only
data3 = dff3.filter(['close']).values
data3 = pd.DataFrame(data3)
print(data3)
# Check columns in 'data'
print(data3.columns)

# If 'Close' exists in columns, proceed with assignment
if 'close' in data3.columns:
    result = data3['close']
else:
    result = data3.iloc[:, 0]

###########################################################################################################
# create dataframe
df4 = pd.DataFrame(result)
print("df4", df4)
############################################################################################################# reducción de ruido filtros pasa bajo pasa alto

# get close prices only
data5 = df4.values

# scale data
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(data5)

# training size is 80% of the data
training_size = int(len(scaled_data)*0.80) 
print("Training_size:",training_size)
train_data_initial = scaled_data[0:training_size,:]
test_data_initial = scaled_data[training_size:,:1]

# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
       # find the end of this pattern
       end_ix = i + n_steps
       # check if we are beyond the sequence
       if end_ix > len(sequence)-1:
          break
       # gather input and output parts of the pattern
       seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
       X.append(seq_x)
       y.append(seq_y)
    return np.array(X), np.array(y)

# split into samples
time_step = inp_history_size
x_train, y_train = split_sequence(train_data_initial, time_step)
x_test, y_test = split_sequence(test_data_initial, time_step)

# reshape input to be [samples, time steps, features] which is required for LSTM
x_train =x_train.reshape(x_train.shape[0],x_train.shape[1],1)
x_test = x_test.reshape(x_test.shape[0],x_test.shape[1],1)

# define model
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv1D, MaxPooling1D, Dropout, Flatten, LSTM
from keras.metrics import RootMeanSquaredError as rmse
model = Sequential()
model.add(Conv1D(filters=256, kernel_size=2, activation='relu',padding = 'same',input_shape=(inp_history_size,1)))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(100, return_sequences = True))
model.add(Dropout(0.3))
model.add(LSTM(100, return_sequences = False))
model.add(Dropout(0.3))
model.add(Dense(units=1, activation = 'sigmoid'))
model.compile(optimizer='adam', loss= 'mse' , metrics = [rmse()])

# Set up early stopping
early_stopping = callbacks.EarlyStopping(
    min_delta=0.0001,
    patience=300,
    restore_best_weights=True,
)
# model training for 300 epochs
history = model.fit(x_train, y_train, epochs = 300 , validation_data = (x_test,y_test), batch_size=32, callbacks=[early_stopping], verbose=2)

# evaluate training data
train_loss, train_rmse = model.evaluate(x_train,y_train, batch_size = 32)
print(f"train_loss={train_loss:.3f}")
print(f"train_rmse={train_rmse:.3f}")

# evaluate testing data
test_loss, test_rmse = model.evaluate(x_test,y_test, batch_size = 32)
print(f"test_loss={test_loss:.3f}")
print(f"test_rmse={test_rmse:.3f}")

# save model to ONNX
output_path = data_path+inp_model_name
onnx_model = tf2onnx.convert.from_keras(model, output_path=output_path)
print(f"saved model to {output_path}")

#prediction using testing data
test_predict = model.predict(x_test)
plot_y_test = y_test.reshape(-1,1)

#calculate metrics
from sklearn import metrics
from sklearn.metrics import r2_score
#transform data to real values
value1=scaler.inverse_transform(plot_y_test)
value2=scaler.inverse_transform(test_predict)
#calc score
score = np.sqrt(metrics.mean_squared_error(value1,value2))
print("RMSE         : {}".format(score))
print("MSE          :", metrics.mean_squared_error(value1,value2))
print("R2 score     :",metrics.r2_score(value1,value2))
