# Copyright 2024, Javier Santiago Gastón de Iriarte Cabrera.
# https://www.mql5.com/en/users/jsgaston/news

# python libraries
import MetaTrader5 as mt5
import tensorflow as tf
import numpy as np
import pandas as pd


symbol="EURUSD"

inp_history_size = 30

import os

path_csv = '.../MQL5/Files/'

file_path = path_csv + 'ticks_data_'+symbol+'.csv'

date_format = "%Y-%m-%d  %H:%M"

if os.path.exists(file_path):
    chunk_size = 1000
    rates = pd.read_csv(file_path, encoding='utf-16le', dtype={1: 'float32', 2: 'float32'})#,chunksize=chunk_size)

else:
    print(f"Error: File not found - {file_path}")
    quit()

print(rates)

for j in range(2015,2024,1):

    # set start and end dates for history data
    from datetime import timedelta, datetime
    # Initialize an empty list to store results
    df = pd.DataFrame()

    df = df.drop(index=df.index)

    # Create an empty dictionary to store DataFrames
    dfs = pd.DataFrame()
    # Empty the DataFrame
    dfs = dfs.drop(index=dfs.index)



    # import the 'pandas' module for displaying data obtained in the tabular form
    import pandas as pd
    pd.set_option('display.max_columns', 500) # number of columns to be displayed
    pd.set_option('display.width', 1500)      # max table width to display
    # import pytz module for working with time zone
    import pytz

    # set time zone to UTC
    timezone = pytz.timezone("Etc/UTC")

    # Fecha de inicio: 1 de febrero de 2023
    fecha_inicio_1 = datetime(j, 2, 1)

    # Obtener la fecha y hora actual
    now=None
    #now = datetime.now()
    now=fecha_inicio_1

    # Print actual date and time in legible format
    print("Fecha y hora actual:", now)


    formatted_now=None
    formatted_now = now.strftime("%Y-%m-%d %H:%M:%S")
    print("Fecha y hora formateadas:", formatted_now)

    # Minus n days
    date_n_days_ago =None
    date_n_days_ago = now + timedelta(days=int(inp_history_size))

    # Print date from n days ago
    print("Fecha y hora hace n días:", date_n_days_ago)

    formated_now=now.strftime("%Y-%m-%d %H:%M:%S")
    print("Fecha y hora n days ago",formated_now)

    # También puedes formatear la salida según tus preferencias
    formated_date_n_days_ago = now.strftime("%Y,%m,%d")
    formated_date_n_days_ago_y = now.strftime("%Y")
    formated_date_n_days_ago_m = now.strftime("%m")
    formated_date_n_days_ago_d = now.strftime("%d")
    print("Fecha y hora formateadas hace n días:", formated_date_n_days_ago)

    # create 'datetime' object in UTC time zone to avoid the implementation of a local time zone offset
    utc_from = datetime(int(formated_date_n_days_ago_y),int(formated_date_n_days_ago_m),int(formated_date_n_days_ago_d), tzinfo=timezone)

    ####################################################################################################################################
    print(rates)

    # create DataFrame out of the obtained data
    rates_frame=pd.DataFrame()
    # Vaciar el DataFrame
    rates_frame = rates_frame.drop(index=rates_frame.index)
    rates_frame = pd.DataFrame(rates)
    #rates = rates.drop(index=rates.index)
    #print(rates_frame)
    #rename columns
    rates_frame.rename(columns={0: 'Time', 1: 'Bid', 2 : 'Ask',3:'spread'}, inplace=True)

    # Step 2: Convert the 'Time' column to pandas datetime format
    rates_frame['Time'] = pd.to_datetime(rates_frame['Time'], format='%Y.%m.%d %H:%M', errors='coerce')
    # Verificar si hay fechas nulas después de la conversión
    if rates_frame['Time'].isnull().any():
        print("Hay fechas inválidas en la columna 'timestamp'.")

    # Asumiendo que tu DataFrame rates_frame tiene una columna 'timestamp'
    # Convierte la columna 'timestamp' a tipo datetime
    #rates_frame['timestamp'] = pd.to_datetime(rates_frame['timestamp'])

    # Seleccionar el rango de fechas que deseas
    fecha_inicio =formatted_now  #'2023-01-01'
    fecha_fin = date_n_days_ago #'2023-12-31'
    print("start date",fecha_inicio)
    print("end date",fecha_fin)

    # Filtrar el DataFrame para obtener solo las filas dentro del rango de fechas
    rates_frame_filtrado = rates_frame[(rates_frame['Time'] >= fecha_inicio) & (rates_frame['Time'] <= fecha_fin)]

    # Step 4: Sort the DataFrame by the 'Time' column
    rates_frame_filtrado = rates_frame_filtrado.sort_values(by='Time')

    #rates_frame_filtrado['time']=pd.to_datetime(rates_frame_filtrado['Time'], unit='s')
    rates_frame_filtrado['close']=(((rates_frame_filtrado['Ask']+rates_frame_filtrado['Bid'])/2))

    # display data
    print("\nDisplay dataframe with data")
    print(rates_frame_filtrado) 

    # Tamaño del lote
    batch_size = 1000
    # Inicializar df2 fuera del bucle
    df2 = pd.DataFrame()

    # Procesar el DataFrame por lotes
    for i in range(0, len(rates_frame_filtrado), batch_size):
        # Seleccionar el lote actual
        current_batch = rates_frame_filtrado.iloc[i:i+batch_size].copy()

        # Realizar alguna operación en el lote actual utilizando NumPy
        current_batch['target'] = current_batch['close']
        current_batch['time'] = current_batch['Time']
        current_batch['time_target'] = current_batch['time'].sub(pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
        current_batch['time_target_seconds'] = current_batch['time'].sub(pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')

        serie_resultante = current_batch[['close', 'target', 'time','Ask','Bid']]

        # Almacenar el lote actual en la lista
        df_batch = pd.DataFrame(serie_resultante.values.reshape(-1, 5), columns=['close', 'target', 'time','Ask','Bid'])

        # Concatenar cada lote a df2
        df2 = pd.concat([df2, df_batch], ignore_index=True,axis=0)

    print("tabla df2", df2)
    print(df2.dtypes)

    ############################################
    df2['Bid'] = pd.to_numeric(df2['Bid'], errors='coerce')
    df2['Ask'] = pd.to_numeric(df2['Ask'], errors='coerce')
    df2['close'] = pd.to_numeric(df2['close'], errors='coerce')
    #########################################
    df2.dropna()
    print("df2 despues de dropna",df2)

    ######################################## v8
    # Convert 'Time' column to datetime format
    df2['time'] = pd.to_datetime(df2['time'])

    # Set 'Time' column as the index
    df2.set_index('time', inplace=True)

    # Resample the data to 1-minute intervals and use the first (open), last (close),
    # maximum (high), and minimum (low) values within each minute
    ohlc_data = df2.resample('1T').agg({'Bid': 'first', 'Ask': 'first', 'close': 'ohlc'})

    # Flatten the multi-level columns
    ohlc_data.columns = ohlc_data.columns.map('_'.join)

    # Rename the OHLC columns
    ohlc_data.rename(columns={'close_open': 'open', 'close_high': 'high', 'close_low': 'low', 'close_close': 'close'}, inplace=True)

    # Drop NaN rows if any
    ohlc_data.dropna(inplace=True)

    # Reset the index to have a separate 'Time' column
    ohlc_data.reset_index(inplace=True)

    # Display the resulting OHLC data
    ohlc_data = ohlc_data.dropna()
    print("ohlc",ohlc_data)
    ohlc_data=pd.DataFrame(ohlc_data)
    #############################################
    ###########






    ########################################################################################################### 1H
    # create dataframe
    df3 = pd.DataFrame(ohlc_data)

    # Para guardar este DataFrame en un archivo CSV, puedes usar la función to_csv
    df3.to_csv(str(symbol)+'1H'+'_from_'+str(formated_date_n_days_ago_m) + '_of_' + str(formated_date_n_days_ago_y) +'_.csv', index=False)




    ##########################################################################################################################