# Copyright 2023, MetaQuotes Ltd.
# https://www.mql5.com

# python libraries
import MetaTrader5 as mt5
import tensorflow as tf
import numpy as np
import pandas as pd
import tf2onnx
from tensorflow.keras import layers, callbacks
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from openpyxl import *
from sys import argv
# input parameters

symbol="EURUSD"
days = 120
inp_history_size =days

if not mt5.initialize():
    print("initialize() failed, error code =",mt5.last_error())
    quit()

# we will save generated onnx-file near the our script
data_path=argv[0]
last_index=data_path.rfind("\\")+1
data_path=data_path[0:last_index]
print("data path to save onnx model",data_path)

# set start and end dates for history dat
from datetime import timedelta, datetime
#end_date = datetime.now()
end_date = datetime(2024, 1, 1, 0)
start_date = end_date - timedelta(days=inp_history_size)

# print start and end dates
print("data start date = ",start_date)
print("data end date = ",end_date)

# get rates
eurusd_rates = mt5.copy_rates_range(symbol, mt5.TIMEFRAME_H1, start_date, end_date)

# create dataframe
df = pd.DataFrame(eurusd_rates)

# get close prices only
data = df.filter(['close']).values
data = pd.DataFrame(data)
print(data)
inp_history_size = len(data)
print("inp_history_size",inp_history_size)
# Check columns in 'data'
print(data.columns)

# If 'Close' exists in columns, proceed with assignment
if 'Close' in data.columns:
    data['target'] = data['Close']
else:
    data['target'] = data.iloc[:, 0]

######################################################################################################

#####################################################################################################
from sklearn.model_selection import KFold, train_test_split
from keras.models import Sequential
from keras.layers import Dense, Activation, BatchNormalization, Dropout
from keras.metrics import RootMeanSquaredError as rmse
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
import numpy as np
import pandas as pd

# Extract OHLC columns
x_features = data[[0]]
# Target variable
y_target = data['target']

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.2, shuffle=False)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(x_train)
X_test_scaled = scaler.transform(x_test)

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1))
y_test_scaled = scaler_y.transform(np.array(y_test).reshape(-1, 1))

# Define parameters

window_size = 120
learning_rate = 0.001
dropout_rate = 0.5
batch_size = 1024
layer_1 = 256
epochs = 1000
k_reg = 0.0001
patience = 20
factor = 0.5
n_splits = 5  # Number of K-fold Splits
  # Ajusta esto según tus necesidades

def create_windows(data, window_size):
    return [data[i:i + window_size] for i in range(len(data) - window_size + 1)]

custom_optimizer = Adam(learning_rate=learning_rate)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=factor, patience=patience, min_lr=1e-26)

def build_model(input_shape, k_reg):
    model = Sequential()
    
    layer_sizes = [512 , 1024 , 512 ,256, 128, 64]

    model.add(Dense(layer_1, kernel_regularizer=l2(k_reg), input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))
    for size in layer_sizes:
        model.add(Dense(size, kernel_regularizer=l2(k_reg)))
        model.add(BatchNormalization())
        model.add(Activation('relu'))
        model.add(Dropout(dropout_rate))

    model.add(Dense(1, activation='linear'))
    model.add(BatchNormalization())
    model.compile(optimizer=custom_optimizer, loss='mse', metrics=[rmse()])
    
    return model



# Define EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)

# KFold Cross Validation
kfold = KFold(n_splits=n_splits, shuffle=False)
history = []
loss_per_epoch = []
val_loss_per_epoch = []

for train, val in kfold.split(X_train_scaled, y_train_scaled):
    x_train_fold, x_val_fold = X_train_scaled[train], X_train_scaled[val]
    y_train_fold, y_val_fold = y_train_scaled[train], y_train_scaled[val]
    
    # Flatten the input data
    x_train_fold_flat = x_train_fold.flatten()
    x_val_fold_flat = x_val_fold.flatten()

    # Create windows for training and validation
    x_train_windows = create_windows(x_train_fold_flat, window_size)
    x_val_windows = create_windows(x_val_fold_flat, window_size)

    # Rebuild the model
    model = build_model((window_size, 1), k_reg)

    # Create a new optimizer
    custom_optimizer = Adam(learning_rate=learning_rate)
    
    # Recompile the model
    model.compile(optimizer=custom_optimizer, loss='mse', metrics=[rmse()])
    
    hist = model.fit(
        np.array(x_train_windows), y_train_fold[window_size - 1:],
        epochs=epochs,
        validation_data=(np.array(x_val_windows), y_val_fold[window_size - 1:]),
        batch_size=batch_size,
        callbacks=[reduce_lr, early_stopping]
    )
    history.append(hist)
    loss_per_epoch.append(hist.history['loss'])
    val_loss_per_epoch.append(hist.history['val_loss'])




mean_loss_per_epoch = [np.mean(loss) for loss in loss_per_epoch]
val_mean_loss_per_epoch = [np.mean(val_loss) for val_loss in val_loss_per_epoch]

print("mean_loss_per_epoch", mean_loss_per_epoch)
print("unique_min_val_loss_per_epoch", val_loss_per_epoch)

# Create a DataFrame to display the mean loss values
epoch_df = pd.DataFrame({
    'Epoch': range(1, len(mean_loss_per_epoch) + 1),
    'Train Loss': mean_loss_per_epoch,
    'Validation Loss': val_loss_per_epoch
})

print(epoch_df)



#########################################################################################################################################

from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint
from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, RegressorMixin
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Activation, Dropout
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
import numpy as np
import pandas as pd

#######################################################################################################################################

# evaluate training data
train_loss, train_rmse = model.evaluate(x_train, y_train, batch_size=batch_size)
print(f"train_loss={train_loss:.3f}")
print(f"train_rmse={train_rmse:.3f}")

# evaluate testing data
test_loss, test_rmse = model.evaluate(x_test, y_test_scaled, batch_size=batch_size)
print(f"test_loss={test_loss:.3f}")
print(f"test_rmse={test_rmse:.3f}")
##############################################################################
#GRU

# Calculate mean squared error
predictions = model.predict(X_test_scaled)

# Check the shape of predictions
print("Predictions shape:", predictions.shape)

# Reshape predictions if necessary
predictions = predictions.reshape(-1, 1)

# Calculate baseline predictions (e.g., mean of y_test_scaled)
baseline_predictions = np.full_like(y_test_scaled, np.mean(y_test_scaled))

# Calculate metrics for the baseline
baseline_mse = mean_squared_error(y_test_scaled, baseline_predictions)
baseline_mae = mean_absolute_error(y_test_scaled, baseline_predictions)
baseline_r2 = r2_score(y_test_scaled, baseline_predictions)

# Verify input shapes
print("y_test_scaled shape:", y_test_scaled.shape)

# Calculate mean squared error
mse = mean_squared_error(y_test_scaled, predictions)
print("Mean Squared Error:", mse)

# Calculate and print mean absolute error
mae = mean_absolute_error(y_test_scaled, predictions)
print(f"\nMean Absolute Error: {mae}")

# Calculate and print R2 Score
r2 = r2_score(y_test_scaled, predictions)
print(f"\nR2 Score: {r2}")

print("")


# Compare metrics
print("Baseline MSE:", baseline_mse)
print("Baseline MAE:", baseline_mae)
print("Baseline r2:", baseline_r2)


print("")
###############################################################################
from sklearn.base import BaseEstimator, RegressorMixin
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Activation, Dropout
from keras.optimizers import Adam
from keras.regularizers import l2
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
import numpy as np

# Function to create a Keras model
def create_keras_model():
    window_size = 120
    learning_rate = 0.001
    dropout_rate = 0.5
    batch_size = 1024
    layer_1 = 256
    epochs = 1000
    k_reg = 0.0001

    custom_optimizer = Adam(learning_rate=learning_rate)

    model = Sequential()

    model.add(Dense(layer_1, kernel_regularizer=l2(k_reg), input_shape=(window_size, 1)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))

    model.add(Dense(512, kernel_regularizer=l2(k_reg)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))

    model.add(Dense(1024, kernel_regularizer=l2(k_reg)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))
    
    model.add(Dense(512, kernel_regularizer=l2(k_reg)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))

    model.add(Dense(256, kernel_regularizer=l2(k_reg)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))

    model.add(Dense(128, kernel_regularizer=l2(k_reg)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))

    model.add(Dense(64, kernel_regularizer=l2(k_reg)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))

    model.add(Dense(1, activation='linear'))
    model.add(BatchNormalization())
    model.compile(optimizer=custom_optimizer, loss='mse', metrics=[rmse()])
    return model

class KerasRegressorWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, build_fn=create_keras_model):
        self.build_fn = build_fn
        self.model = self.build_fn()

    def fit(self, X, y, **kwargs):
        self.model.fit(X, y, **kwargs)
        return self

    def predict(self, X):
        # Ensure that the model is compiled before making predictions
        if not hasattr(self.model, '_is_compiled') or not self.model._is_compiled:
            raise RuntimeError("The model must be compiled before making predictions.")
        
        # Modify this part to handle 3D predictions correctly
        y_pred = self.model.predict(X)
        if len(y_pred.shape) == 3:
            y_pred = y_pred[:, -1, :]  # Assuming you want the last time step

        return y_pred

# Use the wrapper class in cross_val_score
keras_regressor = KerasRegressorWrapper(build_fn=create_keras_model)

# Generate dummy data for demonstration
#X_train_scaled = np.random.rand(100, 120, 1)
#y_train_scaled = np.random.rand(100, 1)

# Use the KerasRegressorWrapper in cross_val_score

cv_scores = -cross_val_score(keras_regressor, X_train_scaled, y_train_scaled, cv=5, scoring='neg_mean_squared_error')
print("cv scores: ", cv_scores)

print("Mean CV Score:", np.mean(cv_scores))

#######################################################################################################################
def create_keras_model2(dropout_rate=0.0, k_reg=0.0, layer_1=0, learning_rate=0.0, window_size=0):
    model = Sequential()

    window_size = 120
    learning_rate = 0.001
    dropout_rate = 0.5
    batch_size = 1024
    layer_1 = 256
    epochs = 1000
    k_reg = 0.0001

    custom_optimizer = Adam(learning_rate=learning_rate)
    if dropout_rate > 0.0:

            model.add(Dense(layer_1, kernel_regularizer=l2(k_reg), input_shape=(window_size, 1)))
            model.add(BatchNormalization())
            model.add(Activation('relu'))
            model.add(Dropout(dropout_rate))

            model.add(Dense(512, kernel_regularizer=l2(k_reg)))
            model.add(BatchNormalization())
            model.add(Activation('relu'))
            model.add(Dropout(dropout_rate))

            model.add(Dense(1024, kernel_regularizer=l2(k_reg)))
            model.add(BatchNormalization())
            model.add(Activation('relu'))
            model.add(Dropout(dropout_rate))

            model.add(Dense(512, kernel_regularizer=l2(k_reg)))
            model.add(BatchNormalization())
            model.add(Activation('relu'))
            model.add(Dropout(dropout_rate))

            model.add(Dense(256, kernel_regularizer=l2(k_reg)))
            model.add(BatchNormalization())
            model.add(Activation('relu'))
            model.add(Dropout(dropout_rate))

            model.add(Dense(128, kernel_regularizer=l2(k_reg)))
            model.add(BatchNormalization())
            model.add(Activation('relu'))
            model.add(Dropout(dropout_rate))

            model.add(Dense(64, kernel_regularizer=l2(k_reg)))
            model.add(BatchNormalization())
            model.add(Activation('relu'))
            model.add(Dropout(dropout_rate))

            model.add(Dense(1, activation='linear'))
            model.add(BatchNormalization())
            model.compile(optimizer=custom_optimizer, loss='mse', metrics=[rmse()])
    return model

class KerasRegressorWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, build_fn=create_keras_model2, batch_size=None, dropout_rate=None, epochs=1000, k_reg=0.99, learning_rate=0.99, layer_1=512, window_size=120):
        self.build_fn = build_fn
        self.batch_size = batch_size
        self.dropout_rate = dropout_rate
        self.epochs = epochs
        self.k_reg = k_reg
        self.learning_rate = learning_rate
        self.layer_1 = layer_1
        self.window_size = window_size
        self.model = self.build_fn(dropout_rate=self.dropout_rate, k_reg=self.k_reg, learning_rate=self.learning_rate, layer_1=self.layer_1, window_size=self.window_size)

    def fit(self, X, y, **kwargs):
        self.model.fit(X, y, epochs=self.epochs, **kwargs)
        return self

    def predict(self, X):
        # Ensure that the model is compiled before making predictions
        if not hasattr(self.model, '_is_compiled') or not self.model._is_compiled:
            raise RuntimeError("The model must be compiled before making predictions.")
        
        # Modify this part to handle 3D predictions correctly
        y_pred = self.model.predict(X)
        if len(y_pred.shape) == 3:
            y_pred = y_pred[:, -1, :]  # Assuming you want the last time step

        return y_pred


# Define the parameter grid for Random Search
param_dist = {
    'learning_rate': uniform(0.0001, 0.1),
    'dropout_rate': uniform(0.1, 0.8),
    'batch_size': randint(32, 1024),
    'layer_1': randint(64, 500),
    'k_reg': uniform(0.0001, 0.01),
    'epochs': randint(50, 1000),
    'window_size': randint(120, 121)
}

# Use the wrapper class in cross_val_score
keras_regressor = KerasRegressorWrapper(build_fn=create_keras_model2, batch_size=batch_size, dropout_rate=dropout_rate, epochs=epochs, k_reg=k_reg, learning_rate=learning_rate, layer_1=layer_1, window_size=window_size)



# Define R2 scorer
r2_scorer = make_scorer(r2_score)

# Perform Random Search
random_search = RandomizedSearchCV(
    keras_regressor,
    param_distributions=param_dist,
    n_iter=10,
    cv=5,
    scoring={'neg_mean_squared_error': 'neg_mean_squared_error', 'neg_mean_absolute_error': 'neg_mean_absolute_error', 'r2': r2_scorer},
    refit='neg_mean_squared_error',
    random_state=42,
    verbose=3
)

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.2, shuffle=False)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(x_train)
X_test_scaled = scaler.transform(x_test)

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1))
y_test_scaled = scaler_y.transform(np.array(y_test).reshape(-1, 1))


# Fit the Random Search to your data with batch_size
random_search.fit(X_train_scaled, y_train_scaled, batch_size=batch_size)  # Adjust batch_size as needed

# Print the best hyperparameters
print("Best Hyperparameters:", random_search.best_params_)

# Get the best model
best_model = random_search.best_estimator_

# Evaluate the best model
best_model.fit(X_train_scaled, y_train_scaled)
# Predict on the test set
y_pred_scaled = best_model.predict(X_test_scaled)

# Inverse transform the scaled predictions to the original scale
y_pred = scaler_y.inverse_transform(y_pred_scaled)

# Inverse transform the true labels to the original scale
y_true = scaler_y.inverse_transform(y_test_scaled)

# Calculate metrics
test_loss = mean_squared_error(y_true, y_pred)
test_rmse = mean_absolute_error(y_true, y_pred)
test_r2 = r2_score(y_true, y_pred)

print(f"Test Loss: {test_loss:.3f}")
print(f"Test RMSE: {test_rmse:.3f}")
print(f"Test R2: {test_r2:.3f}")
print(f"Best Model Test Loss: {test_loss:.3f}")
print(f"Best Model Test RMSE: {test_rmse:.3f}")

# Access other scores from the best model
best_model_scores = random_search.best_score_
print("Best Model Scores:", best_model_scores)

# Access other scores from the best model
best_model_scores = random_search.best_score_

print(type(best_model_scores))
print(best_model_scores)


# Extract R2 score from the dictionary
#best_r2_score = best_model_scores['test_r2']



"""print("Best Model Scores:")
print("Best Model Test MSE:", best_model_scores['test_neg_mean_squared_error'])
print("Best Model Test MAE:", best_model_scores['test_neg_mean_absolute_error'])
print("Best Model Test R2:", best_model_scores)"""

print("cv scores: ", cv_scores)

print("Mean CV Score:", np.mean(cv_scores))

print("Baseline MSE:", baseline_mse)
print("Baseline MAE:", baseline_mae)
print("Baseline r2:", baseline_r2)

print("Mean Squared Error:", mse)

print(f"\nMean Absolute Error: {mae}")

print(f"\nR2 Score: {r2}")

# Extract the results
results_df = pd.DataFrame(random_search.cv_results_)
resultados_df = pd.DataFrame(random_search.cv_results_)
# Display the relevant columns in the results
relevant_columns = ['params', 'mean_test_neg_mean_squared_error', 'std_test_neg_mean_squared_error', 'rank_test_neg_mean_squared_error']
results_df = results_df[relevant_columns]

# Extract the results
results_df2 = pd.DataFrame(random_search.cv_results_)

# Convert 'params' column to string representation
results_df2['params'] = results_df2['params'].astype(str)

print(results_df2)
# Ruta al archivo Excel donde deseas guardar el DataFrame
ruta_excel = 'C:/Users/jsgas/OneDrive/Trading/Articulos/2. py a onnx/GRU vs LSTM/para el articulo/Resultados finales y finetuning/results_file.xlsx'
ruta_excel2 = 'C:/Users/jsgas/OneDrive/Trading/Articulos/2. py a onnx/GRU vs LSTM/para el articulo/Resultados finales y finetuning/resultados_file.xlsx'
# Guarda el DataFrame en un archivo Excel
results_df.to_excel(ruta_excel, index=False)
resultados_df.to_excel(ruta_excel2, index=False)
print(f'DataFrame guardado en {ruta_excel}')

print("cv scores: ", cv_scores)

print("Mean CV Score:", np.mean(cv_scores))

print("Baseline MSE:", baseline_mse)
print("Baseline MAE:", baseline_mae)
print("Baseline r2:", baseline_r2)

print("Mean Squared Error:", mse)

print(f"\nMean Absolute Error: {mae}")

print(f"\nR2 Score: {r2}")
#################################################################################
#########################################################################################

output_path = data_path+symbol+"_"+str(days)+"_model.onnx"
onnx_model = tf2onnx.convert.from_keras(model, output_path=output_path)
print(f"saved model to {output_path}")

###############################################################################
# Plot the mean loss values
plt.plot(range(1, len(mean_loss_per_epoch) + 1), mean_loss_per_epoch, 'r', label='Training loss')
plt.plot(range(1, len(val_mean_loss_per_epoch) + 1), val_mean_loss_per_epoch, 'b', label='Validation loss')
plt.title('Training and Validation Loss per Epoch')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.scatter(y_test_scaled, predictions)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.show()

residuals = y_test_scaled - predictions
plt.scatter(predictions, residuals)
plt.axhline(y=0, color='black', linestyle='--', linewidth=2)
plt.xlabel('Predictions')
plt.ylabel('Residuals')
plt.show()
#################################################################################


######################################################################################### LSTM
df=pd.DataFrame()
# create dataframe
df = pd.DataFrame(eurusd_rates)

inp_history_size=days
# get close prices only
data2 = df.filter(['close']).values

# scale data
from sklearn.preprocessing import MinMaxScaler
scaler2=MinMaxScaler(feature_range=(0,1))
scaled_data = scaler2.fit_transform(data2)

# training size is 80% of the data
training_size = int(len(scaled_data)*0.80) 
print("Training_size:",training_size)
train_data_initial = scaled_data[0:training_size,:]
test_data_initial = scaled_data[training_size:,:1]

# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
       # find the end of this pattern
       end_ix = i + n_steps
       # check if we are beyond the sequence
       if end_ix > len(sequence)-1:
          break
       # gather input and output parts of the pattern
       seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
       X.append(seq_x)
       y.append(seq_y)
    return np.array(X), np.array(y)

# split into samples
time_step = inp_history_size
x_train2, y_train2 = split_sequence(train_data_initial, time_step)
x_test2, y_test2 = split_sequence(test_data_initial, time_step)

# reshape input to be [samples, time steps, features] which is required for LSTM
x_train22 =x_train2.reshape(x_train2.shape[0],x_train2.shape[1],1)
x_test22 = x_test2.reshape(x_test2.shape[0],x_test2.shape[1],1)

# define model
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv1D, MaxPooling1D, Dropout, Flatten, LSTM
from keras.metrics import RootMeanSquaredError as rmse
# define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=20)

model2 = Sequential()
model2.add(Conv1D(filters=256, kernel_size=2, activation='relu',padding = 'same',input_shape=(inp_history_size,1)))
model2.add(MaxPooling1D(pool_size=2))
model2.add(LSTM(100, return_sequences = True))
model2.add(Dropout(0.3))
model2.add(LSTM(100, return_sequences = False))
model2.add(Dropout(0.3))
model2.add(Dense(units=1, activation = 'sigmoid')) # antes estaba sigmoid (0s y 1s)
model2.compile(optimizer='adam', loss= 'mse' , metrics = [rmse()])

# model training for 300 epochs
history2 = model2.fit(x_train22, y_train2, epochs = 1000 , validation_data = (x_test22,y_test2), batch_size=256, callbacks=[early_stopping])#, callbacks=[early_stopping], verbose=2)

# Save training loss and validation loss for each epoch
loss_per_epoch2 = history2.history['loss']
val_loss_per_epoch2 = history2.history['val_loss']

# Create a DataFrame to display the loss values for each epoch
epoch_df2 = pd.DataFrame({'Epoch': range(1, len(loss_per_epoch2) + 1), 'Train Loss': loss_per_epoch2, 'Validation Loss': val_loss_per_epoch2})
print(epoch_df2)

# evaluate training data
train_loss2, train_rmse2 = model2.evaluate(x_train22, y_train2, batch_size=32)
print(f"train_loss={train_loss2:.3f}")
print(f"train_rmse={train_rmse2:.3f}")

# evaluate testing data
test_loss2, test_rmse2 = model2.evaluate(x_test22, y_test2, batch_size=32)
print(f"test_loss={test_loss2:.3f}")
print(f"test_rmse={test_rmse2:.3f}")

##############################################################################
#LTSM


#prediction using testing data
test_predict2 = model2.predict(x_test22)
plot_y_test2 = y_test2.reshape(-1,1)

#calculate metrics
from sklearn import metrics
from sklearn.metrics import r2_score
#transform data to real values
value12=scaler2.inverse_transform(plot_y_test2)
value22=scaler2.inverse_transform(test_predict2)
#calc score
score2 = np.sqrt(metrics.mean_squared_error(value12,value22))
print("RMSE   LSTM      : {}".format(score2))
print("MSE    LSTM      :", metrics.mean_squared_error(value12,value22))

# Calculate and print R2 Score
r2 = r2_score(test_predict2, plot_y_test2)
print(f"\nR2 Score: {r2}")

# Plot training and validation loss
plt.plot(range(1, len(loss_per_epoch2) + 1), loss_per_epoch2, label='Training Loss')
plt.plot(range(1, len(val_loss_per_epoch2) + 1), val_loss_per_epoch2, label='Validation Loss')
plt.title('Training and Validation Loss per Epoch')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.scatter(value22, value12)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.show()

residuals = value22 - value12
plt.scatter(value12, residuals)
plt.axhline(y=0, color='black', linestyle='--', linewidth=2)
plt.xlabel('Predictions')
plt.ylabel('Residuals')
plt.show()

################################################################################################

from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, LSTM, Dropout, Dense
from keras.callbacks import EarlyStopping

from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.base import BaseEstimator, RegressorMixin

# Definir early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=20)

# Función para crear el modelo Keras
def create_model():
    model = Sequential()
    model.add(Conv1D(filters=256, kernel_size=2, activation='relu', padding='same', input_shape=(inp_history_size, 1)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(LSTM(100, return_sequences=True))
    model.add(Dropout(0.3))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.3))
    model.add(Dense(units=1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='mse', metrics=['RootMeanSquaredError'])
    return model

# Clase envoltorio para hacerlo compatible con scikit-learn
class KerasRegressorWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, build_fn=create_model, **kwargs):
        self.build_fn = build_fn
        self.kwargs = kwargs
        self.model = None  # Dejar el modelo sin inicializar aquí

    def fit(self, X, y, **fit_params):
        # Construir el modelo con parámetros específicos de la inicialización
        self.model = self.build_fn(**self.kwargs)

        # Obtener los parámetros de ajuste, incluido el número de épocas
        epochs = fit_params.get('epochs', 1000)
        batch_size = fit_params.get('batch_size', 256)
        verbose = fit_params.get('verbose', 0)

        # Ajustar el modelo con los datos y parámetros de ajuste
        self.model.fit(X, y, epochs=epochs, batch_size=batch_size, verbose=verbose, callbacks=[early_stopping])
        return self

    def predict(self, X):
        y_pred = self.model.predict(X)
        if len(y_pred.shape) == 3:
            y_pred = y_pred[:, -1, :]  # Assuming you want the last time step
        return y_pred

# Crear el modelo envuelto en KerasRegressorWrapper
model2 = KerasRegressorWrapper(build_fn=create_model)

# Use TimeSeriesSplit para la validación cruzada
tscv = TimeSeriesSplit(n_splits=5)

# Convertir los datos para TimeSeriesSplit
X_timeseries = np.concatenate((x_train22, x_test22), axis=0)
y_timeseries = np.concatenate((y_train2, y_test2), axis=0)

# Realizar la validación cruzada
cv_scores = cross_val_score(model2, X_timeseries, y_timeseries, cv=tscv, scoring='neg_mean_squared_error', epochs=1000, batch_size=256, verbose=0)

# Convertir los puntajes de MSE negativos a positivos
cv_scores = -cv_scores

# Imprimir los puntajes de validación cruzada
print("Cross-Validation Scores:", cv_scores)
print("Mean CV Score:", np.mean(cv_scores))

#########################################################################################

print("###############################################################33")

# Plot training  loss
plt.plot(range(1, len(loss_per_epoch2) + 1), loss_per_epoch2, label='LSTM Loss')
plt.plot(range(1, len(mean_loss_per_epoch) + 1), mean_loss_per_epoch, label='GRU Loss')
plt.title('Training  Loss per Epoch')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot validation loss
plt.plot(range(1, len(val_mean_loss_per_epoch) + 1), val_mean_loss_per_epoch, label='GRU Loss')
plt.plot(range(1, len(val_loss_per_epoch2) + 1), val_loss_per_epoch2, label='LSTM Loss')
plt.title('Validation Loss per Epoch')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

