In [None]:
!pip install yfinance pandas numpy torch matplotlib scikit-learn onnx onnxruntime

Collecting onnx
  Downloading onnx-1.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting onnxruntime
  Downloading onnxruntime-1.19.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnx-1.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnxruntime-1.19.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (13.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.2/13.2 MB[0m [31m43.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 k

In [3]:
import yfinance as yf
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error, r2_score
import onnx
import onnxruntime
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Check if GPU is available and use it
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Download EUR/USD forex data
data = yf.download('EURUSD=X', start='2014-01-01', end='2024-01-01')
print(data.head())
data = data[['Close', 'High', 'Low']].copy()  # Create an explicit copy
print(data.head())

# Calculate technical indicators
"""
data['MA20'] = data['Close'].rolling(window=20).mean()
data['EMA20'] = data['Close'].ewm(span=20, adjust=False).mean()
data['EMA8'] = data['Close'].ewm(span=8, adjust=False).mean()
data['RSI'] = 100 - (100 / (1 + data['Close'].pct_change().rolling(window=14).apply(lambda x: (x[x > 0].sum() / -x[x < 0].sum()), raw=True)))

def calculate_psar(data, step=0.02, max_step=0.2):
    psar = data['Close'].copy()
    af = 0.02
    up_trend = True
    ep = data['Low'].iloc[0]
    psar.iloc[0] = ep

    for i in range(1, len(data)):
        if up_trend:
            psar.iloc[i] = psar.iloc[i-1] + af * (ep - psar.iloc[i-1])
        else:
            psar.iloc[i] = psar.iloc[i-1] - af * (psar.iloc[i-1] - ep)

        reverse = False
        if up_trend:
            if data['Low'].iloc[i] < psar.iloc[i]:
                up_trend = False
                reverse = True
                psar.iloc[i] = ep
                ep = data['Low'].iloc[i]
        else:
            if data['High'].iloc[i] > psar.iloc[i]:
                up_trend = True
                reverse = True
                psar.iloc[i] = ep
                ep = data['High'].iloc[i]

        if not reverse:
            if up_trend:
                ep = max(ep, data['High'].iloc[i])
            else:
                ep = min(ep, data['Low'].iloc[i])

            if (up_trend and data['Low'].iloc[i] > psar.iloc[i]) or (not up_trend and data['High'].iloc[i] < psar.iloc[i]):
                af = min(af + step, max_step)

    return psar

data['PSAR'] = calculate_psar(data)
"""
# MACD
def calculate_macd(data, fast=12, slow=26, signal=9):
    exp1 = data['Close'].ewm(span=fast, adjust=False).mean()
    exp2 = data['Close'].ewm(span=slow, adjust=False).mean()
    macd = exp1 - exp2
    signal = macd.ewm(span=signal, adjust=False).mean()
    return macd, signal

data['MACD'], data['MACD_Signal'] = calculate_macd(data)
"""
# ATR
def calculate_atr(data, period=14):
    high_low = data['High'] - data['Low']
    high_close = np.abs(data['High'] - data['Close'].shift())
    low_close = np.abs(data['Low'] - data['Close'].shift())
    ranges = pd.concat([high_low, high_close, low_close], axis=1)
    true_range = np.max(ranges, axis=1)
    return true_range.rolling(period).mean()

data['ATR'] = calculate_atr(data)

# Bollinger Bands
def calculate_bollinger_bands(data, period=20, num_std=2):
    sma = data['Close'].rolling(window=period).mean()
    std = data['Close'].rolling(window=period).std()
    upper_band = sma + (std * num_std)
    lower_band = sma - (std * num_std)
    return upper_band, sma, lower_band

data['BBUpper'], data['BBMiddle'], data['BBLower'] = calculate_bollinger_bands(data)

# Stochastic Oscillator
def calculate_stochastic(data, k_period=14, d_period=3):
    low_min = data['Low'].rolling(window=k_period).min()
    high_max = data['High'].rolling(window=k_period).max()
    k = 100 * ((data['Close'] - low_min) / (high_max - low_min))
    d = k.rolling(window=d_period).mean()
    return k, d

data['Stoch_K'], data['Stoch_D'] = calculate_stochastic(data)

# Average Directional Index (ADX)
def calculate_adx(data, period=14):
    plus_dm = data['High'].diff()
    minus_dm = data['Low'].diff()
    plus_dm[plus_dm < 0] = 0
    minus_dm[minus_dm > 0] = 0

    tr1 = pd.DataFrame(data['High'] - data['Low'])
    tr2 = pd.DataFrame(abs(data['High'] - data['Close'].shift(1)))
    tr3 = pd.DataFrame(abs(data['Low'] - data['Close'].shift(1)))
    frames = [tr1, tr2, tr3]
    tr = pd.concat(frames, axis=1, join='inner').max(axis=1)
    atr = tr.rolling(period).mean()

    plus_di = 100 * (plus_dm.ewm(alpha=1/period).mean() / atr)
    minus_di = abs(100 * (minus_dm.ewm(alpha=1/period).mean() / atr))
    dx = (abs(plus_di - minus_di) / abs(plus_di + minus_di)) * 100
    adx = ((dx.shift(1) * (period - 1)) + dx) / period
    adx_smooth = adx.ewm(alpha=1/period).mean()
    return adx_smooth

data['ADX'] = calculate_adx(data)

# Commodity Channel Index (CCI)
def calculate_cci(data, period=20):
    tp = (data['High'] + data['Low'] + data['Close']) / 3
    sma_tp = tp.rolling(window=period).mean()
    mad = tp.rolling(window=period).apply(lambda x: np.abs(x - x.mean()).mean())
    cci = (tp - sma_tp) / (0.015 * mad)
    return cci

data['CCI'] = calculate_cci(data)

# Calculate VAM
momentum_period = 14
volatility_period = 7
vam_period = 9

data['Momentum'] = data['Close'] - data['Close'].shift(momentum_period)
data['Volatility'] = data['Close'].rolling(window=volatility_period).std()
data['VAM'] = (data['Momentum'] / (data['Volatility'] * np.sqrt(momentum_period))) * 10000
"""
# Drop NaN values due to rolling calculations
data = data.dropna()

# Scale the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data[['Close', 'MACD', 'MACD_Signal']])

# Convert to PyTorch tensors
tensor_data = torch.tensor(scaled_data, dtype=torch.float32).to(device)

class StockDataset(Dataset):
    def __init__(self, data, window_size):
        self.data = data
        self.window_size = window_size

    def __len__(self):
        return len(self.data) - self.window_size

    def __getitem__(self, index):
        x = self.data[index:index + self.window_size, :]
        y = self.data[index + self.window_size, 0]  # Predict 'Close' price
        conditions = self.data[index + self.window_size, 1:]  # Use all other features as conditions
        return x, y, conditions

# Define window size and split data into train and test sets
window_size = 30
split_ratio = 0.8
split_index = int(len(tensor_data) * split_ratio)

train_data = StockDataset(tensor_data[:split_index], window_size)
test_data = StockDataset(tensor_data[split_index:], window_size)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

class ConditionalLSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, condition_dim):
        super(ConditionalLSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.condition_fc = nn.Linear(condition_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim * 2, 1)  # Doubled the input size to accommodate both LSTM output and condition
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

    def forward(self, x, condition, h0, c0):
        # Process the condition
        condition_out = self.condition_fc(condition)

        # Run LSTM
        out, (hn, cn) = self.lstm(x, (h0, c0))

        # Concatenate LSTM output with condition
        combined = torch.cat((out[:, -1, :], condition_out), dim=1)

        # Final prediction
        output = self.fc(combined)
        return output

# Function to train and save a model
def train_and_save_model(train_loader, condition_index=None, condition_name="NoGuidance"):
    model = ConditionalLSTMModel(input_dim=3, hidden_dim=128, num_layers=2, condition_dim=2).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    num_epochs = 100
    train_losses = []
    model.train()

    for epoch in range(num_epochs):
        epoch_loss = 0
        for x_batch, y_batch, condition_batch in train_loader:
            x_batch, y_batch, condition_batch = x_batch.to(device), y_batch.to(device), condition_batch.to(device)
            optimizer.zero_grad()
            h0 = torch.zeros(model.num_layers, x_batch.size(0), model.hidden_dim).to(device)
            c0 = torch.zeros(model.num_layers, x_batch.size(0), model.hidden_dim).to(device)

            if condition_index is not None:
                # Use only the specified condition and repeat it to match the expected shape
                condition_batch = condition_batch[:, condition_index].unsqueeze(1).repeat(1, 2)

            predictions = model(x_batch, condition_batch, h0, c0)
            loss = criterion(predictions.squeeze(), y_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        avg_loss = epoch_loss / len(train_loader)
        train_losses.append(avg_loss)
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}')

    # Export the model to ONNX
    dummy_input_x = torch.randn(1, window_size, 3).to(device)
    dummy_input_condition = torch.randn(1, 2).to(device)
    dummy_h0 = torch.zeros(model.num_layers, 1, model.hidden_dim).to(device)
    dummy_c0 = torch.zeros(model.num_layers, 1, model.hidden_dim).to(device)

    # Save the ONNX model to Google Drive
    onnx_path = f'/content/drive/MyDrive/stock_prediction_model_{condition_name}.onnx'
    torch.onnx.export(model,
                      (dummy_input_x, dummy_input_condition, dummy_h0, dummy_c0),
                      onnx_path,
                      export_params=True,
                      opset_version=11,
                      do_constant_folding=True,
                      input_names=['input_x', 'input_condition', 'h0', 'c0'],
                      output_names=['output'],
                      dynamic_axes={'input_x': {0: 'batch_size', 1: 'sequence'},
                                    'input_condition': {0: 'batch_size'},
                                    'h0': {1: 'batch_size'},
                                    'c0': {1: 'batch_size'},
                                    'output': {0: 'batch_size'}})

    print(f"Model for {condition_name} exported to ONNX format and saved to Google Drive: {onnx_path}")

    return model, onnx_path, train_losses

# Train and save models for each condition
conditions = ['NoGuidance',  'MACD','MACD_Signal']
trained_models = {}
onnx_paths = {}
all_train_losses = {}

for i, cond_name in enumerate(conditions):
    print(f'\nTraining model for {cond_name}:')
    model, onnx_path, train_losses = train_and_save_model(train_loader, i-1 if i > 0 else None, cond_name)
    trained_models[cond_name] = model
    onnx_paths[cond_name] = onnx_path
    all_train_losses[cond_name] = train_losses

# Verify and test each ONNX model
for cond_name, onnx_path in onnx_paths.items():
    print(f"\nVerifying and testing ONNX model for {cond_name}")

    # Verify the ONNX model
    onnx_model = onnx.load(onnx_path)
    onnx.checker.check_model(onnx_model)
    print(f"ONNX model for {cond_name} checked.")

# Test ONNX Runtime
    ort_session = onnxruntime.InferenceSession(onnx_path)

    # Prepare sample input
    sample_x = np.random.randn(1, window_size, 3).astype(np.float32)
    sample_condition = np.random.randn(1, 2).astype(np.float32)
    sample_h0 = np.zeros((2, 1, 128)).astype(np.float32)
    sample_c0 = np.zeros((2, 1, 128)).astype(np.float32)

    # Run inference
    ort_inputs = {
        'input_x': sample_x,
        'input_condition': sample_condition,
        'h0': sample_h0,
        'c0': sample_c0
    }
    ort_output = ort_session.run(None, ort_inputs)

    print(f"ONNX Runtime test for {cond_name} successful.")

# Function to evaluate model and plot results
def evaluate_and_plot(model, loader, condition_index=None, condition_name=None):
    model.eval()
    predictions = []
    actuals = []

    with torch.no_grad():
        for x_batch, y_batch, condition_batch in loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            condition_batch = condition_batch.to(device)

            h0 = torch.zeros(model.num_layers, x_batch.size(0), model.hidden_dim).to(device)
            c0 = torch.zeros(model.num_layers, x_batch.size(0), model.hidden_dim).to(device)

            if condition_index is not None:
                condition_batch = condition_batch[:, condition_index].unsqueeze(1).repeat(1, 2)

            pred = model(x_batch, condition_batch, h0, c0)
            predictions.extend(pred.cpu().numpy())
            actuals.extend(y_batch.cpu().numpy())

    predictions = np.array(predictions).squeeze()
    actuals = np.array(actuals)

    # Inverse transform predictions and actuals
    predictions = scaler.inverse_transform(np.column_stack([predictions, np.zeros((len(predictions), 2))]))[: ,0]
    actuals = scaler.inverse_transform(np.column_stack([actuals, np.zeros((len(actuals), 2))]))[: ,0]

    # Calculate metrics
    mae = mean_absolute_error(actuals, predictions)
    mape = mean_absolute_percentage_error(actuals, predictions) * 100
    mse = mean_squared_error(actuals, predictions)
    rmse = np.sqrt(mse)
    r2 = r2_score(actuals, predictions) * 100

    # Calculate hit rate
    actual_direction = np.sign(np.diff(actuals))
    predicted_direction = np.sign(np.diff(predictions))
    hit_rate = np.mean(actual_direction == predicted_direction) * 100

    print(f'{condition_name} - '
          f'Test MAE: {mae:.4f}, Test MAPE: {mape:.4f}, '
          f'Test RMSE: {rmse:.4f}, Test R2: {r2:.4f}, '
          f'Hit Rate: {hit_rate:.4f}')

    return actuals, predictions, (mae, mape, rmse, r2, hit_rate)

# Evaluate each model
all_predictions = {}
all_metrics = {}

for i, cond_name in enumerate(conditions):
    print(f'\nEvaluating model for {cond_name}:')
    model = trained_models[cond_name]
    actuals, predictions, metrics = evaluate_and_plot(model, test_loader, i-1 if i > 0 else None, cond_name)
    all_predictions[cond_name] = predictions
    all_metrics[cond_name] = metrics

# Plot separate graphs for each metric
metrics = ['MAE', 'MAPE', 'RMSE', 'R2', 'Hit Rate']
metric_labels = ['Mean Absolute Error', 'Mean Absolute Percentage Error (%)', 'Root Mean Square Error', 'R-squared (%)', 'Hit Rate (%)']

for i, metric in enumerate(metrics):
    plt.figure(figsize=(15, 10))

    # Extract the specific metric for each condition
    metric_values = [metrics[i] for metrics in all_metrics.values()]

    # Create bar plot
    bars = plt.bar(conditions, metric_values)

    # Customize the plot
    plt.title(f'Comparison of {metric_labels[i]} Across Different Conditions')
    plt.xlabel('Conditions')
    plt.ylabel(metric_labels[i])
    plt.xticks(rotation=45, ha='right')

    # Add value labels on top of each bar
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                 f'{height:.2f}',
                 ha='center', va='bottom')

    plt.tight_layout()

    # Save the plot
    plt.savefig(f'/content/drive/MyDrive/{metric.lower().replace(" ", "_")}_comparison.png')
    print(f"{metric} comparison plot saved to Google Drive.")

    plt.close()

# Print a summary of the results
print("\nSummary of Results:")
print("===================")

for cond_name, metrics in all_metrics.items():
    mae, mape, rmse, r2, hit_rate = metrics
    print(f"\n{cond_name}:")
    print(f"  MAE:      {mae:.4f}")
    print(f"  MAPE:     {mape:.4f}%")
    print(f"  RMSE:     {rmse:.4f}")
    print(f"  R2:       {r2:.4f}%")
    print(f"  Hit Rate: {hit_rate:.4f}%")

# Determine the best model based on RMSE
best_model = min(all_metrics, key=lambda x: all_metrics[x][2])
print(f"\nBest performing model based on RMSE: {best_model}")

# Save the best model separately
best_model_path = f'/content/drive/MyDrive/best_stock_prediction_model.onnx'
onnx.save(onnx.load(onnx_paths[best_model]), best_model_path)
print(f"Best model saved to: {best_model_path}")

# Visualize the training loss for all models
plt.figure(figsize=(12, 8))
for cond_name, losses in all_train_losses.items():
    plt.plot(losses, label=cond_name)
plt.title('Training Loss Over Time for All Models')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.savefig('/content/drive/MyDrive/training_loss_plot.png')
print("Training loss plot saved to Google Drive.")
plt.close()

# Feature importance analysis for the best model
if best_model != 'NoGuidance':
    best_model_obj = trained_models[best_model]
    feature_importance = best_model_obj.condition_fc.weight.data.cpu().numpy()
    feature_names = conditions[1:]  # Exclude 'NoGuidance'

    plt.figure(figsize=(12, 6))
    plt.bar(feature_names, feature_importance[0])
    plt.title(f'Feature Importance for {best_model} Model')
    plt.xlabel('Features')
    plt.ylabel('Importance')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig('/content/drive/MyDrive/feature_importance_plot.png')
    print("Feature importance plot saved to Google Drive.")
    plt.close()

# Final message
print("\nExperiment completed. All models, visualizations, and analysis results have been saved to Google Drive.")

# Optional: Clean up to free memory
import gc
gc.collect()
torch.cuda.empty_cache()

print("Script execution completed.")

[*********************100%***********************]  1 of 1 completed

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Using device: cpu
                Open      High       Low     Close  Adj Close  Volume
Date                                                                 
2014-01-01  1.374495  1.377904  1.374400  1.374495   1.374495       0
2014-01-02  1.376595  1.377467  1.363271  1.376671   1.376671       0
2014-01-03  1.366624  1.367297  1.360170  1.366662   1.366662       0
2014-01-06  1.359582  1.364610  1.357279  1.359601   1.359601       0
2014-01-07  1.363066  1.365799  1.359878  1.363196   1.363196       0
               Close      High       Low
Date                                    
2014-01-01  1.374495  1.377904  1.374400
2014-01-02  1.376671  1.377467  1.363271
2014-01-03  1.366662  1.367297  1.360170
2014-01-06  1.359601  1.364610  1.357279
2014-01-07  1.363196  1.365799  1.359878

Training model for NoGuidance:





Epoch 1/100, Loss: 0.0353
Epoch 2/100, Loss: 0.0014
Epoch 3/100, Loss: 0.0004
Epoch 4/100, Loss: 0.0003
Epoch 5/100, Loss: 0.0004
Epoch 6/100, Loss: 0.0002
Epoch 7/100, Loss: 0.0002
Epoch 8/100, Loss: 0.0002
Epoch 9/100, Loss: 0.0002
Epoch 10/100, Loss: 0.0001
Epoch 11/100, Loss: 0.0002
Epoch 12/100, Loss: 0.0002
Epoch 13/100, Loss: 0.0002
Epoch 14/100, Loss: 0.0002
Epoch 15/100, Loss: 0.0001
Epoch 16/100, Loss: 0.0001
Epoch 17/100, Loss: 0.0001
Epoch 18/100, Loss: 0.0001
Epoch 19/100, Loss: 0.0001
Epoch 20/100, Loss: 0.0002
Epoch 21/100, Loss: 0.0001
Epoch 22/100, Loss: 0.0002
Epoch 23/100, Loss: 0.0001
Epoch 24/100, Loss: 0.0002
Epoch 25/100, Loss: 0.0001
Epoch 26/100, Loss: 0.0001
Epoch 27/100, Loss: 0.0001
Epoch 28/100, Loss: 0.0001
Epoch 29/100, Loss: 0.0001
Epoch 30/100, Loss: 0.0001
Epoch 31/100, Loss: 0.0002
Epoch 32/100, Loss: 0.0001
Epoch 33/100, Loss: 0.0001
Epoch 34/100, Loss: 0.0001
Epoch 35/100, Loss: 0.0001
Epoch 36/100, Loss: 0.0001
Epoch 37/100, Loss: 0.0001
Epoch 38/1



Model for NoGuidance exported to ONNX format and saved to Google Drive: /content/drive/MyDrive/stock_prediction_model_NoGuidance.onnx

Training model for MACD:
Epoch 1/100, Loss: 0.0327
Epoch 2/100, Loss: 0.0011
Epoch 3/100, Loss: 0.0003
Epoch 4/100, Loss: 0.0003
Epoch 5/100, Loss: 0.0003
Epoch 6/100, Loss: 0.0003
Epoch 7/100, Loss: 0.0002
Epoch 8/100, Loss: 0.0003
Epoch 9/100, Loss: 0.0002
Epoch 10/100, Loss: 0.0003
Epoch 11/100, Loss: 0.0002
Epoch 12/100, Loss: 0.0002
Epoch 13/100, Loss: 0.0002
Epoch 14/100, Loss: 0.0002
Epoch 15/100, Loss: 0.0002
Epoch 16/100, Loss: 0.0002
Epoch 17/100, Loss: 0.0002
Epoch 18/100, Loss: 0.0002
Epoch 19/100, Loss: 0.0002
Epoch 20/100, Loss: 0.0002
Epoch 21/100, Loss: 0.0001
Epoch 22/100, Loss: 0.0001
Epoch 23/100, Loss: 0.0002
Epoch 24/100, Loss: 0.0001
Epoch 25/100, Loss: 0.0001
Epoch 26/100, Loss: 0.0001
Epoch 27/100, Loss: 0.0001
Epoch 28/100, Loss: 0.0001
Epoch 29/100, Loss: 0.0001
Epoch 30/100, Loss: 0.0001
Epoch 31/100, Loss: 0.0001
Epoch 32/100