import numpy as np
import pandas as pd
import random  
from datetime import datetime
import MetaTrader5 as mt5
import time
import concurrent.futures
from tqdm import tqdm
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
import math

# GLOBALS
MARKUP = 0.000001
BACKWARD = datetime(2010, 1, 1) 
FORWARD = datetime(2017, 1, 1)
EXAMWARD = datetime(2024, 1, 1) 
MAX_OPEN_TRADES = 6
symbol = "EURUSD"
risk_reward_ratio = 4  

def retrieve_data(symbol, retries_limit=300):
    terminal_path = "C:/Program Files/RoboForex - MetaTrader 5/Arima/terminal64.exe"
    
    attempt = 0
    raw_data = None
    
    while attempt < retries_limit:
        if not mt5.initialize(path=terminal_path): 
            print("Terminal initialization error")
            return None
            
        instrument_count = mt5.symbols_total() 
        if instrument_count > 0:
            print(f"Instruments in terminal: {instrument_count}")
        else:
            print("No instruments in terminal")
            
        rates = mt5.copy_rates_range(symbol, mt5.TIMEFRAME_H1, BACKWARD, EXAMWARD)  
        mt5.shutdown()

        if rates is None or len(rates) == 0:
            print(f"No data for symbol {symbol} yet (attempt {attempt+1})")  
            attempt += 1
            time.sleep(1) 
        else:
            raw_data = pd.DataFrame(rates[:-1], columns=['time', 'open', 'high', 'low', 'close', 'tick_volume'])
            raw_data['time'] = pd.to_datetime(raw_data['time'], unit='s') 
            raw_data.set_index('time', inplace=True)
            break
            
    if raw_data is None:
        print(f"Failed after {retries_limit} attempts to retrieve data")
        return None
    
    # Add simple features
    raw_data['raw_SMA_10'] = raw_data['close'].rolling(window=10).mean()
    raw_data['raw_SMA_20'] = raw_data['close'].rolling(window=20).mean()
    raw_data['Price_Change'] = raw_data['close'].pct_change() * 100  

    # Additional features
    raw_data['raw_Std_Dev_Close'] = raw_data['close'].rolling(window=20).std() 
    raw_data['raw_Volume_Change'] = raw_data['tick_volume'].pct_change() * 100
    
    raw_data['raw_Prev_Day_Price_Change'] = raw_data['close'] - raw_data['close'].shift(1) 
    raw_data['raw_Prev_Week_Price_Change'] = raw_data['close'] - raw_data['close'].shift(7)
    raw_data['raw_Prev_Month_Price_Change'] = raw_data['close'] - raw_data['close'].shift(30)
    
    raw_data['Consecutive_Positive_Changes'] = (raw_data['Price_Change'] > 0).astype(int).groupby((raw_data['Price_Change'] > 0).astype(int).diff().ne(0).cumsum()).cumsum()  
    raw_data['Consecutive_Negative_Changes'] = (raw_data['Price_Change'] < 0).astype(int).groupby((raw_data['Price_Change'] < 0).astype(int).diff().ne(0).cumsum()).cumsum()  
    raw_data['Price_Density'] = raw_data['close'].rolling(window=10).apply(lambda x: len(set(x)))  
    raw_data['Fractal_Analysis'] = raw_data['close'].rolling(window=10).apply(lambda x: 1 if x.idxmax() else (-1 if x.idxmin() else 0))  
    raw_data['Price_Volume_Ratio'] = raw_data['close'] / raw_data['tick_volume']  
    raw_data['Median_Close_7'] = raw_data['close'].rolling(window=7).median()  
    raw_data['Median_Close_30'] = raw_data['close'].rolling(window=30).median()  
    raw_data['Price_Volatility'] = raw_data['close'].rolling(window=20).std() / raw_data['close'].rolling(window=20).mean()  

    print("\nOriginal columns:")
    print(raw_data[['close', 'high', 'low', 'open', 'tick_volume']].tail(100))

    print("\nList of features:")
    print(raw_data.columns.tolist())

    print("\nLast 100 features:")  
    print(raw_data.tail(100))

    # Replace NaN values with means
    raw_data.fillna(raw_data.mean(), inplace=True)
    
    return raw_data

def augment_data(raw_data, noise_level=0.01, time_shift=1, scale_range=(0.9, 1.1)):
    print(f"Rows before augmentation: {len(raw_data)}")

    # Copy raw_data to augmented_data
    augmented_data = raw_data.copy()

    # Adding noise
    noisy_data = raw_data.copy()
    noisy_data += np.random.normal(0, noise_level, noisy_data.shape)

    # Replace NaN with means
    noisy_data.fillna(noisy_data.mean(), inplace=True)

    augmented_data = pd.concat([augmented_data, noisy_data])
    print(f"Added {len(noisy_data)} rows after adding noise")

    # Time shifting
    shifted_data = raw_data.copy()
    shifted_data.index += pd.DateOffset(hours=time_shift)

    # Replace NaN with means
    shifted_data.fillna(shifted_data.mean(), inplace=True)

    augmented_data = pd.concat([augmented_data, shifted_data])
    print(f"Added {len(shifted_data)} rows after time shifting")

    # Scaling
    scale = np.random.uniform(scale_range[0], scale_range[1])
    scaled_data = raw_data.copy()
    scaled_data *= scale

    # Replace NaN with means
    scaled_data.fillna(scaled_data.mean(), inplace=True)

    augmented_data = pd.concat([augmented_data, scaled_data])
    print(f"Added {len(scaled_data)} rows after scaling")

    # Inversion
    inverted_data = raw_data.copy()
    inverted_data *= -1

    # Replace NaN with means
    inverted_data.fillna(inverted_data.mean(), inplace=True)

    augmented_data = pd.concat([augmented_data, inverted_data])
    print(f"Added {len(inverted_data)} rows after inversion")

    print(f"Rows after augmentation: {len(augmented_data)}")

    # Print dates by years
    print("Print dates by years:")
    for year, group in augmented_data.groupby(augmented_data.index.year):
        print(f"Year {year}: {group.index}")
        
    return augmented_data

import numpy as np
import random
import pandas as pd
from sklearn.utils import class_weight
from imblearn.under_sampling import RandomUnderSampler

def markup_data(data, target_column, label_column, markup_ratio=0.00002):
    # Create a new column for labels (buy/sell) based on the target column (e.g., 'close') and markup_ratio
    data.loc[:, label_column] = np.where(data.loc[:, target_column].shift(-1) > data.loc[:, target_column] + markup_ratio, 1, 0)

    # Replace NaN values with 0 (no trade)
    data.loc[data[label_column].isna(), label_column] = 0

    # Print the number of labels set
    print(f"Number of labels set for price change greater than markup ratio: {data[label_column].sum()}")

    return data

def label_data(data, symbol, min_days=2, max_days=72):
    terminal_path = "C:/Program Files/RoboForex - MetaTrader 5/Arima/terminal64.exe"

    if not mt5.initialize(path=terminal_path):
        print("Terminal connection error")
        return

    symbol_info = mt5.symbol_info(symbol)
    stop_level = 300 * symbol_info.point
    take_level = 800 * symbol_info.point

    labels = []

    # Limit the loop with end_date
    for i in range(data.shape[0] - max_days):
        rand = random.randint(min_days, max_days)
        curr_pr = data['close'].iloc[i]
        future_pr = data['close'].iloc[i + rand]
        min_pr = data['low'].iloc[i:i + rand].min()
        max_pr = data['high'].iloc[i:i + rand].max()

        price_change = abs(future_pr - curr_pr)

        if price_change > take_level and future_pr > curr_pr and min_pr > curr_pr - stop_level:
            labels.append(1)  # Growth
        elif price_change > take_level and future_pr < curr_pr and max_pr < curr_pr + stop_level:
            labels.append(0)  # Decline
        else:
            labels.append(None)

    data = data.iloc[:len(labels)].copy()
    data['labels'] = labels

    # Drop rows with NaN values
    data.dropna(inplace=True)

    # Split data into features (X) and labels (y)
    X = data.drop('labels', axis=1)
    y = data['labels']

    # Class balancing
    rus = RandomUnderSampler(random_state=2)
    X_balanced, y_balanced = rus.fit_resample(X, y)

    # Concatenate balanced features and labels
    data_balanced = pd.concat([X_balanced, y_balanced], axis=1)
    print("Number of growth labels (1.0):", data_balanced['labels'].value_counts()[1.0])
    print("Number of decline labels (0.0):", data_balanced['labels'].value_counts()[0.0])
    return data_balanced
    
def generate_new_features(data, num_features=100, random_seed=1):
    random.seed(random_seed)
    new_features = {}

    for _ in range(num_features):
        # Generate random names for new features
        feature_name = f'feature_{len(new_features)}'

        # Generate random indices for selecting existing features
        col1_idx, col2_idx = random.sample(range(len(data.columns)), 2)
        col1, col2 = data.columns[col1_idx], data.columns[col2_idx]

        # Choose a random operation to create a new feature
        operation = random.choice(['add', 'subtract', 'multiply', 'divide', 'shift', 'rolling_mean', 'rolling_std', 'rolling_max', 'rolling_min', 'rolling_sum'])

        if operation == 'add':
            new_features[feature_name] = data[col1] + data[col2]
        elif operation == 'subtract':
            new_features[feature_name] = data[col1] - data[col2]
        elif operation == 'multiply':
            new_features[feature_name] = data[col1] * data[col2]
        elif operation == 'divide':
            new_features[feature_name] = data[col1] / data[col2]
        elif operation == 'shift':
            shift = random.randint(1, 10)
            new_features[feature_name] = data[col1].shift(shift)
        elif operation == 'rolling_mean':
            window = random.randint(2, 20)
            new_features[feature_name] = data[col1].rolling(window).mean()
        elif operation == 'rolling_std':
            window = random.randint(2, 20)
            new_features[feature_name] = data[col1].rolling(window).std()
        elif operation == 'rolling_max':
            window = random.randint(2, 20)
            new_features[feature_name] = data[col1].rolling(window).max()
        elif operation == 'rolling_min':
            window = random.randint(2, 20)
            new_features[feature_name] = data[col1].rolling(window).min()
        elif operation == 'rolling_sum':
            window = random.randint(2, 20)
            new_features[feature_name] = data[col1].rolling(window).sum()

    # Add new features to the original DataFrame
    new_data = pd.concat([data, pd.DataFrame(new_features)], axis=1)

    # Print the generated features as a table
    print("\nGenerated features:")
    print(new_data[list(new_features.keys())].tail(100))

    return new_data
from sklearn.mixture import GaussianMixture

def cluster_features_by_gmm(data, n_components=30):
    # Drop the 'label' column as it is not a feature
    X = data.drop(['label', 'labels'], axis=1)

    # Replace infinite and very large values with the median of the corresponding feature column
    X = X.replace([np.inf, -np.inf], np.nan)
    X = X.fillna(X.median())

    # Create GMM model
    gmm = GaussianMixture(n_components=n_components, covariance_type='full', reg_covar=0.1, random_state=1)

    # Fit the model on data
    gmm.fit(X)

    # Return DataFrame with clusters for each data row
    data['cluster'] = gmm.predict(X)

    # Print table with clusters
    print("\nFeature clusters:")
    print(data[['cluster']].tail(100))

    return data

from sklearn.feature_selection import RFECV
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

def feature_engineering(data, n_features_to_select=10):
    # Drop the 'label' column as it is not a feature
    X = data.drop(['label', 'labels'], axis=1)
    y = data['labels']

    # Replace infinite and very large values with the median of the corresponding feature column
    X = X.replace([np.inf, -np.inf], np.nan)
    X = X.fillna(X.median())

    # Create RandomForestClassifier model
    clf = RandomForestClassifier(n_estimators=100, random_state=1)

    # Use RFECV to select the top n_features_to_select features
    rfecv = RFECV(estimator=clf, step=1, cv=5, scoring='accuracy', n_jobs=-1, verbose=1,
                  min_features_to_select=n_features_to_select)
    rfecv.fit(X, y)

    # Return DataFrame with the best features, 'label' column, and 'labels' column
    selected_features = X.columns[rfecv.get_support(indices=True)]
    selected_data = data[selected_features.tolist() + ['label', 'labels']]

    # Print table with the best features
    print("\nBest features:")
    print(pd.DataFrame({'Feature': selected_features}))

    return selected_data

import xgboost as xgb
from sklearn.model_selection import cross_val_score, GridSearchCV
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import BaggingClassifier

def train_xgboost_classifier(data, num_boost_rounds=1000):
    # Check that data is not empty
    if data.empty:
        raise ValueError("Data should not be empty")

    # Check that all required columns are present in data
    required_columns = ['label', 'labels']
    if not all(column in data.columns for column in required_columns):
        raise ValueError(f"Data is missing required columns: {required_columns}")

    # Drop the 'label' column as it is not a feature
    X = data.drop(['label', 'labels'], axis=1)
    y = data['labels']

    # Check that all features are numeric
    if not all(pd.api.types.is_numeric_dtype(X[column]) for column in X.columns):
        raise ValueError("All features should have numeric data type")

    # Create base XGBoost model
    clf = xgb.XGBClassifier(objective='binary:logistic', random_state=1,
                         max_depth=5, learning_rate=0.2, n_estimators=300,
                         subsample=0.01, colsample_bytree=0.1,
                         reg_alpha=1, reg_lambda=1)

    # Create ensemble model using Bagging
    bagging_clf = BaggingClassifier(base_estimator=clf, random_state=1)

    # Define hyperparameters for grid search
    param_grid = {
        'n_estimators': [10, 20, 30],
        'max_samples': [0.5, 0.7, 1.0],
        'max_features': [0.5, 0.7, 1.0]
    }

    # Train model on data using cross-validation and grid search for hyperparameter tuning
    grid_search = GridSearchCV(bagging_clf, param_grid, cv=5, scoring='accuracy')
    grid_search.fit(X, y)

    # Calculate mean accuracy score
    accuracy = grid_search.best_score_

    print(f"Average cross-validation accuracy: {accuracy:.2f}")

    # Return the trained model
    return grid_search.best_estimator_

def test_model(model, X_test, y_test, markup, initial_balance=10000.0, point_cost=0.00001):
    balance = initial_balance
    trades = 0
    profits = []

    # Testing the model on the test data
    predicted_labels = model.predict(X_test)
    for i in range(len(predicted_labels) - 10):
        if predicted_labels[i] == 1:
            # Opening a long position
            entry_price = X_test.iloc[i]['close']
            exit_price = X_test.iloc[i+10]['close']
            if exit_price > entry_price + markup:
                # Closing the long position with profit made
                profit = (exit_price - entry_price - markup) / point_cost
                balance += profit
                trades += 1
                profits.append(profit)
            else:
                # Closing the long position with a loss
                loss = (entry_price - exit_price + markup) / point_cost
                balance -= loss
                trades += 1
                profits.append(-loss)
        elif predicted_labels[i] == 0:
            # Opening a short position
            entry_price = X_test.iloc[i]['close']
            exit_price = X_test.iloc[i+10]['close']
            if exit_price < entry_price - markup:
                # Closing the short position with profit made
                profit = (entry_price - exit_price - markup) / point_cost
                balance += profit
                trades += 1
                profits.append(profit)
            else:
                # Closing the short position with a loss
                loss = (exit_price - entry_price + markup) / point_cost
                balance -= loss
                trades += 1
                profits.append(-loss)

    # Calculating total accumulated profit or loss
    total_profit = balance - initial_balance

    # Printing results
    print(f"Total accumulated profit or loss: {total_profit:.2f}")
    print(f"Number of trades: {trades}")
    time.sleep(100)

def online_trading(symbol, features, model):
    terminal_path = "C:/Program Files/RoboForex - MetaTrader 5/Arima/terminal64.exe"

    if not mt5.initialize(path=terminal_path):
        print("Error: Failed to connect to MetaTrader 5 terminal")
        return

    open_trades = 0
    attempts = 30000

    # Get the current account balance
    account_info = mt5.account_info()
    account_balance = account_info.balance

    # Set the initial volume for opening trades
    volume = 0.1

    # Set the initial peak balance
    peak_balance = account_balance

    while True:
        symbol_info = mt5.symbol_info(symbol)
        if symbol_info is not None:
            break
        else:
            print(f"Error: Instrument not found. Attempt {_ + 1} of {attempts}")
            time.sleep(5)

    while True:
        price_bid = mt5.symbol_info_tick(symbol).bid
        price_ask = mt5.symbol_info_tick(symbol).ask

        signal = model.predict(features)

        positions_total = mt5.positions_total()

        # Calculate the daily drop in account balance
        account_info = mt5.account_info()
        current_balance = account_info.balance
        daily_drop = (account_balance - current_balance) / account_balance

        # Calculate the probability of winning based on the distance between the model's prediction and 0.5
        probability_of_winning = abs(signal[-1] - 0.5) * 2

        # Calculate the optimal volume for opening trades using the Kelly criterion
        optimal_volume = (probability_of_winning - (1 - probability_of_winning) / risk_reward_ratio) / risk_reward_ratio * account_balance / price_ask

        # Set minimum and maximum volume for opening trades
        min_volume = 0.1  # minimum trade volume
        max_volume = 1.0  # maximum trade volume
        optimal_volume = max(min_volume, min(max_volume, optimal_volume))

        # Reduce the volume for opening trades by 10% with a step of 0.01 lot for each day of daily drop
        if daily_drop > 0.02:
            optimal_volume -= 0.1
            optimal_volume = max(optimal_volume, 0.1)
        elif current_balance > peak_balance:
            optimal_volume = (probability_of_winning - (1 - probability_of_winning) / risk_reward_ratio) / risk_reward_ratio * account_balance / price_ask
            peak_balance = current_balance

        # Set the volume for opening trades
        volume = optimal_volume

        for _ in range(attempts):
            if positions_total < MAX_OPEN_TRADES and signal[-1] > 0.5:
                request = {
                    "action": mt5.TRADE_ACTION_DEAL,
                    "symbol": symbol,
                    "volume": volume,
                    "type": mt5.ORDER_TYPE_BUY,
                    "price": price_ask,
                    "sl": price_ask - 300 * symbol_info.point,
                    "tp": price_ask + 800 * symbol_info.point,
                    "deviation": 20,
                    "magic": 123456,
                    "comment": "Test deal",
                    "type_time": mt5.ORDER_TIME_GTC,
                    "type_filling": mt5.ORDER_FILLING_FOK,
                }
            elif positions_total < MAX_OPEN_TRADES and signal[-1] < 0.5:
                request = {
                    "action": mt5.TRADE_ACTION_DEAL,
                    "symbol": symbol,
                    "volume": volume,
                    "type": mt5.ORDER_TYPE_SELL,
                    "price": price_bid,
                    "sl": price_bid + 300 * symbol_info.point,
                    "tp": price_bid - 800 * symbol_info.point,
                    "deviation": 20,
                    "magic": 123456,
                    "comment": "Test deal",
                    "type_time": mt5.ORDER_TIME_GTC,
                    "type_filling": mt5.ORDER_FILLING_FOK,
                }
            else:
                print("No signal to open a position")
                return None

            result = mt5.order_send(request)

            if result.retcode == mt5.TRADE_RETCODE_DONE:
                if signal[-1] < 0.5:
                    print("Buy position opened")
                    open_trades += 1
                elif signal[-1] > 0.5:
                    print("Sell position opened")
                    open_trades += 1
                return result.order
            else:
                print(f"Error: Trade request not executed, retcode={result.retcode}. Attempt {_ + 1}/{attempts}")
                time.sleep(3)

        time.sleep(4000)

import threading

def process_data(raw_data):
    # Augment data
    augmented_data = augment_data(raw_data)

    # Markup data
    marked_data = markup_data(augmented_data, 'close', 'label')

    # Label data
    labeled_data = label_data(marked_data, symbol)

    # Cluster features by GMM
    labeled_data_clustered = cluster_features_by_gmm(labeled_data, n_components=30)

    # Feature engineering
    labeled_data_engineered = feature_engineering(labeled_data_clustered, n_features_to_select=11)

    return labeled_data_engineered

import xgboost as xgb
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import pandas as pd
import matplotlib.pyplot as plt
import time
import threading


# Define evaluate_xgboost_classifier function
def evaluate_xgboost_classifier(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Create a global variable to track if all symbols are done
all_symbols_done = False

# Define process_symbol function
def process_symbol(symbol):
    global all_symbols_done
    try:
        # Retrieve data for the specified symbol
        raw_data = retrieve_data(symbol)
        if raw_data is None:
            print("Data not found for symbol {}".format(symbol))
            return None

        # Process data
        labeled_data_engineered = process_data(raw_data)

        # Split data into train and test sets
        train_data = labeled_data_engineered[labeled_data_engineered.index <= FORWARD]
        test_data = labeled_data_engineered[labeled_data_engineered.index > FORWARD]

        # Train XGBoost classifier
        xgb_clf = train_xgboost_classifier(train_data, num_boost_rounds=1000)

        # Evaluate XGBoost classifier
        test_features = test_data.drop(['label', 'labels'], axis=1)
        test_labels = test_data['labels']
        accuracy = evaluate_xgboost_classifier(xgb_clf, test_features, test_labels)
        print("Accuracy for symbol {}: {:.2f}%".format(symbol, accuracy * 100))

        # Get the last 2000 data points for online trading
        features = test_features.values

        # Online trading
        position_id = None
        while not all_symbols_done:
            position_id = online_trading(symbol, features, xgb_clf)
            time.sleep(6)

        # Set all_symbols_done to True when this symbol is done
        all_symbols_done = True

    except Exception as e:
        print("Error processing symbol {}: {}".format(symbol, e))
        return None

symbols = ["EURUSD", "GBPUSD", "AUDUSD", "NZDUSD", "USDCAD"]

# Create a list of threads for each symbol
threads = []
for symbol in symbols:
    thread = threading.Thread(target=process_symbol, args=(symbol,))
    thread.start()
    threads.append(thread)

# Wait for all threads to complete
for thread in threads:
    thread.join()