import pandas as pd
import numpy as np
from skyfield.api import load, utc
from datetime import datetime, timedelta
import requests
import MetaTrader5 as mt5
import seaborn as sns
import matplotlib.pyplot as plt
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

# Part 1: Collecting astronomical data

planets = load('de421.bsp')
earth = planets['earth']
ts = load.timescale()

def get_planet_positions(date):
    print(f"Getting planet positions for {date}")
    t = ts.from_datetime(date.replace(tzinfo=utc))
    planet_positions = {}
    planet_ids = {
        'mercury': 'MERCURY BARYCENTER',
        'venus': 'VENUS BARYCENTER',
        'mars': 'MARS BARYCENTER',
        'jupiter': 'JUPITER BARYCENTER',
        'saturn': 'SATURN BARYCENTER',
        'uranus': 'URANUS BARYCENTER',
        'neptune': 'NEPTUNE BARYCENTER'
    }
    for planet, planet_id in planet_ids.items():
        planet_obj = planets[planet_id]
        astrometric = earth.at(t).observe(planet_obj)
        ra, dec, _ = astrometric.radec()
        planet_positions[planet] = {'ra': ra.hours, 'dec': dec.degrees}
    print(f"Planet positions for {date}: {planet_positions}")
    return planet_positions

def get_moon_phase(date):
    print(f"Getting moon phase for {date}")
    t = ts.from_datetime(date.replace(tzinfo=utc))
    eph = load('de421.bsp')
    moon, sun, earth = eph['moon'], eph['sun'], eph['earth']

    e = earth.at(t)
    _, m, _ = e.observe(moon).apparent().ecliptic_latlon()
    _, s, _ = e.observe(sun).apparent().ecliptic_latlon()

    phase = (m.degrees - s.degrees) % 360
    print(f"Moon phase for {date}: {phase}")
    return phase

def get_solar_activity(date):
    print(f"Getting solar activity for {date}")
    url = f"https://services.swpc.noaa.gov/json/solar-cycle/observed-solar-cycle-indices.json"
    response = requests.get(url)
    data = response.json()

    target_date = date.strftime("%Y-%m")

    closest_data = min(data, key=lambda x: abs(datetime.strptime(x['time-tag'], "%Y-%m") - datetime.strptime(target_date, "%Y-%m")))

    solar_activity = {
        'sunspot_number': closest_data.get('ssn', None),
        'f10.7_flux': closest_data.get('f10.7', None)
    }
    print(f"Solar activity for {date}: {solar_activity}")
    return solar_activity

def calculate_aspects(positions):
    print("Calculating aspects")
    aspects = {}
    planets = list(positions.keys())
    for i in range(len(planets)):
        for j in range(i+1, len(planets)):
            planet1 = planets[i]
            planet2 = planets[j]
            ra1 = positions[planet1]['ra']
            ra2 = positions[planet2]['ra']
            angle = abs(ra1 - ra2) % 24
            angle = min(angle, 24 - angle) * 15  # Convert to degrees

            if abs(angle - 0) <= 10 or abs(angle - 180) <= 10:
                aspects[f"{planet1}_{planet2}"] = "conjunction" if abs(angle - 0) <= 10 else "opposition"
            elif abs(angle - 90) <= 10:
                aspects[f"{planet1}_{planet2}"] = "square"
            elif abs(angle - 120) <= 10:
                aspects[f"{planet1}_{planet2}"] = "trine"

    print(f"Aspects calculated: {aspects}")
    return aspects

# Part 2: Getting financial data through MetaTrader5

def get_financial_data(symbol, start_date, end_date):
    print(f"Getting financial data for {symbol} from {start_date} to {end_date}")
    if not mt5.initialize():
        print("initialize() failed")
        mt5.shutdown()
        return None

    timeframe = mt5.TIMEFRAME_D1

    rates = mt5.copy_rates_range(symbol, timeframe, start_date, end_date)
    mt5.shutdown()

    financial_df = pd.DataFrame(rates)
    financial_df['time'] = pd.to_datetime(financial_df['time'], unit='s')
    print(f"Financial data retrieved: {financial_df.head()}")
    return financial_df

# Part 3: Synchronizing astronomical and financial data

def sync_data(astro_df, financial_df):
    print("Synchronizing astronomical and financial data")
    astro_df['date'] = pd.to_datetime(astro_df['date']).dt.tz_localize(None)
    financial_df['time'] = pd.to_datetime(financial_df['time'])
    merged_data = pd.merge(financial_df, astro_df, left_on='time', right_on='date', how='inner')
    print(f"Merged data: {merged_data.head()}")
    return merged_data

# Part 4: Training the model and making predictions

def train_and_predict(merged_data):
    print("Training the model and making predictions")
    # Converting aspects to numerical features
    aspect_cols = [col for col in merged_data.columns if '_' in col and col not in ['date', 'time']]
    label_encoders = {}
    for col in aspect_cols:
        label_encoders[col] = LabelEncoder()
        merged_data[col] = label_encoders[col].fit_transform(merged_data[col].astype(str))

    # Creating lags for financial data
    for col in ['open', 'high', 'low', 'close']:
        for lag in range(1, 6):
            merged_data[f'{col}_lag{lag}'] = merged_data[col].shift(lag)

    # Creating lags for astronomical data
    astro_cols = ['mercury', 'venus', 'mars', 'jupiter', 'saturn', 'uranus', 'neptune']
    for col in astro_cols:
        merged_data[f'{col}_ra'] = merged_data[col].apply(lambda x: eval(x)['ra'] if pd.notna(x) else np.nan)
        merged_data[f'{col}_dec'] = merged_data[col].apply(lambda x: eval(x)['dec'] if pd.notna(x) else np.nan)
        for lag in range(1, 6):
            merged_data[f'{col}_ra_lag{lag}'] = merged_data[f'{col}_ra'].shift(lag)
            merged_data[f'{col}_dec_lag{lag}'] = merged_data[f'{col}_dec'].shift(lag)
        merged_data.drop(columns=[col, f'{col}_ra', f'{col}_dec'], inplace=True)

    # Filling missing values with mean values for numerical columns
    numeric_cols = merged_data.select_dtypes(include=[np.number]).columns
    merged_data[numeric_cols] = merged_data[numeric_cols].fillna(merged_data[numeric_cols].mean())

    merged_data = merged_data.dropna()

    # Creating a binary target variable
    merged_data['price_change'] = (merged_data['close'].shift(-1) > merged_data['close']).astype(int)

    # Removing rows with missing values in the target variable
    merged_data = merged_data.dropna(subset=['price_change'])

    features = [col for col in merged_data.columns if col not in ['date', 'time', 'close', 'price_change']]
    X = merged_data[features]
    y = merged_data['price_change']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

    model = CatBoostClassifier(iterations=500, learning_rate=0.1, depth=9, random_state=1)
    model.fit(X_train, y_train, eval_set=(X_test, y_test), early_stopping_rounds=200, verbose=100)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    clf_report = classification_report(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    print(f"Accuracy: {accuracy}")
    print("Classification Report:")
    print(clf_report)
    print("Confusion Matrix:")
    print(conf_matrix)

    # Visualizing feature importance
    feature_importance = model.feature_importances_
    feature_names = X.columns
    sorted_idx = np.argsort(feature_importance)
    pos = np.arange(sorted_idx.shape[0]) + 0.5

    plt.figure(figsize=(12, 6))
    plt.barh(pos, feature_importance[sorted_idx], align='center')
    plt.yticks(pos, np.array(feature_names)[sorted_idx])
    plt.xlabel('Feature Importance')
    plt.title('Feature Importance')
    plt.show()

    # Predicting the next value
    def predict_next():
        last_data = merged_data.iloc[-1]
        input_features = last_data[features].values.reshape(1, -1)
        prediction = model.predict(input_features)
        print(f"Price change prediction (0: decrease, 1: increase): {prediction[0]}")

    predict_next()

# Main program
print("Starting the main program")
start_date = datetime(2023, 3, 1)
end_date = datetime(2024, 7, 30)

astro_data = []
current_date = start_date

while current_date <= end_date:
    planet_positions = get_planet_positions(current_date)
    moon_phase = get_moon_phase(current_date)
    try:
        solar_activity = get_solar_activity(current_date)
    except Exception as e:
        print(f"Error getting solar activity for {current_date}: {e}")
        solar_activity = {'sunspot_number': None, 'f10.7_flux': None}
    aspects = calculate_aspects(planet_positions)

    astro_data.append({
        'date': current_date,
        'mercury': str(planet_positions['mercury']),
        'venus': str(planet_positions['venus']),
        'mars': str(planet_positions['mars']),
        'jupiter': str(planet_positions['jupiter']),
        'saturn': str(planet_positions['saturn']),
        'uranus': str(planet_positions['uranus']),
        'neptune': str(planet_positions['neptune']),
        'moon_phase': moon_phase,
        **solar_activity,
        **aspects
    })

    current_date += timedelta(days=1)

astro_df = pd.DataFrame(astro_data)
print(f"Astronomical data collected: {astro_df.head()}")

symbol = "EURUSD"
financial_data = get_financial_data(symbol, start_date, end_date)

if financial_data is not None:
    merged_data = sync_data(astro_df, financial_data)
    train_and_predict(merged_data)
else:
    print("Failed to retrieve financial data")