import requests
import pandas as pd
import numpy as np
import json
import logging
from typing import Dict, List, Optional, Tuple
from datetime import datetime, timedelta
from pathlib import Path
import MetaTrader5 as mt5
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class ManualPPPCalculator:
    """
    Manual PPP calculator using price levels, exchange rates, and GDP data
    """
    
    def __init__(self):
        self.base_url = "http://dataservices.imf.org/REST/SDMX_JSON.svc"
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Manual-PPP-Calculator/1.0',
            'Accept': 'application/json'
        })
        
        # Currency to country mapping
        self.currency_country_map = {
            'USD': 'US', 'EUR': 'U2', 'GBP': 'GB', 'JPY': 'JP',
            'AUD': 'AU', 'CAD': 'CA', 'CHF': 'CH', 'NZD': 'NZ',
            'SEK': 'SE', 'NOK': 'NO', 'DKK': 'DK', 'PLN': 'PL',
            'CZK': 'CZ', 'HUF': 'HU', 'ZAR': 'ZA', 'BRL': 'BR',
            'MXN': 'MX', 'SGD': 'SG', 'HKD': 'HK', 'KRW': 'KR'
        }
        
        # Current market rates (fallback if live data fails)
        self.fallback_market_rates = {
            'EURUSD': 1.0850, 'GBPUSD': 1.2650, 'USDJPY': 148.50,
            'AUDUSD': 0.6750, 'USDCAD': 1.3550, 'USDCHF': 0.8850,
            'NZDUSD': 0.6150
        }
        
        # Known relative price levels (Big Mac Index style data for 2024)
        # These are relative to US = 100
        self.price_levels_2024 = {
            'US': 100.0,    # Base
            'U2': 88.5,     # Euro area (slightly cheaper than US)
            'GB': 85.2,     # UK (cheaper due to lower VAT on food)
            'JP': 67.4,     # Japan (significantly cheaper)
            'AU': 95.8,     # Australia (close to US)
            'CA': 91.3,     # Canada (moderately cheaper)
            'CH': 125.6,    # Switzerland (most expensive)
            'NZ': 89.7,     # New Zealand
            'ZA': 45.2,     # South Africa (much cheaper)
            'BR': 52.8,     # Brazil
            'MX': 48.6,     # Mexico
            'PL': 58.9,     # Poland
            'CZ': 62.1,     # Czech Republic
            'HU': 55.4      # Hungary
        }
        
        # Approximate GDP USD estimates for 2023 (in billions)
        self.gdp_usd_estimates_2023 = {
            'US': 27000,    # $27 trillion
            'U2': 17500,    # Euro area ~$17.5 trillion
            'GB': 3300,     # UK ~$3.3 trillion
            'JP': 4200,     # Japan ~$4.2 trillion
            'AU': 1700,     # Australia ~$1.7 trillion
            'CA': 2100,     # Canada ~$2.1 trillion
            'CH': 900,      # Switzerland ~$0.9 trillion
            'NZ': 250       # New Zealand ~$0.25 trillion
        }
        
        # Base year exchange rates (approximate 2020 levels)
        self.base_rates_2020 = {
            'U2': 0.85,   # EURUSD
            'GB': 0.78,   # GBPUSD  
            'JP': 106.0,  # USDJPY
            'AU': 1.45,   # AUDUSD
            'CA': 1.34,   # USDCAD
            'CH': 0.92,   # USDCHF
            'NZ': 1.52    # NZDUSD
        }
        
        # Historical inflation data for dynamic PPP calculation
        self.historical_inflation = {
            2020: {'US': 1.2, 'U2': 0.3, 'GB': 0.9, 'JP': -0.1},
            2021: {'US': 4.7, 'U2': 2.6, 'GB': 2.6, 'JP': -0.2},
            2022: {'US': 8.0, 'U2': 8.4, 'GB': 9.1, 'JP': 2.5},
            2023: {'US': 4.1, 'U2': 5.4, 'GB': 7.3, 'JP': 3.3},
            2024: {'US': 3.2, 'U2': 2.4, 'GB': 2.3, 'JP': 2.8}
        }
    
    def fetch_all_available_data(self, countries: List[str], years: int = 10) -> pd.DataFrame:
        """Fetch ALL available economic data from IMF"""
        end_year = datetime.now().year
        start_year = end_year - years
        
        # Comprehensive list of indicators
        all_indicators = [
            'NGDP_XDC',       # GDP in national currency (we know this works)
            'NGDP_USD',       # GDP in US dollars
            'PCPIPCH',        # Inflation rate
            'NGDP_RPCH',      # Real GDP growth
            'ENDA_XDC_USD_RATE',  # Exchange rate
            'PCPI_IX',        # Consumer Price Index
            'LP'              # Population
        ]
        
        logger.info(f"Fetching {len(all_indicators)} indicators for {len(countries)} countries...")
        
        all_data = []
        chunk_size = 5  # Indicators per request
        
        for i in range(0, len(all_indicators), chunk_size):
            chunk = all_indicators[i:i + chunk_size]
            
            try:
                countries_string = '+'.join(countries)
                indicators_string = '+'.join(chunk)
                
                url = f"{self.base_url}/CompactData/IFS/A.{countries_string}.{indicators_string}"
                
                response = self.session.get(url, params={
                    'startPeriod': str(start_year),
                    'endPeriod': str(end_year)
                }, timeout=60)
                
                if response.status_code == 200:
                    raw_data = response.json()
                    df_chunk = self._parse_response_data(raw_data)
                    
                    if not df_chunk.empty:
                        all_data.append(df_chunk)
                        logger.info(f"Chunk {i//chunk_size + 1}: {len(df_chunk)} points")
                
            except Exception as e:
                logger.warning(f"Chunk {i//chunk_size + 1} failed: {e}")
                continue
        
        # Combine all data
        if all_data:
            combined_df = pd.concat(all_data, ignore_index=True)
            logger.info(f"Total data fetched: {len(combined_df)} points")
            
            # Show what we have
            available_indicators = combined_df['INDICATOR'].unique()
            logger.info(f"Available indicators: {list(available_indicators)}")
            
            return combined_df
        else:
            logger.error("No data fetched!")
            return pd.DataFrame()
    
    def _parse_response_data(self, data: Dict) -> pd.DataFrame:
        """Parse IMF API response"""
        records = []
        
        try:
            compact_data = data['CompactData']
            dataset = compact_data['DataSet']
            
            if 'Series' not in dataset:
                return pd.DataFrame()
            
            series_list = dataset['Series']
            if not isinstance(series_list, list):
                series_list = [series_list]
            
            for series in series_list:
                series_attrs = {k.replace('@', ''): v for k, v in series.items() 
                              if k.startswith('@')}
                
                obs_list = series.get('Obs', [])
                if not isinstance(obs_list, list):
                    obs_list = [obs_list]
                
                for obs in obs_list:
                    if isinstance(obs, dict):
                        record = series_attrs.copy()
                        record.update({
                            'year': obs.get('@TIME_PERIOD', ''),
                            'value': obs.get('@OBS_VALUE', ''),
                            'status': obs.get('@OBS_STATUS', '')
                        })
                        records.append(record)
            
            df = pd.DataFrame(records)
            
            if 'value' in df.columns:
                df['value'] = pd.to_numeric(df['value'], errors='coerce')
            
            if 'year' in df.columns:
                df['year'] = pd.to_numeric(df['year'], errors='coerce')
            
            return df
            
        except Exception as e:
            logger.error(f"Error parsing response: {e}")
            return pd.DataFrame()
    
    def calculate_manual_ppp_rates(self, economic_data: pd.DataFrame) -> Dict:
        """Calculate PPP rates manually using multiple methods"""
        
        results = {
            'method_1_price_levels': {},
            'method_2_gdp_implied': {},
            'method_3_inflation_adjusted': {},
            'method_4_big_mac_proxy': {},
            'composite_ppp_rates': {}
        }
        
        logger.info("Calculating manual PPP rates using multiple methods...")
        
        # Method 1: Price level adjustment
        results['method_1_price_levels'] = self._calculate_price_level_ppp()
        
        # Method 2: GDP-implied rates
        results['method_2_gdp_implied'] = self._calculate_gdp_implied_ppp(economic_data)
        
        # Method 3: Inflation-adjusted historical rates
        results['method_3_inflation_adjusted'] = self._calculate_inflation_adjusted_ppp(economic_data)
        
        # Method 4: Big Mac Index proxy
        results['method_4_big_mac_proxy'] = self._calculate_big_mac_proxy_ppp()
        
        # Method 5: Composite calculation
        results['composite_ppp_rates'] = self._calculate_composite_ppp(results)
        
        return results
    
    def _calculate_price_level_ppp(self) -> Dict:
        """Method 1: Calculate PPP using relative price levels"""
        logger.info("Method 1: Calculating PPP from price levels...")
        
        ppp_rates = {}
        
        for country, price_level in self.price_levels_2024.items():
            if country != 'US':
                ppp_factor = price_level / 100.0
                ppp_rates[country] = {
                    'ppp_conversion_factor': ppp_factor,
                    'price_level_index': price_level,
                    'method': 'price_level_adjustment'
                }
        
        return ppp_rates
    
    def _calculate_gdp_implied_ppp(self, economic_data: pd.DataFrame) -> Dict:
        """Method 2: Calculate GDP-implied PPP rates"""
        logger.info("Method 2: Calculating GDP-implied PPP...")
        
        if economic_data.empty:
            return {}
        
        gdp_lcu_data = economic_data[economic_data['INDICATOR'] == 'NGDP_XDC'].copy()
        
        if gdp_lcu_data.empty:
            return {}
        
        ppp_implied = {}
        
        for country, gdp_usd_2023 in self.gdp_usd_estimates_2023.items():
            country_gdp_lcu = gdp_lcu_data[gdp_lcu_data['REF_AREA'] == country]
            
            if not country_gdp_lcu.empty:
                latest_data = country_gdp_lcu.sort_values('year').iloc[-1]
                gdp_lcu = latest_data['value']
                
                if gdp_lcu > 0:
                    implied_rate = gdp_lcu / gdp_usd_2023
                    
                    ppp_implied[country] = {
                        'implied_exchange_rate': implied_rate,
                        'gdp_lcu': gdp_lcu,
                        'gdp_usd_estimate': gdp_usd_2023,
                        'method': 'gdp_ratio_estimate'
                    }
        
        return ppp_implied
    
    def _calculate_inflation_adjusted_ppp(self, economic_data: pd.DataFrame) -> Dict:
        """Method 3: Calculate inflation-adjusted PPP"""
        logger.info("Method 3: Calculating inflation-adjusted PPP...")
        
        inflation_data = economic_data[economic_data['INDICATOR'] == 'PCPIPCH'].copy()
        
        if inflation_data.empty:
            # Use approximation if no inflation data
            return self._approximate_inflation_adjustment()
        
        inflation_adjusted = {}
        
        for country, base_rate in self.base_rates_2020.items():
            country_inflation = inflation_data[inflation_data['REF_AREA'] == country]
            us_inflation = inflation_data[inflation_data['REF_AREA'] == 'US']
            
            if not country_inflation.empty and not us_inflation.empty:
                country_inflation_avg = country_inflation['value'].mean()
                us_inflation_avg = us_inflation['value'].mean()
                
                inflation_differential = (us_inflation_avg - country_inflation_avg) / 100
                adjusted_rate = base_rate * (1 + inflation_differential)
                
                inflation_adjusted[country] = {
                    'inflation_adjusted_rate': adjusted_rate,
                    'base_rate_2020': base_rate,
                    'inflation_differential': inflation_differential * 100,
                    'method': 'relative_ppp_inflation'
                }
        
        return inflation_adjusted
    
    def _approximate_inflation_adjustment(self) -> Dict:
        """Approximate inflation adjustment using typical rates"""
        # Typical inflation rates 2020-2024
        typical_inflation = {
            'US': 4.5, 'U2': 3.8, 'GB': 4.2, 'JP': 1.8,
            'AU': 4.1, 'CA': 3.9, 'CH': 2.1, 'NZ': 4.0
        }
        
        inflation_adjusted = {}
        
        for country, base_rate in self.base_rates_2020.items():
            us_inflation = typical_inflation.get('US', 4.5)
            country_inflation = typical_inflation.get(country, 3.5)
            
            inflation_differential = (us_inflation - country_inflation) / 100
            adjusted_rate = base_rate * (1 + inflation_differential)
            
            inflation_adjusted[country] = {
                'inflation_adjusted_rate': adjusted_rate,
                'base_rate_2020': base_rate,
                'inflation_differential': inflation_differential * 100,
                'method': 'approximate_inflation'
            }
        
        return inflation_adjusted
    
    def _calculate_big_mac_proxy_ppp(self) -> Dict:
        """Method 4: Big Mac Index style calculation"""
        logger.info("Method 4: Calculating Big Mac proxy PPP...")
        
        us_big_mac_price = 5.50
        big_mac_ppp = {}
        
        for country, price_level in self.price_levels_2024.items():
            if country != 'US':
                local_big_mac_price = us_big_mac_price * (price_level / 100.0)
                ppp_rate = local_big_mac_price / us_big_mac_price
                
                big_mac_ppp[country] = {
                    'big_mac_ppp_rate': ppp_rate,
                    'implied_local_price': local_big_mac_price,
                    'method': 'big_mac_proxy'
                }
        
        return big_mac_ppp
    
    def _calculate_composite_ppp(self, all_methods: Dict) -> Dict:
        """Method 5: Create composite PPP rates from all methods"""
        logger.info("Method 5: Creating composite PPP rates...")
        
        composite_rates = {}
        
        # Get all countries that appear in any method
        all_countries = set()
        for method_results in all_methods.values():
            if isinstance(method_results, dict):
                all_countries.update(method_results.keys())
        
        for country in all_countries:
            if country == 'US':
                continue
                
            rates = []
            methods_used = []
            weights = []
            
            # Method 1: Price levels (weight: 0.3)
            if country in all_methods['method_1_price_levels']:
                rate = all_methods['method_1_price_levels'][country]['ppp_conversion_factor']
                rates.append(rate)
                methods_used.append('price_levels')
                weights.append(0.3)
            
            # Method 2: GDP implied (weight: 0.25)
            if country in all_methods['method_2_gdp_implied']:
                rate = all_methods['method_2_gdp_implied'][country]['implied_exchange_rate']
                if 0.1 <= rate <= 200:  # Sanity check
                    rates.append(rate)
                    methods_used.append('gdp_implied')
                    weights.append(0.25)
            
            # Method 3: Inflation adjusted (weight: 0.25)
            if country in all_methods['method_3_inflation_adjusted']:
                rate = all_methods['method_3_inflation_adjusted'][country]['inflation_adjusted_rate']
                rates.append(rate)
                methods_used.append('inflation_adjusted')
                weights.append(0.25)
            
            # Method 4: Big Mac proxy (weight: 0.2)
            if country in all_methods['method_4_big_mac_proxy']:
                rate = all_methods['method_4_big_mac_proxy'][country]['big_mac_ppp_rate']
                rates.append(rate)
                methods_used.append('big_mac_proxy')
                weights.append(0.2)
            
            # Calculate weighted average
            if rates:
                total_weight = sum(weights)
                normalized_weights = [w / total_weight for w in weights]
                
                composite_rate = sum(rate * weight for rate, weight in zip(rates, normalized_weights))
                
                composite_rates[country] = {
                    'composite_ppp_rate': composite_rate,
                    'methods_used': methods_used,
                    'confidence': len(methods_used) / 4.0
                }
        
        return composite_rates
    
    def calculate_ppp_fair_values(self, currency_pairs: List[str]) -> Dict:
        """Calculate fair values using manual PPP calculation"""
        logger.info("Starting manual PPP fair value calculation...")
        
        # Get all countries needed
        countries = set()
        for pair in currency_pairs:
            base_currency = pair[:3]
            quote_currency = pair[3:]
            
            base_country = self.currency_country_map.get(base_currency)
            quote_country = self.currency_country_map.get(quote_currency)
            
            if base_country and quote_country:
                countries.add(base_country)
                countries.add(quote_currency)
        
        # Fetch economic data
        economic_data = self.fetch_all_available_data(list(countries))
        
        # Calculate PPP rates using multiple methods
        ppp_calculation_results = self.calculate_manual_ppp_rates(economic_data)
        
        # Get current market rates (use fallback)
        market_rates = self.fallback_market_rates
        
        # Calculate fair values and deviations
        results = {
            'ppp_calculation_methods': ppp_calculation_results,
            'fair_values': {},
            'deviations': {},
            'market_rates': market_rates,
            'summary': {}
        }
        
        composite_ppp = ppp_calculation_results.get('composite_ppp_rates', {})
        
        for pair in currency_pairs:
            base_currency = pair[:3]
            quote_currency = pair[3:]
            
            base_country = self.currency_country_map.get(base_currency)
            quote_country = self.currency_country_map.get(quote_currency)
            
            if not base_country or not quote_country:
                continue
            
            # Calculate fair value from composite PPP
            fair_value = self._calculate_pair_fair_value_from_ppp(
                composite_ppp, base_country, quote_country, pair
            )
            
            if fair_value:
                results['fair_values'][pair] = fair_value
                
                # Calculate market deviation
                market_rate = market_rates.get(pair)
                if market_rate and fair_value.get('fair_rate'):
                    deviation = ((market_rate - fair_value['fair_rate']) / fair_value['fair_rate']) * 100
                    
                    results['deviations'][pair] = {
                        'market_rate': market_rate,
                        'fair_value': fair_value['fair_rate'],
                        'deviation_pct': deviation,
                        'status': 'overvalued' if deviation > 5 else 'undervalued' if deviation < -5 else 'fair',
                        'magnitude': 'significant' if abs(deviation) > 15 else 'moderate' if abs(deviation) > 5 else 'minimal',
                        'confidence': fair_value.get('confidence', 0.5)
                    }
        
        # Generate summary
        results['summary'] = self._generate_summary(results['deviations'])
        
        return results
    
    def _calculate_pair_fair_value_from_ppp(self, composite_ppp: Dict, 
                                          base_country: str, quote_country: str, pair: str) -> Dict:
        """Calculate fair value for currency pair from PPP data"""
        
        base_ppp = composite_ppp.get(base_country, {})
        quote_ppp = composite_ppp.get(quote_country, {})
        
        # Handle USD pairs
        if quote_country == 'US':
            if base_ppp:
                fair_rate = base_ppp['composite_ppp_rate']
                confidence = base_ppp['confidence']
            else:
                return {}
        elif base_country == 'US':
            if quote_ppp:
                fair_rate = quote_ppp['composite_ppp_rate']
                confidence = quote_ppp['confidence']
            else:
                return {}
        else:
            # Cross pairs
            if base_ppp and quote_ppp:
                fair_rate = base_ppp['composite_ppp_rate'] / quote_ppp['composite_ppp_rate']
                confidence = min(base_ppp['confidence'], quote_ppp['confidence'])
            else:
                return {}
        
        return {
            'pair': pair,
            'fair_rate': fair_rate,
            'confidence': confidence,
            'base_country': base_country,
            'quote_country': quote_country
        }
    
    def _generate_summary(self, deviations: Dict) -> Dict:
        """Generate summary statistics"""
        if not deviations:
            return {}
        
        deviation_values = [data['deviation_pct'] for data in deviations.values()]
        
        return {
            'total_pairs_analyzed': len(deviations),
            'average_deviation': round(np.mean(np.abs(deviation_values)), 2),
            'max_overvaluation': round(max(deviation_values), 2),
            'max_undervaluation': round(min(deviation_values), 2),
            'pairs_near_fair_value': sum(1 for d in deviation_values if abs(d) <= 5),
            'significantly_misaligned': sum(1 for d in deviation_values if abs(d) > 15)
        }
    
    def calculate_dynamic_ppp_for_dates(self, dates: List[datetime], pair: str = 'EURUSD') -> List[float]:
        """Calculate dynamic PPP fair values for specific dates using real IMF data"""
        base_ppp_2020 = 0.85  # Base EUR/USD rate in 2020
        ppp_values = []
        
        # Get real economic data from IMF
        countries = ['US', 'U2']
        economic_data = self.fetch_all_available_data(countries, years=5)
        
        # Extract inflation and GDP data
        inflation_data = economic_data[economic_data['INDICATOR'] == 'PCPIPCH'].copy() if not economic_data.empty else pd.DataFrame()
        gdp_data = economic_data[economic_data['INDICATOR'] == 'NGDP_XDC'].copy() if not economic_data.empty else pd.DataFrame()
        
        print(f"Using {'REAL' if not economic_data.empty else 'FALLBACK'} IMF data for dynamic PPP calculation")
        
        for date in dates:
            year = date.year
            
            # Method 1: Real inflation-based PPP
            inflation_ppp = self._calculate_yearly_inflation_ppp(year, inflation_data, base_ppp_2020)
            
            # Method 2: GDP-implied PPP for the specific year
            gdp_ppp = self._calculate_yearly_gdp_ppp(year, gdp_data)
            
            # Method 3: Price level PPP (static but adjusted for year)
            price_level_ppp = self.price_levels_2024.get('U2', 88.5) / 100.0
            
            # Method 4: Structural adjustments based on year
            structural_factor = self._get_structural_adjustment(year, date)
            
            # Combine methods with dynamic weighting
            methods = []
            weights = []
            
            if inflation_ppp is not None:
                methods.append(inflation_ppp * structural_factor)
                weights.append(0.4)
            
            if gdp_ppp is not None:
                methods.append(gdp_ppp * structural_factor)
                weights.append(0.35)
            
            methods.append(price_level_ppp * structural_factor)
            weights.append(0.25)
            
            # Calculate weighted composite
            if methods:
                total_weight = sum(weights)
                normalized_weights = [w / total_weight for w in weights]
                composite_ppp = sum(method * weight for method, weight in zip(methods, normalized_weights))
            else:
                # Ultimate fallback
                composite_ppp = base_ppp_2020 * structural_factor
            
            ppp_values.append(composite_ppp)
        
        return ppp_values
    
    def _calculate_yearly_inflation_ppp(self, year: int, inflation_data: pd.DataFrame, base_rate: float) -> Optional[float]:
        """Calculate PPP for specific year using real inflation data"""
        if inflation_data.empty:
            # Fallback to historical data
            return self._fallback_inflation_ppp(year, base_rate)
        
        try:
            # Get inflation data up to the specific year
            us_inflation_data = inflation_data[
                (inflation_data['REF_AREA'] == 'US') & 
                (inflation_data['year'] <= year) & 
                (inflation_data['year'] >= 2020)
            ]
            
            eu_inflation_data = inflation_data[
                (inflation_data['REF_AREA'] == 'U2') & 
                (inflation_data['year'] <= year) & 
                (inflation_data['year'] >= 2020)
            ]
            
            if us_inflation_data.empty or eu_inflation_data.empty:
                return self._fallback_inflation_ppp(year, base_rate)
            
            # Calculate cumulative inflation from 2020 to year
            cumulative_us = 1.0
            cumulative_eu = 1.0
            
            for y in range(2021, year + 1):
                us_year_data = us_inflation_data[us_inflation_data['year'] == y]
                eu_year_data = eu_inflation_data[eu_inflation_data['year'] == y]
                
                if not us_year_data.empty and not eu_year_data.empty:
                    us_rate = us_year_data['value'].iloc[0] / 100
                    eu_rate = eu_year_data['value'].iloc[0] / 100
                    
                    cumulative_us *= (1 + us_rate)
                    cumulative_eu *= (1 + eu_rate)
                else:
                    # Use fallback rates if data missing
                    fallback_rates = self.historical_inflation.get(y, {'US': 3.0, 'U2': 2.5})
                    cumulative_us *= (1 + fallback_rates['US'] / 100)
                    cumulative_eu *= (1 + fallback_rates['U2'] / 100)
            
            # Relative PPP adjustment
            inflation_adjustment = cumulative_us / cumulative_eu
            return base_rate * inflation_adjustment
            
        except Exception as e:
            logger.warning(f"Error in yearly inflation PPP calculation: {e}")
            return self._fallback_inflation_ppp(year, base_rate)
    
    def _calculate_yearly_gdp_ppp(self, year: int, gdp_data: pd.DataFrame) -> Optional[float]:
        """Calculate GDP-implied PPP for specific year"""
        if gdp_data.empty:
            return None
        
        try:
            # Get GDP data for the specific year or closest available
            us_gdp_data = gdp_data[
                (gdp_data['REF_AREA'] == 'US') & 
                (gdp_data['year'] <= year)
            ].sort_values('year')
            
            eu_gdp_data = gdp_data[
                (gdp_data['REF_AREA'] == 'U2') & 
                (gdp_data['year'] <= year)
            ].sort_values('year')
            
            if us_gdp_data.empty or eu_gdp_data.empty:
                return None
            
            # Get latest available data
            us_gdp_lcu = us_gdp_data.iloc[-1]['value']
            eu_gdp_lcu = eu_gdp_data.iloc[-1]['value']
            
            # Use GDP USD estimates (adjust for year if needed)
            us_gdp_usd = self.gdp_usd_estimates_2023.get('US', 27000)
            eu_gdp_usd = self.gdp_usd_estimates_2023.get('U2', 17500)
            
            # Year adjustment for GDP USD estimates
            if year != 2023:
                growth_factor = 1 + (0.025 * (year - 2023))  # Assume 2.5% annual growth
                us_gdp_usd *= growth_factor
                eu_gdp_usd *= growth_factor
            
            # Calculate implied exchange rate
            if us_gdp_lcu > 0 and eu_gdp_lcu > 0:
                us_implied_rate = us_gdp_lcu / us_gdp_usd
                eu_implied_rate = eu_gdp_lcu / eu_gdp_usd
                
                # EUR/USD rate
                eurusd_implied = eu_implied_rate / us_implied_rate
                return eurusd_implied
            
            return None
            
        except Exception as e:
            logger.warning(f"Error in yearly GDP PPP calculation: {e}")
            return None
    
    def _get_structural_adjustment(self, year: int, date: datetime) -> float:
        """Get structural adjustments based on economic events"""
        adjustment = 1.0
        
        # COVID-19 impact (2020)
        if year == 2020:
            month = date.month
            if month >= 3:  # Pandemic started in March
                adjustment *= 0.95  # Euro weakened due to early severe lockdowns
        
        # Energy crisis and Ukraine war (2022+)
        if year >= 2022:
            if year == 2022 and date.month >= 2:  # Ukraine war started Feb 2022
                adjustment *= 0.97  # Euro weakened due to energy dependence
            elif year >= 2023:
                adjustment *= 0.98  # Continued energy concerns
        
        # ECB vs Fed policy divergence
        if year >= 2022:
            # Fed raised rates more aggressively than ECB
            quarters_factor = ((date.month - 1) // 3 + 1) / 4  # Quarter of year
            policy_adjustment = 0.99 - (0.02 * quarters_factor)  # Progressive weakening
            adjustment *= policy_adjustment
        
        return adjustment
    
    def _fallback_inflation_ppp(self, year: int, base_rate: float) -> float:
        """Fallback inflation PPP calculation using historical data"""
        cumulative_adjustment = 1.0
        
        for y in range(2021, min(year + 1, 2025)):
            if y in self.historical_inflation:
                us_inf = self.historical_inflation[y].get('US', 3.0)
                eu_inf = self.historical_inflation[y].get('U2', 2.5)
                annual_adjustment = (us_inf - eu_inf) / 100
                cumulative_adjustment *= (1 + annual_adjustment)
        
        return base_rate * cumulative_adjustment
    
    def visualize_historical_ppp_comparison(self, pair: str = 'EURUSD', months: int = 12) -> Optional[plt.Figure]:
        """
        Visualize historical market rates vs dynamic PPP fair values using MetaTrader5 data
        """
        # Initialize MetaTrader5
        if not mt5.initialize():
            logger.error(f"MT5 initialize() failed, error code = {mt5.last_error()}")
            return None
        
        try:
            # Set time range
            end_date = datetime.now()
            start_date = end_date - timedelta(days=months*30)
            
            # Get historical data from MT5
            rates = mt5.copy_rates_range(pair, mt5.TIMEFRAME_D1, start_date, end_date)
            
            if rates is None or len(rates) == 0:
                logger.error(f"Failed to get {pair} data from MetaTrader5")
                return None
            
            # Convert to DataFrame
            df = pd.DataFrame(rates)
            df['time'] = pd.to_datetime(df['time'], unit='s')
            df.set_index('time', inplace=True)
            
            market_rates = df['close']
            dates = [date.to_pydatetime() for date in market_rates.index]
            
            logger.info(f"Loaded {len(market_rates)} daily bars for {pair}")
            
            # Calculate dynamic PPP values for each date
            ppp_values = self.calculate_dynamic_ppp_for_dates(dates, pair)
            
            # Create visualization with fixed width
            fig, ax = plt.subplots(figsize=(10, 7))  # Width=10 inches * 70 DPI = 700px
            
            # Plot market rates
            ax.plot(dates, market_rates, label=f'{pair} Market Rate (MT5)', 
                   linewidth=2, color='#1f77b4', alpha=0.9)
            
            # Plot dynamic PPP fair values
            ax.plot(dates, ppp_values, label='Dynamic PPP Fair Value', 
                   linewidth=2.5, color='#d62728', linestyle='--', alpha=0.9)
            
            # Fair value zones
            ppp_upper_5 = [rate * 1.05 for rate in ppp_values]
            ppp_lower_5 = [rate * 0.95 for rate in ppp_values]
            ax.fill_between(dates, ppp_lower_5, ppp_upper_5, alpha=0.15, 
                           color='#2ca02c', label='Fair Value Zone (±5%)')
            
            ppp_upper_15 = [rate * 1.15 for rate in ppp_values]
            ppp_lower_15 = [rate * 0.85 for rate in ppp_values]
            ax.fill_between(dates, ppp_upper_15, ppp_upper_5, alpha=0.1, 
                           color='#ff7f0e', label='Overvaluation Zone (5-15%)')
            ax.fill_between(dates, ppp_lower_5, ppp_lower_15, alpha=0.1, 
                           color='#ff7f0e', label='Undervaluation Zone (5-15%)')
            
            # Current values and deviation
            current_market = float(market_rates.iloc[-1])
            current_ppp = ppp_values[-1]
            deviation = ((current_market - current_ppp) / current_ppp) * 100
            
            # Statistics
            avg_deviation = np.mean([((m - p) / p) * 100 for m, p in zip(market_rates, ppp_values)])
            max_overvaluation = max([((m - p) / p) * 100 for m, p in zip(market_rates, ppp_values)])
            max_undervaluation = min([((m - p) / p) * 100 for m, p in zip(market_rates, ppp_values)])
            
            # Status classification
            if deviation > 15:
                status = "Critically Overvalued"
                status_color = "#d62728"
            elif deviation > 5:
                status = "Overvalued"
                status_color = "#ff7f0e"
            elif deviation < -15:
                status = "Critically Undervalued"
                status_color = "#2ca02c"
            elif deviation < -5:
                status = "Undervalued"
                status_color = "#2ca02c"
            else:
                status = "Fair Value"
                status_color = "#1f77b4"
            
            # Chart formatting
            title = f'{pair}: Market Rate vs Dynamic PPP Fair Value\nCurrent Deviation: {deviation:+.1f}% ({status})'
            ax.set_title(title, fontsize=16, fontweight='bold', pad=20)
            ax.set_xlabel('Date', fontsize=12, fontweight='bold')
            ax.set_ylabel('Exchange Rate', fontsize=12, fontweight='bold')
            ax.legend(fontsize=11, loc='upper left')
            ax.grid(True, alpha=0.3, linestyle='-', linewidth=0.5)
            
            # Format dates on x-axis
            ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
            ax.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
            plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
            
            # Annotation with current values
            textstr = (f'Market Rate: {current_market:.4f}\n'
                      f'PPP Fair Value: {current_ppp:.4f}\n'
                      f'Current Deviation: {deviation:+.1f}%\n'
                      f'Avg Deviation: {avg_deviation:+.1f}%')
            
            props = dict(boxstyle='round', facecolor='wheat', alpha=0.8)
            ax.text(0.02, 0.98, textstr, transform=ax.transAxes, fontsize=10,
                   verticalalignment='top', bbox=props)
            
            # Y-axis limits
            y_min = min(min(market_rates) * 0.98, min(ppp_values) * 0.95)
            y_max = max(max(market_rates) * 1.02, max(ppp_values) * 1.05)
            ax.set_ylim(y_min, y_max)
            
            plt.tight_layout()
            
            # Console output
            print(f"\n=== {pair} ANALYSIS FOR {months} MONTHS ===")
            print(f"Period: {dates[0].strftime('%Y-%m-%d')} - {dates[-1].strftime('%Y-%m-%d')}")
            print(f"Current Market Rate: {current_market:.4f}")
            print(f"Dynamic PPP Fair Value: {current_ppp:.4f}")
            print(f"Current Deviation: {deviation:+.1f}% ({status})")
            print(f"Average Deviation: {avg_deviation:+.1f}%")
            print(f"Max Overvaluation: {max_overvaluation:+.1f}%")
            print(f"Max Undervaluation: {max_undervaluation:+.1f}%")
            
            # Trading signals
            if abs(deviation) > 15:
                signal = "STRONG SIGNAL"
                action = f"SELL {pair[:3]}" if deviation > 0 else f"BUY {pair[:3]}"
            elif abs(deviation) > 5:
                signal = "MODERATE SIGNAL"
                action = f"SELL {pair[:3]}" if deviation > 0 else f"BUY {pair[:3]}"
            else:
                signal = "NO SIGNAL"
                action = "HOLD"
            
            print(f"\nTrading Signal: {signal}")
            print(f"Recommendation: {action}")
            
            return fig
            
        except Exception as e:
            logger.error(f"Error in MT5 visualization: {e}")
            return None
            
        finally:
            mt5.shutdown()
    
    def save_historical_ppp_data(self, pair: str = 'EURUSD', months: int = 12, filename: str = None):
        """Save historical PPP analysis data to CSV"""
        if filename is None:
            filename = f"{pair.lower()}_ppp_analysis.csv"
        
        if not mt5.initialize():
            logger.error("MT5 initialization failed")
            return
        
        try:
            end_date = datetime.now()
            start_date = end_date - timedelta(days=months*30)
            
            rates = mt5.copy_rates_range(pair, mt5.TIMEFRAME_D1, start_date, end_date)
            
            if rates is None:
                logger.error("Failed to get market data")
                return
            
            df = pd.DataFrame(rates)
            df['time'] = pd.to_datetime(df['time'], unit='s')
            
            # Calculate PPP values
            dates = [date.to_pydatetime() for date in df['time']]
            ppp_values = self.calculate_dynamic_ppp_for_dates(dates, pair)
            
            # Add PPP data
            df['ppp_fair_value'] = ppp_values
            df['deviation_pct'] = ((df['close'] - df['ppp_fair_value']) / df['ppp_fair_value']) * 100
            
            # Classification
            def classify_deviation(dev):
                if dev > 15: return "Critically Overvalued"
                elif dev > 5: return "Overvalued"
                elif dev < -15: return "Critically Undervalued"
                elif dev < -5: return "Undervalued"
                else: return "Fair Value"
            
            df['status'] = df['deviation_pct'].apply(classify_deviation)
            
            # Save to CSV
            output_cols = ['time', 'close', 'ppp_fair_value', 'deviation_pct', 'status']
            df[output_cols].to_csv(filename, index=False)
            
            logger.info(f"PPP analysis data saved to: {filename}")
            
        finally:
            mt5.shutdown()
    
    def generate_report(self, results: Dict) -> str:
        """Generate comprehensive manual PPP report"""
        
        report = []
        report.append("=" * 70)
        report.append("MANUAL PPP CALCULATION REPORT")
        report.append("=" * 70)
        
        # Methodology overview
        report.append("\n📊 CALCULATION METHODOLOGY:")
        report.append("   • Method 1: Price level adjustment (30% weight)")
        report.append("   • Method 2: GDP-implied rates (25% weight)")
        report.append("   • Method 3: Inflation-adjusted PPP (25% weight)")
        report.append("   • Method 4: Big Mac proxy using price levels (20% weight)")
        report.append("   • Final: Weighted composite of all methods")
        
        # Results summary
        if 'summary' in results and results['summary']:
            summary = results['summary']
            report.append(f"\n🎯 ANALYSIS SUMMARY:")
            report.append(f"   • Currency pairs analyzed: {summary.get('total_pairs_analyzed', 0)}")
            report.append(f"   • Average deviation from fair value: {summary.get('average_deviation', 0)}%")
            report.append(f"   • Pairs near fair value (±5%): {summary.get('pairs_near_fair_value', 0)}")
            report.append(f"   • Significantly misaligned (>15%): {summary.get('significantly_misaligned', 0)}")
        
        # Individual pair results
        if 'deviations' in results and results['deviations']:
            report.append(f"\n💱 CURRENCY PAIR ANALYSIS:")
            report.append(f"{'Pair':<8} {'Market':<8} {'PPP Fair':<8} {'Dev%':<7} {'Status':<12} {'Confidence'}")
            report.append("-" * 65)
            
            # Sort by absolute deviation
            sorted_pairs = sorted(results['deviations'].items(), 
                                key=lambda x: abs(x[1]['deviation_pct']), reverse=True)
            
            for pair, data in sorted_pairs:
                market = data['market_rate']
                fair = data['fair_value']
                dev = data['deviation_pct']
                status = data['status']
                confidence = data.get('confidence', 0.5)
                
                report.append(f"{pair:<8} {market:<8.4f} {fair:<8.4f} {dev:>6.1f}% {status:<12} {confidence:.2f}")
        
        report.append(f"\n" + "=" * 70)
        report.append(f"Report generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        report.append("=" * 70)
        
        return "\n".join(report)

def main():
    """Main execution function"""
    print("=== COMPLETE PPP SYSTEM WITH VISUALIZATION ===\n")
    
    calculator = ManualPPPCalculator()
    
    try:
        # Calculate PPP fair values
        major_pairs = ['EURUSD', 'GBPUSD', 'USDJPY', 'AUDUSD', 'USDCAD', 'USDCHF', 'NZDUSD']
        
        print("Calculating PPP fair values...")
        results = calculator.calculate_ppp_fair_values(major_pairs)
        
        if results['fair_values']:
            report = calculator.generate_report(results)
            print(report)
        
        # Create historical visualization for EURUSD
        print("\nGenerating historical PPP visualization for EURUSD...")
        fig = calculator.visualize_historical_ppp_comparison('EURUSD', months=12)
        
        if fig:
            plt.show()
            # Save with fixed 700px width
            fig.savefig('eurusd_ppp_analysis.png', dpi=70, bbox_inches='tight', 
                       facecolor='white', edgecolor='none')
            print("Chart saved as: eurusd_ppp_analysis.png (700px width)")
        
        # Save historical data
        calculator.save_historical_ppp_data('EURUSD', months=12)
        
        print("\n✅ Complete PPP analysis finished!")
        
    except Exception as e:
        logger.error(f"Error in main execution: {e}")
        print(f"❌ Error occurred: {e}")

if __name__ == "__main__":
    main()

def quick_ppp_visualization(pair: str = 'EURUSD', months: int = 24):
    """Quick PPP visualization for any currency pair"""
    calculator = ManualPPPCalculator()
    fig = calculator.visualize_historical_ppp_comparison(pair, months)
    
    if fig:
        plt.show()
        # Save with fixed 700px width
        filename = f"{pair.lower()}_ppp_chart.png"
        fig.savefig(filename, dpi=70, bbox_inches='tight', 
                   facecolor='white', edgecolor='none')
        print(f"Chart saved as: {filename} (700px width)")
        return fig
    else:
        print(f"Failed to create visualization for {pair}")
        return None

# Usage examples:
# main()  # Full analysis
quick_ppp_visualization('EURUSD', 24)  # Quick EURUSD chart
# quick_ppp_visualization('GBPUSD', 6)   # Quick GBPUSD chart
