import sqlite3
import duckdb
import pandas as pd
import numpy as np
import time

# 1. Generate Synthetic Financial Data (~1 million rows)
def generate_data(filename="finance_data.csv"):
    print("Generating synthetic data...")
    tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'NVDA', 'META', 'NFLX']
    n_rows = 1_000_000
    
    df = pd.DataFrame({
        'timestamp': pd.date_range(start='2023-01-01', periods=n_rows, freq='s'),
        'ticker': np.random.choice(tickers, n_rows),
        'price': np.random.uniform(100, 500, n_rows),
        'volume': np.random.randint(1, 1000, n_rows)
    })
    df.to_csv(filename, index=False)
    print(f"Created {filename} with {n_rows} rows.")

# 2. Benchmark SQLite
def benchmark_sqlite(filename):
    conn = sqlite3.connect(":memory:") # Using memory for a fair speed test
    cursor = conn.cursor()
    
    # Load data
    df = pd.read_csv(filename)
    df.to_sql("prices", conn, index=False)
    
    start_time = time.perf_counter()
    
    # Standard financial aggregation
    query = "SELECT ticker, AVG(price) FROM prices GROUP BY ticker"
    cursor.execute(query)
    results = cursor.fetchall()
    
    end_time = time.perf_counter()
    conn.close()
    return end_time - start_time

# 3. Benchmark DuckDB
def benchmark_duckdb(filename):
    conn = duckdb.connect(database=':memory:')
    
    start_time = time.perf_counter()
    
    # DuckDB can query the CSV directly using vectorized execution
    query = f"SELECT ticker, AVG(price) FROM '{filename}' GROUP BY ticker"
    results = conn.execute(query).fetchall()
    
    end_time = time.perf_counter()
    return end_time - start_time

if __name__ == "__main__":
    csv_file = "finance_data.csv"
    generate_data(csv_file)
    
    print("\nStarting Benchmarks...")
    
    sqlite_time = benchmark_sqlite(csv_file)
    print(f"SQLite execution time: {sqlite_time:.4f} seconds")
    
    duckdb_time = benchmark_duckdb(csv_file)
    print(f"DuckDB execution time: {duckdb_time:.4f} seconds")
    
    speedup = sqlite_time / duckdb_time
    print(f"\nDuckDB was {speedup:.1f}x faster than SQLite for this query.")