import pandas as pd

# Load the CSV file with semicolon separator
file_path = 'ML.csv'
data = pd.read_csv(file_path, sep=';')

# Drop rows with any missing or incomplete values
data.dropna(inplace=True)

# Drop any duplicate rows if present
data.drop_duplicates(inplace=True)

# Convert non-numeric columns to numerical format
for col in data.columns:
    if data[col].dtype == 'object':
        # Convert categorical to numerical using label encoding
        data[col] = data[col].astype('category').cat.codes

# Ensure all remaining columns are numeric and cleanly formatted for CatBoost
data = data.apply(pd.to_numeric, errors='coerce')
data.dropna(inplace=True)  # Drop any rows that might still contain NaNs after conversion

# Save the cleaned data to a new file in CatBoost-friendly format
output_file_path = 'Cleaned.csv'
data.to_csv(output_file_path, index=False)

print(f"Data cleaned and saved to {output_file_path}")
