import pandas as pd

# List of file paths
files = [
    'rive_droite_2024.xlsx',
    'rive_gauche_2024.xlsx',
    'canoo_2023.xlsx',
    'canoo_2020.xlsx'
]

# Output paths for cleaned data
output_paths = [
    'output/rive_droite_2024.csv',
    'output/rive_gauche_2024.csv',
    'output/canoo_2023.csv',
    'output/canoo_2020.csv'
]

dfs = []  # List to store cleaned dataframes

for file, output_path in zip(files, output_paths):
    # Read the Excel file
    df = pd.read_excel(file)

    # Clean the data
    df_clean = df.dropna()  # Drop rows with any missing data
    df_clean = df_clean.apply(pd.to_numeric, errors='coerce')  # Convert all to numeric, coerce errors to NaN
    df_clean = df_clean.dropna()  # Drop any rows that now have NaNs

    # Save the cleaned dataframe
    df_clean.to_csv(output_path, index=False)

    # Append the clean dataframe to the list for plotting
    dfs.append(df_clean)