import pandas as pd # List of file paths files = [ 'rive_droite_2024.xlsx', 'rive_gauche_2024.xlsx', 'canoo_2023.xlsx', 'canoo_2020.xlsx' ] # Output paths for cleaned data output_paths = [ 'output/rive_droite_2024.csv', 'output/rive_gauche_2024.csv', 'output/canoo_2023.csv', 'output/canoo_2020.csv' ] dfs = [] # List to store cleaned dataframes for file, output_path in zip(files, output_paths): # Read the Excel file df = pd.read_excel(file) # Clean the data df_clean = df.dropna() # Drop rows with any missing data df_clean = df_clean.apply(pd.to_numeric, errors='coerce') # Convert all to numeric, coerce errors to NaN df_clean = df_clean.dropna() # Drop any rows that now have NaNs # Save the cleaned dataframe df_clean.to_csv(output_path, index=False) # Append the clean dataframe to the list for plotting dfs.append(df_clean)