Files
2026-06-19 11:15:39 +02:00

35 lines
916 B
Python

import pandas as pd
# List of file paths
files = [
'rive_droite_2024.xlsx',
'rive_gauche_2024.xlsx',
'canoo_2023.xlsx',
'canoo_2020.xlsx'
]
# Output paths for cleaned data
output_paths = [
'output/rive_droite_2024.csv',
'output/rive_gauche_2024.csv',
'output/canoo_2023.csv',
'output/canoo_2020.csv'
]
dfs = [] # List to store cleaned dataframes
for file, output_path in zip(files, output_paths):
# Read the Excel file
df = pd.read_excel(file)
# Clean the data
df_clean = df.dropna() # Drop rows with any missing data
df_clean = df_clean.apply(pd.to_numeric, errors='coerce') # Convert all to numeric, coerce errors to NaN
df_clean = df_clean.dropna() # Drop any rows that now have NaNs
# Save the cleaned dataframe
df_clean.to_csv(output_path, index=False)
# Append the clean dataframe to the list for plotting
dfs.append(df_clean)