35 lines
916 B
Python
35 lines
916 B
Python
import pandas as pd
|
|
|
|
# List of file paths
|
|
files = [
|
|
'rive_droite_2024.xlsx',
|
|
'rive_gauche_2024.xlsx',
|
|
'canoo_2023.xlsx',
|
|
'canoo_2020.xlsx'
|
|
]
|
|
|
|
# Output paths for cleaned data
|
|
output_paths = [
|
|
'output/rive_droite_2024.csv',
|
|
'output/rive_gauche_2024.csv',
|
|
'output/canoo_2023.csv',
|
|
'output/canoo_2020.csv'
|
|
]
|
|
|
|
dfs = [] # List to store cleaned dataframes
|
|
|
|
for file, output_path in zip(files, output_paths):
|
|
# Read the Excel file
|
|
df = pd.read_excel(file)
|
|
|
|
# Clean the data
|
|
df_clean = df.dropna() # Drop rows with any missing data
|
|
df_clean = df_clean.apply(pd.to_numeric, errors='coerce') # Convert all to numeric, coerce errors to NaN
|
|
df_clean = df_clean.dropna() # Drop any rows that now have NaNs
|
|
|
|
# Save the cleaned dataframe
|
|
df_clean.to_csv(output_path, index=False)
|
|
|
|
# Append the clean dataframe to the list for plotting
|
|
dfs.append(df_clean)
|