final commit
This commit is contained in:
@@ -0,0 +1,34 @@
|
||||
import pandas as pd
|
||||
|
||||
# List of file paths
|
||||
files = [
|
||||
'rive_droite_2024.xlsx',
|
||||
'rive_gauche_2024.xlsx',
|
||||
'canoo_2023.xlsx',
|
||||
'canoo_2020.xlsx'
|
||||
]
|
||||
|
||||
# Output paths for cleaned data
|
||||
output_paths = [
|
||||
'output/rive_droite_2024.csv',
|
||||
'output/rive_gauche_2024.csv',
|
||||
'output/canoo_2023.csv',
|
||||
'output/canoo_2020.csv'
|
||||
]
|
||||
|
||||
dfs = [] # List to store cleaned dataframes
|
||||
|
||||
for file, output_path in zip(files, output_paths):
|
||||
# Read the Excel file
|
||||
df = pd.read_excel(file)
|
||||
|
||||
# Clean the data
|
||||
df_clean = df.dropna() # Drop rows with any missing data
|
||||
df_clean = df_clean.apply(pd.to_numeric, errors='coerce') # Convert all to numeric, coerce errors to NaN
|
||||
df_clean = df_clean.dropna() # Drop any rows that now have NaNs
|
||||
|
||||
# Save the cleaned dataframe
|
||||
df_clean.to_csv(output_path, index=False)
|
||||
|
||||
# Append the clean dataframe to the list for plotting
|
||||
dfs.append(df_clean)
|
||||
Reference in New Issue
Block a user