final commit

2026-06-19 11:15:39 +02:00
parent e2b1b20913
commit d095840037
975 changed files with 1232523 additions and 0 deletions
@@ -0,0 +1,34 @@
+import pandas as pd
+
+# List of file paths
+files = [
+    'rive_droite_2024.xlsx',
+    'rive_gauche_2024.xlsx',
+    'canoo_2023.xlsx',
+    'canoo_2020.xlsx'
+]
+
+# Output paths for cleaned data
+output_paths = [
+    'output/rive_droite_2024.csv',
+    'output/rive_gauche_2024.csv',
+    'output/canoo_2023.csv',
+    'output/canoo_2020.csv'
+]
+
+dfs = []  # List to store cleaned dataframes
+
+for file, output_path in zip(files, output_paths):
+    # Read the Excel file
+    df = pd.read_excel(file)
+
+    # Clean the data
+    df_clean = df.dropna()  # Drop rows with any missing data
+    df_clean = df_clean.apply(pd.to_numeric, errors='coerce')  # Convert all to numeric, coerce errors to NaN
+    df_clean = df_clean.dropna()  # Drop any rows that now have NaNs
+
+    # Save the cleaned dataframe
+    df_clean.to_csv(output_path, index=False)
+
+    # Append the clean dataframe to the list for plotting
+    dfs.append(df_clean)