final commit

2026-06-19 11:15:39 +02:00
parent e2b1b20913
commit d095840037
975 changed files with 1232523 additions and 0 deletions
@@ -0,0 +1,65 @@
+import pandas as pd
+import pprint
+import matplotlib.pyplot as plt
+
+# function that check if the difference between two distances is less than 20m
+def check_distance(dist1, dist2):
+    if abs(dist1 - dist2) < 0.05:
+        return True
+    return False
+
+def pt_src(data_df, dist_dict):
+    df = pd.DataFrame(columns=['distance','conductivite','name'])
+    keys = list(dist_dict.keys())
+    for i in range(len(data_df)):
+        for j in range(len(keys)):
+            if check_distance(data_df.iloc[i, 0], keys[j]):
+                new_row = {'distance': keys[j], 'conductivite': data_df.iloc[i, 1], 'name':dist_dict.get(keys[j])}
+                df = df._append(new_row, ignore_index=True)
+    return df
+
+# Function that check if their are double entries based on the name column in the data and only keeps the row with the median distance value
+def remove_double_entries(data_df):
+    data_df = data_df.sort_values(by=['name','distance'])
+    data_df = data_df.drop_duplicates(subset='name', keep='first')
+    return data_df
+
+# Paths to the cleaned data CSV files
+files = [
+    'output/rive_droite_2024.csv',
+    'output/rive_gauche_2024.csv',
+    'output/canoo_2023.csv',
+    'output/canoo_2020.csv'
+]
+
+file_rive_droite = "output\\rive_droite_2024.csv"
+file_rive_gauche = "output\\rive_gauche_2024.csv"
+file_canoo_2023 = "output\canoo_2023.csv"
+file_canoo_2020 = "output\canoo_2020.csv"
+
+file_points_sources_rive_droite = "output\sources\points_sources_rive_droite.csv"
+file_points_sources_rive_gauche = "output\sources\points_sources_rive_gauche.csv"
+file_points_sources_canoo_2023 = "output\sources\points_sources_canoo_2023.csv"
+
+distance_file = "distances.xlsx"
+
+# DataFrames for distances 2023/2024
+df_distances = pd.read_excel(distance_file, skiprows=1)
+distances_2023 = dict(zip(df_distances.iloc[:, 1], df_distances.iloc[:, 0]))
+distances_2024 = dict(zip(df_distances.iloc[:, 2], df_distances.iloc[:, 0]))
+
+# DataFrames for the cleaned data
+df_rive_droite_2024 = pd.read_csv(file_rive_droite)
+df_rive_gauche_2024 = pd.read_csv(file_rive_gauche)
+df_canoo_2023 = pd.read_csv(file_canoo_2023)
+df_canoo_2020 = pd.read_csv(file_canoo_2020)
+
+# DataFrames computation for the sources of the river
+pt_src_rive_droite = remove_double_entries(pt_src(df_rive_droite_2024, distances_2024))
+pt_src_rive_droite.to_csv(file_points_sources_rive_droite, index=False)
+
+pt_src_rive_gauche = remove_double_entries(pt_src(df_rive_gauche_2024, distances_2024))
+pt_src_rive_gauche.to_csv(file_points_sources_rive_gauche, index=False)
+
+pt_src_canoo_2023 = remove_double_entries(pt_src(df_canoo_2023, distances_2023))
+pt_src_canoo_2023.to_csv(file_points_sources_canoo_2023, index=False)