import pandas as pd import pprint import matplotlib.pyplot as plt # function that check if the difference between two distances is less than 20m def check_distance(dist1, dist2): if abs(dist1 - dist2) < 0.05: return True return False def pt_src(data_df, dist_dict): df = pd.DataFrame(columns=['distance','conductivite','name']) keys = list(dist_dict.keys()) for i in range(len(data_df)): for j in range(len(keys)): if check_distance(data_df.iloc[i, 0], keys[j]): new_row = {'distance': keys[j], 'conductivite': data_df.iloc[i, 1], 'name':dist_dict.get(keys[j])} df = df._append(new_row, ignore_index=True) return df # Function that check if their are double entries based on the name column in the data and only keeps the row with the median distance value def remove_double_entries(data_df): data_df = data_df.sort_values(by=['name','distance']) data_df = data_df.drop_duplicates(subset='name', keep='first') return data_df # Paths to the cleaned data CSV files files = [ 'output/rive_droite_2024.csv', 'output/rive_gauche_2024.csv', 'output/canoo_2023.csv', 'output/canoo_2020.csv' ] file_rive_droite = "output\\rive_droite_2024.csv" file_rive_gauche = "output\\rive_gauche_2024.csv" file_canoo_2023 = "output\canoo_2023.csv" file_canoo_2020 = "output\canoo_2020.csv" file_points_sources_rive_droite = "output\sources\points_sources_rive_droite.csv" file_points_sources_rive_gauche = "output\sources\points_sources_rive_gauche.csv" file_points_sources_canoo_2023 = "output\sources\points_sources_canoo_2023.csv" distance_file = "distances.xlsx" # DataFrames for distances 2023/2024 df_distances = pd.read_excel(distance_file, skiprows=1) distances_2023 = dict(zip(df_distances.iloc[:, 1], df_distances.iloc[:, 0])) distances_2024 = dict(zip(df_distances.iloc[:, 2], df_distances.iloc[:, 0])) # DataFrames for the cleaned data df_rive_droite_2024 = pd.read_csv(file_rive_droite) df_rive_gauche_2024 = pd.read_csv(file_rive_gauche) df_canoo_2023 = pd.read_csv(file_canoo_2023) df_canoo_2020 = pd.read_csv(file_canoo_2020) # DataFrames computation for the sources of the river pt_src_rive_droite = remove_double_entries(pt_src(df_rive_droite_2024, distances_2024)) pt_src_rive_droite.to_csv(file_points_sources_rive_droite, index=False) pt_src_rive_gauche = remove_double_entries(pt_src(df_rive_gauche_2024, distances_2024)) pt_src_rive_gauche.to_csv(file_points_sources_rive_gauche, index=False) pt_src_canoo_2023 = remove_double_entries(pt_src(df_canoo_2023, distances_2023)) pt_src_canoo_2023.to_csv(file_points_sources_canoo_2023, index=False)