Files
luce/2024_10_25/lecture_distances.py
T
2026-06-19 11:15:39 +02:00

65 lines
2.6 KiB
Python

import pandas as pd
import pprint
import matplotlib.pyplot as plt
# function that check if the difference between two distances is less than 20m
def check_distance(dist1, dist2):
if abs(dist1 - dist2) < 0.05:
return True
return False
def pt_src(data_df, dist_dict):
df = pd.DataFrame(columns=['distance','conductivite','name'])
keys = list(dist_dict.keys())
for i in range(len(data_df)):
for j in range(len(keys)):
if check_distance(data_df.iloc[i, 0], keys[j]):
new_row = {'distance': keys[j], 'conductivite': data_df.iloc[i, 1], 'name':dist_dict.get(keys[j])}
df = df._append(new_row, ignore_index=True)
return df
# Function that check if their are double entries based on the name column in the data and only keeps the row with the median distance value
def remove_double_entries(data_df):
data_df = data_df.sort_values(by=['name','distance'])
data_df = data_df.drop_duplicates(subset='name', keep='first')
return data_df
# Paths to the cleaned data CSV files
files = [
'output/rive_droite_2024.csv',
'output/rive_gauche_2024.csv',
'output/canoo_2023.csv',
'output/canoo_2020.csv'
]
file_rive_droite = "output\\rive_droite_2024.csv"
file_rive_gauche = "output\\rive_gauche_2024.csv"
file_canoo_2023 = "output\canoo_2023.csv"
file_canoo_2020 = "output\canoo_2020.csv"
file_points_sources_rive_droite = "output\sources\points_sources_rive_droite.csv"
file_points_sources_rive_gauche = "output\sources\points_sources_rive_gauche.csv"
file_points_sources_canoo_2023 = "output\sources\points_sources_canoo_2023.csv"
distance_file = "distances.xlsx"
# DataFrames for distances 2023/2024
df_distances = pd.read_excel(distance_file, skiprows=1)
distances_2023 = dict(zip(df_distances.iloc[:, 1], df_distances.iloc[:, 0]))
distances_2024 = dict(zip(df_distances.iloc[:, 2], df_distances.iloc[:, 0]))
# DataFrames for the cleaned data
df_rive_droite_2024 = pd.read_csv(file_rive_droite)
df_rive_gauche_2024 = pd.read_csv(file_rive_gauche)
df_canoo_2023 = pd.read_csv(file_canoo_2023)
df_canoo_2020 = pd.read_csv(file_canoo_2020)
# DataFrames computation for the sources of the river
pt_src_rive_droite = remove_double_entries(pt_src(df_rive_droite_2024, distances_2024))
pt_src_rive_droite.to_csv(file_points_sources_rive_droite, index=False)
pt_src_rive_gauche = remove_double_entries(pt_src(df_rive_gauche_2024, distances_2024))
pt_src_rive_gauche.to_csv(file_points_sources_rive_gauche, index=False)
pt_src_canoo_2023 = remove_double_entries(pt_src(df_canoo_2023, distances_2023))
pt_src_canoo_2023.to_csv(file_points_sources_canoo_2023, index=False)