final commit
This commit is contained in:
@@ -0,0 +1,65 @@
|
||||
import pandas as pd
|
||||
import pprint
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# function that check if the difference between two distances is less than 20m
|
||||
def check_distance(dist1, dist2):
|
||||
if abs(dist1 - dist2) < 0.05:
|
||||
return True
|
||||
return False
|
||||
|
||||
def pt_src(data_df, dist_dict):
|
||||
df = pd.DataFrame(columns=['distance','conductivite','name'])
|
||||
keys = list(dist_dict.keys())
|
||||
for i in range(len(data_df)):
|
||||
for j in range(len(keys)):
|
||||
if check_distance(data_df.iloc[i, 0], keys[j]):
|
||||
new_row = {'distance': keys[j], 'conductivite': data_df.iloc[i, 1], 'name':dist_dict.get(keys[j])}
|
||||
df = df._append(new_row, ignore_index=True)
|
||||
return df
|
||||
|
||||
# Function that check if their are double entries based on the name column in the data and only keeps the row with the median distance value
|
||||
def remove_double_entries(data_df):
|
||||
data_df = data_df.sort_values(by=['name','distance'])
|
||||
data_df = data_df.drop_duplicates(subset='name', keep='first')
|
||||
return data_df
|
||||
|
||||
# Paths to the cleaned data CSV files
|
||||
files = [
|
||||
'output/rive_droite_2024.csv',
|
||||
'output/rive_gauche_2024.csv',
|
||||
'output/canoo_2023.csv',
|
||||
'output/canoo_2020.csv'
|
||||
]
|
||||
|
||||
file_rive_droite = "output\\rive_droite_2024.csv"
|
||||
file_rive_gauche = "output\\rive_gauche_2024.csv"
|
||||
file_canoo_2023 = "output\canoo_2023.csv"
|
||||
file_canoo_2020 = "output\canoo_2020.csv"
|
||||
|
||||
file_points_sources_rive_droite = "output\sources\points_sources_rive_droite.csv"
|
||||
file_points_sources_rive_gauche = "output\sources\points_sources_rive_gauche.csv"
|
||||
file_points_sources_canoo_2023 = "output\sources\points_sources_canoo_2023.csv"
|
||||
|
||||
distance_file = "distances.xlsx"
|
||||
|
||||
# DataFrames for distances 2023/2024
|
||||
df_distances = pd.read_excel(distance_file, skiprows=1)
|
||||
distances_2023 = dict(zip(df_distances.iloc[:, 1], df_distances.iloc[:, 0]))
|
||||
distances_2024 = dict(zip(df_distances.iloc[:, 2], df_distances.iloc[:, 0]))
|
||||
|
||||
# DataFrames for the cleaned data
|
||||
df_rive_droite_2024 = pd.read_csv(file_rive_droite)
|
||||
df_rive_gauche_2024 = pd.read_csv(file_rive_gauche)
|
||||
df_canoo_2023 = pd.read_csv(file_canoo_2023)
|
||||
df_canoo_2020 = pd.read_csv(file_canoo_2020)
|
||||
|
||||
# DataFrames computation for the sources of the river
|
||||
pt_src_rive_droite = remove_double_entries(pt_src(df_rive_droite_2024, distances_2024))
|
||||
pt_src_rive_droite.to_csv(file_points_sources_rive_droite, index=False)
|
||||
|
||||
pt_src_rive_gauche = remove_double_entries(pt_src(df_rive_gauche_2024, distances_2024))
|
||||
pt_src_rive_gauche.to_csv(file_points_sources_rive_gauche, index=False)
|
||||
|
||||
pt_src_canoo_2023 = remove_double_entries(pt_src(df_canoo_2023, distances_2023))
|
||||
pt_src_canoo_2023.to_csv(file_points_sources_canoo_2023, index=False)
|
||||
Reference in New Issue
Block a user