final commit
This commit is contained in:
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,65 @@
|
||||
import pandas as pd
|
||||
import pprint
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# function that check if the difference between two distances is less than 20m
|
||||
def check_distance(dist1, dist2):
|
||||
if abs(dist1 - dist2) < 0.05:
|
||||
return True
|
||||
return False
|
||||
|
||||
def pt_src(data_df, dist_dict):
|
||||
df = pd.DataFrame(columns=['distance','conductivite','name'])
|
||||
keys = list(dist_dict.keys())
|
||||
for i in range(len(data_df)):
|
||||
for j in range(len(keys)):
|
||||
if check_distance(data_df.iloc[i, 0], keys[j]):
|
||||
new_row = {'distance': keys[j], 'conductivite': data_df.iloc[i, 1], 'name':dist_dict.get(keys[j])}
|
||||
df = df._append(new_row, ignore_index=True)
|
||||
return df
|
||||
|
||||
# Function that check if their are double entries based on the name column in the data and only keeps the row with the median distance value
|
||||
def remove_double_entries(data_df):
|
||||
data_df = data_df.sort_values(by=['name','distance'])
|
||||
data_df = data_df.drop_duplicates(subset='name', keep='first')
|
||||
return data_df
|
||||
|
||||
# Paths to the cleaned data CSV files
|
||||
files = [
|
||||
'output/rive_droite_2024.csv',
|
||||
'output/rive_gauche_2024.csv',
|
||||
'output/canoo_2023.csv',
|
||||
'output/canoo_2020.csv'
|
||||
]
|
||||
|
||||
file_rive_droite = "output\\rive_droite_2024.csv"
|
||||
file_rive_gauche = "output\\rive_gauche_2024.csv"
|
||||
file_canoo_2023 = "output\canoo_2023.csv"
|
||||
file_canoo_2020 = "output\canoo_2020.csv"
|
||||
|
||||
file_points_sources_rive_droite = "output\sources\points_sources_rive_droite.csv"
|
||||
file_points_sources_rive_gauche = "output\sources\points_sources_rive_gauche.csv"
|
||||
file_points_sources_canoo_2023 = "output\sources\points_sources_canoo_2023.csv"
|
||||
|
||||
distance_file = "distances.xlsx"
|
||||
|
||||
# DataFrames for distances 2023/2024
|
||||
df_distances = pd.read_excel(distance_file, skiprows=1)
|
||||
distances_2023 = dict(zip(df_distances.iloc[:, 1], df_distances.iloc[:, 0]))
|
||||
distances_2024 = dict(zip(df_distances.iloc[:, 2], df_distances.iloc[:, 0]))
|
||||
|
||||
# DataFrames for the cleaned data
|
||||
df_rive_droite_2024 = pd.read_csv(file_rive_droite)
|
||||
df_rive_gauche_2024 = pd.read_csv(file_rive_gauche)
|
||||
df_canoo_2023 = pd.read_csv(file_canoo_2023)
|
||||
df_canoo_2020 = pd.read_csv(file_canoo_2020)
|
||||
|
||||
# DataFrames computation for the sources of the river
|
||||
pt_src_rive_droite = remove_double_entries(pt_src(df_rive_droite_2024, distances_2024))
|
||||
pt_src_rive_droite.to_csv(file_points_sources_rive_droite, index=False)
|
||||
|
||||
pt_src_rive_gauche = remove_double_entries(pt_src(df_rive_gauche_2024, distances_2024))
|
||||
pt_src_rive_gauche.to_csv(file_points_sources_rive_gauche, index=False)
|
||||
|
||||
pt_src_canoo_2023 = remove_double_entries(pt_src(df_canoo_2023, distances_2023))
|
||||
pt_src_canoo_2023.to_csv(file_points_sources_canoo_2023, index=False)
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 169 KiB |
File diff suppressed because it is too large
Load Diff
|
After Width: | Height: | Size: 4.7 MiB |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
After Width: | Height: | Size: 643 KiB |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,14 @@
|
||||
distance,conductivite,name
|
||||
35.1607891,288.3176,Anglades
|
||||
40.48775894,293.005,Anglanat
|
||||
18.30798519,227.1419,Ayrissac
|
||||
9.453041659,208.1663,Bual
|
||||
5.224241888,204.4046,Corn
|
||||
13.66500765,214.7027,Cross Renard
|
||||
12.16202511,218.6649,Diège
|
||||
42.08276949,293.4255,Liauzut
|
||||
30.174923070000002,281.1578,Marchepied
|
||||
43.47837751,309.5416,Pescalerie
|
||||
22.79824485,245.96,Pito
|
||||
26.01200193,263.7979,Ressel
|
||||
46.31162723,306.0898,Sagne
|
||||
|
@@ -0,0 +1,13 @@
|
||||
distance,conductivite,name
|
||||
33.797216829999996,211.924774169921,Anglades
|
||||
39.083181992,216.155151367187,Anglanat
|
||||
17.5891885989,173.244277954101,Ayrissac
|
||||
8.9805148019,164.101013183593,Bual
|
||||
4.842451423000001,159.90266418457,Corn
|
||||
13.34022860284,168.972229003906,Cross Renard
|
||||
11.887356203,166.809005737304,Diège
|
||||
29.06627534,204.600280761718,Marchepied
|
||||
41.181449136000005,216.37808227539,Pescalerie
|
||||
22.042666324000002,176.678253173828,Pito
|
||||
25.1455890044,193.875442504882,Ressel
|
||||
44.530155019999995,221.792526245117,Sagne
|
||||
|
@@ -0,0 +1,14 @@
|
||||
distance,conductivite,name
|
||||
33.797216829999996,209.5849609375,Anglades
|
||||
39.083181992,214.441787719726,Anglanat
|
||||
17.5891885989,171.898605346679,Ayrissac
|
||||
8.9805148019,162.625411987304,Bual
|
||||
4.842451423000001,157.083145141601,Corn
|
||||
13.34022860284,167.320678710937,Cross Renard
|
||||
11.887356203,165.127410888671,Diège
|
||||
40.65562334,215.466522216796,Liauzut
|
||||
29.06627534,203.885635375976,Marchepied
|
||||
41.181449136000005,215.419143676757,Pescalerie
|
||||
22.042666324000002,175.490203857421,Pito
|
||||
25.1455890044,192.625335693359,Ressel
|
||||
44.530155019999995,220.570251464843,Sagne
|
||||
|
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,34 @@
|
||||
import pandas as pd
|
||||
|
||||
# List of file paths
|
||||
files = [
|
||||
'rive_droite_2024.xlsx',
|
||||
'rive_gauche_2024.xlsx',
|
||||
'canoo_2023.xlsx',
|
||||
'canoo_2020.xlsx'
|
||||
]
|
||||
|
||||
# Output paths for cleaned data
|
||||
output_paths = [
|
||||
'output/rive_droite_2024.csv',
|
||||
'output/rive_gauche_2024.csv',
|
||||
'output/canoo_2023.csv',
|
||||
'output/canoo_2020.csv'
|
||||
]
|
||||
|
||||
dfs = [] # List to store cleaned dataframes
|
||||
|
||||
for file, output_path in zip(files, output_paths):
|
||||
# Read the Excel file
|
||||
df = pd.read_excel(file)
|
||||
|
||||
# Clean the data
|
||||
df_clean = df.dropna() # Drop rows with any missing data
|
||||
df_clean = df_clean.apply(pd.to_numeric, errors='coerce') # Convert all to numeric, coerce errors to NaN
|
||||
df_clean = df_clean.dropna() # Drop any rows that now have NaNs
|
||||
|
||||
# Save the cleaned dataframe
|
||||
df_clean.to_csv(output_path, index=False)
|
||||
|
||||
# Append the clean dataframe to the list for plotting
|
||||
dfs.append(df_clean)
|
||||
@@ -0,0 +1,64 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# Paths to the cleaned data CSV files
|
||||
files = [
|
||||
'output/rive_droite_2024.csv',
|
||||
'output/rive_gauche_2024.csv',
|
||||
'output/canoo_2023.csv',
|
||||
'output/canoo_2020.csv'
|
||||
]
|
||||
|
||||
sources_files = [
|
||||
'output/sources/points_sources_rive_droite.csv',
|
||||
'output/sources/points_sources_canoo_2023.csv'
|
||||
]
|
||||
|
||||
colors = ['blue', 'limegreen', 'red', 'purple'] # Colors for each year
|
||||
labels = ['2024 Canoe Right Bank', '2024 Canoe Left Bank', '2023 Canoe', '2020 Canoe'] # Labels for each dataset
|
||||
|
||||
sources_colors = ['blue', 'red'] # Colors for each year
|
||||
|
||||
|
||||
dfs = [] # List to store dataframes
|
||||
for file in files:
|
||||
df = pd.read_csv(file)
|
||||
dfs.append(df)
|
||||
|
||||
|
||||
sources_dfs = [] # List to store dataframes
|
||||
for file in sources_files:
|
||||
df = pd.read_csv(file)
|
||||
sources_dfs.append(df)
|
||||
|
||||
plt.figure(figsize=(12, 8)) # Create a figure with a custom size
|
||||
|
||||
for df, color, label in zip(dfs, colors, labels):
|
||||
plt.scatter(df.iloc[:, 0], df.iloc[:, 1], color=color, label=label, alpha=0.8, s=2) # Plot each year's data
|
||||
|
||||
|
||||
for df, color in zip(sources_dfs, sources_colors):
|
||||
bbox_props = dict(boxstyle="round,pad=0.3", fc="white", ec=color, alpha=0.7)
|
||||
for i, row in df.iterrows():
|
||||
plt.scatter(row['distance'], row['conductivite'], color='white', alpha=0.8, s=60, edgecolors='black', linewidth=3)
|
||||
plt.annotate(row['name'],
|
||||
(row['distance'], row['conductivite']),
|
||||
weight='bold' ,
|
||||
fontsize=11,
|
||||
xytext=(0, 25),
|
||||
bbox=bbox_props,
|
||||
# arrowprops=dict(facecolor='black', shrink=0.05),
|
||||
textcoords='offset points',
|
||||
ha='center')
|
||||
|
||||
|
||||
plt.title('Conductivity by kilometers of river for different year of continuous measurements', y=1 , fontsize = 15, fontweight = 'bold')
|
||||
plt.xlabel('Distance (kilometers)')
|
||||
plt.ylabel('Conductivity (us/cm)')
|
||||
plt.legend(loc='lower right', frameon=True, facecolor='white', edgecolor='black', fancybox=False, framealpha=1)
|
||||
plt.grid(True)
|
||||
|
||||
# Sauvegarder le graphique
|
||||
plt.savefig('output/my_conductivity_plot2.png', format='png', dpi=300, bbox_inches='tight')
|
||||
|
||||
plt.show()
|
||||
Reference in New Issue
Block a user