Files
2026-06-19 11:15:39 +02:00

140 lines
4.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.interpolate import make_interp_spline
from sklearn.cluster import DBSCAN
# === Chargement des données ===
chemin_fichier = "data.xlsx"
df = pd.read_excel(chemin_fichier)
# === Filtrer les sources ===
df = df[df["type"].str.lower() == "source"]
df = df.dropna(subset=["c25°C ", "Nom"])
# === Couleurs par source ===
couleurs_sources = {
"Bullac": "#D77D00",
"Corn": "#A1C935",
"Bual": "#F5D200",
"La diège": "#9271EA",
"Ayrissac": "#EB64C3",
"Pito": "#D52117",
"Ressel": "#1DC6C3",
"Marchepied": "#3FB94D",
"Anglades": "#0084C9",
"Liauzu": "#431D84",
"Pescalerie": "#D8676B",
"Sagne": "tab:purple",
}
# === Paramètres DBSCAN ===
eps_val = 5
min_samples_val = 1
# === Clustering dynamique par source ===
clusters_data = []
for nom in df["Nom"].unique():
sub_df = df[df["Nom"] == nom].copy()
X = sub_df[["c25°C "]].to_numpy()
db = DBSCAN(eps=eps_val, min_samples=min_samples_val).fit(X)
sub_df["cluster"] = db.labels_
grouped = (
sub_df.groupby("cluster")
.agg(
conductivite_min=("c25°C ", "min"),
conductivite_max=("c25°C ", "max"),
conductivite_moyenne=("c25°C ", "mean"),
effectif=("c25°C ", "count")
)
.reset_index(drop=True)
)
grouped["frequence_normalisee"] = grouped["effectif"] / grouped["effectif"].sum()
grouped["Nom"] = nom
clusters_data.append(grouped)
# === Fusion des résultats ===
df_clusters = pd.concat(clusters_data, ignore_index=True)
# === Ordre des sources ===
noms_ordonnes = [
"Sagne", "Ressel", "Pito", "Pescalerie", "Marchepied",
"Liauzu", "La diège", "Corn", "Bullac", "Bual", "Ayrissac", "Anglades"
]
sources_disponibles = set(df_clusters["Nom"].unique())
noms_valides = [nom for nom in noms_ordonnes if nom in sources_disponibles]
# === Tracé du graphique 3D ===
fig = plt.figure(figsize=(20, 12))
ax = fig.add_subplot(111, projection='3d')
y_spacing = 4.0
for idx, nom in enumerate(reversed(noms_valides)):
df_nom = df_clusters[df_clusters["Nom"] == nom].sort_values("conductivite_moyenne")
if df_nom.empty:
continue
xs_min = df_nom["conductivite_min"].to_numpy()
xs_max = df_nom["conductivite_max"].to_numpy()
xs_center = df_nom["conductivite_moyenne"].to_numpy()
ys = df_nom["frequence_normalisee"].to_numpy()
color = couleurs_sources.get(nom, "gray")
y_val = (len(noms_valides) - 1 - idx) * y_spacing
x_curve = []
y_curve = []
for i in range(len(xs_center)):
# Pic trapézoïdal
x_curve.extend([xs_min[i], xs_center[i], xs_max[i]])
y_curve.extend([0, ys[i], 0])
# Relier à la suivante si les clusters sont très proches
if i < len(xs_center) - 1 and xs_min[i + 1] - xs_max[i] <= 1.5:
x_curve.extend([xs_max[i], xs_min[i + 1]])
y_curve.extend([0, 0])
# Ajout d'un point dancrage à gauche (252 µS/cm) à z=0
x_curve = [525] + x_curve
y_curve = [0] + y_curve
# Ajouter un point final à la courbe à droite (limite max de l'axe X)
xlim_max = df_clusters["conductivite_max"].max() + 10
x_curve.append(xlim_max)
y_curve.append(0)
# Tracé 3D
ax.plot(x_curve, [y_val] * len(x_curve), y_curve, color=color, linewidth=2, alpha=0.8)
# Affichage du nom de la source
ax.text(
x=df_clusters["conductivite_moyenne"].min() - 30,
y=y_val,
z=0,
s=nom,
color=color,
fontsize=10,
ha='right',
va='center'
)
# === Configuration finale du graphique ===
ax.set_yticks([])
ax.set_xlabel("Conductivité (c25°C, µS/cm)", labelpad=15)
ax.set_ylabel("")
ax.set_zlabel("Fréquence normalisée", labelpad=10)
ax.set_title("Courbes 3D type histogrammes par source (pics trapézoïdaux DBSCAN)", pad=20)
ax.set_xlim(525, df_clusters["conductivite_max"].max() + 10)
ax.set_ylim(-y_spacing, len(noms_valides) * y_spacing)
ax.set_zlim(0, 1)
ax.view_init(elev=10, azim=-120)
plt.tight_layout()
plt.show()