140 lines
4.1 KiB
Python
140 lines
4.1 KiB
Python
import pandas as pd
|
||
import numpy as np
|
||
import matplotlib.pyplot as plt
|
||
from mpl_toolkits.mplot3d import Axes3D
|
||
from scipy.interpolate import make_interp_spline
|
||
from sklearn.cluster import DBSCAN
|
||
|
||
# === Chargement des données ===
|
||
chemin_fichier = "data.xlsx"
|
||
df = pd.read_excel(chemin_fichier)
|
||
|
||
# === Filtrer les sources ===
|
||
df = df[df["type"].str.lower() == "source"]
|
||
df = df.dropna(subset=["c25°C ", "Nom"])
|
||
|
||
# === Couleurs par source ===
|
||
couleurs_sources = {
|
||
"Bullac": "#D77D00",
|
||
"Corn": "#A1C935",
|
||
"Bual": "#F5D200",
|
||
"La diège": "#9271EA",
|
||
"Ayrissac": "#EB64C3",
|
||
"Pito": "#D52117",
|
||
"Ressel": "#1DC6C3",
|
||
"Marchepied": "#3FB94D",
|
||
"Anglades": "#0084C9",
|
||
"Liauzu": "#431D84",
|
||
"Pescalerie": "#D8676B",
|
||
"Sagne": "tab:purple",
|
||
}
|
||
|
||
# === Paramètres DBSCAN ===
|
||
eps_val = 5
|
||
min_samples_val = 1
|
||
|
||
# === Clustering dynamique par source ===
|
||
clusters_data = []
|
||
|
||
for nom in df["Nom"].unique():
|
||
sub_df = df[df["Nom"] == nom].copy()
|
||
X = sub_df[["c25°C "]].to_numpy()
|
||
|
||
db = DBSCAN(eps=eps_val, min_samples=min_samples_val).fit(X)
|
||
sub_df["cluster"] = db.labels_
|
||
|
||
grouped = (
|
||
sub_df.groupby("cluster")
|
||
.agg(
|
||
conductivite_min=("c25°C ", "min"),
|
||
conductivite_max=("c25°C ", "max"),
|
||
conductivite_moyenne=("c25°C ", "mean"),
|
||
effectif=("c25°C ", "count")
|
||
)
|
||
.reset_index(drop=True)
|
||
)
|
||
|
||
grouped["frequence_normalisee"] = grouped["effectif"] / grouped["effectif"].sum()
|
||
grouped["Nom"] = nom
|
||
clusters_data.append(grouped)
|
||
|
||
# === Fusion des résultats ===
|
||
df_clusters = pd.concat(clusters_data, ignore_index=True)
|
||
|
||
# === Ordre des sources ===
|
||
noms_ordonnes = [
|
||
"Sagne", "Ressel", "Pito", "Pescalerie", "Marchepied",
|
||
"Liauzu", "La diège", "Corn", "Bullac", "Bual", "Ayrissac", "Anglades"
|
||
]
|
||
sources_disponibles = set(df_clusters["Nom"].unique())
|
||
noms_valides = [nom for nom in noms_ordonnes if nom in sources_disponibles]
|
||
|
||
# === Tracé du graphique 3D ===
|
||
fig = plt.figure(figsize=(20, 12))
|
||
ax = fig.add_subplot(111, projection='3d')
|
||
|
||
y_spacing = 4.0
|
||
|
||
for idx, nom in enumerate(reversed(noms_valides)):
|
||
df_nom = df_clusters[df_clusters["Nom"] == nom].sort_values("conductivite_moyenne")
|
||
if df_nom.empty:
|
||
continue
|
||
|
||
xs_min = df_nom["conductivite_min"].to_numpy()
|
||
xs_max = df_nom["conductivite_max"].to_numpy()
|
||
xs_center = df_nom["conductivite_moyenne"].to_numpy()
|
||
ys = df_nom["frequence_normalisee"].to_numpy()
|
||
color = couleurs_sources.get(nom, "gray")
|
||
y_val = (len(noms_valides) - 1 - idx) * y_spacing
|
||
|
||
x_curve = []
|
||
y_curve = []
|
||
|
||
for i in range(len(xs_center)):
|
||
# Pic trapézoïdal
|
||
x_curve.extend([xs_min[i], xs_center[i], xs_max[i]])
|
||
y_curve.extend([0, ys[i], 0])
|
||
|
||
# Relier à la suivante si les clusters sont très proches
|
||
if i < len(xs_center) - 1 and xs_min[i + 1] - xs_max[i] <= 1.5:
|
||
x_curve.extend([xs_max[i], xs_min[i + 1]])
|
||
y_curve.extend([0, 0])
|
||
|
||
# Ajout d'un point d’ancrage à gauche (252 µS/cm) à z=0
|
||
x_curve = [525] + x_curve
|
||
y_curve = [0] + y_curve
|
||
|
||
# Ajouter un point final à la courbe à droite (limite max de l'axe X)
|
||
xlim_max = df_clusters["conductivite_max"].max() + 10
|
||
x_curve.append(xlim_max)
|
||
y_curve.append(0)
|
||
|
||
# Tracé 3D
|
||
ax.plot(x_curve, [y_val] * len(x_curve), y_curve, color=color, linewidth=2, alpha=0.8)
|
||
|
||
# Affichage du nom de la source
|
||
ax.text(
|
||
x=df_clusters["conductivite_moyenne"].min() - 30,
|
||
y=y_val,
|
||
z=0,
|
||
s=nom,
|
||
color=color,
|
||
fontsize=10,
|
||
ha='right',
|
||
va='center'
|
||
)
|
||
|
||
# === Configuration finale du graphique ===
|
||
ax.set_yticks([])
|
||
ax.set_xlabel("Conductivité (c25°C, µS/cm)", labelpad=15)
|
||
ax.set_ylabel("")
|
||
ax.set_zlabel("Fréquence normalisée", labelpad=10)
|
||
ax.set_title("Courbes 3D type histogrammes par source (pics trapézoïdaux DBSCAN)", pad=20)
|
||
|
||
ax.set_xlim(525, df_clusters["conductivite_max"].max() + 10)
|
||
ax.set_ylim(-y_spacing, len(noms_valides) * y_spacing)
|
||
ax.set_zlim(0, 1)
|
||
ax.view_init(elev=10, azim=-120)
|
||
|
||
plt.tight_layout()
|
||
plt.show() |