Files
luce/2025-08-22/main.py
T
2025-09-17 15:51:01 +02:00

225 lines
8.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
# ==========================
# CONFIG
# ==========================
CHEM_PATH = "data_valider_Pierre_toutes_dates_hydro.xlsx"
CHEM_SHEET = "tri_points"
TYPE_COL = "type" # 'rivière' / 'source'
HYDRO_COL = "hydrodynamique" # 'hautes-eaux' / 'moyennes-eaux' / 'étiage'
KM_COL = "km"
CAMP_COL = " nb campagne" # (il y a un espace initial dans le fichier)
# Dossier dexport
EXPORT_DIR = Path("out")
EXPORT_DIR.mkdir(parents=True, exist_ok=True)
# Limites de laxe X (km) pour tous les subplots
KM_XLIM = (0, 50) # mets à None pour auto; sinon tuple (min,max)
# === COULEURS PAR ÉTAT HYDRODYNAMIQUE ===
hautes_eaux_colors = ["#EA66FF", "#E01EFF", "#C942FF", "#B300F9", "#8000B2", "#591D71", "#2C0E38"]
moyennes_eaux_colors = ["#D8E6F2", "#B1CEE6", "#8BB5D9", "#649DCD", "#3E85C0", "#316A9A", "#254F73", "#5671F7", "#0000FF", "#18354D"]
etiage_colors = ["#FFE4B5", "#FFA500", "#FF0000", "#8B0000"]
# Valeurs EXACTES présentes dans le fichier
HYDRO_STATES = [
("hautes-eaux", hautes_eaux_colors),
("moyennes-eaux", moyennes_eaux_colors),
("étiage", etiage_colors)
]
# ==========================
# PARAMÈTRES À TRACER
# (y_riv / y_src facultatifs — auto si omis)
# ==========================
parametres = [
{"variable": "Fe2+", "ylabel": "Fe2+ (mg/L)"},
{"variable": "Alcalinité", "ylabel": "HCO3- (mg/L)"},
{"variable": "Na+", "ylabel": "Na+ (mg/L)"},
{"variable": "Mg2+", "ylabel": "Mg2+ (mg/L)"},
{"variable": "T°C", "ylabel": "Température (°C)"},
{"variable": "pH", "ylabel": "pH"},
{"variable": "Cl-", "ylabel": "Cl- (mg/L)"},
{"variable": "c25°C ", "ylabel": "Conductivité (μS/cm)"},
{"variable": "DOC", "ylabel": "DOC (mg/L)"},
{"variable": "SO42-", "ylabel": "SO42- (mg/L)"},
{"variable": "SiO2", "ylabel": "SiO2 (mg/L)"},
{"variable": "PO43-", "ylabel": "PO43- (mg/L)"},
{"variable": "NO3-", "ylabel": "NO3- (mg/L)"},
{"variable": "Ca recalculé","ylabel": "Ca2+ (mg/L)"},
{"variable": "O2 mg/L", "ylabel": "O2 (mg/L)"},
{"variable": "Fluorure", "ylabel": "Fl- (mg/L)"},
{"variable": "NO2-", "ylabel": "NO2- (mg/L)"},
{"variable": "NH4+", "ylabel": "NH4+ (mg/L)"},
{"variable": "δ13C VPDB", "ylabel": "δ13C"},
]
# ==========================
# UTILITAIRES
# ==========================
def sanitize_filename(s: str) -> str:
s = str(s).strip().replace("\n", " ")
for b in ['<','>',':','"','/','\\','|','?','*']:
s = s.replace(b, '_')
s = '_'.join(s.split()) # compresser espaces
return s
def nice_camp_label(c):
try:
cf = float(c)
if cf.is_integer():
return f"camp. {int(cf)}"
except Exception:
pass
return f"camp. {c}"
def get_ylim_for(param, df_sources, df_riviere, var, qlo=0.05, qhi=0.95, pad=0.05):
"""
Calcule (y_src, y_riv).
- Si 'y_src'/'y_riv' sont fournis dans param -> on les respecte.
- Sinon: auto depuis les données (quantiles qloqhi) + marge 'pad'.
y_riv est calculé sur TOUTES les données 'rivière' (tous états) pour
que les 3 subplots partagent la même échelle.
"""
# --- rivière
if "y_riv" in param:
y_riv = tuple(param["y_riv"])
else:
dr = df_riviere[[var]].dropna()
if dr.empty:
y_riv = (0, 1)
else:
lo, hi = dr[var].quantile([qlo, qhi]).values
if not np.isfinite(lo) or not np.isfinite(hi) or lo == hi:
lo, hi = float(dr[var].min()), float(dr[var].max())
span = max(hi - lo, 1e-12)
y_riv = (lo - pad*span, hi + pad*span)
# --- sources
if "y_src" in param:
y_src = tuple(param["y_src"])
else:
ds = df_sources[[var]].dropna()
if ds.empty:
y_src = y_riv
else:
lo, hi = ds[var].quantile([qlo, qhi]).values
if not np.isfinite(lo) or not np.isfinite(hi) or lo == hi:
lo, hi = float(ds[var].min()), float(ds[var].max())
span = max(hi - lo, 1e-12)
y_src = (lo - pad*span, hi + pad*span)
return y_src, y_riv
def build_campaign_color_map(df_riv, hydro_col, camp_col):
"""Map {state: {camp_id: color}} avec palettes par état (boucle si nécessaire)."""
mapping = {}
for state, palette in HYDRO_STATES:
sub = df_riv.loc[df_riv[hydro_col] == state]
camps = pd.unique(sub[camp_col].dropna())
camps_sorted = np.sort(camps)
if len(camps_sorted) == 0:
mapping[state] = {}
continue
colors = (palette * ((len(camps_sorted) // len(palette)) + 1))[:len(camps_sorted)]
mapping[state] = {c: col for c, col in zip(camps_sorted, colors)}
return mapping
# ==========================
# LECTURE & PRÉPA
# ==========================
chem = pd.read_excel(CHEM_PATH, sheet_name=CHEM_SHEET)
# Harmoniser 'type'
chem[TYPE_COL] = chem[TYPE_COL].astype(str).str.strip().str.lower()
# Numériques utiles
chem[KM_COL] = pd.to_numeric(chem[KM_COL], errors="coerce")
chem[CAMP_COL] = pd.to_numeric(chem[CAMP_COL], errors="coerce")
# Tri logique (km puis campagne)
chem = chem.sort_values([KM_COL, CAMP_COL])
# Sous-ensembles
df_sources = chem.loc[chem[TYPE_COL] == "source"].copy()
df_riviere = chem.loc[chem[TYPE_COL] == "rivière"].copy()
# Colonnes indispensables côté rivière
for c in [HYDRO_COL, KM_COL, CAMP_COL]:
if c not in df_riviere.columns:
raise KeyError(f"Colonne manquante pour 'rivière': {c}")
# Couleurs par campagne×état
camp_color_map = build_campaign_color_map(df_riviere, HYDRO_COL, CAMP_COL)
# ==========================
# TRAÇAGE
# ==========================
for p in parametres:
var, ylabel = p["variable"], p["ylabel"]
if var not in chem.columns:
print(f"[AVERTISSEMENT] '{var}' absent → figure ignorée.")
continue
y_src_limits, y_riv_limits = get_ylim_for(p, df_sources, df_riviere, var, qlo=0, qhi=1)
fig, axes = plt.subplots(nrows=4, ncols=1, sharex=False, figsize=(12, 9))
# Réserve de l'espace à droite pour la légende
fig.subplots_adjust(hspace=0.18, right=0.80)
fig.suptitle(f"{var} — X = km (campagnes en courbes pour la rivière)", fontsize=12, x=0.01, ha="left")
# 1) SOURCES — X = km (nuage gris)
ax_src = axes[0]
if not df_sources.empty:
ds = df_sources[[KM_COL, var]].dropna()
ax_src.scatter(ds[KM_COL], ds[var], s=24, alpha=0.7,
edgecolors="white", linewidths=0.4, color="#666666")
ax_src.set_ylabel(ylabel + "\n(sources)")
ax_src.set_xlabel("km")
if KM_XLIM: ax_src.set_xlim(*KM_XLIM)
ax_src.set_ylim(*y_src_limits)
ax_src.grid(True, linestyle=":", alpha=0.35)
# 24) RIVIÈRE par ÉTAT — X = km ; une courbe par 'nb campagne'
legend_handles, legend_labels = [], []
for i, (state, palette) in enumerate(HYDRO_STATES, start=1):
ax = axes[i]
sub = df_riviere.loc[df_riviere[HYDRO_COL] == state, [KM_COL, CAMP_COL, var]]
sub = sub.dropna(subset=[KM_COL, var])
if not sub.empty:
for camp, grp in sub.groupby(CAMP_COL):
grp = grp.sort_values(KM_COL)
color = camp_color_map.get(state, {}).get(camp, (palette[0] if palette else "C0"))
h = ax.plot(grp[KM_COL], grp[var], marker="o", ms=4, lw=1.6, color=color)[0]
lab = nice_camp_label(camp)
if lab not in legend_labels:
legend_handles.append(h)
legend_labels.append(lab)
ax.set_ylabel(f"{ylabel}\n({state})")
ax.set_xlabel("km")
if KM_XLIM: ax.set_xlim(*KM_XLIM)
ax.set_ylim(*y_riv_limits)
ax.grid(True, linestyle=":", alpha=0.35)
# Légende globale (campagnes) à droite, hors du tracé
if legend_handles:
axes[1].legend(
legend_handles, legend_labels,
loc="center left", bbox_to_anchor=(1.02, 0.5),
frameon=False, ncol=1, title="Campagnes", borderaxespad=0.0
)
# Export
fname = sanitize_filename(f"{var}_rivieres_sources.png")
outpath = EXPORT_DIR / fname
fig.savefig(outpath, dpi=200, bbox_inches="tight")
plt.close(fig)
print(f"[OK] Export: {outpath.resolve()}")