import pandas as pd import geopandas as gpd from shapely.geometry import Point import numpy as np import pyproj import math import sys # Function to convert WGS84 to Lambert 93 for the entire DataFrame def convert_to_lambert93(row, transf): if(math.isnan(row[x_col]) or math.isnan(row[y_col])): x, y = transf.transform(row[x_inter_col], row[y_inter_col]) row[x_inter_col] = x row[y_inter_col] = y return row def point_position_on_line(pt): return line.project(pt) # Load CSV df = pd.read_csv(sys.argv[1], encoding='UTF-8', on_bad_lines='skip', delimiter=';') x_col = sys.argv[4] y_col = sys.argv[5] x_inter_col = sys.argv[6] y_inter_col = sys.argv[7] df[x_col] = pd.to_numeric(df[x_col], errors='coerce') df[y_col] = pd.to_numeric(df[y_col], errors='coerce') # Load shapefile (must contain LineString) line_gdf = gpd.read_file(sys.argv[2]) line = line_gdf.union_all() # merge if multiple lines # Create geometry df['geometry'] = df.apply( lambda row: Point(row[x_col], row[y_col]) if not pd.isna(row[x_col]) and not pd.isna(row[y_col]) else None, axis=1 ) # Get position df['distance_on_line'] = df['geometry'].apply(lambda g: line.project(g) if g else np.nan) # Interpolate df['distance_on_line'] = df['distance_on_line'].interpolate(method='linear', limit_direction='both') # Create interpolated points df['geometry'] = df['distance_on_line'].apply(lambda d: line.interpolate(d)) # Start with original values df[x_inter_col] = df[x_col] df[y_inter_col] = df[y_col] # Update only missing ones df.loc[df[x_col].isna(), x_inter_col] = df.loc[df[x_col].isna(), 'geometry'].apply(lambda g: g.x) df.loc[df[y_col].isna(), y_inter_col] = df.loc[df[y_col].isna(), 'geometry'].apply(lambda g: g.y) # Load CSV and shape df = pd.read_csv("in/raw.csv", delimiter=';', encoding='utf-8') df[x_col] = pd.to_numeric(df[x_col], errors='coerce') df[y_col] = pd.to_numeric(df[y_col], errors='coerce') # Load the shapefile (must be a LineString) line = gpd.read_file("in/célé.shp").unary_union # Merge multiple lines if needed # Assign geometry to known points df['geometry'] = df.apply( lambda r: Point(r[x_col], r[y_col]) if pd.notna(r[x_col]) else None, axis=1 ) # Compute the position of known points along the line df['distance_on_line'] = df['geometry'].apply(lambda g: line.project(g) if g else np.nan) # Interpolate missing distances (ensures every point has a valid position) df['distance_on_line'] = df['distance_on_line'].interpolate(method='linear', limit_direction='both') # Interpolate new coordinates from the reference shape df['geometry'] = df['distance_on_line'].apply(lambda d: line.interpolate(d) if pd.notna(d) else None) df[x_inter_col] = df['geometry'].apply(lambda g: g.x if g else np.nan) df[y_inter_col] = df['geometry'].apply(lambda g: g.y if g else np.nan) # Apply the conversion to the entire DataFrame row-wise wgs84 = pyproj.CRS("EPSG:4326") lambert93 = pyproj.CRS("EPSG:2154") transformer = pyproj.Transformer.from_crs(wgs84, lambert93, always_xy=True) df = df.apply(lambda row : convert_to_lambert93(row, transformer), axis=1) # Save fixed CSV df.to_csv(sys.argv[3], sep=';', index=False)