In [1]:
pip install pandas matplotlib openpyxl
Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages. Requirement already satisfied: pandas in c:\programdata\anaconda3\lib\site-packages (2.2.2) Requirement already satisfied: matplotlib in c:\programdata\anaconda3\lib\site-packages (3.8.4) Requirement already satisfied: openpyxl in c:\programdata\anaconda3\lib\site-packages (3.1.2) Requirement already satisfied: numpy>=1.26.0 in c:\programdata\anaconda3\lib\site-packages (from pandas) (1.26.4) Requirement already satisfied: python-dateutil>=2.8.2 in c:\programdata\anaconda3\lib\site-packages (from pandas) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in c:\programdata\anaconda3\lib\site-packages (from pandas) (2024.1) Requirement already satisfied: tzdata>=2022.7 in c:\programdata\anaconda3\lib\site-packages (from pandas) (2023.3) Requirement already satisfied: contourpy>=1.0.1 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (1.2.0) Requirement already satisfied: cycler>=0.10 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (0.11.0) Requirement already satisfied: fonttools>=4.22.0 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (4.51.0) Requirement already satisfied: kiwisolver>=1.3.1 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (1.4.4) Requirement already satisfied: packaging>=20.0 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (23.2) Requirement already satisfied: pillow>=8 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (10.3.0) Requirement already satisfied: pyparsing>=2.3.1 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (3.0.9) Requirement already satisfied: et-xmlfile in c:\programdata\anaconda3\lib\site-packages (from openpyxl) (1.1.0) Requirement already satisfied: six>=1.5 in c:\programdata\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)
In [1]:
import pandas as pd
# List of file paths
files = [
'Y:MISSIONS/Eau/8 - Projet recherche Célé/Lucie/Science/Canoo/rive_droite_2024.xlsx',
'Y:/MISSIONS/Eau/8 - Projet recherche Célé/Lucie/Science/Canoo/rive_gauche_2024.xlsx',
'Y:/MISSIONS/Eau/8 - Projet recherche Célé/Lucie/Science/Canoo/canoo_2023.xlsx',
'Y:/MISSIONS/Eau/8 - Projet recherche Célé/Lucie/Science/Canoo/canoo_2020.xlsx'
]
# Output paths for cleaned data
output_paths = [
'C:/Users/lnoguera.PNR/Desktop/canoo_2020/rive_droite_2024.csv',
'C:/Users/lnoguera.PNR/Desktop/canoo_2020/rive_gauche_2024.csv',
'C:/Users/lnoguera.PNR/Desktop/canoo_2020/canoo_2023.csv',
'C:/Users/lnoguera.PNR/Desktop/canoo_2020/canoo_2020.csv'
]
dfs = [] # List to store cleaned dataframes
for file, output_path in zip(files, output_paths):
# Read the Excel file
df = pd.read_excel(file)
# Clean the data
df_clean = df.dropna() # Drop rows with any missing data
df_clean = df_clean.apply(pd.to_numeric, errors='coerce') # Convert all to numeric, coerce errors to NaN
df_clean = df_clean.dropna() # Drop any rows that now have NaNs
# Save the cleaned dataframe
df_clean.to_csv(output_path, index=False)
# Append the clean dataframe to the list for plotting
dfs.append(df_clean)
In [12]:
import pandas as pd
import matplotlib.pyplot as plt
# Paths to the cleaned data CSV files
files = [
'C:/Users/lnoguera.PNR/Desktop/canoo_2020/rive_droite_2024.csv',
'C:/Users/lnoguera.PNR/Desktop/canoo_2020/rive_gauche_2024.csv',
'C:/Users/lnoguera.PNR/Desktop/canoo_2020/canoo_2023.csv',
'C:/Users/lnoguera.PNR/Desktop/canoo_2020/canoo_2020.csv'
]
colors = ['blue', 'green', 'red', 'purple'] # Colors for each year
labels = ['2024 Rive Droite', '2024 Rive Gauche', '2023 Canoo', '2020 Canoo'] # Labels for each dataset
dfs = [] # List to store dataframes
for file in files:
df = pd.read_csv(file)
dfs.append(df)
plt.figure(figsize=(12, 8)) # Create a figure with a custom size
for df, color, label in zip(dfs, colors, labels):
plt.scatter(df.iloc[:, 0], df.iloc[:, 1], color=color, label=label, alpha=0.6) # Plot each year's data
plt.title('Conductivity by Distance for Different Years')
plt.xlabel('Distance (meters)')
plt.ylabel('Conductivity')
plt.legend()
plt.grid(True)
plt.show()
In [3]:
import pandas as pd
import matplotlib.pyplot as plt
# Paths to the cleaned data CSV files
files = [
'C:/Users/lnoguera.PNR/Desktop/canoo_2020/rive_droite_2024.csv',
'C:/Users/lnoguera.PNR/Desktop/canoo_2020/rive_gauche_2024.csv',
'C:/Users/lnoguera.PNR/Desktop/canoo_2020/canoo_2023.csv',
'C:/Users/lnoguera.PNR/Desktop/canoo_2020/canoo_2020.csv'
]
colors = ['blue', 'limegreen', 'red', 'purple'] # Colors for each year
labels = ['2024 Canoe Right Bank', '2024 Canoe Left Bank', '2023 Canoe', '2020 Canoe'] # Labels for each dataset
dfs = [] # List to store dataframes
for file in files:
df = pd.read_csv(file)
dfs.append(df)
# Define the points of interest for vertical lines and annotations
points_of_interest = pd.DataFrame({
'distance': [2.624552714, 5.344023139, 3.99491, 12.14835075, 14.082,
18.2607284858491, 22.79516878, 25.95420761, 30.19047331, 35.14127516,
42.08013205, 43.45151653, 46.29024454],
'source': ['Bullac', 'Corn', 'Bual', 'Diege', 'Cross du renard',
'Ayrissac', 'Pito', 'Ressel', 'Marchepied', 'Anglades',
'Liauzu', 'Pescalerie', 'Sagne']
})
plt.figure(figsize=(12, 8)) # Create a figure with a custom size
for df, color, label in zip(dfs, colors, labels):
plt.scatter(df.iloc[:, 0], df.iloc[:, 1], color=color, label=label, alpha=0.8, s=2) # Plot each year's data
# Adding vertical lines and labels for each point of interest
#for index, row in points_of_interest.iterrows():
#plt.axvline(x=row['distance'], color='black', linestyle='--', alpha=0.8 ) # Black vertical line
#Adjusted text placement to add space and move higher
#plt.text(row['distance'] - 200, plt.ylim()[1]*1.01, row['source'], rotation=90,
#verticalalignment='bottom', fontsize=12, color='black', fontweight='bold')
plt.title('Conductivity by kilometers of river for different year of continuous measurements', y=1 , fontsize = 15, fontweight = 'bold')
plt.xlabel('Distance (kilometers)')
plt.ylabel('Conductivity (us/cm)')
plt.legend(loc='lower right', frameon=True, facecolor='white', edgecolor='black', fancybox=False, framealpha=1)
plt.grid(True)
# Sauvegarder le graphique
plt.savefig('C:/Users/lnoguera.PNR/Desktop/canoo_2020/my_conductivity_plot2.png', format='png', dpi=300, bbox_inches='tight')
plt.show()
In [ ]: