Newer
Older
import pandas as pd
import numpy as np
from scipy.stats import gaussian_kde
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.compose import TransformedTargetRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import os
import pdb
def monthly_climatology(daily_input,t_unit):
if isinstance(daily_input, str):
daily_input = pd.read_csv(daily_input, index_col=0, parse_dates=True)
monthly_mean_columns = [c for c in daily_input.columns if c[0] in ['Q', 'T']]
monthly_mean = daily_input.loc[:, monthly_mean_columns].groupby(by=daily_input.index.month).mean()
#remember to add ['E', 'P']
monthly_sum_columns = [c for c in daily_input.columns if c[0] in ['P','E']]
monthly_sum = daily_input.loc[:, monthly_sum_columns].groupby(by=daily_input.index.month).mean() * t_unit
#pdb.set_trace()
return pd.concat([monthly_mean, monthly_sum], axis=1)#[monthly_mean_columns,monthly_sum_columns]
def daily_climatology_p_et_ensemble2(daily_input,t_unit,radius=2/3):
# This function takes as input the daily temperature, precipitation and runoff and generates the input-target matrix
runoff = daily_input[['Q']]
temp = daily_input[[c for c in daily_input.columns if c[0] == 'T']]
prec = daily_input[[c for c in daily_input.columns if c[0] == 'P']]
evap = daily_input[[c for c in daily_input.columns if c[0] == 'E']]
snow = daily_input[[c for c in daily_input.columns if c[0] == 'S']]
runoff_t_unit = runoff.rolling(30, min_periods=30).mean()
# Compute the t_unit days average temperature
temp_t_unit = temp.rolling(t_unit, min_periods=t_unit).mean()
# Compute the t_unit days average snow water equivalent
if not snow.empty:
snow_t_unit = snow.rolling(t_unit, min_periods=t_unit).mean()
# Compute the t_unit days sum evapotranspiration
evap_t_unit = evap.rolling(t_unit, min_periods=t_unit).sum()
# Compute the t_unit days sum precipitation
prec_t_unit = prec.rolling(t_unit, min_periods=t_unit).sum()
daily_t_unit = pd.concat([runoff_t_unit ,prec_t_unit, temp_t_unit, evap_t_unit, snow_t_unit], axis=1)
daily = daily_t_unit.groupby(by=daily_t_unit.index.dayofyear).mean()
prec_mean=prec_t_unit.groupby(by=prec_t_unit.index.dayofyear).mean()
prec_q75=prec_t_unit.groupby(by=prec_t_unit.index.dayofyear).quantile(q=0.75)
prec_q25=prec_t_unit.groupby(by=prec_t_unit.index.dayofyear).quantile(q=0.25)
prec_q25=prec_q25.add_suffix('_Q25')
temp_q75=pd.DataFrame(data=None, columns=temp_t_unit.columns)
evap_q75=pd.DataFrame(data=None, columns=evap_t_unit.columns)
snow_q75=pd.DataFrame(data=None, columns=snow_t_unit.columns)
temp_q25=pd.DataFrame(data=None, columns=temp_t_unit.columns)
evap_q25=pd.DataFrame(data=None, columns=evap_t_unit.columns)
snow_q25=pd.DataFrame(data=None, columns=snow_t_unit.columns)
#get the range as 2/3 of the mean difference between the quantile and the mean precipitation over the year.
range75= radius*(prec_q75.mean().mean()-prec_t_unit).mean().mean()
range25= radius*(-prec_q25.mean().mean()+prec_t_unit).mean().mean()
for i in range(1,367):
#get dates where the dayofyear is ==1 for the 3 variables
prec_t_unit_i=prec_t_unit[day_i]
temp_t_unit_i=temp_t_unit[day_i]
evap_t_unit_i=evap_t_unit[day_i]
snow_t_unit_i=snow_t_unit[day_i]
#compute the mean and interesting quantiles for the precipitation
prec_t_unit_i_mean=prec_t_unit_i.mean(axis=1)
#prec_t_unit_i_mean=prec_t_unit_i.drop(columns=prec_t_unit.columns[-4:]).mean(axis=1)
prec_q75_i=prec_q75[prec_q75.index==i].mean(axis=1)
prec_q25_i=prec_q25[prec_q25.index==i].mean(axis=1)
#select the situations where the precipitation is similar (closer than a range) to the quantile
situations75=(np.abs(np.array(prec_t_unit_i_mean)-np.array(prec_q75_i))<range75)
situations25=(np.abs(np.array(prec_t_unit_i_mean)-np.array(prec_q25_i))<range25)
#average the variables values happened int the selected situations and append it
s.name=i
temp_q75=temp_q75.append(s)
s.name=i
evap_q75=evap_q75.append(s)
s=snow_t_unit_i[situations75].mean()
s.name=i
snow_q75=snow_q75.append(s)
s.name=i
temp_q25=temp_q25.append(s)
s.name=i
evap_q25=evap_q25.append(s)
s=snow_t_unit_i[situations25].mean()
s.name=i
snow_q25=snow_q25.append(s)
temp_q75=temp_q75.add_suffix('_Q75')
evap_q75=evap_q75.add_suffix('_Q75')
snow_q75=snow_q75.add_suffix('_Q75')
temp_q25=temp_q25.add_suffix('_Q25')
evap_q25=evap_q25.add_suffix('_Q25')
snow_q25=snow_q25.add_suffix('_Q25')
temp_q75, prec_q75, evap_q75, snow_q75,
temp_q25, prec_q25, evap_q25, snow_q25], axis=1).dropna()
daily_clim.loc[366]=daily_clim.loc[365]
def daily_climatology_p_et_ensemble(daily_input,t_unit,radius=2/3):
# This function takes as input the daily temperature, precipitation and runoff and generates the input-target matrix
runoff = daily_input[['Q']]
prec = daily_input[[c for c in daily_input.columns if c[0] == 'P']]
other= daily_input[[c for c in daily_input.columns if (c[0] != 'Q' and c[0] != 'P')]]
# Compute the t_unit days average runoff
runoff_t_unit = runoff.rolling(30, min_periods=30).mean()
# Compute the t_unit days average temperature, snow water equivalent and the sum for the evapotranspiration.
if not other.empty:
other_t_unit = other.rolling(t_unit, min_periods=t_unit).mean()
other_t_unit[[c for c in other_t_unit.columns if c[0]=='E']] = other_t_unit[[c for c in other_t_unit.columns if c[0]=='E']]*t_unit
# Compute the t_unit days sum precipitation
if not prec.empty:
prec_t_unit = prec.rolling(t_unit, min_periods=t_unit).sum()
daily_t_unit = pd.concat([runoff_t_unit ,prec_t_unit, other_t_unit], axis=1)
daily = daily_t_unit.groupby(by=daily_t_unit.index.dayofyear).mean()
prec_mean=prec_t_unit.groupby(by=prec_t_unit.index.dayofyear).mean()
prec_q75=prec_t_unit.groupby(by=prec_t_unit.index.dayofyear).quantile(q=0.75)
prec_q75=prec_q75.add_suffix('_Q75')
prec_q25=prec_t_unit.groupby(by=prec_t_unit.index.dayofyear).quantile(q=0.25)
prec_q25=prec_q25.add_suffix('_Q25')
other_q75=pd.DataFrame(data=None, columns=other_t_unit.columns)
other_q25=pd.DataFrame(data=None, columns=other_t_unit.columns)
#get the range as 2/3 of the mean difference between the quantile and the mean precipitation over the year.
range75= radius*(prec_q75.mean().mean()-prec_t_unit).mean().mean()
range25= radius*(-prec_q25.mean().mean()+prec_t_unit).mean().mean()
for i in range(1,367):
#get dates where the dayofyear is ==i for the 3 variables
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
prec_t_unit_i=prec_t_unit[day_i]
other_t_unit_i=other_t_unit[day_i]
#compute the mean and interesting quantiles for the precipitation
prec_t_unit_i_mean=prec_t_unit_i.mean(axis=1)
prec_q75_i=prec_q75[prec_q75.index==i].mean(axis=1)
prec_q25_i=prec_q25[prec_q25.index==i].mean(axis=1)
#select the situations where the precipitation is similar (closer than a range) to the quantile
situations75=(np.abs(np.array(prec_t_unit_i_mean)-np.array(prec_q75_i))<range75)
situations25=(np.abs(np.array(prec_t_unit_i_mean)-np.array(prec_q25_i))<range25)
s=other_t_unit_i.mean()
if situations75.sum() != 0:
if situations75.sum() == 1:
s=other_t_unit_i[situations75]
s=pd.DataFrame(data=s.values, index=range(i,i+1),columns=s.columns)
else:
s=other_t_unit_i[situations75].mean()
s.name=i
other_q75=other_q75.append(s)
p=other_t_unit_i.mean()
if situations25.sum() != 0:
if situations25.sum() == 1:
p=other_t_unit_i[situations25]
p=pd.DataFrame(data=p.values, index=range(i,i+1),columns=p.columns)
else:
p=other_t_unit_i[situations25].mean()
p.name=i
other_q25=other_q25.append(p)
other_q75=other_q75.add_suffix('_Q75')
other_q75.index=list(range(1,367))
other_q25=other_q25.add_suffix('_Q25')
other_q25.index=list(range(1,367))
#pdb.set_trace()
daily_clim=pd.concat([daily,
prec_q75, other_q75,
prec_q25, other_q25], axis=1).dropna()
if daily_clim.index.max() == 365:
daily_clim.loc[366]=daily_clim.loc[365]
return daily_clim
def daily_climatology_p_ensemble(daily_input, t_unit):
runoff = daily_input[['Q']]
temp = daily_input[[c for c in daily_input.columns if c[0] == 'T']]
prec = daily_input[[c for c in daily_input.columns if c[0] == 'P']]
evap = daily_input[[c for c in daily_input.columns if c[0] == 'E']]
runoff_t_unit = runoff.rolling(30, min_periods=30).mean()
# Compute the t_unit days average temperature
temp_t_unit = temp.rolling(t_unit, min_periods=t_unit).mean()
#temp_t_unit = pd.concat([shift_series_t_unitdays(temp_t_unit.loc[:, col], (-t_length + 1, 1)) for col in temp_t_unit], axis=1)
# Compute the t_unit days sum evapotranspiration
evap_t_unit = evap.rolling(t_unit, min_periods=t_unit).sum()
#evap_t_unit = pd.concat([shift_series_t_unitdays(evap_t_unit.loc[:, col], (-t_length + 1, 1)) for col in evap_t_unit], axis=1)
# Compute the t_unit days sum precipitation
prec_t_unit = prec.rolling(t_unit, min_periods=t_unit).sum()
#prec_t_unit = pd.concat([shift_series_t_unitdays(prec_t_unit.loc[:, col], (-t_length + 1, 1)) for col in prec_t_unit], axis=1)
daily_t_unit = pd.concat([runoff_t_unit, temp_t_unit, evap_t_unit], axis=1)
daily = daily_t_unit.groupby(by=daily_t_unit.index.dayofyear).mean()
prec_mean=prec_t_unit.groupby(by=prec_t_unit.index.dayofyear).mean()
prec_q75 =prec_t_unit.groupby(by=prec_t_unit.index.dayofyear).quantile(q=0.75)
prec_q75=prec_q75.add_suffix('_Q75')
prec_q25 =prec_t_unit.groupby(by=prec_t_unit.index.dayofyear).quantile(q=0.25)
prec_q25=prec_q25.add_suffix('_Q25')
daily = pd.concat([daily,prec_mean,prec_q75,prec_q25], axis=1)
return daily