diff --git a/Notebooks/Figures/bootstrap/tasmin_members_vs_ensemble_extremes.png b/Notebooks/Figures/bootstrap/tasmin_members_vs_ensemble_extremes.png new file mode 100644 index 0000000000000000000000000000000000000000..3e7d6a71e1bb9125f3aae97a9fd0b146ec67a778 Binary files /dev/null and b/Notebooks/Figures/bootstrap/tasmin_members_vs_ensemble_extremes.png differ diff --git a/Notebooks/Figures/bootstrap/tasmin_members_vs_ensemble_mean.png b/Notebooks/Figures/bootstrap/tasmin_members_vs_ensemble_mean.png new file mode 100644 index 0000000000000000000000000000000000000000..b61ee66f42b1f2f72324d8713bfd836a027d34f0 Binary files /dev/null and b/Notebooks/Figures/bootstrap/tasmin_members_vs_ensemble_mean.png differ diff --git a/Notebooks/Figures/sensitivity/precipitation_pr_sensitivity_regularization.png b/Notebooks/Figures/sensitivity/precipitation_pr_sensitivity_regularization.png new file mode 100644 index 0000000000000000000000000000000000000000..88348ef551c2f703f8ec34b20d7b4c052e0ac2b4 Binary files /dev/null and b/Notebooks/Figures/sensitivity/precipitation_pr_sensitivity_regularization.png differ diff --git a/Notebooks/Figures/sensitivity/precipitation_ztuvq_p_dem_doy_sensitivity_regularization.png b/Notebooks/Figures/sensitivity/precipitation_ztuvq_p_dem_doy_sensitivity_regularization.png new file mode 100644 index 0000000000000000000000000000000000000000..132705b905bd46140a25717287e539d4ea2c87ab Binary files /dev/null and b/Notebooks/Figures/sensitivity/precipitation_ztuvq_p_dem_doy_sensitivity_regularization.png differ diff --git a/Notebooks/Figures/sensitivity/tasmax_ztuvq_p_dem_doy_sensitivity_regularization.png b/Notebooks/Figures/sensitivity/tasmax_ztuvq_p_dem_doy_sensitivity_regularization.png new file mode 100644 index 0000000000000000000000000000000000000000..0f9984a0f5aae8d44952e09af35d08b6d6b864ea Binary files /dev/null and b/Notebooks/Figures/sensitivity/tasmax_ztuvq_p_dem_doy_sensitivity_regularization.png differ diff --git a/Notebooks/Figures/sensitivity/tasmin_ztuvq_p_dem_doy_sensitivity_regularization.png b/Notebooks/Figures/sensitivity/tasmin_ztuvq_p_dem_doy_sensitivity_regularization.png new file mode 100644 index 0000000000000000000000000000000000000000..f5da91ad415045932e27ce0650b02976e390ed47 Binary files /dev/null and b/Notebooks/Figures/sensitivity/tasmin_ztuvq_p_dem_doy_sensitivity_regularization.png differ diff --git a/Notebooks/eval_bootstrap.ipynb b/Notebooks/eval_bootstrap.ipynb index 2e2d9a147b9efcee9230c5f8631d1c8c1c1e2c2f..c84074d75a9f373ffea1779a79a394564abd7ec9 100644 --- a/Notebooks/eval_bootstrap.ipynb +++ b/Notebooks/eval_bootstrap.ipynb @@ -27,6 +27,7 @@ "source": [ "# builtins\n", "import pathlib\n", + "import warnings\n", "\n", "# externals\n", "import numpy as np\n", @@ -81,7 +82,18 @@ "outputs": [], "source": [ "# predictand to evaluate\n", - "PREDICTAND = 'pr'" + "PREDICTAND = 'tasmin'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49e03dc8-e709-4877-922a-4914e61d7636", + "metadata": {}, + "outputs": [], + "source": [ + "# whether only precipitation was used as predictor\n", + "PR_ONLY = False" ] }, { @@ -104,8 +116,12 @@ "outputs": [], "source": [ "# model to evaluate\n", - "models = ['USegNet_{}_ztuvq_500_850_p_dem_doy_1mm_{}_{}'.format(PREDICTAND, loss, OPTIM) if loss == 'BernoulliGammaLoss' else\n", - " 'USegNet_{}_ztuvq_500_850_p_dem_doy_{}_{}'.format(PREDICTAND, loss, OPTIM) for loss in LOSS]" + "if PREDICTAND == 'pr' and PR_ONLY:\n", + " models = ['USegNet_pr_pr_1mm_{}_{}'.format(PREDICTAND, loss, OPTIM) if loss == 'BernoulliGammaLoss' else\n", + " 'USegNet_pr_pr_{}_{}'.format(PREDICTAND, loss, OPTIM) for loss in LOSS]\n", + "else:\n", + " models = ['USegNet_{}_ztuvq_500_850_p_dem_doy_1mm_{}_{}'.format(PREDICTAND, loss, OPTIM) if loss == 'BernoulliGammaLoss' else\n", + " 'USegNet_{}_ztuvq_500_850_p_dem_doy_{}_{}'.format(PREDICTAND, loss, OPTIM) for loss in LOSS]" ] }, { @@ -349,11 +365,14 @@ ] }, { - "cell_type": "markdown", - "id": "c4114092-81ef-4547-9485-f54a12ac2a16", + "cell_type": "code", + "execution_count": null, + "id": "e8adcb5e-c7b4-4156-85b3-4751020160e6", "metadata": {}, + "outputs": [], "source": [ - "### Coefficient of determination" + "# extreme quantile of interest\n", + "quantile = 0.02 if PREDICTAND == 'tasmin' else 0.98" ] }, { @@ -401,6 +420,39 @@ " return r2_mm, r2_anom" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "074d7405-a01b-4368-b98b-06d8d46f1ce6", + "metadata": {}, + "outputs": [], + "source": [ + "def bias(y_pred, y_true, relative=False):\n", + " return (((y_pred - y_true) / y_true) * 100).mean().values.item() if relative else (y_pred - y_true).mean().values.item()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc13939-e517-47bd-aa7d-0addd6715538", + "metadata": {}, + "outputs": [], + "source": [ + "def mae(y_pred, y_true):\n", + " return np.abs(y_pred - y_true).mean().values.item()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c93f497e-760e-4484-aeb6-ce54f561a7f6", + "metadata": {}, + "outputs": [], + "source": [ + "def rmse(y_pred, y_true):\n", + " return np.sqrt(((y_pred - y_true) ** 2).mean().values.item())" + ] + }, { "cell_type": "markdown", "id": "3e6ecc98-f32f-42f7-9971-64b270aa5453", @@ -416,7 +468,7 @@ "id": "671cd3c0-8d6c-41c1-bf8e-93f5943bf9aa", "metadata": {}, "source": [ - "Calculate yearly average bias, MAE, and RMSE over entire reference period for model predictions, ERA-5, and QM-adjusted ERA-5." + "### Metrics for mean values" ] }, { @@ -441,9 +493,9 @@ "source": [ "# yearly average r2, bias, mae, and rmse for ERA-5\n", "r2_refe_mm, r2_refe_anom = r2(y_refe, y_true)\n", - "bias_refe = ((y_refe_yearly_avg - y_true_yearly_avg) / y_true_yearly_avg) * 100 if PREDICTAND == 'pr' else y_refe_yearly_avg - y_true_yearly_avg\n", - "mae_refe = np.abs(y_refe_yearly_avg - y_true_yearly_avg)\n", - "rmse_refe = (y_refe_yearly_avg - y_true_yearly_avg) ** 2" + "bias_refe = bias(y_refe_yearly_avg, y_true_yearly_avg, relative=True if PREDICTAND == 'pr' else False)\n", + "mae_refe = mae(y_refe_yearly_avg, y_true_yearly_avg)\n", + "rmse_refe = rmse(y_refe_yearly_avg, y_true_yearly_avg)" ] }, { @@ -455,9 +507,68 @@ "source": [ "# yearly average r2, bias, mae, and rmse for QM-Adjusted ERA-5\n", "r2_refe_qm_mm, r2_refe_qm_anom = r2(y_refe_qm, y_true)\n", - "bias_refe_qm = ((y_refe_qm_yearly_avg - y_true_yearly_avg) / y_true_yearly_avg) * 100 if PREDICTAND == 'pr' else y_refe_qm_yearly_avg - y_true_yearly_avg\n", - "mae_refe_qm = np.abs(y_refe_qm_yearly_avg - y_true_yearly_avg)\n", - "rmse_refe_qm = (y_refe_qm_yearly_avg - y_true_yearly_avg) ** 2" + "bias_refe_qm = bias(y_refe_qm_yearly_avg, y_true_yearly_avg, relative=True if PREDICTAND == 'pr' else False)\n", + "mae_refe_qm = mae(y_refe_qm_yearly_avg, y_true_yearly_avg)\n", + "rmse_refe_qm = rmse(y_refe_qm_yearly_avg, y_true_yearly_avg)" + ] + }, + { + "cell_type": "markdown", + "id": "c07684d1-76c0-4088-bdd7-7e1a6ccc4716", + "metadata": {}, + "source": [ + "### Metrics for extreme values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "343aad59-4b0a-4eec-9ac3-86e5f9d06fc6", + "metadata": {}, + "outputs": [], + "source": [ + "# calculate extreme quantile for each year\n", + "with warnings.catch_warnings():\n", + " warnings.simplefilter('ignore', category=RuntimeWarning)\n", + " y_true_ex = y_true.chunk(dict(time=-1)).groupby('time.year').quantile(quantile, dim='time')\n", + " y_refe_ex = y_refe.chunk(dict(time=-1)).groupby('time.year').quantile(quantile, dim='time')\n", + " y_refe_qm_ex = y_refe_qm.chunk(dict(time=-1)).groupby('time.year').quantile(quantile, dim='time')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fbbc648a-82a7-4137-b8ea-5dccb56a65c7", + "metadata": {}, + "outputs": [], + "source": [ + "# bias in extreme quantile\n", + "bias_ex_refe = bias(y_refe_ex, y_true_ex, relative=True if PREDICTAND == 'pr' else False)\n", + "bias_ex_refe_qm = bias(y_refe_qm_ex, y_true_ex, relative=True if PREDICTAND == 'pr' else False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44a3a0e7-ca39-49ce-b569-51b0022161ed", + "metadata": {}, + "outputs": [], + "source": [ + "# mean absolute error in extreme quantile\n", + "mae_ex_refe = mae(y_refe_ex, y_true_ex)\n", + "mae_ex_refe_qm = mae(y_refe_qm_ex, y_true_ex)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a90ce1dc-cf94-4081-9add-1d26195f2302", + "metadata": {}, + "outputs": [], + "source": [ + "# root mean squared error in extreme quantile\n", + "rmse_ex_refe = rmse(y_refe_ex, y_true_ex)\n", + "rmse_ex_refe_qm = rmse(y_refe_qm_ex, y_true_ex)" ] }, { @@ -474,14 +585,13 @@ " df_refe = pd.read_csv(filename)\n", "else:\n", " # compute validation metrics\n", - " df_refe = pd.DataFrame([], columns=['r2_mm', 'r2_anom', 'bias', 'mae', 'rmse', 'product'])\n", - " for product, metrics in zip(['Era-5', 'Era-5 QM'], [[bias_refe, mae_refe, rmse_refe], [bias_refe_qm, mae_refe_qm, rmse_refe_qm]]):\n", - " values = pd.DataFrame([[np.sqrt(m.mean().values.item()) if name == 'rmse' else m.mean().values.item() for\n", - " name, m in zip(['bias', 'mae', 'rmse'], metrics)] + [product]], columns=df_refe.columns[2:])\n", - " values['r2_mm'] = r2_refe_mm if product == 'Era-5' else r2_refe_qm_mm\n", - " values['r2_anom'] = r2_refe_anom if product == 'Era-5' else r2_refe_qm_anom\n", - " df_refe = df_refe.append(values, ignore_index=True)\n", - " \n", + " df_refe = pd.DataFrame([], columns=['r2_mm', 'r2_anom', 'bias', 'mae', 'rmse', 'bias_ex', 'mae_ex', 'rmse_ex', 'product'])\n", + " for product, metrics in zip(['Era-5', 'Era-5 QM'],\n", + " [[r2_refe_mm, r2_refe_anom, bias_refe, mae_refe, rmse_refe, bias_ex_refe, mae_ex_refe, rmse_ex_refe],\n", + " [r2_refe_qm_mm, r2_refe_qm_anom, bias_refe_qm, mae_refe_qm, rmse_refe_qm, bias_ex_refe_qm, mae_ex_refe_qm,\n", + " rmse_ex_refe_qm]]):\n", + " df_refe = df_refe.append(pd.DataFrame([metrics + [product]], columns=df_refe.columns), ignore_index=True)\n", + "\n", " # save metrics to disk\n", " df_refe.to_csv(filename, index=False)" ] @@ -501,7 +611,7 @@ "id": "630ce1c5-b018-437f-a7cf-8c8d99cd8f84", "metadata": {}, "source": [ - "Calculate yearly average bias, MAE, and RMSE over entire reference period for model predictions." + "### Metrics for mean values" ] }, { @@ -513,10 +623,43 @@ "source": [ "# yearly average bias, mae, and rmse for each ensemble member\n", "y_pred_yearly_avg = {k: v.groupby('time.year').mean(dim='time') for k, v in ensemble.items()}\n", - "bias_pred = {k: ((v - y_true_yearly_avg) / y_true_yearly_avg) * 100 if PREDICTAND == 'pr'\n", - " else v - y_true_yearly_avg for k, v in y_pred_yearly_avg.items()}\n", - "mae_pred = {k: np.abs(v - y_true_yearly_avg) for k, v in y_pred_yearly_avg.items()}\n", - "rmse_pred = {k: (v - y_true_yearly_avg) ** 2 for k, v in y_pred_yearly_avg.items()}" + "bias_pred = {k: [bias(v[i], y_true_yearly_avg, relative=True if PREDICTAND == 'pr' else False) for i in range(len(ensemble[k]))] for k, v in y_pred_yearly_avg.items()}\n", + "mae_pred = {k: [mae(v[i], y_true_yearly_avg) for i in range(len(ensemble[k]))] for k, v in y_pred_yearly_avg.items()}\n", + "rmse_pred = {k: [rmse(v[i], y_true_yearly_avg) for i in range(len(ensemble[k]))] for k, v in y_pred_yearly_avg.items()}" + ] + }, + { + "cell_type": "markdown", + "id": "122e84f8-211d-4816-9b03-2e1abc24eb9e", + "metadata": {}, + "source": [ + "### Metrics for extreme values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b48a065-1a0b-4457-a97a-642c26d56c51", + "metadata": {}, + "outputs": [], + "source": [ + "# calculate extreme quantile for each year\n", + "with warnings.catch_warnings():\n", + " warnings.simplefilter('ignore', category=RuntimeWarning)\n", + " y_pred_ex = {k: v.chunk(dict(time=-1)).groupby('time.year').quantile(quantile, dim='time') for k, v in ensemble.items()}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e6893da-271b-4b0e-bbc1-46b5eb9ecee3", + "metadata": {}, + "outputs": [], + "source": [ + "# yearly average bias, mae, and rmse for each ensemble member\n", + "bias_pred_ex = {k: [bias(v[i], y_true_ex, relative=True if PREDICTAND == 'pr' else False) for i in range(len(ensemble[k]))] for k, v in y_pred_ex.items()}\n", + "mae_pred_ex = {k: [mae(v[i], y_true_ex) for i in range(len(ensemble[k]))] for k, v in y_pred_ex.items()}\n", + "rmse_pred_ex = {k: [rmse(v[i], y_true_ex) for i in range(len(ensemble[k]))] for k, v in y_pred_ex.items()}" ] }, { @@ -529,19 +672,19 @@ "outputs": [], "source": [ "# compute validation metrics for model predictions\n", - "filename = RESULTS.joinpath(PREDICTAND, 'prediction.csv')\n", + "filename = (RESULTS.joinpath(PREDICTAND, 'prediction_pr-only.csv') if PREDICTAND == 'pr' and PR_ONLY else\n", + " RESULTS.joinpath(PREDICTAND, 'prediction.csv'))\n", "if filename.exists():\n", " # check if validation metrics for predictions already exist\n", " df_pred = pd.read_csv(filename)\n", "else:\n", " # validation metrics for each ensemble member\n", - " df_pred = pd.DataFrame([], columns=['r2_mm', 'r2_anom', 'bias', 'mae', 'rmse', 'product', 'loss'])\n", + " df_pred = pd.DataFrame([], columns=['r2_mm', 'r2_anom', 'bias', 'mae', 'rmse', 'bias_ex', 'mae_ex', 'rmse_ex', 'product', 'loss'])\n", " for k in y_pred_yearly_avg.keys():\n", " for i in range(len(ensemble[k])):\n", " # bias, mae, and rmse\n", - " values = pd.DataFrame([[np.sqrt(m.mean().values.item()) if name == 'rmse' else m.mean().values.item()\n", - " for name, m in zip(['bias', 'mae', 'rmse'], [bias_pred[k][i], mae_pred[k][i], rmse_pred[k][i]])] +\n", - " [bias_pred[k][i].members.item()] + [k]],\n", + " values = pd.DataFrame([[bias_pred[k][i], mae_pred[k][i], rmse_pred[k][i], bias_pred_ex[k][i],\n", + " mae_pred_ex[k][i], rmse_pred_ex[k][i], 'Member-{:d}'.format(i), k]],\n", " columns=df_pred.columns[2:])\n", " \n", " # r2 scores\n", @@ -550,15 +693,24 @@ " \n", " # validation metrics for ensemble\n", " for k, v in ensemble_mean_full.items():\n", - " yearly_avg = v.groupby('time.year').mean(dim='time')\n", - " bias = ((((yearly_avg - y_true_yearly_avg) / y_true_yearly_avg) * 100).mean().values.item() if PREDICTAND == 'pr' else \n", - " (yearly_avg - y_true_yearly_avg).mean().values.item())\n", - " mae = np.abs(yearly_avg - y_true_yearly_avg).mean().values.item()\n", - " rmse = np.sqrt(((yearly_avg - y_true_yearly_avg) ** 2).mean().values.item())\n", + " # metrics for mean values\n", + " means = v.groupby('time.year').mean(dim='time')\n", + " bias_mean = bias(means, y_true_yearly_avg, relative=True if PREDICTAND == 'pr' else False)\n", + " mae_mean = mae(means, y_true_yearly_avg)\n", + " rmse_mean = rmse(means, y_true_yearly_avg)\n", + " \n", + " # metrics for extreme values\n", + " with warnings.catch_warnings():\n", + " warnings.simplefilter('ignore', category=RuntimeWarning)\n", + " extremes = v.chunk(dict(time=-1)).groupby('time.year').quantile(quantile, dim='time')\n", + " bias_ex = bias(extremes, y_true_ex, relative=True if PREDICTAND == 'pr' else False)\n", + " mae_ex = mae(extremes, y_true_ex)\n", + " rmse_ex = rmse(extremes, y_true_ex)\n", + " \n", + " # r2 scores\n", " r2_mm, r2_anom = r2(v, y_true, precipitation=True if PREDICTAND == 'pr' else False)\n", - " values = pd.DataFrame([[r2_mm, r2_anom, bias, mae, rmse, 'Ensemble-{:d}'.format(len(ensemble[k])), k]],\n", - " columns=df_pred.columns)\n", - " df_pred = df_pred.append(values, ignore_index=True)\n", + " df_pred = df_pred.append(pd.DataFrame([[r2_mm, r2_anom, bias_mean, mae_mean, rmse_mean, bias_ex, mae_ex, rmse_ex, 'Ensemble-{:d}'.format(len(ensemble[k])), k]],\n", + " columns=df_pred.columns), ignore_index=True)\n", "\n", " # save metrics to disk\n", " df_pred.to_csv(filename, index=False)" @@ -614,22 +766,26 @@ " ensemble_mean_prob = ensemble_prob.mean(dim='members')\n", " \n", " # filename for probability metrics\n", - " filename = RESULTS.joinpath(PREDICTAND, 'probability.csv')\n", - " \n", - " # AUC and ROCSS for each ensemble member\n", - " df_prob = pd.DataFrame([], columns=['auc', 'rocss', 'product', 'loss'])\n", - " for i in range(len(ensemble_prob)):\n", - " auc_score, rocss = auc_rocss(ensemble_prob[i], y_true, wet_day_threshold=WET_DAY_THRESHOLD)\n", - " df_prob = df_prob.append(pd.DataFrame([[auc_score, rocss, ensemble_prob[i].members.item(), 'BernoulliGammaLoss']],\n", + " filename = (RESULTS.joinpath(PREDICTAND, 'probability_pr-only.csv') if PREDICTAND == 'pr' and PR_ONLY else\n", + " RESULTS.joinpath(PREDICTAND, 'probability.csv'))\n", + " if filename.exists():\n", + " # check if validation metrics for probabilities already exist\n", + " df_prob = pd.read_csv(filename)\n", + " else:\n", + " # AUC and ROCSS for each ensemble member\n", + " df_prob = pd.DataFrame([], columns=['auc', 'rocss', 'product', 'loss'])\n", + " for i in range(len(ensemble_prob)):\n", + " auc_score, rocss = auc_rocss(ensemble_prob[i], y_true, wet_day_threshold=WET_DAY_THRESHOLD)\n", + " df_prob = df_prob.append(pd.DataFrame([[auc_score, rocss, ensemble_prob[i].members.item(), 'BernoulliGammaLoss']],\n", + " columns=df_prob.columns), ignore_index=True)\n", + "\n", + " # AUC and ROCSS for ensemble mean\n", + " auc_score, rocss = auc_rocss(ensemble_mean_prob, y_true, wet_day_threshold=WET_DAY_THRESHOLD)\n", + " df_prob = df_prob.append(pd.DataFrame([[auc_score, rocss, 'Ensemble-{:d}'.format(len(ensemble_prob)), 'BernoulliGammaLoss']],\n", " columns=df_prob.columns), ignore_index=True)\n", - " \n", - " # AUC and ROCSS for ensemble mean\n", - " auc_score, rocss = auc_rocss(ensemble_mean_prob, y_true, wet_day_threshold=WET_DAY_THRESHOLD)\n", - " df_prob = df_prob.append(pd.DataFrame([[auc_score, rocss, 'Ensemble-{:d}'.format(len(ensemble_prob)), 'BernoulliGammaLoss']],\n", - " columns=df_prob.columns), ignore_index=True)\n", - " \n", - " # save metrics to disk\n", - " df_prob.to_csv(filename, index=False)" + "\n", + " # save metrics to disk\n", + " df_prob.to_csv(filename, index=False)" ] } ], diff --git a/Notebooks/eval_sensitivity.ipynb b/Notebooks/eval_sensitivity.ipynb index e10eeb33342760807098de1d45ebcddcc0ee9508..893da3e5573c8ea43f9f0644d0295c99a1c92612 100644 --- a/Notebooks/eval_sensitivity.ipynb +++ b/Notebooks/eval_sensitivity.ipynb @@ -55,16 +55,16 @@ "# define the model parameters\n", "PREDICTAND = 'pr' # precipitation, tasmin, tasmax\n", "MODEL = 'USegNet'\n", - "PPREDICTORS = 'ztuvq'\n", - "# PPREDICTORS = ''\n", + "# PPREDICTORS = 'ztuvq'\n", + "PPREDICTORS = ''\n", "PLEVELS = ['500', '850']\n", - "SPREDICTORS = 'p'\n", - "# SPREDICTORS = 'pr'\n", - "DEM = 'dem'\n", - "# DEM = ''\n", + "# SPREDICTORS = 'p'\n", + "SPREDICTORS = 'pr'\n", + "# DEM = 'dem'\n", + "DEM = ''\n", "DEM_FEATURES = ''\n", - "# DOY = ''\n", - "DOY = 'doy'\n", + "DOY = ''\n", + "# DOY = 'doy'\n", "OPTIMS = ['Adam', 'SGD', 'SGDCLR']" ] }, @@ -122,17 +122,6 @@ "# PARAMETER = {'Wet day threshold': WET_DAY_THRESHOLD}" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "b7a61547-9887-4068-8fbf-9a799922ff8d", - "metadata": {}, - "outputs": [], - "source": [ - "# extreme quantile of interest\n", - "quantile = 0.98" - ] - }, { "cell_type": "markdown", "id": "cda0cf8b-adce-4513-af69-31df6eb5542f", @@ -522,16 +511,8 @@ "\n", "# adjust subplots\n", "fig.subplots_adjust(wspace=0.05, hspace=0.15)\n", - "fig.savefig('./Figures/{}'.format(df_name.name.replace('csv', 'png')), bbox_inches='tight', dpi=300)" + "fig.savefig('./Figures/sensitivity/{}'.format(df_name.name.replace('csv', 'png')), bbox_inches='tight', dpi=300)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "18b21e6e-711c-4afd-91de-9d7e4403c6ce", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/Notebooks/plot_figures_paper.ipynb b/Notebooks/plot_figures_paper.ipynb index d359b806de7c5f2a16a800dfcc1fff66cac7790b..40d5cb256772c70bf7311bf0306688dd253ed574 100644 --- a/Notebooks/plot_figures_paper.ipynb +++ b/Notebooks/plot_figures_paper.ipynb @@ -47,7 +47,18 @@ "outputs": [], "source": [ "# predictand\n", - "PREDICTAND = 'tasmax'" + "PREDICTAND = 'tasmin'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5e7a63c-3d3c-4e29-94af-c21f0f63ba6c", + "metadata": {}, + "outputs": [], + "source": [ + "# whether only precipitation was used as predictor\n", + "PR_ONLY = False" ] }, { @@ -67,7 +78,20 @@ "source": [ "# model results computed by notebook eval_bootstrap.ipynb\n", "df_refe = pd.read_csv(RESULTS.joinpath(PREDICTAND, 'reference.csv'))\n", - "df_pred = pd.read_csv(RESULTS.joinpath(PREDICTAND, 'prediction.csv'))" + "df_pred = pd.read_csv(RESULTS.joinpath(PREDICTAND, 'prediction_pr-only.csv' if PREDICTAND == 'pr' and PR_ONLY else\n", + " 'prediction.csv'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6f2b9ea-3ede-40f8-a448-de2833b5d180", + "metadata": {}, + "outputs": [], + "source": [ + "# dataframe of single members and ensembles only\n", + "members = df_pred[np.isin(df_pred['product'], ['Member-{}'.format(i) for i in range(10)])]\n", + "ensemble = df_pred[~np.isin(df_pred['product'], ['Member-{}'.format(i) for i in range(10)])]" ] }, { @@ -89,25 +113,91 @@ "tags": [] }, "source": [ - "### Absolute values: single members vs. ensemble" + "### Mean values: single members vs. ensemble" ] }, { "cell_type": "code", "execution_count": null, - "id": "a6f2b9ea-3ede-40f8-a448-de2833b5d180", + "id": "c7bd9c53-4a96-41e7-b8bd-7d438d4dbe73", "metadata": {}, "outputs": [], "source": [ - "# dataframe of single members and ensembles only\n", - "members = df_pred[np.isin(df_pred['product'], ['Member-{}'.format(i) for i in range(10)])]\n", - "ensemble = df_pred[~np.isin(df_pred['product'], ['Member-{}'.format(i) for i in range(10)])]" + "# initialize figure\n", + "fig = plt.figure(figsize=(16, 5))\n", + "\n", + "# create grid for different subplots\n", + "grid = gridspec.GridSpec(ncols=5, nrows=1, width_ratios=[3, 1, 1, 3, 1], wspace=0.05, hspace=0)\n", + "\n", + "# add subplots\n", + "ax1 = fig.add_subplot(grid[0])\n", + "ax2 = fig.add_subplot(grid[1], sharey=ax1)\n", + "ax3 = fig.add_subplot(grid[3])\n", + "ax4 = fig.add_subplot(grid[4], sharey=ax3)\n", + "axes = [ax1, ax2, ax3, ax4]\n", + "\n", + "# plot bias: single members vs. ensemble\n", + "sns.barplot(x='product', y='bias', hue='loss', data=members, palette=palette, ax=ax1);\n", + "sns.barplot(x='product', y='bias', hue='loss', data=ensemble, palette=palette, ax=ax2);\n", + "\n", + "# plot mae: single members vs. ensemble\n", + "sns.barplot(x='product', y='mae', hue='loss', data=members, palette=palette, ax=ax3);\n", + "sns.barplot(x='product', y='mae', hue='loss', data=ensemble, palette=palette, ax=ax4);\n", + "\n", + "# axes limits and ticks\n", + "y_lim_bias = (-50, 50) if PREDICTAND == 'pr' else (-1, 1)\n", + "y_lim_mae = (0, 2) if PREDICTAND == 'pr' else (0, 1)\n", + "y_ticks_bias = (np.arange(y_lim_bias[0], y_lim_bias[1] + 10, 10) if PREDICTAND == 'pr' else\n", + " np.arange(y_lim_bias[0], y_lim_bias[1] + 0.2, 0.2))\n", + "y_ticks_mae = np.arange(y_lim_mae[0], y_lim_mae[1] + 0.2, 0.2)\n", + "\n", + "# axis for bias\n", + "ax1.set_ylabel('Bias (%)' if PREDICTAND == 'pr' else 'Bias (°C)')\n", + "ax1.set_ylim(y_lim_bias)\n", + "ax1.set_yticks(y_ticks_bias)\n", + "\n", + "# axis for mae\n", + "ax3.set_ylabel('Mean absolute error (mm)' if PREDICTAND == 'pr' else 'Mean absolute error (°C)')\n", + "ax3.set_ylim(y_lim_mae)\n", + "ax3.set_yticks(y_ticks_mae)\n", + "\n", + "# adjust axis for ensemble predictions\n", + "for ax in [ax2, ax4]:\n", + " ax.yaxis.tick_right()\n", + " ax.set_ylabel('')\n", + "\n", + "# axis fontsize and legend\n", + "for ax in axes:\n", + " ax.tick_params('both', labelsize=14)\n", + " ax.set_xticklabels(ax.get_xticklabels(), rotation=90)\n", + " ax.yaxis.label.set_size(14)\n", + " ax.set_xlabel('')\n", + " \n", + " # adjust legend\n", + " h, _ = ax.get_legend_handles_labels()\n", + " ax.get_legend().remove()\n", + "\n", + "# show single legend\n", + "ax4.legend(bbox_to_anchor=(1.3, 1.05), loc=2, frameon=False, fontsize=14);\n", + "\n", + "# save figure\n", + "fig.savefig('./Figures/bootstrap/{}_members_vs_ensemble_mean.png'.format(PREDICTAND), dpi=300, bbox_inches='tight')" + ] + }, + { + "cell_type": "markdown", + "id": "233f4239-1cca-4ad5-8581-782e42948503", + "metadata": { + "tags": [] + }, + "source": [ + "### Extreme values: single members vs. ensemble" ] }, { "cell_type": "code", "execution_count": null, - "id": "c7bd9c53-4a96-41e7-b8bd-7d438d4dbe73", + "id": "72aadc32-a1c8-40a1-8cbe-e7166e18b6e0", "metadata": {}, "outputs": [], "source": [ @@ -125,16 +215,16 @@ "axes = [ax1, ax2, ax3, ax4]\n", "\n", "# plot bias: single members vs. ensemble\n", - "sns.barplot(x='product', y='bias', hue='loss', data=members, palette=palette, ax=ax1);\n", - "sns.barplot(x='product', y='bias', hue='loss', data=ensemble, palette=palette, ax=ax2);\n", + "sns.barplot(x='product', y='bias_ex', hue='loss', data=members, palette=palette, ax=ax1);\n", + "sns.barplot(x='product', y='bias_ex', hue='loss', data=ensemble, palette=palette, ax=ax2);\n", "\n", "# plot mae: single members vs. ensemble\n", - "sns.barplot(x='product', y='mae', hue='loss', data=members, palette=palette, ax=ax3);\n", - "sns.barplot(x='product', y='mae', hue='loss', data=ensemble, palette=palette, ax=ax4);\n", + "sns.barplot(x='product', y='mae_ex', hue='loss', data=members, palette=palette, ax=ax3);\n", + "sns.barplot(x='product', y='mae_ex', hue='loss', data=ensemble, palette=palette, ax=ax4);\n", "\n", "# axes limits and ticks\n", "y_lim_bias = (-50, 50) if PREDICTAND == 'pr' else (-1, 1)\n", - "y_lim_mae = (0, 2) if PREDICTAND == 'pr' else (0, 1)\n", + "y_lim_mae = (0, 2) if PREDICTAND == 'pr' else (0, 2)\n", "y_ticks_bias = (np.arange(y_lim_bias[0], y_lim_bias[1] + 10, 10) if PREDICTAND == 'pr' else\n", " np.arange(y_lim_bias[0], y_lim_bias[1] + 0.2, 0.2))\n", "y_ticks_mae = np.arange(y_lim_mae[0], y_lim_mae[1] + 0.2, 0.2)\n", @@ -169,13 +259,21 @@ "ax4.legend(bbox_to_anchor=(1.3, 1.05), loc=2, frameon=False, fontsize=14);\n", "\n", "# save figure\n", - "fig.savefig('./Figures/bootstrap/{}_members_vs_ensemble.pdf'.format(PREDICTAND), bbox_inches='tight')" + "fig.savefig('./Figures/bootstrap/{}_members_vs_ensemble_extremes.png'.format(PREDICTAND), dpi=300, bbox_inches='tight')" + ] + }, + { + "cell_type": "markdown", + "id": "f13dce92-e895-48de-97ef-17b3d7b7553d", + "metadata": {}, + "source": [ + "### Predictions vs. reference" ] }, { "cell_type": "code", "execution_count": null, - "id": "6ebfee41-03a1-470b-8b61-698f3422cb9c", + "id": "3b63c13d-b32a-4a24-8290-e209db490b88", "metadata": {}, "outputs": [], "source": [ @@ -185,7 +283,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4170e37d-ca9f-4d9b-81c2-945699ab9b39", + "id": "a5fea8e6-48f6-42b4-b72e-739e5eb9bf5c", "metadata": {}, "outputs": [], "source": [