From 3ca5a973b9817a094783f594029955253c966f53 Mon Sep 17 00:00:00 2001
From: "Daniel.Frisinghelli" <daniel.frisinghelli@eurac.edu>
Date: Wed, 17 Nov 2021 16:30:32 +0000
Subject: [PATCH] Further improvements.

---
 Notebooks/eval_bootstrap.ipynb     | 393 ++++++++++++++++++++---------
 Notebooks/eval_precipitation.ipynb |   2 +-
 Notebooks/eval_sensitivity.ipynb   |   2 +-
 Notebooks/eval_temperature.ipynb   |   2 +-
 4 files changed, 277 insertions(+), 122 deletions(-)

diff --git a/Notebooks/eval_bootstrap.ipynb b/Notebooks/eval_bootstrap.ipynb
index abe6fc7..bf829d0 100644
--- a/Notebooks/eval_bootstrap.ipynb
+++ b/Notebooks/eval_bootstrap.ipynb
@@ -11,9 +11,11 @@
   {
    "cell_type": "markdown",
    "id": "969d063b-5262-4324-901f-0a48630c4f27",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "source": [
-    "### Imports and constants"
+    "## Imports and constants"
    ]
   },
   {
@@ -64,7 +66,9 @@
   {
    "cell_type": "markdown",
    "id": "7eae545b-4d8a-4689-a6c0-4aba2cb9104e",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "source": [
     "## Search model configuration"
    ]
@@ -106,6 +110,16 @@
     "models"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "5a64795a-6e5c-409a-8b3b-c738a96fa255",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Load datasets"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "e790ed9f-451c-4368-849d-06d9c50f797c",
@@ -249,7 +263,8 @@
    "source": [
     "y_pred = [xr.open_dataset(sim, chunks={'time': 365}) for sim in models]\n",
     "if PREDICTAND == 'pr':\n",
-    "    y_pred = [y_p.rename({NAMES[PREDICTAND]: PREDICTAND}) for y_p in y_pred]"
+    "    y_pred = [y_p.rename({NAMES[PREDICTAND]: PREDICTAND}) for y_p in y_pred]\n",
+    "    y_pred = [y_p.transpose('time', 'y', 'x') for y_p in y_pred]"
    ]
   },
   {
@@ -278,125 +293,86 @@
   },
   {
    "cell_type": "markdown",
-   "id": "775a3c92-1027-49d2-9681-dd53e0af70ac",
-   "metadata": {},
+   "id": "6a718ea3-54d3-400a-8c89-76d04347de2d",
+   "metadata": {
+    "tags": []
+   },
    "source": [
-    "## Mean time series"
+    "## Ensemble predictions"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "dbe5db42-c31c-493b-a3b8-42c794cde6d9",
+   "id": "5a6c0bfe-c1d2-4e43-9f8e-35c63c46bb10",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# whether to compute rolling or hard mean\n",
-    "ROLLING = False"
+    "# create ensemble dataset\n",
+    "ensemble = xr.Dataset({'member_{}'.format(i): member for i, member in enumerate(y_pred)}).to_array('members')"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "fae4d70c-276c-4ba6-b6b6-ba6eb1793e0c",
+   "id": "0e526227-cd4c-4a1c-ab72-51b72a4f821f",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# define scale of mean time series\n",
-    "# scale = '1M'  # monthly\n",
-    "scale = '1Y'  # yearly"
+    "# full ensemble mean prediction and standard deviation\n",
+    "ensemble_mean_full = ensemble.mean(dim='members')\n",
+    "ensemble_std_full = ensemble.std(dim='members')"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5eaaaf2f-d4c4-4f30-b124-66d04d6db2b9",
+   "id": "d4a70701-2823-4106-ad6a-3272b678d0f9",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# mean time series over entire grid and validation period\n",
-    "if ROLLING:\n",
-    "    y_pred_ts = [y_p.rolling(time=365, center=True).mean().mean(dim=('y', 'x')).dropna('time').compute() for y_p in y_pred]\n",
-    "    y_true_ts = y_true.rolling(time=365, center=True).mean().mean(dim=('y', 'x')).dropna('time').compute()\n",
-    "    y_refe_ts = y_refe.rolling(time=365, center=True).mean().mean(dim=('y', 'x')).dropna('time').compute()\n",
-    "    y_refe_qm_ts = y_refe_qm.rolling(time=365, center=True).mean().mean(dim=('y', 'x')).dropna('time').compute()\n",
-    "else:\n",
-    "    y_pred_ts = [y_p.resample(time=scale).mean(dim=('time', 'y', 'x')).compute() for y_p in y_pred]\n",
-    "    y_true_ts = y_true.resample(time=scale).mean(dim=('time', 'y', 'x')).compute()\n",
-    "    y_refe_ts = y_refe.resample(time=scale).mean(dim=('time', 'y', 'x')).compute()\n",
-    "    y_refe_qm_ts = y_refe_qm.resample(time=scale).mean(dim=('time', 'y', 'x')).compute()"
+    "# ensemble mean prediction using three random members\n",
+    "ensemble_3 = np.random.randint(0, len(ensemble.members), size=3)\n",
+    "ensemble_mean_3 = ensemble[ensemble_3, ...].mean(dim='members')\n",
+    "ensemble_std_3 = ensemble[ensemble_3, ...].std(dim='members')"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b5cab101-3e33-48a9-b071-5b32b1084eb1",
+   "id": "c4d18814-1340-4ed4-8102-2ccd6f0c2664",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# convert model predictions to numpy array\n",
-    "y_pred_ts = np.asarray([y_p for y_p in y_pred_ts])"
+    "# ensemble mean prediction using five random members\n",
+    "ensemble_5 = np.random.randint(0, len(ensemble.members), size=5)\n",
+    "ensemble_mean_5 = ensemble[ensemble_5, ...].mean(dim='members')\n",
+    "ensemble_std_5 = ensemble[ensemble_5, ...].std(dim='members')"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b65387b7-ee4b-4d9d-b925-6973b8040cb2",
-   "metadata": {},
-   "outputs": [],
+   "cell_type": "markdown",
+   "id": "f8b31e39-d4b9-4347-953f-87af04c0dd7a",
+   "metadata": {
+    "tags": []
+   },
    "source": [
-    "# calculate quantiles for ensemble of bootstrapped models\n",
-    "y_pred_q = np.quantile(y_pred_ts, q=[0.25, 0.5, 0.75], axis=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8ca32179-66ed-4f9d-a8f6-92cb547afe4a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# initialize figure\n",
-    "palette = sns.color_palette('viridis', 3)\n",
-    "fig, ax = plt.subplots(1, 1, figsize=(16, 9))\n",
-    "\n",
-    "# time to plot on x-axis\n",
-    "time = y_true_ts.time if ROLLING else [t.astype('datetime64[{}]'.format(scale.lstrip('1'))) for t in y_true_ts.time.values] \n",
-    "xticks = [t.astype('datetime64[Y]') for t in list(y_true_ts.time.resample(time='1Y').groups.keys())]\n",
-    "\n",
-    "# plot reference: observations, ERA-5, ERA-5 QM-adjusted\n",
-    "ax.plot(time, y_true_ts, label='Observed', ls='-', color='k');\n",
-    "ax.plot(time, y_refe_ts, label='ERA-5', ls='-', color=palette[0]);\n",
-    "ax.plot(time, y_refe_qm_ts, label='ERA-5 QM-adjusted', ls='-', color=palette[1]);\n",
-    "\n",
-    "# plot model predictions: median and IQR\n",
-    "ax.plot(time, y_pred_q[1, :], label='Prediction: Median', color=palette[-1])\n",
-    "ax.fill_between(x=time, y1=y_pred_q[0, :], y2=y_pred_q[-1, :], alpha=0.3, label='Prediction: IQR', color=palette[-1]);\n",
-    "\n",
-    "# add legend\n",
-    "ax.legend(frameon=False, loc='lower right', fontsize=12)\n",
-    "\n",
-    "# axis limits and ticks\n",
-    "ax.set_xticks(xticks)\n",
-    "ax.set_xticklabels(xticks)\n",
-    "ax.tick_params(axis='both', labelsize=12)\n",
-    "\n",
-    "# save figure\n",
-    "fig.savefig('./Figures/{}_{}_{}_bootstrap_time_series_{}.png'.format(PREDICTAND, LOSS, OPTIM, scale if not ROLLING else 'rolling'),\n",
-    "            bbox_inches='tight', dpi=300)"
+    "# Model validation"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "7effbf83-7977-4a47-aa6d-d57c4c4c22c6",
-   "metadata": {},
+   "id": "3e6ecc98-f32f-42f7-9971-64b270aa5453",
+   "metadata": {
+    "tags": []
+   },
    "source": [
-    "### Bias, MAE, and RMSE"
+    "## Bias, MAE, and RMSE for reference data"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "b8c23b7b-ccdf-412a-a30d-ac686af03d9f",
+   "id": "671cd3c0-8d6c-41c1-bf8e-93f5943bf9aa",
    "metadata": {},
    "source": [
     "Calculate yearly average bias, MAE, and RMSE over entire reference period for model predictions, ERA-5, and QM-adjusted ERA-5."
@@ -410,25 +386,11 @@
    "outputs": [],
    "source": [
     "# yearly average values over validation period\n",
-    "y_pred_yearly_avg = [y_p.groupby('time.year').mean(dim='time') for y_p in y_pred]\n",
     "y_refe_yearly_avg = y_refe.groupby('time.year').mean(dim='time')\n",
     "y_refe_qm_yearly_avg = y_refe_qm.groupby('time.year').mean(dim='time')\n",
     "y_true_yearly_avg = y_true.groupby('time.year').mean(dim='time')"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cce7ffbf-7e16-45f1-a2b6-5bc688595ee7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# yearly average bias, mae, and rmse for model predictions\n",
-    "bias_pred = [y_p - y_true_yearly_avg for y_p in y_pred_yearly_avg]\n",
-    "mae_pred = [np.abs(y_p - y_true_yearly_avg) for y_p in y_pred_yearly_avg]\n",
-    "rmse_pred = [(y_p - y_true_yearly_avg) ** 2 for y_p in y_pred_yearly_avg]"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -455,14 +417,6 @@
     "rmse_refe_qm = (y_refe_qm_yearly_avg - y_true_yearly_avg) ** 2"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "e0b2811e-ca0a-488b-911f-531526980ef0",
-   "metadata": {},
-   "source": [
-    "#### Calculate absolute values"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -471,7 +425,7 @@
    "outputs": [],
    "source": [
     "# create dataframe for mean bias, mae, and rmse\n",
-    "df = pd.DataFrame([], columns=['bias', 'mae', 'rmse', 'product'])"
+    "df_ref = pd.DataFrame([], columns=['bias', 'mae', 'rmse', 'product'])"
    ]
   },
   {
@@ -484,65 +438,266 @@
     "# absolute values for the reference datasets\n",
     "for product, metrics in zip(['Era-5', 'Era-5 QM'], [[bias_refe, mae_refe, rmse_refe], [bias_refe_qm, mae_refe_qm, rmse_refe_qm]]):\n",
     "    values = pd.DataFrame([[np.sqrt(m.mean().values.item()) if name == 'rmse' else m.mean().values.item() for name, m in zip(['bias', 'mae', 'rmse'], metrics)] + [product]],\n",
-    "                          columns=df.columns)\n",
-    "    df = df.append(values, ignore_index=True)"
+    "                          columns=df_ref.columns)\n",
+    "    df_ref = df_ref.append(values, ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "258cb3c6-c2fc-457d-885e-28eaf48f1d5b",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Bias, MAE, and RMSE for model predictions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3f8d75ad-1da2-4f80-aef9-996c0463d1a2",
+   "metadata": {},
+   "source": [
+    "### Absolute values for each ensemble member"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "64f7a0b9-a772-4a03-9160-7839a48e56cd",
+   "id": "6980833a-3848-43ca-bcca-d759b4fd9f69",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# absolute values for the model predictions\n",
+    "# yearly average bias, mae, and rmse for each ensemble member\n",
+    "y_pred_yearly_avg = ensemble.groupby('time.year').mean(dim='time')\n",
+    "bias_pred = y_pred_yearly_avg - y_true_yearly_avg\n",
+    "mae_pred = np.abs(y_pred_yearly_avg - y_true_yearly_avg)\n",
+    "rmse_pred = (y_pred_yearly_avg - y_true_yearly_avg) ** 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "64f7a0b9-a772-4a03-9160-7839a48e56cd",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# absolute values for each ensemble member\n",
     "df_pred = pd.DataFrame([], columns=['bias', 'mae', 'rmse', 'product'])\n",
     "for i in range(len(bias_pred)):\n",
     "    values = pd.DataFrame([[np.sqrt(m.mean().values.item()) if name == 'rmse' else m.mean().values.item()\n",
-    "                            for name, m in zip(['bias', 'mae', 'rmse'], [bias_pred[i], mae_pred[i], rmse_pred[i]])] + ['Prediction']], columns=df.columns)\n",
+    "                            for name, m in zip(['bias', 'mae', 'rmse'], [bias_pred[i], mae_pred[i], rmse_pred[i]])] + [bias_pred[i].members.item()]],\n",
+    "                          columns=df_pred.columns)\n",
     "    df_pred = df_pred.append(values, ignore_index=True)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "a1fdb3bd-84cd-4ebf-80f4-a0275e372315",
+   "metadata": {},
+   "source": [
+    "### Absolute values for ensemble predictions"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a8dc34b1-f7dd-4d1c-8288-314aba444b86",
+   "id": "5a5755de-c000-43aa-9d16-637f021691ae",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# compute mean and standard deviation of ensemble members\n",
-    "mean = df_pred.mean(axis=0).to_frame().transpose()\n",
-    "std = df_pred.std(axis=0).to_frame().transpose()\n",
-    "mean['product'] = 'Prediction'"
+    "for name, ens in zip(['Ensemble-3', 'Ensemble-5', 'Ensemble-{:d}'.format(len(ensemble))], [ensemble_mean_3, ensemble_mean_5, ensemble_mean_full]):\n",
+    "    yearly_avg = ens.groupby('time.year').mean(dim='time')\n",
+    "    bias = (yearly_avg - y_true_yearly_avg).mean().values.item()\n",
+    "    mae = np.abs(yearly_avg - y_true_yearly_avg).mean().values.item()\n",
+    "    rmse = np.sqrt(((yearly_avg - y_true_yearly_avg) ** 2).mean().values.item())\n",
+    "    values = pd.DataFrame([[bias, mae, rmse, name]], columns=df_pred.columns)\n",
+    "    df_pred = df_pred.append(values, ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "902e299c-a927-41b1-b2ae-987c30dee8cf",
+   "metadata": {},
+   "source": [
+    "## Plot results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3cfcd2de-cd37-42d5-b53d-e8abfd21e242",
+   "metadata": {},
+   "source": [
+    "### Absolute values"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "62339f06-5973-4df1-bdc2-aeb961f15403",
+   "id": "bdca9b54-3e05-49c8-b1b2-b8c782017306",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# add ensemble mean to reference dataframe\n",
-    "df = df.append(mean, ignore_index=True)"
+    "# create a sequential colormap: for reference data, single ensemble members, and ensemble mean predictions\n",
+    "palette = sns.color_palette('YlOrRd_r', 10) + sns.color_palette('Greens', 3)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "557e6b0e-7a54-4bbe-bd39-1fbc66e2c9af",
+   "id": "32f1a652-a4a2-4738-ba6f-93fe3ac43658",
    "metadata": {},
    "outputs": [],
    "source": [
+    "# absolute values for metrics for both reference and model predictions\n",
+    "df = pd.concat([df_ref, df_pred], ignore_index=True)\n",
     "df"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d7c8e987-0257-4263-ac4b-718a614c458f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# initialize figure\n",
+    "fig, axes = plt.subplots(1, 2, figsize=(16, 5))\n",
+    "sns.barplot(x='product', y='bias', data=df_pred, palette=palette, ax=axes[0])\n",
+    "sns.barplot(x='product', y='mae', data=df_pred, palette=palette, ax=axes[1])\n",
+    "# sns.barplot(x='product', y='rmse', data=df, palette=palette, ax=axes[2])\n",
+    "\n",
+    "# axes limits and ticks\n",
+    "axes[0].set_ylim(-0.5, 0.5)\n",
+    "axes[1].set_ylim(0, 1)\n",
+    "# axes[2].set_ylim(0, 1)\n",
+    "for ax in axes:\n",
+    "    ax.tick_params('both', labelsize=14)\n",
+    "    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)\n",
+    "    ax.yaxis.label.set_size(14)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "775a3c92-1027-49d2-9681-dd53e0af70ac",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Regional time series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dbe5db42-c31c-493b-a3b8-42c794cde6d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# whether to compute rolling or hard mean\n",
+    "ROLLING = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fae4d70c-276c-4ba6-b6b6-ba6eb1793e0c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define scale of mean time series\n",
+    "# scale = '1M'  # monthly\n",
+    "scale = '1Y'  # yearly"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5eaaaf2f-d4c4-4f30-b124-66d04d6db2b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mean time series over entire grid and validation period\n",
+    "if ROLLING:\n",
+    "    y_pred_ts = ensemble_mean_full.rolling(time=365, center=True).mean().mean(dim=('y', 'x')).dropna('time').compute()\n",
+    "    y_pred_ts_var = ensemble_std_full.rolling(time=365, center=True).mean().mean(dim=('y', 'x')).dropna('time').compute()\n",
+    "    y_true_ts = y_true.rolling(time=365, center=True).mean().mean(dim=('y', 'x')).dropna('time').compute()\n",
+    "    y_refe_ts = y_refe.rolling(time=365, center=True).mean().mean(dim=('y', 'x')).dropna('time').compute()\n",
+    "    y_refe_qm_ts = y_refe_qm.rolling(time=365, center=True).mean().mean(dim=('y', 'x')).dropna('time').compute()\n",
+    "else:\n",
+    "    y_pred_ts = ensemble_mean_full.resample(time=scale).mean(dim=('time', 'y', 'x')).compute()\n",
+    "    y_pred_ts_var = ensemble_std_full.resample(time=scale).mean(dim=('time', 'y', 'x')).compute()\n",
+    "    y_true_ts = y_true.resample(time=scale).mean(dim=('time', 'y', 'x')).compute()\n",
+    "    y_refe_ts = y_refe.resample(time=scale).mean(dim=('time', 'y', 'x')).compute()\n",
+    "    y_refe_qm_ts = y_refe_qm.resample(time=scale).mean(dim=('time', 'y', 'x')).compute()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "28bc3177-b6a0-4938-9e74-59be2491fa56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# color palette\n",
+    "palette = sns.color_palette('viridis', 3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07375015-4205-4dfb-9bd2-0f37d5e56672",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# factor of standard deviation to plot as uncertainty around ensemble mean prediction\n",
+    "std_factor = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ca32179-66ed-4f9d-a8f6-92cb547afe4a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# initialize figure\n",
+    "fig, ax = plt.subplots(1, 1, figsize=(16, 9))\n",
+    "\n",
+    "# time to plot on x-axis\n",
+    "time = y_true_ts.time if ROLLING else [t.astype('datetime64[{}]'.format(scale.lstrip('1'))) for t in y_true_ts.time.values] \n",
+    "xticks = [t.astype('datetime64[Y]') for t in list(y_true_ts.time.resample(time='1Y').groups.keys())]\n",
+    "\n",
+    "# plot reference: observations, ERA-5, ERA-5 QM-adjusted\n",
+    "ax.plot(time, y_true_ts, label='Observed', ls='-', color='k');\n",
+    "ax.plot(time, y_refe_ts, label='ERA-5', ls='-', color=palette[0]);\n",
+    "ax.plot(time, y_refe_qm_ts, label='ERA-5 QM-adjusted', ls='-', color=palette[1]);\n",
+    "\n",
+    "# plot model predictions: median and IQR\n",
+    "ax.plot(time, y_pred_ts, label='Prediction: Ensemble mean', color=palette[-1])\n",
+    "ax.fill_between(x=time, y1=y_pred_ts - std_factor * y_pred_ts_var, y2=y_pred_ts + std_factor * y_pred_ts_var,\n",
+    "                alpha=0.3, label='Prediction: Ensemble std', color=palette[-1]);\n",
+    "\n",
+    "# add legend\n",
+    "ax.legend(frameon=False, loc='lower right', fontsize=12)\n",
+    "\n",
+    "# axis limits and ticks\n",
+    "ax.set_xticks(xticks)\n",
+    "ax.set_xticklabels(xticks)\n",
+    "ax.tick_params(axis='both', labelsize=12)\n",
+    "\n",
+    "# save figure\n",
+    "fig.savefig('./Figures/{}_{}_{}_bootstrap_time_series_{}.png'.format(PREDICTAND, LOSS, OPTIM, scale if not ROLLING else 'rolling'),\n",
+    "            bbox_inches='tight', dpi=300)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "923762ca-6ebc-4ffa-9b65-2faaf816fc05",
    "metadata": {},
    "source": [
-    "#### Plot spatial distributions"
+    "### Spatial distributions"
    ]
   },
   {
@@ -552,8 +707,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# compute ensemble median for yearly mean bias of each grid point\n",
-    "pred = np.median(np.stack([y_p.mean(dim='year') for y_p in bias_pred], axis=0), axis=0)"
+    "# compute ensemble mean yearly mean bias of each grid point\n",
+    "pred = (ensemble_mean_full.groupby('time.year').mean(dim='time') - y_true_yearly_avg).mean(dim='year').compute()"
    ]
   },
   {
@@ -581,7 +736,7 @@
     "# set titles\n",
     "axes[0].set_title('Era-5', fontsize=14, pad=10);\n",
     "axes[1].set_title('Era-5: QM-adjusted', fontsize=14, pad=10);\n",
-    "axes[2].set_title('Predictions: Median', fontsize=14, pad=10)\n",
+    "axes[2].set_title('Predictions: Ensemble mean', fontsize=14, pad=10)\n",
     "\n",
     "# adjust axes\n",
     "for ax in axes.flat:\n",
@@ -626,7 +781,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.10"
+   "version": "3.7.12"
   }
  },
  "nbformat": 4,
diff --git a/Notebooks/eval_precipitation.ipynb b/Notebooks/eval_precipitation.ipynb
index e1f85e5..75a37e2 100644
--- a/Notebooks/eval_precipitation.ipynb
+++ b/Notebooks/eval_precipitation.ipynb
@@ -1505,7 +1505,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.10"
+   "version": "3.7.12"
   }
  },
  "nbformat": 4,
diff --git a/Notebooks/eval_sensitivity.ipynb b/Notebooks/eval_sensitivity.ipynb
index 6ab07d1..bd834be 100644
--- a/Notebooks/eval_sensitivity.ipynb
+++ b/Notebooks/eval_sensitivity.ipynb
@@ -550,7 +550,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.10"
+   "version": "3.7.12"
   }
  },
  "nbformat": 4,
diff --git a/Notebooks/eval_temperature.ipynb b/Notebooks/eval_temperature.ipynb
index 6422cb5..ffa0142 100644
--- a/Notebooks/eval_temperature.ipynb
+++ b/Notebooks/eval_temperature.ipynb
@@ -1219,7 +1219,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.10"
+   "version": "3.7.12"
   }
  },
  "nbformat": 4,
-- 
GitLab