Preparing figures for InformA seminar.

971e181e · Frisinghelli Daniel · a0a232e0 · 971e181e
Commit 971e181e authored 3 years ago by Frisinghelli Daniel
--- a/Notebooks/eval_temperature.ipynb
+++ b/Notebooks/eval_temperature.ipynb
@@ -36,11 +36,22 @@
    "MODEL = 'USegNet'\n",
    "PPREDICTORS = 'ztuvq'\n",
    "PLEVELS = ['500', '850']\n",
-    "SPREDICTORS = 'pt2'\n",
+    "SPREDICTORS = 'p'\n",
    "DEM = 'dem'\n",
    "DOY = 'doy'"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "686a7c1d-e71d-4954-8d79-26b9a84f648e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mapping from predictands to variable names\n",
+    "NAMES = {'tasmin': 'minimum temperature', 'tasmax': 'maximum temperature', 'pr': 'precipitation'}"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "4d5d12c5-50fd-4c5c-9240-c3df78e49b44",
@@ -145,7 +156,7 @@
   "id": "ddebdf9f-862c-461e-aa57-cd344d54eee9",
   "metadata": {},
   "source": [
-    "## Model validation"
+    "## Model validation: temperature"
   ]
  },
  {
@@ -153,7 +164,7 @@
   "id": "5e8be24e-8ca2-4582-98c0-b56c6db289d2",
   "metadata": {},
   "source": [
-    "### Overall bias"
+    "### Bias"
   ]
  },
  {
@@ -161,7 +172,7 @@
   "id": "a4f7177c-7d09-401f-957b-0e493b9ef5d0",
   "metadata": {},
   "source": [
-    "Calculate average bias over entire reference period:"
+    "Calculate yearly average bias over entire reference period:"
   ]
  },
  {
@@ -171,13 +182,12 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# average bias over reference period\n",
-    "y_pred_avg = y_pred.mean(dim='time')\n",
-    "y_true_avg = y_true.mean(dim='time')\n",
-    "bias = y_pred_avg - y_true_avg\n",
-    "# bias = (y_pred - y_true).mean()\n",
+    "# yearly average bias over reference period\n",
+    "y_pred_yearly_avg = y_pred.groupby('time.year').mean(dim='time')\n",
+    "y_true_yearly_avg = y_true.groupby('time.year').mean(dim='time')\n",
+    "bias_yearly_avg = y_pred_yearly_avg - y_true_yearly_avg\n",
    "for var in bias:\n",
-    "    print('Overall average bias {}: {:.2f}'.format(var, bias[var].mean().item()))"
+    "    print('Yearly average bias {}: {:.2f}'.format(var, bias_yearly_avg[var].mean().item()))"
   ]
  },
  {
@@ -188,10 +198,9 @@
   "outputs": [],
   "source": [
    "# mean absolute error over reference period\n",
-    "mae = np.abs(y_pred_avg - y_true_avg).mean()\n",
-    "# mae = np.abs(y_pred - y_true).mean()\n",
+    "mae_avg = np.abs(y_pred_yearly_avg - y_true_yearly_avg).mean()\n",
    "for var in mae:\n",
-    "    print('Mean absolute error {}: {:.2f}'.format(var, mae[var].item()))"
+    "    print('Yearly average mean absolute error {}: {:.2f}'.format(var, mae[var].item()))"
   ]
  },
  {
@@ -202,8 +211,7 @@
   "outputs": [],
   "source": [
    "# root mean squared error over reference period\n",
-    "rmse = ((y_pred_avg - y_true_avg) ** 2).mean()\n",
-    "# rmse = ((y_pred - y_true) ** 2).mean()\n",
+    "rmse_avg = ((y_pred_yearly_avg - y_true_yearly_avg) ** 2).mean()\n",
    "for var in rmse:\n",
    "    print('Root mean squared error {}: {:.2f}'.format(var, rmse[var].item()))"
   ]
@@ -216,13 +224,14 @@
   "outputs": [],
   "source": [
    "# Pearson's correlation coefficient over reference period\n",
-    "for var in y_pred_avg:\n",
-    "    y_p = y_pred_avg[var].values[~np.isnan(y_pred_avg[var])]\n",
-    "    # y_p = y_pred[var].values[~np.isnan(y_pred[var])]\n",
-    "    y_t = y_true_avg[var].values[~np.isnan(y_true_avg[var])]\n",
-    "    # y_t = y_true[var].values[~np.isnan(y_true[var])]\n",
-    "    r, _ = stats.pearsonr(y_p, y_t)\n",
-    "    print('Pearson correlation for {}: {:.2f}'.format(var, r))"
+    "for var in y_pred_yearly_avg:\n",
+    "    correlations = []\n",
+    "    for year in y_pred_yearly_avg.year:\n",
+    "        y_p = y_pred_yearly_avg[var].sel(year=year).values        \n",
+    "        y_t = y_true_yearly_avg[var].sel(year=year).values\n",
+    "        r, _ = stats.pearsonr(y_p[~np.isnan(y_p)], y_t[~np.isnan(y_t)])\n",
+    "        correlations.append(r)\n",
+    "print('Yearly average Pearson correlation coefficient for {}: {:.2f}'.format(var, np.asarray(r).mean()))"
   ]
  },
  {
@@ -233,16 +242,47 @@
   "outputs": [],
   "source": [
    "# plot average of observation, prediction, and bias\n",
-    "fig, axes = plt.subplots(len(y_pred_avg.data_vars), 3, figsize=(24, len(y_pred_avg.data_vars) * 6), sharex=True)\n",
-    "axes = axes.reshape(len(y_pred_avg.data_vars), -1)\n",
-    "for i, var in enumerate(y_pred_avg):\n",
-    "    for ds, ax in zip([y_true_avg, y_pred_avg, bias], axes[i, ...]):\n",
-    "        ds[var].plot(ax=ax)\n",
-    "\n",
+    "fig, axes = plt.subplots(len(y_pred_yearly_avg.data_vars), 3, figsize=(24, len(y_pred_yearly_avg.data_vars) * 6),\n",
+    "                         sharex=True, sharey=True)\n",
+    "axes = axes.reshape(len(y_pred_yearly_avg.data_vars), -1)\n",
+    "for i, var in enumerate(y_pred_yearly_avg):\n",
+    "    for ds, ax in zip([y_true_yearly_avg, y_pred_yearly_avg, bias_yearly_avg], axes[i, ...]):\n",
+    "        if ds is bias_yearly_avg:\n",
+    "            ds = ds[var].mean(dim='year')\n",
+    "            im2 = ax.imshow(ds.values, origin='lower', cmap='RdBu_r', vmin=-2, vmax=2)\n",
+    "            ax.text(x=ds.shape[0] - 2, y=2, s='Average: {:.2f}°C'.format(ds.mean().item()), fontsize=14, ha='right')\n",
+    "        else:\n",
+    "            im1 = ax.imshow(ds[var].mean(dim='year').values, origin='lower', cmap='RdYlBu_r', vmin=-15, vmax=15)\n",
+    "        \n",
    "# set titles\n",
-    "axes[0, 0].set_title('Observed');\n",
-    "axes[0, 1].set_title('Predicted');\n",
-    "axes[0, 2].set_title('Difference');"
+    "axes[0, 0].set_title('Observed', fontsize=16);\n",
+    "axes[0, 1].set_title('Predicted', fontsize=16);\n",
+    "axes[0, 2].set_title('Bias', fontsize=16);\n",
+    "\n",
+    "# adjust axes\n",
+    "for ax in axes.flat:\n",
+    "    ax.axes.get_xaxis().set_ticklabels([])\n",
+    "    ax.axes.get_xaxis().set_ticks([])\n",
+    "    ax.axes.get_yaxis().set_ticklabels([])\n",
+    "    ax.axes.get_yaxis().set_ticks([])\n",
+    "    ax.axes.axis('tight')\n",
+    "    ax.set_xlabel('')\n",
+    "    ax.set_ylabel('')\n",
+    "\n",
+    "# adjust figure\n",
+    "fig.suptitle('Average {}: 1991 - 2010'.format(NAMES[PREDICTAND]), fontsize=20);\n",
+    "fig.subplots_adjust(hspace=0, wspace=0, top=0.85)\n",
+    "\n",
+    "# add colorbar for bias\n",
+    "axes = axes.flatten()\n",
+    "cbar_ax = fig.add_axes([axes[-1].get_position().x1 + 0.01, axes[-1].get_position().y0,\n",
+    "                        0.01, axes[-1].get_position().y1 - axes[-1].get_position().y0])\n",
+    "cbar = fig.colorbar(im2, cax=cbar_ax)\n",
+    "cbar.set_label(label='Bias / (°C)', fontsize=16)\n",
+    "cbar.ax.tick_params(labelsize=14)\n",
+    "\n",
+    "# save figure\n",
+    "fig.savefig('../Notebooks/Figures/{}_average_bias.png'.format(PREDICTAND), dpi=300, bbox_inches='tight')"
   ]
  },
  {
@@ -274,6 +314,19 @@
    "bias_snl = y_pred_snl - y_true_snl"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2d0d5a46-0652-4289-9e1c-6d47aafcfef0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# print average bias per season\n",
+    "for var in bias_snl.data_vars:\n",
+    "    for season in bias_snl.tasmin.season:\n",
+    "        print('Average bias of {} for season {}: {:.2f}'.format(var, season.values.item(), bias_snl[var].sel(season=season).mean().item()))"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "c4232ae9-a557-4d61-8ab3-d7eda6201f98",
@@ -290,43 +343,47 @@
   "outputs": [],
   "source": [
    "# plot seasonal differences\n",
-    "fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(14,12))\n",
-    "for i, season in enumerate(('DJF', 'MAM', 'JJA', 'SON')):\n",
-    "    y_true_snl[PREDICTAND].sel(season=season).plot.pcolormesh(\n",
-    "        ax=axes[i, 0], add_colorbar=True, extend='both')\n",
-    "\n",
-    "    y_pred_snl[PREDICTAND].sel(season=season).plot.pcolormesh(\n",
-    "        ax=axes[i, 1], add_colorbar=True, extend='both')\n",
+    "seasons = ('DJF', 'JJA')\n",
+    "fig, axes = plt.subplots(nrows=1, ncols=len(seasons) + 1, figsize=(24,8), sharex=True, sharey=True)\n",
+    "axes = axes.flatten()\n",
    "\n",
-    "    bias_snl[PREDICTAND].sel(season=season).plot.pcolormesh(\n",
-    "        ax=axes[i, 2], vmin=-1, vmax=1, add_colorbar=True,\n",
-    "        extend='both')\n",
+    "# plot annual average bias\n",
+    "ds = bias_yearly_avg[PREDICTAND].mean(dim='year')\n",
+    "axes[0].imshow(ds.values, origin='lower', cmap='RdBu_r', vmin=-2, vmax=2)\n",
+    "axes[0].set_title('Annual', fontsize=16);\n",
+    "axes[0].text(x=ds.shape[0] - 2, y=2, s='Average: {:.2f}°C'.format(ds.mean().item()), fontsize=14, ha='right')\n",
    "\n",
-    "    axes[i, 0].set_ylabel(season)\n",
-    "    axes[i, 1].set_ylabel('')\n",
-    "    axes[i, 2].set_ylabel('')\n",
+    "# plot seasonal average bias\n",
+    "for ax, season in zip(axes[1:], seasons):\n",
+    "    ds = bias_snl[PREDICTAND].sel(season=season)\n",
+    "    ax.imshow(ds.values, origin='lower', cmap='RdBu_r', vmin=-2, vmax=2)\n",
+    "    ax.set_title(season, fontsize=16);\n",
+    "    ax.text(x=ds.shape[0] - 2, y=2, s='Average: {:.2f}°C'.format(ds.mean().item()), fontsize=14, ha='right')\n",
    "\n",
+    "# adjust axes\n",
    "for ax in axes.flat:\n",
    "    ax.axes.get_xaxis().set_ticklabels([])\n",
+    "    ax.axes.get_xaxis().set_ticks([])\n",
    "    ax.axes.get_yaxis().set_ticklabels([])\n",
+    "    ax.axes.get_yaxis().set_ticks([])\n",
    "    ax.axes.axis('tight')\n",
    "    ax.set_xlabel('')\n",
+    "    ax.set_ylabel('')\n",
    "\n",
-    "axes[0, 0].set_title('Observed')\n",
-    "axes[0, 1].set_title('Predicted')\n",
-    "axes[0, 2].set_title('Difference')\n",
+    "# adjust figure\n",
+    "fig.suptitle('Average bias of {}: 1991 - 2010'.format(NAMES[PREDICTAND]), fontsize=20);\n",
+    "fig.subplots_adjust(hspace=0, wspace=0, top=0.85)\n",
    "\n",
-    "plt.tight_layout()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ec2f6293-384e-41b6-9e79-75bd852df0c0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# TODO: Annual cylce"
+    "# add colorbar for bias\n",
+    "axes = axes.flatten()\n",
+    "cbar_ax = fig.add_axes([axes[-1].get_position().x1 + 0.01, axes[-1].get_position().y0,\n",
+    "                        0.01, axes[-1].get_position().y1 - axes[-1].get_position().y0])\n",
+    "cbar = fig.colorbar(im2, cax=cbar_ax)\n",
+    "cbar.set_label(label='Bias / (°C)', fontsize=16)\n",
+    "cbar.ax.tick_params(labelsize=14)\n",
+    "\n",
+    "# save figure\n",
+    "fig.savefig('../Notebooks/Figures/{}_average_bias_seasonal.png'.format(PREDICTAND), dpi=300, bbox_inches='tight')"
   ]
  },
  {

 %% Cell type:markdown id:f15afea1-9ea4-4201-bdd7-32ae377db6a9 tags:

 # Evaluate ERA-5 downscaling

 %% Cell type:markdown id:7d2c4c86-18e4-44c4-bdf7-0e8249614749 tags:

 We used **1981-1991 as training** period and **1991-2010 as reference** period. The results shown in this notebook are based on the model predictions on the reference period.

 %% Cell type:markdown id:4186c89c-b55b-4559-a818-8b712baaf44e tags:

 Define the predictand and the model to evaluate:

 %% Cell type:code id:bb24b6ed-2d0a-44e0-b9a9-abdcb2a8294d tags:

 ``` python
 # define the model parameters
 PREDICTAND = 'tasmin'
 MODEL = 'USegNet'
 PPREDICTORS = 'ztuvq'
 PLEVELS = ['500', '850']
-SPREDICTORS = 'pt2'
+SPREDICTORS = 'p'
 DEM = 'dem'
 DOY = 'doy'
 ```

+%% Cell type:code id:686a7c1d-e71d-4954-8d79-26b9a84f648e tags:
+
+``` python
+# mapping from predictands to variable names
+NAMES = {'tasmin': 'minimum temperature', 'tasmax': 'maximum temperature', 'pr': 'precipitation'}
+```
+
 %% Cell type:markdown id:4d5d12c5-50fd-4c5c-9240-c3df78e49b44 tags:

 ### Imports

 %% Cell type:code id:1afb0fab-5d2a-4875-9032-29b99c6dec89 tags:

 ``` python
 # builtins
 import datetime
 import warnings

 # externals
 import xarray as xr
 import numpy as np
 import matplotlib.pyplot as plt
 import scipy.stats as stats
 from IPython.display import Image

 # locals
 from climax.main.io import ERA5_PATH, OBS_PATH, TARGET_PATH
 from pysegcnn.core.utils import search_files
 ```

 %% Cell type:markdown id:2c2d0b1d-9630-4cfe-ae9d-35b05b94a1a5 tags:

 ### Model architecture

 %% Cell type:code id:f98e1c9f-0581-43ea-a569-fd05fdaf36c1 tags:

 ``` python
 Image("./Figures/architecture.png", width=900, height=400)
 ```

 %% Cell type:markdown id:f47caa41-9380-4c02-8785-4febcf2cb2d0 tags:

 ### Load datasets

 %% Cell type:code id:020dfe33-ce3c-467f-ad0a-295cc338b1a9 tags:

 ``` python
 # model predictions and observations NetCDF
 y_pred = TARGET_PATH.joinpath(PREDICTAND, '_'.join([MODEL, PREDICTAND, PPREDICTORS, *PLEVELS, SPREDICTORS, DEM, DOY]) + '.nc')
 if PREDICTAND == 'tas':
    # read both tasmax and tasmin
    tasmax = xr.open_dataset(search_files(OBS_PATH.joinpath('tasmax'), '.nc$').pop())
    tasmin = xr.open_dataset(search_files(OBS_PATH.joinpath('tasmin'), '.nc$').pop())
    y_true = xr.merge([tasmax, tasmin])
 else:
    y_true = xr.open_dataset(search_files(OBS_PATH.joinpath(PREDICTAND), '.nc$').pop())
 ```

 %% Cell type:code id:1dc2a386-d63b-4c6a-8e63-00365927559d tags:

 ``` python
 # load datasets
 y_pred = xr.open_dataset(y_pred)
 y_true = y_true.sel(time=y_pred.time)  # subset to time period covered by predictions
 ```

 %% Cell type:code id:966d85fb-9185-408f-ac2b-1e4ca829ccd1 tags:

 ``` python
 # align datasets and mask missing values in model predictions
 y_true, y_pred = xr.align(y_true, y_pred, join='override')
 y_pred = y_pred.where(~np.isnan(y_true), other=np.nan)
 ```

 %% Cell type:markdown id:ddebdf9f-862c-461e-aa57-cd344d54eee9 tags:

-## Model validation
+## Model validation: temperature

 %% Cell type:markdown id:5e8be24e-8ca2-4582-98c0-b56c6db289d2 tags:

-### Overall bias
+### Bias

 %% Cell type:markdown id:a4f7177c-7d09-401f-957b-0e493b9ef5d0 tags:

-Calculate average bias over entire reference period:
+Calculate yearly average bias over entire reference period:

 %% Cell type:code id:746bf95f-a78b-4da8-a063-1fa48e3c5da8 tags:

 ``` python
-# average bias over reference period
-y_pred_avg = y_pred.mean(dim='time')
-y_true_avg = y_true.mean(dim='time')
-bias = y_pred_avg - y_true_avg
-# bias = (y_pred - y_true).mean()
+# yearly average bias over reference period
+y_pred_yearly_avg = y_pred.groupby('time.year').mean(dim='time')
+y_true_yearly_avg = y_true.groupby('time.year').mean(dim='time')
+bias_yearly_avg = y_pred_yearly_avg - y_true_yearly_avg
 for var in bias:
-    print('Overall average bias {}: {:.2f}'.format(var, bias[var].mean().item()))
+    print('Yearly average bias {}: {:.2f}'.format(var, bias_yearly_avg[var].mean().item()))
 ```

 %% Cell type:code id:2ef19dce-29ac-4f6f-9999-e67745a4afd1 tags:

 ``` python
 # mean absolute error over reference period
-mae = np.abs(y_pred_avg - y_true_avg).mean()
-# mae = np.abs(y_pred - y_true).mean()
+mae_avg = np.abs(y_pred_yearly_avg - y_true_yearly_avg).mean()
 for var in mae:
-    print('Mean absolute error {}: {:.2f}'.format(var, mae[var].item()))
+    print('Yearly average mean absolute error {}: {:.2f}'.format(var, mae[var].item()))
 ```

 %% Cell type:code id:4c4dd156-f763-482c-84e6-c4329bfd3fe4 tags:

 ``` python
 # root mean squared error over reference period
-rmse = ((y_pred_avg - y_true_avg) ** 2).mean()
-# rmse = ((y_pred - y_true) ** 2).mean()
+rmse_avg = ((y_pred_yearly_avg - y_true_yearly_avg) ** 2).mean()
 for var in rmse:
    print('Root mean squared error {}: {:.2f}'.format(var, rmse[var].item()))
 ```

 %% Cell type:code id:ed54a50e-41c2-4a7f-9839-05357996f0c4 tags:

 ``` python
 # Pearson's correlation coefficient over reference period
-for var in y_pred_avg:
-    y_p = y_pred_avg[var].values[~np.isnan(y_pred_avg[var])]
-    # y_p = y_pred[var].values[~np.isnan(y_pred[var])]
-    y_t = y_true_avg[var].values[~np.isnan(y_true_avg[var])]
-    # y_t = y_true[var].values[~np.isnan(y_true[var])]
-    r, _ = stats.pearsonr(y_p, y_t)
-    print('Pearson correlation for {}: {:.2f}'.format(var, r))
+for var in y_pred_yearly_avg:
+    correlations = []
+    for year in y_pred_yearly_avg.year:
+        y_p = y_pred_yearly_avg[var].sel(year=year).values
+        y_t = y_true_yearly_avg[var].sel(year=year).values
+        r, _ = stats.pearsonr(y_p[~np.isnan(y_p)], y_t[~np.isnan(y_t)])
+        correlations.append(r)
+print('Yearly average Pearson correlation coefficient for {}: {:.2f}'.format(var, np.asarray(r).mean()))
 ```

 %% Cell type:code id:760f86ce-9e04-4938-b24f-d2819fbf622e tags:

 ``` python
 # plot average of observation, prediction, and bias
-fig, axes = plt.subplots(len(y_pred_avg.data_vars), 3, figsize=(24, len(y_pred_avg.data_vars) * 6), sharex=True)
-axes = axes.reshape(len(y_pred_avg.data_vars), -1)
-for i, var in enumerate(y_pred_avg):
-    for ds, ax in zip([y_true_avg, y_pred_avg, bias], axes[i, ...]):
-        ds[var].plot(ax=ax)
+fig, axes = plt.subplots(len(y_pred_yearly_avg.data_vars), 3, figsize=(24, len(y_pred_yearly_avg.data_vars) * 6),
+                         sharex=True, sharey=True)
+axes = axes.reshape(len(y_pred_yearly_avg.data_vars), -1)
+for i, var in enumerate(y_pred_yearly_avg):
+    for ds, ax in zip([y_true_yearly_avg, y_pred_yearly_avg, bias_yearly_avg], axes[i, ...]):
+        if ds is bias_yearly_avg:
+            ds = ds[var].mean(dim='year')
+            im2 = ax.imshow(ds.values, origin='lower', cmap='RdBu_r', vmin=-2, vmax=2)
+            ax.text(x=ds.shape[0] - 2, y=2, s='Average: {:.2f}°C'.format(ds.mean().item()), fontsize=14, ha='right')
+        else:
+            im1 = ax.imshow(ds[var].mean(dim='year').values, origin='lower', cmap='RdYlBu_r', vmin=-15, vmax=15)

 # set titles
-axes[0, 0].set_title('Observed');
-axes[0, 1].set_title('Predicted');
-axes[0, 2].set_title('Difference');
+axes[0, 0].set_title('Observed', fontsize=16);
+axes[0, 1].set_title('Predicted', fontsize=16);
+axes[0, 2].set_title('Bias', fontsize=16);
+
+# adjust axes
+for ax in axes.flat:
+    ax.axes.get_xaxis().set_ticklabels([])
+    ax.axes.get_xaxis().set_ticks([])
+    ax.axes.get_yaxis().set_ticklabels([])
+    ax.axes.get_yaxis().set_ticks([])
+    ax.axes.axis('tight')
+    ax.set_xlabel('')
+    ax.set_ylabel('')
+
+# adjust figure
+fig.suptitle('Average {}: 1991 - 2010'.format(NAMES[PREDICTAND]), fontsize=20);
+fig.subplots_adjust(hspace=0, wspace=0, top=0.85)
+
+# add colorbar for bias
+axes = axes.flatten()
+cbar_ax = fig.add_axes([axes[-1].get_position().x1 + 0.01, axes[-1].get_position().y0,
+                        0.01, axes[-1].get_position().y1 - axes[-1].get_position().y0])
+cbar = fig.colorbar(im2, cax=cbar_ax)
+cbar.set_label(label='Bias / (°C)', fontsize=16)
+cbar.ax.tick_params(labelsize=14)
+
+# save figure
+fig.savefig('../Notebooks/Figures/{}_average_bias.png'.format(PREDICTAND), dpi=300, bbox_inches='tight')
 ```

 %% Cell type:markdown id:aa4ef730-5a9d-40dc-a318-2f43a4cf1cd2 tags:

 ### Seasonal bias

 %% Cell type:markdown id:eda455a2-e8ee-4644-bb85-b0cf76acd11a tags:

 Calculate seasonal bias:

 %% Cell type:code id:24aadff5-b19d-4f4b-a4c5-32ee656e64cd tags:

 ``` python
 # group data by season: (DJF, MAM, JJA, SON)
 y_true_snl = y_true.groupby('time.season').mean(dim='time')
 y_pred_snl = y_pred.groupby('time.season').mean(dim='time')
 bias_snl = y_pred_snl - y_true_snl
 ```

+%% Cell type:code id:2d0d5a46-0652-4289-9e1c-6d47aafcfef0 tags:
+
+``` python
+# print average bias per season
+for var in bias_snl.data_vars:
+    for season in bias_snl.tasmin.season:
+        print('Average bias of {} for season {}: {:.2f}'.format(var, season.values.item(), bias_snl[var].sel(season=season).mean().item()))
+```
+
 %% Cell type:markdown id:c4232ae9-a557-4d61-8ab3-d7eda6201f98 tags:

 Plot seasonal differences, taken from the [xarray documentation](xarray.pydata.org/en/stable/examples/monthly-means.html).

 %% Cell type:code id:b39a6cc0-614c-452d-bb85-bc10e5179948 tags:

 ``` python
 # plot seasonal differences
-fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(14,12))
-for i, season in enumerate(('DJF', 'MAM', 'JJA', 'SON')):
-    y_true_snl[PREDICTAND].sel(season=season).plot.pcolormesh(
-        ax=axes[i, 0], add_colorbar=True, extend='both')
-
-    y_pred_snl[PREDICTAND].sel(season=season).plot.pcolormesh(
-        ax=axes[i, 1], add_colorbar=True, extend='both')
-
-    bias_snl[PREDICTAND].sel(season=season).plot.pcolormesh(
-        ax=axes[i, 2], vmin=-1, vmax=1, add_colorbar=True,
-        extend='both')
-
-    axes[i, 0].set_ylabel(season)
-    axes[i, 1].set_ylabel('')
-    axes[i, 2].set_ylabel('')
+seasons = ('DJF', 'JJA')
+fig, axes = plt.subplots(nrows=1, ncols=len(seasons) + 1, figsize=(24,8), sharex=True, sharey=True)
+axes = axes.flatten()
+
+# plot annual average bias
+ds = bias_yearly_avg[PREDICTAND].mean(dim='year')
+axes[0].imshow(ds.values, origin='lower', cmap='RdBu_r', vmin=-2, vmax=2)
+axes[0].set_title('Annual', fontsize=16);
+axes[0].text(x=ds.shape[0] - 2, y=2, s='Average: {:.2f}°C'.format(ds.mean().item()), fontsize=14, ha='right')
+
+# plot seasonal average bias
+for ax, season in zip(axes[1:], seasons):
+    ds = bias_snl[PREDICTAND].sel(season=season)
+    ax.imshow(ds.values, origin='lower', cmap='RdBu_r', vmin=-2, vmax=2)
+    ax.set_title(season, fontsize=16);
+    ax.text(x=ds.shape[0] - 2, y=2, s='Average: {:.2f}°C'.format(ds.mean().item()), fontsize=14, ha='right')

+# adjust axes
 for ax in axes.flat:
    ax.axes.get_xaxis().set_ticklabels([])
+    ax.axes.get_xaxis().set_ticks([])
    ax.axes.get_yaxis().set_ticklabels([])
+    ax.axes.get_yaxis().set_ticks([])
    ax.axes.axis('tight')
    ax.set_xlabel('')
+    ax.set_ylabel('')

-axes[0, 0].set_title('Observed')
-axes[0, 1].set_title('Predicted')
-axes[0, 2].set_title('Difference')
-
-plt.tight_layout()
-```
+# adjust figure
+fig.suptitle('Average bias of {}: 1991 - 2010'.format(NAMES[PREDICTAND]), fontsize=20);
+fig.subplots_adjust(hspace=0, wspace=0, top=0.85)
+
+# add colorbar for bias
+axes = axes.flatten()
+cbar_ax = fig.add_axes([axes[-1].get_position().x1 + 0.01, axes[-1].get_position().y0,
+                        0.01, axes[-1].get_position().y1 - axes[-1].get_position().y0])
+cbar = fig.colorbar(im2, cax=cbar_ax)
+cbar.set_label(label='Bias / (°C)', fontsize=16)
+cbar.ax.tick_params(labelsize=14)

-%% Cell type:code id:ec2f6293-384e-41b6-9e79-75bd852df0c0 tags:
-
-``` python
-# TODO: Annual cylce
+# save figure
+fig.savefig('../Notebooks/Figures/{}_average_bias_seasonal.png'.format(PREDICTAND), dpi=300, bbox_inches='tight')
 ```

 %% Cell type:markdown id:c70b369d-2d16-42e3-9300-4a18757ad1b2 tags:

 ### Bias of extreme values

 %% Cell type:code id:4acfc3f2-20ed-498c-ab35-f392ae0e64f9 tags:

 ``` python
 # TODO: smooth quantiles
 ```

 %% Cell type:code id:33198267-8eb6-4b84-bb6d-c903b27ccbc5 tags:

 ``` python
 # percentiles of interest
 percentiles = [0.01, 0.02, 0.98, 0.99]
 ```

 %% Cell type:code id:51137d23-a380-4d48-a005-fd1edaf554eb tags:

 ``` python
 # calculate percentiles over reference period
 with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    y_pred_dist = y_pred.quantile(q=percentiles, dim='time')
    y_true_dist = y_true.quantile(q=percentiles, dim='time')
 ```

 %% Cell type:code id:b461c774-c5fc-4a50-8609-34a7e7674a34 tags:

 ``` python
 # calculate bias in each percentile over entire reference period
 bias_dist = y_pred_dist - y_true_dist
 ```

 %% Cell type:code id:bd2ec314-87ed-407f-af8b-5e2c6785e9cc tags:

 ``` python
 # calculate correlation coefficient for extreme values
 for var in y_pred_dist:
    for q in percentiles:
        y_p = y_pred_dist[var].sel(quantile=q).values[~np.isnan(y_pred_dist[var].sel(quantile=q))]
        y_t = y_true_dist[var].sel(quantile=q).values[~np.isnan(y_true_dist[var].sel(quantile=q))]
        r, _ = stats.pearsonr(y_p, y_t)
        print('Pearson correlation for {}, q={:.2f}: R={:.2f}'.format(var, q, r))
 ```

 %% Cell type:code id:04a1b4fc-8fc0-4e1e-adbe-bf1eaafeac5d tags:

 ``` python
 # plot bias in each percentile
 fig, axes = plt.subplots(len(y_pred_dist.data_vars), len(percentiles), sharex=True, sharey=True, figsize=(32, 6))
 axes = axes.reshape(len(y_pred_dist.data_vars), -1)
 for ax, var in zip(axes, y_pred_dist):
    # iterate over percentiles
    for axis, q in zip(ax, percentiles):
        ds = bias_dist.sel(quantile=q).to_array()
        ds.plot(ax=axis, vmin=-2, vmax=2, cmap='RdBu_r')
        axis.text(x=bias_dist.x[-1], y=bias_dist.y[0], s='Avg: {:.2f}'.format(ds.mean().item()), ha='right', va='bottom')
 ```