Skip to content
Snippets Groups Projects
Commit 971e181e authored by Frisinghelli Daniel's avatar Frisinghelli Daniel
Browse files

Preparing figures for InformA seminar.

parent a0a232e0
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id:f15afea1-9ea4-4201-bdd7-32ae377db6a9 tags:
# Evaluate ERA-5 downscaling
%% Cell type:markdown id:7d2c4c86-18e4-44c4-bdf7-0e8249614749 tags:
We used **1981-1991 as training** period and **1991-2010 as reference** period. The results shown in this notebook are based on the model predictions on the reference period.
%% Cell type:markdown id:4186c89c-b55b-4559-a818-8b712baaf44e tags:
Define the predictand and the model to evaluate:
%% Cell type:code id:bb24b6ed-2d0a-44e0-b9a9-abdcb2a8294d tags:
``` python
# define the model parameters
PREDICTAND = 'tasmin'
MODEL = 'USegNet'
PPREDICTORS = 'ztuvq'
PLEVELS = ['500', '850']
SPREDICTORS = 'pt2'
SPREDICTORS = 'p'
DEM = 'dem'
DOY = 'doy'
```
%% Cell type:code id:686a7c1d-e71d-4954-8d79-26b9a84f648e tags:
``` python
# mapping from predictands to variable names
NAMES = {'tasmin': 'minimum temperature', 'tasmax': 'maximum temperature', 'pr': 'precipitation'}
```
%% Cell type:markdown id:4d5d12c5-50fd-4c5c-9240-c3df78e49b44 tags:
### Imports
%% Cell type:code id:1afb0fab-5d2a-4875-9032-29b99c6dec89 tags:
``` python
# builtins
import datetime
import warnings
# externals
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from IPython.display import Image
# locals
from climax.main.io import ERA5_PATH, OBS_PATH, TARGET_PATH
from pysegcnn.core.utils import search_files
```
%% Cell type:markdown id:2c2d0b1d-9630-4cfe-ae9d-35b05b94a1a5 tags:
### Model architecture
%% Cell type:code id:f98e1c9f-0581-43ea-a569-fd05fdaf36c1 tags:
``` python
Image("./Figures/architecture.png", width=900, height=400)
```
%% Cell type:markdown id:f47caa41-9380-4c02-8785-4febcf2cb2d0 tags:
### Load datasets
%% Cell type:code id:020dfe33-ce3c-467f-ad0a-295cc338b1a9 tags:
``` python
# model predictions and observations NetCDF
y_pred = TARGET_PATH.joinpath(PREDICTAND, '_'.join([MODEL, PREDICTAND, PPREDICTORS, *PLEVELS, SPREDICTORS, DEM, DOY]) + '.nc')
if PREDICTAND == 'tas':
# read both tasmax and tasmin
tasmax = xr.open_dataset(search_files(OBS_PATH.joinpath('tasmax'), '.nc$').pop())
tasmin = xr.open_dataset(search_files(OBS_PATH.joinpath('tasmin'), '.nc$').pop())
y_true = xr.merge([tasmax, tasmin])
else:
y_true = xr.open_dataset(search_files(OBS_PATH.joinpath(PREDICTAND), '.nc$').pop())
```
%% Cell type:code id:1dc2a386-d63b-4c6a-8e63-00365927559d tags:
``` python
# load datasets
y_pred = xr.open_dataset(y_pred)
y_true = y_true.sel(time=y_pred.time) # subset to time period covered by predictions
```
%% Cell type:code id:966d85fb-9185-408f-ac2b-1e4ca829ccd1 tags:
``` python
# align datasets and mask missing values in model predictions
y_true, y_pred = xr.align(y_true, y_pred, join='override')
y_pred = y_pred.where(~np.isnan(y_true), other=np.nan)
```
%% Cell type:markdown id:ddebdf9f-862c-461e-aa57-cd344d54eee9 tags:
## Model validation
## Model validation: temperature
%% Cell type:markdown id:5e8be24e-8ca2-4582-98c0-b56c6db289d2 tags:
### Overall bias
### Bias
%% Cell type:markdown id:a4f7177c-7d09-401f-957b-0e493b9ef5d0 tags:
Calculate average bias over entire reference period:
Calculate yearly average bias over entire reference period:
%% Cell type:code id:746bf95f-a78b-4da8-a063-1fa48e3c5da8 tags:
``` python
# average bias over reference period
y_pred_avg = y_pred.mean(dim='time')
y_true_avg = y_true.mean(dim='time')
bias = y_pred_avg - y_true_avg
# bias = (y_pred - y_true).mean()
# yearly average bias over reference period
y_pred_yearly_avg = y_pred.groupby('time.year').mean(dim='time')
y_true_yearly_avg = y_true.groupby('time.year').mean(dim='time')
bias_yearly_avg = y_pred_yearly_avg - y_true_yearly_avg
for var in bias:
print('Overall average bias {}: {:.2f}'.format(var, bias[var].mean().item()))
print('Yearly average bias {}: {:.2f}'.format(var, bias_yearly_avg[var].mean().item()))
```
%% Cell type:code id:2ef19dce-29ac-4f6f-9999-e67745a4afd1 tags:
``` python
# mean absolute error over reference period
mae = np.abs(y_pred_avg - y_true_avg).mean()
# mae = np.abs(y_pred - y_true).mean()
mae_avg = np.abs(y_pred_yearly_avg - y_true_yearly_avg).mean()
for var in mae:
print('Mean absolute error {}: {:.2f}'.format(var, mae[var].item()))
print('Yearly average mean absolute error {}: {:.2f}'.format(var, mae[var].item()))
```
%% Cell type:code id:4c4dd156-f763-482c-84e6-c4329bfd3fe4 tags:
``` python
# root mean squared error over reference period
rmse = ((y_pred_avg - y_true_avg) ** 2).mean()
# rmse = ((y_pred - y_true) ** 2).mean()
rmse_avg = ((y_pred_yearly_avg - y_true_yearly_avg) ** 2).mean()
for var in rmse:
print('Root mean squared error {}: {:.2f}'.format(var, rmse[var].item()))
```
%% Cell type:code id:ed54a50e-41c2-4a7f-9839-05357996f0c4 tags:
``` python
# Pearson's correlation coefficient over reference period
for var in y_pred_avg:
y_p = y_pred_avg[var].values[~np.isnan(y_pred_avg[var])]
# y_p = y_pred[var].values[~np.isnan(y_pred[var])]
y_t = y_true_avg[var].values[~np.isnan(y_true_avg[var])]
# y_t = y_true[var].values[~np.isnan(y_true[var])]
r, _ = stats.pearsonr(y_p, y_t)
print('Pearson correlation for {}: {:.2f}'.format(var, r))
for var in y_pred_yearly_avg:
correlations = []
for year in y_pred_yearly_avg.year:
y_p = y_pred_yearly_avg[var].sel(year=year).values
y_t = y_true_yearly_avg[var].sel(year=year).values
r, _ = stats.pearsonr(y_p[~np.isnan(y_p)], y_t[~np.isnan(y_t)])
correlations.append(r)
print('Yearly average Pearson correlation coefficient for {}: {:.2f}'.format(var, np.asarray(r).mean()))
```
%% Cell type:code id:760f86ce-9e04-4938-b24f-d2819fbf622e tags:
``` python
# plot average of observation, prediction, and bias
fig, axes = plt.subplots(len(y_pred_avg.data_vars), 3, figsize=(24, len(y_pred_avg.data_vars) * 6), sharex=True)
axes = axes.reshape(len(y_pred_avg.data_vars), -1)
for i, var in enumerate(y_pred_avg):
for ds, ax in zip([y_true_avg, y_pred_avg, bias], axes[i, ...]):
ds[var].plot(ax=ax)
fig, axes = plt.subplots(len(y_pred_yearly_avg.data_vars), 3, figsize=(24, len(y_pred_yearly_avg.data_vars) * 6),
sharex=True, sharey=True)
axes = axes.reshape(len(y_pred_yearly_avg.data_vars), -1)
for i, var in enumerate(y_pred_yearly_avg):
for ds, ax in zip([y_true_yearly_avg, y_pred_yearly_avg, bias_yearly_avg], axes[i, ...]):
if ds is bias_yearly_avg:
ds = ds[var].mean(dim='year')
im2 = ax.imshow(ds.values, origin='lower', cmap='RdBu_r', vmin=-2, vmax=2)
ax.text(x=ds.shape[0] - 2, y=2, s='Average: {:.2f}°C'.format(ds.mean().item()), fontsize=14, ha='right')
else:
im1 = ax.imshow(ds[var].mean(dim='year').values, origin='lower', cmap='RdYlBu_r', vmin=-15, vmax=15)
# set titles
axes[0, 0].set_title('Observed');
axes[0, 1].set_title('Predicted');
axes[0, 2].set_title('Difference');
axes[0, 0].set_title('Observed', fontsize=16);
axes[0, 1].set_title('Predicted', fontsize=16);
axes[0, 2].set_title('Bias', fontsize=16);
# adjust axes
for ax in axes.flat:
ax.axes.get_xaxis().set_ticklabels([])
ax.axes.get_xaxis().set_ticks([])
ax.axes.get_yaxis().set_ticklabels([])
ax.axes.get_yaxis().set_ticks([])
ax.axes.axis('tight')
ax.set_xlabel('')
ax.set_ylabel('')
# adjust figure
fig.suptitle('Average {}: 1991 - 2010'.format(NAMES[PREDICTAND]), fontsize=20);
fig.subplots_adjust(hspace=0, wspace=0, top=0.85)
# add colorbar for bias
axes = axes.flatten()
cbar_ax = fig.add_axes([axes[-1].get_position().x1 + 0.01, axes[-1].get_position().y0,
0.01, axes[-1].get_position().y1 - axes[-1].get_position().y0])
cbar = fig.colorbar(im2, cax=cbar_ax)
cbar.set_label(label='Bias / (°C)', fontsize=16)
cbar.ax.tick_params(labelsize=14)
# save figure
fig.savefig('../Notebooks/Figures/{}_average_bias.png'.format(PREDICTAND), dpi=300, bbox_inches='tight')
```
%% Cell type:markdown id:aa4ef730-5a9d-40dc-a318-2f43a4cf1cd2 tags:
### Seasonal bias
%% Cell type:markdown id:eda455a2-e8ee-4644-bb85-b0cf76acd11a tags:
Calculate seasonal bias:
%% Cell type:code id:24aadff5-b19d-4f4b-a4c5-32ee656e64cd tags:
``` python
# group data by season: (DJF, MAM, JJA, SON)
y_true_snl = y_true.groupby('time.season').mean(dim='time')
y_pred_snl = y_pred.groupby('time.season').mean(dim='time')
bias_snl = y_pred_snl - y_true_snl
```
%% Cell type:code id:2d0d5a46-0652-4289-9e1c-6d47aafcfef0 tags:
``` python
# print average bias per season
for var in bias_snl.data_vars:
for season in bias_snl.tasmin.season:
print('Average bias of {} for season {}: {:.2f}'.format(var, season.values.item(), bias_snl[var].sel(season=season).mean().item()))
```
%% Cell type:markdown id:c4232ae9-a557-4d61-8ab3-d7eda6201f98 tags:
Plot seasonal differences, taken from the [xarray documentation](xarray.pydata.org/en/stable/examples/monthly-means.html).
%% Cell type:code id:b39a6cc0-614c-452d-bb85-bc10e5179948 tags:
``` python
# plot seasonal differences
fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(14,12))
for i, season in enumerate(('DJF', 'MAM', 'JJA', 'SON')):
y_true_snl[PREDICTAND].sel(season=season).plot.pcolormesh(
ax=axes[i, 0], add_colorbar=True, extend='both')
y_pred_snl[PREDICTAND].sel(season=season).plot.pcolormesh(
ax=axes[i, 1], add_colorbar=True, extend='both')
bias_snl[PREDICTAND].sel(season=season).plot.pcolormesh(
ax=axes[i, 2], vmin=-1, vmax=1, add_colorbar=True,
extend='both')
axes[i, 0].set_ylabel(season)
axes[i, 1].set_ylabel('')
axes[i, 2].set_ylabel('')
seasons = ('DJF', 'JJA')
fig, axes = plt.subplots(nrows=1, ncols=len(seasons) + 1, figsize=(24,8), sharex=True, sharey=True)
axes = axes.flatten()
# plot annual average bias
ds = bias_yearly_avg[PREDICTAND].mean(dim='year')
axes[0].imshow(ds.values, origin='lower', cmap='RdBu_r', vmin=-2, vmax=2)
axes[0].set_title('Annual', fontsize=16);
axes[0].text(x=ds.shape[0] - 2, y=2, s='Average: {:.2f}°C'.format(ds.mean().item()), fontsize=14, ha='right')
# plot seasonal average bias
for ax, season in zip(axes[1:], seasons):
ds = bias_snl[PREDICTAND].sel(season=season)
ax.imshow(ds.values, origin='lower', cmap='RdBu_r', vmin=-2, vmax=2)
ax.set_title(season, fontsize=16);
ax.text(x=ds.shape[0] - 2, y=2, s='Average: {:.2f}°C'.format(ds.mean().item()), fontsize=14, ha='right')
# adjust axes
for ax in axes.flat:
ax.axes.get_xaxis().set_ticklabels([])
ax.axes.get_xaxis().set_ticks([])
ax.axes.get_yaxis().set_ticklabels([])
ax.axes.get_yaxis().set_ticks([])
ax.axes.axis('tight')
ax.set_xlabel('')
ax.set_ylabel('')
axes[0, 0].set_title('Observed')
axes[0, 1].set_title('Predicted')
axes[0, 2].set_title('Difference')
plt.tight_layout()
```
# adjust figure
fig.suptitle('Average bias of {}: 1991 - 2010'.format(NAMES[PREDICTAND]), fontsize=20);
fig.subplots_adjust(hspace=0, wspace=0, top=0.85)
# add colorbar for bias
axes = axes.flatten()
cbar_ax = fig.add_axes([axes[-1].get_position().x1 + 0.01, axes[-1].get_position().y0,
0.01, axes[-1].get_position().y1 - axes[-1].get_position().y0])
cbar = fig.colorbar(im2, cax=cbar_ax)
cbar.set_label(label='Bias / (°C)', fontsize=16)
cbar.ax.tick_params(labelsize=14)
%% Cell type:code id:ec2f6293-384e-41b6-9e79-75bd852df0c0 tags:
``` python
# TODO: Annual cylce
# save figure
fig.savefig('../Notebooks/Figures/{}_average_bias_seasonal.png'.format(PREDICTAND), dpi=300, bbox_inches='tight')
```
%% Cell type:markdown id:c70b369d-2d16-42e3-9300-4a18757ad1b2 tags:
### Bias of extreme values
%% Cell type:code id:4acfc3f2-20ed-498c-ab35-f392ae0e64f9 tags:
``` python
# TODO: smooth quantiles
```
%% Cell type:code id:33198267-8eb6-4b84-bb6d-c903b27ccbc5 tags:
``` python
# percentiles of interest
percentiles = [0.01, 0.02, 0.98, 0.99]
```
%% Cell type:code id:51137d23-a380-4d48-a005-fd1edaf554eb tags:
``` python
# calculate percentiles over reference period
with warnings.catch_warnings():
warnings.simplefilter('ignore')
y_pred_dist = y_pred.quantile(q=percentiles, dim='time')
y_true_dist = y_true.quantile(q=percentiles, dim='time')
```
%% Cell type:code id:b461c774-c5fc-4a50-8609-34a7e7674a34 tags:
``` python
# calculate bias in each percentile over entire reference period
bias_dist = y_pred_dist - y_true_dist
```
%% Cell type:code id:bd2ec314-87ed-407f-af8b-5e2c6785e9cc tags:
``` python
# calculate correlation coefficient for extreme values
for var in y_pred_dist:
for q in percentiles:
y_p = y_pred_dist[var].sel(quantile=q).values[~np.isnan(y_pred_dist[var].sel(quantile=q))]
y_t = y_true_dist[var].sel(quantile=q).values[~np.isnan(y_true_dist[var].sel(quantile=q))]
r, _ = stats.pearsonr(y_p, y_t)
print('Pearson correlation for {}, q={:.2f}: R={:.2f}'.format(var, q, r))
```
%% Cell type:code id:04a1b4fc-8fc0-4e1e-adbe-bf1eaafeac5d tags:
``` python
# plot bias in each percentile
fig, axes = plt.subplots(len(y_pred_dist.data_vars), len(percentiles), sharex=True, sharey=True, figsize=(32, 6))
axes = axes.reshape(len(y_pred_dist.data_vars), -1)
for ax, var in zip(axes, y_pred_dist):
# iterate over percentiles
for axis, q in zip(ax, percentiles):
ds = bias_dist.sel(quantile=q).to_array()
ds.plot(ax=axis, vmin=-2, vmax=2, cmap='RdBu_r')
axis.text(x=bias_dist.x[-1], y=bias_dist.y[0], s='Avg: {:.2f}'.format(ds.mean().item()), ha='right', va='bottom')
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment