{ "cells": [ { "cell_type": "markdown", "id": "fde8874d-299f-4f48-a10a-9fb6a00b43b9", "metadata": {}, "source": [ "# Evaluate bootstrapped model results" ] }, { "cell_type": "markdown", "id": "969d063b-5262-4324-901f-0a48630c4f27", "metadata": {}, "source": [ "### Imports and constants" ] }, { "cell_type": "code", "execution_count": null, "id": "8af00ae4-4aeb-4ff8-a46a-65966b28c440", "metadata": {}, "outputs": [], "source": [ "# builtins\n", "import pathlib\n", "\n", "# externals\n", "import numpy as np\n", "import xarray as xr\n", "\n", "# locals\n", "from climax.main.io import OBS_PATH, ERA5_PATH\n", "from climax.main.config import VALID_PERIOD\n", "from pysegcnn.core.utils import search_files" ] }, { "cell_type": "code", "execution_count": null, "id": "5bc74835-dc59-46ed-849b-3ff614e53eee", "metadata": {}, "outputs": [], "source": [ "# mapping from predictands to variable names\n", "NAMES = {'tasmin': 'minimum temperature', 'tasmax': 'maximum temperature', 'pr': 'precipitation'}" ] }, { "cell_type": "code", "execution_count": null, "id": "c8a63ef3-35ef-4ffa-b1f3-5c2986eb7eb1", "metadata": {}, "outputs": [], "source": [ "# path to bootstrapped model results\n", "RESULTS = pathlib.Path('/mnt/CEPH_PROJECTS/FACT_CLIMAX/ERA5_PRED/bootstrap')" ] }, { "cell_type": "markdown", "id": "7eae545b-4d8a-4689-a6c0-4aba2cb9104e", "metadata": {}, "source": [ "## Search model configuration" ] }, { "cell_type": "code", "execution_count": null, "id": "3e856f80-14fd-405f-a44e-cc77863f8e5b", "metadata": {}, "outputs": [], "source": [ "# predictand to evaluate\n", "PREDICTAND = 'tasmin'\n", "LOSS = 'L1Loss'\n", "OPTIM = 'Adam'" ] }, { "cell_type": "code", "execution_count": null, "id": "011b792d-7349-44ad-997d-11f236472a11", "metadata": {}, "outputs": [], "source": [ "# model to evaluate\n", "model = 'USegNet_{}_ztuvq_500_850_p_dem_doy_{}_{}'.format(PREDICTAND, LOSS, OPTIM)" ] }, { "cell_type": "code", "execution_count": null, "id": "dc4ca6f0-5490-4522-8661-e36bd1be11b7", "metadata": {}, "outputs": [], "source": [ "# get bootstrapped models\n", "models = sorted(search_files(RESULTS.joinpath(PREDICTAND), model + '(.*).nc$'),\n", " key=lambda x: int(x.stem.split('_')[-1]))\n", "models" ] }, { "cell_type": "markdown", "id": "e790ed9f-451c-4368-849d-06d9c50f797c", "metadata": {}, "source": [ "### Load observations" ] }, { "cell_type": "code", "execution_count": null, "id": "0862e0c8-06df-45d6-bc1b-002ffb6e9915", "metadata": {}, "outputs": [], "source": [ "# load observations\n", "y_true = xr.open_dataset(OBS_PATH.joinpath(PREDICTAND, 'OBS_{}_1980_2018.nc'.format(PREDICTAND)),\n", " chunks={'time': 365})\n", "y_true = y_true.sel(time=VALID_PERIOD) # subset to time period covered by predictions\n", "y_true = y_true.rename({NAMES[PREDICTAND]: PREDICTAND}) if PREDICTAND == 'pr' else y_true" ] }, { "cell_type": "code", "execution_count": null, "id": "aba38642-85d1-404a-81f3-65d23985fb7a", "metadata": {}, "outputs": [], "source": [ "# mask of missing values\n", "missing = np.isnan(y_true[PREDICTAND])" ] }, { "cell_type": "markdown", "id": "d4512ed2-d503-4bc1-ae76-84560c101a14", "metadata": {}, "source": [ "### Load reference data" ] }, { "cell_type": "code", "execution_count": null, "id": "f90f6abf-5fd6-49c0-a1ad-f62242b3d3a0", "metadata": {}, "outputs": [], "source": [ "# ERA-5 reference dataset\n", "if PREDICTAND == 'pr':\n", " y_refe = xr.open_dataset(search_files(ERA5_PATH.joinpath('ERA5', 'total_precipitation'), '.nc$').pop(),\n", " chunks={'time': 365})\n", " y_refe = y_refe.rename({'tp': 'pr'})\n", "else:\n", " y_refe = xr.open_dataset(search_files(ERA5_PATH.joinpath('ERA5', '2m_{}_temperature'.format(PREDICTAND.lstrip('tas'))), '.nc$').pop(),\n", " chunks={'time': 365})\n", " y_refe = y_refe - 273.15 # convert to °C\n", " y_refe = y_refe.rename({'t2m': PREDICTAND})" ] }, { "cell_type": "code", "execution_count": null, "id": "ea6d5f56-4f39-4e9a-976d-00ff28fce95c", "metadata": {}, "outputs": [], "source": [ "# subset to time period covered by predictions\n", "y_refe = y_refe.sel(time=VALID_PERIOD).drop_vars('lambert_azimuthal_equal_area')\n", "y_refe = y_refe.transpose('time', 'y', 'x') # change order of dimensions" ] }, { "cell_type": "markdown", "id": "d37702de-da5f-4306-acc1-e569471c1f12", "metadata": {}, "source": [ "### Load QM-adjusted reference data" ] }, { "cell_type": "code", "execution_count": null, "id": "fffbd267-d08b-44f4-869c-7056c4f19c28", "metadata": {}, "outputs": [], "source": [ "y_refe_qm = xr.open_dataset(ERA5_PATH.joinpath('QM_ERA5_{}_day_19912010.nc'.format(PREDICTAND)), chunks={'time': 365})\n", "y_refe_qm = y_refe_qm.transpose('time', 'x', 'y') # change order of dimensions" ] }, { "cell_type": "code", "execution_count": null, "id": "16fa580e-27a7-4758-9164-7f607df7179d", "metadata": {}, "outputs": [], "source": [ "# center hours at 00:00:00 rather than 12:00:00\n", "y_refe_qm['time'] = np.asarray([t.astype('datetime64[D]') for t in y_refe_qm.time.values])" ] }, { "cell_type": "code", "execution_count": null, "id": "6789791f-006b-49b3-aa04-34e4ed8e1571", "metadata": {}, "outputs": [], "source": [ "# subset to time period covered by predictions\n", "y_refe_qm = y_refe_qm.sel(time=VALID_PERIOD).drop_vars('lambert_azimuthal_equal_area')" ] }, { "cell_type": "code", "execution_count": null, "id": "b51cfb3f-caa8-413e-a12d-47bbafcef1df", "metadata": {}, "outputs": [], "source": [ "# align datasets and mask missing values\n", "y_true, y_refe, y_refe_qm = xr.align(y_true[PREDICTAND], y_refe[PREDICTAND], y_refe_qm[PREDICTAND], join='override')\n", "y_refe = y_refe.where(~missing, other=np.nan)\n", "y_refe_qm = y_refe_qm.where(~missing, other=np.nan)" ] }, { "cell_type": "markdown", "id": "b4a6c286-6b88-487d-866c-3cb633686dac", "metadata": {}, "source": [ "### Load model predictions" ] }, { "cell_type": "code", "execution_count": null, "id": "ccaf0118-da51-43b0-a2b6-56ba4b252999", "metadata": {}, "outputs": [], "source": [ "y_pred = [xr.open_dataset(sim, chunks={'time': 365}) for sim in models]\n", "if PREDICTAND == 'pr':\n", " y_pred = [y_p.rename({NAMES[PREDICTAND]: PREDICTAND}) for y_p in y_pred]" ] }, { "cell_type": "code", "execution_count": null, "id": "df3f018e-4723-4878-b72a-0586b68e6dfd", "metadata": {}, "outputs": [], "source": [ "# align datasets and mask missing values\n", "y_prob = []\n", "for i, y_p in enumerate(y_pred):\n", " \n", " # check whether evaluating precipitation or temperatures\n", " if len(y_p.data_vars) > 1:\n", " _, _, y_p, y_p_prob = xr.align(y_true, y_refe, y_p[PREDICTAND], y_p.prob, join='override')\n", " y_p_prob = y_p_prob.where(~missing, other=np.nan) # mask missing values\n", " y_prob.append(y_p_prob)\n", " else:\n", " _, _, y_p = xr.align(y_true, y_refe, y_p[PREDICTAND], join='override')\n", " \n", " # mask missing values\n", " y_p = y_p.where(~missing, other=np.nan)\n", " y_pred[i] = y_p" ] }, { "cell_type": "markdown", "id": "7effbf83-7977-4a47-aa6d-d57c4c4c22c6", "metadata": {}, "source": [ "## Bias, MAE, and RMSE" ] }, { "cell_type": "markdown", "id": "b8c23b7b-ccdf-412a-a30d-ac686af03d9f", "metadata": {}, "source": [ "Calculate yearly average bias, MAE, and RMSE over entire reference period for model predictions, ERA-5, and QM-adjusted ERA-5." ] }, { "cell_type": "code", "execution_count": null, "id": "7939a4d2-4eff-4507-86f8-dba7c0b635df", "metadata": {}, "outputs": [], "source": [ "# yearly average values over validation period\n", "y_pred_yearly_avg = [y_p.groupby('time.year').mean(dim='time') for y_p in y_pred]\n", "y_refe_yearly_avg = y_refe.groupby('time.year').mean(dim='time')\n", "y_refe_qm_yearly_avg = y_refe_qm.groupby('time.year').mean(dim='time')\n", "y_true_yearly_avg = y_true.groupby('time.year').mean(dim='time')" ] }, { "cell_type": "code", "execution_count": null, "id": "cce7ffbf-7e16-45f1-a2b6-5bc688595ee7", "metadata": {}, "outputs": [], "source": [ "# yearly average bias, mae, and rmse for model predictions\n", "bias_pred = [y_p - y_true_yearly_avg for y_p in y_pred_yearly_avg]\n", "mae_pred = [np.abs(y_p - y_true_yearly_avg) for y_p in y_pred_yearly_avg]\n", "rmse_pred = [np.sqrt((y_p - y_true_yearly_avg) ** 2) for y_p in y_pred_yearly_avg]" ] }, { "cell_type": "code", "execution_count": null, "id": "64e29db7-998d-4952-84b0-1c79016ab9a9", "metadata": {}, "outputs": [], "source": [ "# yearly average bias, mae, and rmse for ERA-5\n", "bias_refe = y_refe_yearly_avg - y_true_yearly_avg\n", "mae_refe = np.abs(y_refe_yearly_avg - y_true_yearly_avg)\n", "rmse_refe = np.sqrt((y_refe_yearly_avg - y_true_yearly_avg) ** 2)" ] }, { "cell_type": "code", "execution_count": null, "id": "bb7dc5c2-c518-4251-8d27-5b52e20e0397", "metadata": {}, "outputs": [], "source": [ "# yearly average bias, mae, and rmse for QM-Adjusted ERA-5\n", "bias_refe_qm = y_refe_qm_yearly_avg - y_true_yearly_avg\n", "mae_refe_qm = np.abs(y_refe_qm_yearly_avg - y_true_yearly_avg)\n", "rmse_refe_qm = np.sqrt((y_refe_qm_yearly_avg - y_true_yearly_avg) ** 2)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 5 }