From 73b60483c5a74d650b9fb0e79e8ed564ed70dd5e Mon Sep 17 00:00:00 2001 From: "Daniel.Frisinghelli" <daniel.frisinghelli@eurac.edu> Date: Fri, 1 Oct 2021 14:51:44 +0000 Subject: [PATCH] Notebook to assess different distributions. --- Notebooks/pr_distribution.ipynb | 258 ++++++++++++++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 Notebooks/pr_distribution.ipynb diff --git a/Notebooks/pr_distribution.ipynb b/Notebooks/pr_distribution.ipynb new file mode 100644 index 0000000..59db30e --- /dev/null +++ b/Notebooks/pr_distribution.ipynb @@ -0,0 +1,258 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "63805b4a-b30e-4c10-a948-bc59651ca7a6", + "metadata": {}, + "source": [ + "### Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28982ce9-bf0c-4eb1-8b9e-bec118359966", + "metadata": {}, + "outputs": [], + "source": [ + "# builtins\n", + "import datetime\n", + "import warnings\n", + "import calendar\n", + "\n", + "# externals\n", + "import xarray as xr\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import pandas as pd\n", + "import scipy.stats as stats\n", + "from mpl_toolkits.axes_grid1.inset_locator import inset_axes\n", + "import scipy.stats as stats\n", + "from IPython.display import Image\n", + "from sklearn.metrics import r2_score, roc_curve, auc, classification_report\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# locals\n", + "from climax.main.io import ERA5_PATH, OBS_PATH, TARGET_PATH, DEM_PATH\n", + "from climax.main.config import CALIB_PERIOD, VALID_PERIOD\n", + "from pysegcnn.core.utils import search_files\n", + "from pysegcnn.core.graphics import plot_classification_report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2373d894-e252-4f16-826b-88731e195259", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# model predictions and observations NetCDF \n", + "y_true = xr.open_dataset(search_files(OBS_PATH.joinpath('pr'), 'OBS_pr(.*).nc$').pop())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c2c1912-a947-4afe-84a7-895726be5cfd", + "metadata": {}, + "outputs": [], + "source": [ + "# subset to calibration and validation period\n", + "y_calib = y_true.sel(time=CALIB_PERIOD).precipitation.values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed7d1686-968e-49e9-ba34-d03658ba3b32", + "metadata": {}, + "outputs": [], + "source": [ + "# mask missing values\n", + "y_calib = y_calib[~np.isnan(y_calib)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b570e39-f242-49ef-8aef-eff8fbcf7c4d", + "metadata": {}, + "outputs": [], + "source": [ + "# only use values greater 0\n", + "y_calib = y_calib[y_calib > 0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5de4933a-ef9d-4afe-8af6-ff68d91860ce", + "metadata": {}, + "outputs": [], + "source": [ + "# fit gamma distribution to data\n", + "alpha, loc, beta = stats.gamma.fit(y_calib, loc=0.1)\n", + "gamma_calib = stats.gamma(alpha, loc=loc, scale=beta)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75b74f7c-c9d7-4d52-b140-e0ad9de17b69", + "metadata": {}, + "outputs": [], + "source": [ + "# fit generalized pareto distribution to data\n", + "alpha, loc, beta = stats.genpareto.fit(y_calib, loc=0.1)\n", + "genpareto_calib = stats.genpareto(alpha, loc=loc, scale=beta)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14ade547-443a-457a-bccd-d88d049b9d81", + "metadata": {}, + "outputs": [], + "source": [ + "# compute empirical quantiles\n", + "quantiles = np.arange(0.01, 1, 0.005)\n", + "\n", + "# empirical quantiles and theoretical quantiles\n", + "eq = np.quantile(y_calib, quantiles)\n", + "tq_gamma = gamma_calib.ppf(quantiles)\n", + "tq_genpareto = genpareto_calib.ppf(quantiles)\n", + "\n", + "# Q-Q plot \n", + "RANGE = 40\n", + "fig, ax = plt.subplots(1, 1, figsize=(6, 6))\n", + "ax.scatter(eq, tq_gamma, color='grey', label='Gamma')\n", + "ax.scatter(eq, tq_genpareto, color='k', label='GenPareto')\n", + "ax.plot(np.arange(0, RANGE), np.arange(0, RANGE), '--k')\n", + "ax.set_xlim(0, RANGE)\n", + "ax.set_ylim(0, RANGE)\n", + "ax.set_ylabel('Theoretical quantiles');\n", + "ax.set_xlabel('Empirical quantiles');\n", + "ax.legend(frameon=False, fontsize=12);" + ] + }, + { + "cell_type": "markdown", + "id": "c0fea8ac-bac0-4096-bc81-90d799f8ab94", + "metadata": {}, + "source": [ + "### Empirical quantiles per grid point" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4dcb3348-5d22-4324-b840-2c305983e826", + "metadata": {}, + "outputs": [], + "source": [ + "quantiles = np.arange(0.01, 1, 0.01)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18ce44b4-d6c0-4950-9cd2-7a3af1095b24", + "metadata": {}, + "outputs": [], + "source": [ + "# subset to calibration period\n", + "y_calib_p = y_true.sel(time=CALIB_PERIOD).precipitation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a02c42e0-591c-4630-89b8-5dd8ef71a4a0", + "metadata": {}, + "outputs": [], + "source": [ + "# compute empirical quantiles over time\n", + "equantiles = y_calib_p.quantile(quantiles, dim='time')\n", + "equantiles = equantiles.rename({'quantile': 'q'})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "966d2724-2628-4842-abc9-695711945347", + "metadata": {}, + "outputs": [], + "source": [ + "# iterate over the grid points\n", + "gammaq = np.ones(shape=(len(y_calib_p.q), len(y_calib_p.y), len(y_calib_p.x))) * np.nan\n", + "for i, _ in enumerate(y_calib_p.x):\n", + " print('Rows: {}/{}'.format(i + 1, len(y_calib_p.x)))\n", + " for j, _ in enumerate(y_calib_p.y):\n", + " \n", + " # current grid point: xarray.Dataset, dimensions=(time)\n", + " point = y_calib_p.isel(x=i, y=j).values\n", + " \n", + " # mask missing values\n", + " point = point[~np.isnan(point)]\n", + " \n", + " # check if the grid point is valid\n", + " if point.size < 1:\n", + " # move on to next grid point\n", + " continue\n", + " \n", + " # consider only values > 0\n", + " point = point[point > 0]\n", + " \n", + " # fit Gamma distribution to grid point\n", + " alpha, loc, beta = stats.gamma.fit(point)\n", + " gamma = stats.gamma(alpha, loc=loc, scale=beta)\n", + " \n", + " # compute theoretical quantiles of fitted gamma distribution\n", + " tq = gamma.ppf(quantiles)\n", + " \n", + " # store theoretical quantiles for current grid point\n", + " gammaq[:, j, i] = tq\n", + "\n", + "# store theoretical quantiles in xarray.DataArray\n", + "tquantiles = xr.DataArray(data=gammaq, dims=['q', 'y', 'x'],\n", + " coords=dict(q=quantiles, lat=y_calib_p.y, lon=y_calib_p.x),\n", + " name='precipitation')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "601de7cb-35f4-40e1-9b51-2dab23102659", + "metadata": {}, + "outputs": [], + "source": [ + "# compute bias in theoretical quantiles\n", + "biasq = tquantiles - equantiles" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- GitLab