Skip to content
Snippets Groups Projects
Commit 73b60483 authored by Frisinghelli Daniel's avatar Frisinghelli Daniel
Browse files

Notebook to assess different distributions.

parent bd79fde1
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id:63805b4a-b30e-4c10-a948-bc59651ca7a6 tags:
### Imports
%% Cell type:code id:28982ce9-bf0c-4eb1-8b9e-bec118359966 tags:
``` python
# builtins
import datetime
import warnings
import calendar
# externals
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import scipy.stats as stats
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import scipy.stats as stats
from IPython.display import Image
from sklearn.metrics import r2_score, roc_curve, auc, classification_report
from sklearn.model_selection import train_test_split
# locals
from climax.main.io import ERA5_PATH, OBS_PATH, TARGET_PATH, DEM_PATH
from climax.main.config import CALIB_PERIOD, VALID_PERIOD
from pysegcnn.core.utils import search_files
from pysegcnn.core.graphics import plot_classification_report
```
%% Cell type:code id:2373d894-e252-4f16-826b-88731e195259 tags:
``` python
# model predictions and observations NetCDF
y_true = xr.open_dataset(search_files(OBS_PATH.joinpath('pr'), 'OBS_pr(.*).nc$').pop())
```
%% Cell type:code id:0c2c1912-a947-4afe-84a7-895726be5cfd tags:
``` python
# subset to calibration and validation period
y_calib = y_true.sel(time=CALIB_PERIOD).precipitation.values
```
%% Cell type:code id:ed7d1686-968e-49e9-ba34-d03658ba3b32 tags:
``` python
# mask missing values
y_calib = y_calib[~np.isnan(y_calib)]
```
%% Cell type:code id:1b570e39-f242-49ef-8aef-eff8fbcf7c4d tags:
``` python
# only use values greater 0
y_calib = y_calib[y_calib > 0]
```
%% Cell type:code id:5de4933a-ef9d-4afe-8af6-ff68d91860ce tags:
``` python
# fit gamma distribution to data
alpha, loc, beta = stats.gamma.fit(y_calib, loc=0.1)
gamma_calib = stats.gamma(alpha, loc=loc, scale=beta)
```
%% Cell type:code id:75b74f7c-c9d7-4d52-b140-e0ad9de17b69 tags:
``` python
# fit generalized pareto distribution to data
alpha, loc, beta = stats.genpareto.fit(y_calib, loc=0.1)
genpareto_calib = stats.genpareto(alpha, loc=loc, scale=beta)
```
%% Cell type:code id:14ade547-443a-457a-bccd-d88d049b9d81 tags:
``` python
# compute empirical quantiles
quantiles = np.arange(0.01, 1, 0.005)
# empirical quantiles and theoretical quantiles
eq = np.quantile(y_calib, quantiles)
tq_gamma = gamma_calib.ppf(quantiles)
tq_genpareto = genpareto_calib.ppf(quantiles)
# Q-Q plot
RANGE = 40
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
ax.scatter(eq, tq_gamma, color='grey', label='Gamma')
ax.scatter(eq, tq_genpareto, color='k', label='GenPareto')
ax.plot(np.arange(0, RANGE), np.arange(0, RANGE), '--k')
ax.set_xlim(0, RANGE)
ax.set_ylim(0, RANGE)
ax.set_ylabel('Theoretical quantiles');
ax.set_xlabel('Empirical quantiles');
ax.legend(frameon=False, fontsize=12);
```
%% Cell type:markdown id:c0fea8ac-bac0-4096-bc81-90d799f8ab94 tags:
### Empirical quantiles per grid point
%% Cell type:code id:4dcb3348-5d22-4324-b840-2c305983e826 tags:
``` python
quantiles = np.arange(0.01, 1, 0.01)
```
%% Cell type:code id:18ce44b4-d6c0-4950-9cd2-7a3af1095b24 tags:
``` python
# subset to calibration period
y_calib_p = y_true.sel(time=CALIB_PERIOD).precipitation
```
%% Cell type:code id:a02c42e0-591c-4630-89b8-5dd8ef71a4a0 tags:
``` python
# compute empirical quantiles over time
equantiles = y_calib_p.quantile(quantiles, dim='time')
equantiles = equantiles.rename({'quantile': 'q'})
```
%% Cell type:code id:966d2724-2628-4842-abc9-695711945347 tags:
``` python
# iterate over the grid points
gammaq = np.ones(shape=(len(y_calib_p.q), len(y_calib_p.y), len(y_calib_p.x))) * np.nan
for i, _ in enumerate(y_calib_p.x):
print('Rows: {}/{}'.format(i + 1, len(y_calib_p.x)))
for j, _ in enumerate(y_calib_p.y):
# current grid point: xarray.Dataset, dimensions=(time)
point = y_calib_p.isel(x=i, y=j).values
# mask missing values
point = point[~np.isnan(point)]
# check if the grid point is valid
if point.size < 1:
# move on to next grid point
continue
# consider only values > 0
point = point[point > 0]
# fit Gamma distribution to grid point
alpha, loc, beta = stats.gamma.fit(point)
gamma = stats.gamma(alpha, loc=loc, scale=beta)
# compute theoretical quantiles of fitted gamma distribution
tq = gamma.ppf(quantiles)
# store theoretical quantiles for current grid point
gammaq[:, j, i] = tq
# store theoretical quantiles in xarray.DataArray
tquantiles = xr.DataArray(data=gammaq, dims=['q', 'y', 'x'],
coords=dict(q=quantiles, lat=y_calib_p.y, lon=y_calib_p.x),
name='precipitation')
```
%% Cell type:code id:601de7cb-35f4-40e1-9b51-2dab23102659 tags:
``` python
# compute bias in theoretical quantiles
biasq = tquantiles - equantiles
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment