Skip to content
Snippets Groups Projects
Commit 9bcb0d5a authored by Frisinghelli Daniel's avatar Frisinghelli Daniel
Browse files

Notebook visualizing stratified sampling.

parent 07448b4e
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id:d6b83379-c5a8-48c3-bb85-d00a341a37f4 tags:
### Imports
%% Cell type:code id:eeba7f9b-066a-4843-bd64-5b6326c0b536 tags:
``` python
# builtins
import datetime
import warnings
import calendar
# externals
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import scipy.stats as stats
from IPython.display import Image
from sklearn.metrics import r2_score, roc_curve, auc, classification_report
from sklearn.model_selection import train_test_split
# locals
from climax.main.io import ERA5_PATH, OBS_PATH, TARGET_PATH, DEM_PATH
from climax.main.config import CALIB_PERIOD
from pysegcnn.core.utils import search_files
from pysegcnn.core.graphics import plot_classification_report
```
%% Cell type:code id:e75b3217-26f7-4a4a-ae2a-4fbb92a9f2a2 tags:
``` python
# model predictions and observations NetCDF
y_true = xr.open_dataset(search_files(OBS_PATH.joinpath('pr'), 'OBS_pr(.*).nc$').pop())
```
%% Cell type:code id:3aa8466e-84a9-4c2e-ae19-403b6246e27f tags:
``` python
# subset to calibration period
y_true = y_true.sel(time=CALIB_PERIOD)
```
%% Cell type:code id:4f1a58a2-8c4c-4d73-a116-e64e68fdd507 tags:
``` python
# precipitation threshold defining a wet day
WET_DAY_THRESHOLD = 1
```
%% Cell type:code id:5e6696df-8660-4083-9a32-0dd282112948 tags:
``` python
# calculate number of wet days in calibration period
wet_days = (y_true.mean(dim=('y', 'x')) >= WET_DAY_THRESHOLD).astype(np.int16)
nwet_days = wet_days.to_array().values.squeeze()
```
%% Cell type:code id:b87accd6-d5e4-4dc6-9532-3ef8aa162d24 tags:
``` python
# split training/validation set chronologically
train, valid = train_test_split(CALIB_PERIOD, shuffle=False, test_size=0.25)
```
%% Cell type:code id:559d1450-09db-4b2f-844a-d572485973e0 tags:
``` python
# split training/validation set by number of wet days
train_st, valid_st = train_test_split(CALIB_PERIOD, stratify=nwet_days, test_size=0.5)
train_st, valid_st = np.asarray(sorted(train_st)), np.asarray(sorted(valid_st)) # sort chronologically
```
%% Cell type:code id:7fd013f9-77d0-48de-8d5f-2c6a1cb3ed17 tags:
``` python
# plot distribution of wet days in calibration period
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(10, 10))
axes = axes.flatten()
# not stratified
sns.countplot(x=wet_days.sel(time=train).to_array().values.squeeze(), ax=axes[0])
sns.countplot(x=wet_days.sel(time=valid).to_array().values.squeeze(), ax=axes[2])
# stratified
sns.countplot(x=wet_days.sel(time=train_st).to_array().values.squeeze(), ax=axes[1])
sns.countplot(x=wet_days.sel(time=valid_st).to_array().values.squeeze(), ax=axes[3])
# axes properties
for ax in axes:
ax.set_ylabel('')
for ax in axes[2:]:
ax.set_xticklabels(['Dry', 'Wet'])
for ax in [axes[0], axes[1]]:
ax.text(1, ax.get_ylim()[-1] - 5, 'Training', ha='left', va='top', fontsize=12)
for ax in [axes[2], axes[3]]:
ax.text(1, ax.get_ylim()[-1] - 5, 'Validation', ha='left', va='top', fontsize=12)
axes[0].set_title('Not stratified')
axes[1].set_title('Stratified')
# adjust subplot
fig.subplots_adjust(wspace=0.1, hspace=0.1)
fig.suptitle('Stratified sampling: wet day threshold {:0d} mm'.format(WET_DAY_THRESHOLD));
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment