Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import pandas as pd
import numpy as np
import xarray as xr
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt
import pdb
def check_data_gap(df):
df.dropna(axis='columns', how='all', inplace=True)
missing_dates = pd.date_range(df.index.min(), df.index.max()).difference(df.index)
print(f'Date start: {df.index.min().strftime("%Y-%m-%d")}, date end: {df.index.max().strftime("%Y-%m-%d")}')
if len(missing_dates) > 0:
print(f"Missing dates: {', '.join(missing_dates.strftime('%Y-%m-%d'))}")
else:
print('No missing dates')
# return missing_dates
def interpolate_df(df):
df = df.reindex(pd.date_range(df.index.min(), df.index.max()), fill_value=np.nan)
return df.interpolate()
def readnetcdf_in_shp_mattia(nc_fileName, shp_fileName, res=0.25, plot=False):
# Opent the netcdf file
ds = xr.open_dataset(nc_fileName)
# Open the shape file and reproject it to lat lon WGS84
shp = gpd.read_file(shp_fileName)
shp = shp.to_crs('epsg:4326')
# Crop ds with the shapefile bounding box (bb)
bb = shp.bounds.iloc[0]
ds = ds.sel(lon=slice(bb['minx']-res, bb['maxx']+res), lat=slice(bb['maxy']+res, bb['miny']-res))
# Mask all the points in ds where the grid box do not intersect or is in the shapefile
for x in ds.longitude.values:
for y in ds.latitude.values:
gridbox = Point(x, y).buffer(res/2, cap_style=3)
if not gridbox.intersects(shp.loc[0, 'geometry']):
for k in ds.data_vars.keys():
ds[k].loc[dict(longitude=x, latitude=y)] = np.nan
ds = ds.dropna(dim='longitude', how='all')
ds = ds.dropna(dim='latitude', how='all')
# Plot the era5 gridbox and the shapefile if plot=True
if plot:
for x in ds.longitude.values:
for y in ds.latitude.values:
gridbox = Point(x, y).buffer(res / 2, cap_style=3)
gridbox_x, gridbox_y = gridbox.exterior.xy
plt.plot(gridbox_x, gridbox_y, color='blue')
plt.plot(x, y, marker='o', color='red')
shp_x, shp_y = shp.loc[0, 'geometry'].exterior.xy
plt.plot(shp_x, shp_y, color='black')
plt.axis('equal')
return ds
def readnetcdf_in_shp(nc_fileName, shp_fileName, res=5500, plot=False):
# Open the netcdf file
ds = xr.open_dataset(nc_fileName)
# Open the shape file and reproject it to the MESCAN-Surfex grid (unit=meters)
shp = gpd.read_file(shp_fileName)
shp_reproj = shp.to_crs('+proj=lcc +lat_1=50 +lat_2=50 +lat_0=50 +lon_0=8 +x_0=2937018.5829291 +y_0=2937031.41074803 +a=6371229 +b=6371229')
# Crop ds with the shapefile bounding box (bb)
bb = shp_reproj.bounds.iloc[0]
ds = ds.sel(x=slice(bb['minx']-res, bb['maxx']+res),
y=slice(bb['miny']-res, bb['maxy']+res))
#0000 Mask all the points in ds where the grid box do not intersect or is in the shapefile
for i in ds.x.values:
for j in ds.y.values:
gridbox = Point(i, j).buffer(res/2, cap_style=3)
if not (gridbox.intersects(shp_reproj.loc[0, 'geometry'])):
for k in ds.data_vars.keys():
if not (k =='Lambert_Conformal'):
ds[k].loc[dict(x=i, y=j)] = np.nan
ds = ds.dropna(dim='x', how='all')
ds = ds.dropna(dim='y', how='all')
# Plot the era5 gridbox and the shapefile if plot=True
if plot:
for x in ds.x.values:
for y in ds.y.values:
gridbox = Point(x, y).buffer(res / 2, cap_style=3)
gridbox_x, gridbox_y = gridbox.exterior.xy
plt.plot(gridbox_x, gridbox_y, color='blue')
for k in ds.data_vars.keys():
if (k !='Lambert_Conformal'):
if not(ds[k].loc[dict(x=x, y=y)].isnull().all()):
plt.plot(x, y, marker='o', color='red')
shp_x, shp_y = shp_reproj.loc[0, 'geometry'].exterior.xy
plt.plot(shp_x, shp_y, color='black')
plt.axis('equal')
print(f'n of pixels{counter}')
return ds
def xarray2df(xa, varnamedest,varnameor=False):
if not varnameor:
df = {}
for i in range(xa.y.size):
for j in range(xa.x.size):
df[f'{varnamedest}{i*xa.x.size+j}'] = xa.isel(y=i, x=j).to_dataframe().iloc[:, 2]
#pdb.set_trace()
else:
df = {}
for i in range(xa.y.size):
for j in range(xa.x.size):
df[f'{varnamedest}{i*xa.x.size+j}'] = xa.isel(y=i, x=j).to_dataframe().loc[:,varnameor]
#pdb.set_trace()
frame=pd.DataFrame(df)
return frame
# ------------------------------------------------------------------------------------------------------------------