Skip to content
Snippets Groups Projects
create_daily_csv.py 4.89 KiB
Newer Older
import pandas as pd
import numpy as np
import xarray as xr
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt

import pdb

def check_data_gap(df):

    df.dropna(axis='columns', how='all', inplace=True)
    missing_dates = pd.date_range(df.index.min(), df.index.max()).difference(df.index)
    
    print(f'Date start: {df.index.min().strftime("%Y-%m-%d")}, date end: {df.index.max().strftime("%Y-%m-%d")}')

    if len(missing_dates) > 0:
        print(f"Missing dates: {', '.join(missing_dates.strftime('%Y-%m-%d'))}")
    else:
        print('No missing dates')
    # return missing_dates


def interpolate_df(df):

    df = df.reindex(pd.date_range(df.index.min(), df.index.max()), fill_value=np.nan)
    return df.interpolate()


def readnetcdf_in_shp_mattia(nc_fileName, shp_fileName, res=0.25, plot=False):

    # Opent the netcdf file
    ds = xr.open_dataset(nc_fileName)

    # Open the shape file and reproject it to lat lon WGS84
    shp = gpd.read_file(shp_fileName)
    shp = shp.to_crs('epsg:4326')

    # Crop ds with the shapefile bounding box (bb)
    bb = shp.bounds.iloc[0]
    ds = ds.sel(lon=slice(bb['minx']-res, bb['maxx']+res), lat=slice(bb['maxy']+res, bb['miny']-res))

    # Mask all the points in ds where the grid box do not intersect or is in the shapefile
    for x in ds.longitude.values:
        for y in ds.latitude.values:
            gridbox = Point(x, y).buffer(res/2, cap_style=3)
            if not gridbox.intersects(shp.loc[0, 'geometry']):
                for k in ds.data_vars.keys():
                    ds[k].loc[dict(longitude=x, latitude=y)] = np.nan
    ds = ds.dropna(dim='longitude', how='all')
    ds = ds.dropna(dim='latitude', how='all')

    # Plot the era5 gridbox and the shapefile if plot=True
    if plot:
        for x in ds.longitude.values:
            for y in ds.latitude.values:
                gridbox = Point(x, y).buffer(res / 2, cap_style=3)
                gridbox_x, gridbox_y = gridbox.exterior.xy
                plt.plot(gridbox_x, gridbox_y, color='blue')
                plt.plot(x, y, marker='o', color='red')
        shp_x, shp_y = shp.loc[0, 'geometry'].exterior.xy
        plt.plot(shp_x, shp_y, color='black')
        plt.axis('equal')

    return ds


def readnetcdf_in_shp(nc_fileName, shp_fileName, res=5500, plot=False):
    
    # Open the netcdf file
    ds = xr.open_dataset(nc_fileName)
    
    # Open the shape file and reproject it to the MESCAN-Surfex grid (unit=meters)
    shp = gpd.read_file(shp_fileName)
    shp_reproj = shp.to_crs('+proj=lcc +lat_1=50 +lat_2=50 +lat_0=50 +lon_0=8 +x_0=2937018.5829291 +y_0=2937031.41074803 +a=6371229 +b=6371229')
    
    # Crop ds with the shapefile bounding box (bb)
    bb = shp_reproj.bounds.iloc[0]
    ds = ds.sel(x=slice(bb['minx']-res, bb['maxx']+res), 
                y=slice(bb['miny']-res, bb['maxy']+res))
    
    
    #0000 Mask all the points in ds where the grid box do not intersect or is in the shapefile
    for i in ds.x.values:
        for j in ds.y.values:
            gridbox = Point(i, j).buffer(res/2, cap_style=3)
            if not (gridbox.intersects(shp_reproj.loc[0, 'geometry'])):
                for k in ds.data_vars.keys():
                    if not (k =='Lambert_Conformal'):
                        ds[k].loc[dict(x=i, y=j)] = np.nan
    ds = ds.dropna(dim='x', how='all')
    ds = ds.dropna(dim='y', how='all')

    # Plot the era5 gridbox and the shapefile if plot=True
    if plot:
        for x in ds.x.values:
            for y in ds.y.values:
                gridbox = Point(x, y).buffer(res / 2, cap_style=3)
                gridbox_x, gridbox_y = gridbox.exterior.xy
                plt.plot(gridbox_x, gridbox_y, color='blue')
                for k in ds.data_vars.keys():
                    if (k !='Lambert_Conformal'):
                        if not(ds[k].loc[dict(x=x, y=y)].isnull().all()):
                            plt.plot(x, y, marker='o', color='red')
        shp_x, shp_y = shp_reproj.loc[0, 'geometry'].exterior.xy
        plt.plot(shp_x, shp_y, color='black')
        plt.axis('equal')                        
    print(f'n of pixels{counter}')                  
    return ds


def xarray2df(xa, varnamedest,varnameor=False):
    if not varnameor:
        df = {}
        for i in range(xa.y.size):
            for j in range(xa.x.size):
                df[f'{varnamedest}{i*xa.x.size+j}'] = xa.isel(y=i, x=j).to_dataframe().iloc[:, 2]
                #pdb.set_trace()
             
    else:
        df = {}
        for i in range(xa.y.size):
            for j in range(xa.x.size):
                df[f'{varnamedest}{i*xa.x.size+j}'] = xa.isel(y=i, x=j).to_dataframe().loc[:,varnameor]
                #pdb.set_trace()
    
    frame=pd.DataFrame(df)
    return frame


# ------------------------------------------------------------------------------------------------------------------