Source code for geohydroconvert

# -*- coding: utf-8 -*-
"""
Created on Thu 16 Dec 2021

@author: Alexandre Kenshilik Coche
@contact: alexandre.co@hotmail.fr

This module is a collection of tools for manipulating hydrological space-time
data, especially netcdf data. It has been originally developped to provide
preprocessing tools for CWatM (https://cwatm.iiasa.ac.at/) and HydroModPy
(https://gitlab.com/Alex-Gauvain/HydroModPy), but most functions have been
designed to be of general use.

"""

#%% Imports:
import xarray as xr
xr.set_options(keep_attrs = True)
import rioxarray as rio #Not necessary, the rio module from xarray is enough
import pandas as pd
import geopandas as gpd
import numpy as np
import rasterio
import rasterio.features
from affine import Affine
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely.geometry import mapping
import os
import re
import sys
import gc # garbage collector
import pathlib
from osgeo import gdal
from osgeo import osr
from osgeo import ogr
from osgeo import gdalconst
import matplotlib.pyplot as plt
import fiona
import datetime
import math

import matplotlib.pyplot as plt
from pysheds.grid import Grid  # to compute river network

# import whitebox
# wbt = whitebox.WhiteboxTools()
# wbt.verbose = False

#%% LEGENDE: 
# ---- ° = à garder mais mettre à jour
# ---- * = à inclure dans une autre fonction ou supprimer


#%% LOADING & INITIALIZING DATASETS
###############################################################################
[docs]def use_valid_time(data_ds):
    """
    Use 'valid_time' as the temporal coordinate.
    Standardize its names into 'time'. If not the main time coordinate, swap 
    it with the main time coordinate.

    Parameters
    ----------
    data_ds : xarray.dataset
        Dataset whose temporal coordinate should be renamed.

    Returns
    -------
    data_ds : xarray.dataset
        Dataset with the modified name for the temporal coordinate.

    """
    # Rename 'valid_time' into 'time' (if necessary)
    if ('valid_time' in data_ds.coords) | ('valid_time' in data_ds.data_vars):
        data_ds = data_ds.rename({'valid_time': 'time'})
    
    # Swap the coordinate (if necessary)
    time_coord = main_time_coord(data_ds)
    if time_coord != 'time':
        data_ds = data_ds.swap_dims({time_coord: 'time'})
    
    return data_ds


###############################################################################
[docs]def load_any(data, name=None, infer_time=False, **xr_kwargs):
             # decode_times=True, decode_cf=True, decode_coords='all'):
    r"""
    This function loads any common spatio-temporal file or variable, without
    the need to think about the file or variable type.
    
    import geoconvert as gc
    data_ds = gc.load_any(r'D:\data.nc', decode_times = True, decode_coords = 'all')

    Parameters
    ----------
    data : TYPE
        DESCRIPTION.
    name : TYPE, optional
        DESCRIPTION. The default is None.
    **xr_kwargs: keyword args
        Argument passed to the xarray.open_dataset function call.
        May contain: 
            . decode_coords
            . decode_times
            . decode_cf
            > help(xr.open_dataset

    Returns
    -------
    data_ds : TYPE
        DESCRIPTION.

    """
    
    # data is a variable
    if isinstance(data, xr.Dataset):
        data_ds = data.copy()
        # Rename time coord
        if (not 'time' in data_ds.coords) & (not 'time' in data_ds.data_vars):
            data_ds = use_valid_time(data_ds)
    
    elif isinstance(data, xr.DataArray):
        if name is None:
            name = input('Name of main variable: ')
        data_ds = data.to_dataset(name = name, promote_attrs = True)
        # Rename time coord
        if (not 'time' in data_ds.coords) & (not 'time' in data_ds.data_vars):
            data_ds = use_valid_time(data_ds)
    
    elif isinstance(data, gpd.GeoDataFrame):
        data_ds = data.copy()
    
    # data is a string/path
    elif isinstance(data, (str, pathlib.Path)):
        print("\nLoading data...")
        if not os.path.isfile(data):
            print("   Err: the path provided is not a file")
            return
        
        if os.path.splitext(data)[-1] == '.shp':
            data_ds = gpd.read_file(data)
        
        elif os.path.splitext(data)[-1] == '.nc':
            try:
                with xr.open_dataset(data, **xr_kwargs) as data_ds:
                    data_ds.load() # to unlock the resource
                    # Rename time coord
                    if (not 'time' in data_ds.coords) & (not 'time' in data_ds.data_vars):
                        data_ds = use_valid_time(data_ds)
            except:
                xr_kwargs['decode_times'] = False
                if not infer_time:
                    print("   _ decode_times = False")
                try:
                    with xr.open_dataset(data, **xr_kwargs) as data_ds:
                        data_ds.load() # to unlock the resource
                
                except:
                    xr_kwargs['decode_coords'] = False
                    print("   _ decode_coords = False")
                    with xr.open_dataset(data, **xr_kwargs) as data_ds:
                        data_ds.load() # to unlock the resource
                
                # Rename time coord
                if (not 'time' in data_ds.coords) & (not 'time' in data_ds.data_vars):
                    data_ds = use_valid_time(data_ds)        
                
                if infer_time:
                    print("   _ inferring time axis...")
                    units, reference_date = data_ds.time.attrs['units'].split('since')
                    if units.replace(' ', '').casefold() in ['month', 'months', 'M']: 
                        freq = 'M'
                    elif units.replace(' ', '').casefold() in ['day', 'days', 'D']:
                        freq = 'D'
                    start_date = pd.date_range(start = reference_date, 
                                               periods = int(data_ds.time[0].values)+1, 
                                               freq = freq) 
                    try:
                        data_ds['time'] = pd.date_range(start = start_date[-1], 
                                                        periods = data_ds.sizes['time'], 
                                                        freq = freq)
                    except: # SPECIAL CASE to handle truncated output files from failed CWatM simulations
                        data_ds = data_ds.where(data_ds['time']<1e5, drop = True)
                        data_ds['time'] = pd.date_range(start = start_date[-1], 
                                                        periods = data_ds.sizes['time'], 
                                                        freq = freq)
                        
                    print(f"     . initial time = {data_ds.time[0].values.strftime('%Y-%m-%d')} | final time = {data_ds.time[-1].values.strftime('%Y-%m-%d')} | units = {units}")
                    

        elif os.path.splitext(data)[-1] in ['.tif', '.asc']:
            xr_kwargs['decode_coords'] = None
            with xr.open_dataset(data, **xr_kwargs) as data_ds:
                data_ds.load() # to unlock the resource   
            
            # Rename time coord
            if (not 'time' in data_ds.coords) & (not 'time' in data_ds.data_vars):
                data_ds = use_valid_time(data_ds)
            
            if name is None:
                name = input('Name of main variable: ')
            data_ds = data_ds.squeeze('band')
            data_ds = data_ds.drop('band')
            data_ds = data_ds.rename(band_data = name)
    
    # Return
    return data_ds


###############################################################################
[docs]def main_var(data_ds):
    var = list(set(list(data_ds.data_vars)) - set(['x', 'y', 'X','Y', 'i', 'j',
                                                   'lat', 'lon', 
                                                   'spatial_ref', 
                                                   'LambertParisII',
                                                   'bnds', 'time_bnds',
                                                   'valid_time',
                                                   'forecast_reference_time',
                                                   'forecast_period']))
    if len(var) == 1:
        var = var[0]
    
    return var


###############################################################################
[docs]def main_space_coords(data_ds):
    x_var = list(set(list(data_ds.coords)).intersection(set(['x', 'X', 'lon', 'longitude'])))
    y_var = list(set(list(data_ds.coords)).intersection(set(['y', 'Y', 'lat', 'latitude'])))
    
    if len(x_var) == 1:
        x_var = x_var[0]
    if len(y_var) == 1:
        y_var = y_var[0]
    
    return x_var, y_var


###############################################################################
[docs]def main_time_coord(data_ds):
    var = list(set(list(data_ds.coords)).intersection(set(['time', 'valid_time',
                                                           'forecast_period',
                                                           # 'time_bnds',
                                                           # 'forecast_reference_time',
                                                           ])))
    if len(var) == 1:
        var = var[0]
    elif len(var) > 1:
        coords_length = {data_ds[v].size:v for v in var}
        var = coords_length[max(coords_length.keys())]
    
    return var


###############################################################################
[docs]def get_filelist(data, filetype = '.nc'):
    """
    This function converts a folder (or a file) in a list of relevant files.

    Parameters
    ----------
    data: str or iterable
        Folder, filepath or iterable of filepaths
    filetype: str
        Extension.

    Returns
    -------
    data_folder : str
        Root of the files.
    filelist : list of str
        List of files.

    """
    
    # ---- Data is a single element
    
    # if data is a single string/path
    if isinstance(data,  (str, pathlib.Path)): 
        # if this string points to a folder
        if os.path.isdir(data): 
            data_folder = data    
            filelist = [f for f in os.listdir(data_folder)
                             if (os.path.isfile(os.path.join(data_folder, f)) \
                                 & (os.path.splitext(os.path.join(data_folder, f))[-1] == filetype))]
            
        # if this string points to a file
        else: 
            data_folder = os.path.split(data)[0]    # root of the file 
            filelist = [data]
    
    # ---- Data is an iterable
    elif isinstance(data, (list, tuple)):
        # [Safeguard] It is assumed that data contains an iterable of files
        if not os.path.isfile(data[0]):
            print("Err: Argument should be a folder, a filepath or a list of filepath")
            return
        
        data_folder = os.path.split(data[0])[0] # root of the first element of the list
        filelist = list(data)
        
        
    return data_folder, filelist 


###############################################################################
#%%% ° pick_dates_fields
[docs]def pick_dates_fields(*, input_file, output_format = 'NetCDF', **kwargs):
    """
    % DESCRIPTION:
    This function extracts the specified dates or fields from NetCDF files that
    contain multiple dates or fields, and exports it as a single file.
    
    
    % EXAMPLE:
    import geoconvert as gc
    gc.pick_dates_fields(input_file = r"D:/path/test.nc", 
                  dates = ['2020-10-15', '2021-10-15'])

    % OPTIONAL ARGUMENTS:
    > output_format = 'NetCDF' (default) | 'GeoTIFF'
    > kwargs:
        > dates = ['2021-10-15', '2021-10-19']
        > fields = ['T2M', 'PRECIP', ...]
    """    
    
    with xr.open_dataset(input_file) as _dataset:
        _dataset.load() # to unlock the resource
    
    #% Get arguments (and build output_name):
    # ---------------------------------------
    _basename = os.path.splitext(input_file)[0]
    
    # Get fields:
    if 'fields' in kwargs:
        fields = kwargs['fields']
        if isinstance(fields, str): fields = [fields]
        else: fields = list(fields) # in case fields are string or tuple
    else:
        fields = list(_dataset.data_vars) # if not input_arg, fields = all
    
    # Get dates:
    if 'dates' in kwargs:
        dates = kwargs['dates']
        if isinstance(dates, str): 
            output_file = '_'.join([_basename, dates, '_'.join(fields)])
            dates = [dates]
        else: 
            dates = list(dates) # in case dates are tuple
            output_file = '_'.join([_basename, dates[0], 'to', 
                                    dates[-1], '_'.join(fields)])

    else:
        dates = ['alldates'] # if not input_arg, dates = all  
        output_file = '_'.join([_basename, '_'.join(fields)])
        
    
    #% Standardize terms:
    # -------------------
    if 't' in list(_dataset.dims):
        print('Renaming time coordinate')
        _dataset = _dataset.rename(t = 'time')    

    if 'lon' in list(_dataset.dims) or 'lat' in list(_dataset.dims):
        print('Renaming lat/lon coordinates')
        _dataset = _dataset.rename(lat = 'latitude', lon = 'longitude')
        # Change the order of coordinates to match QGIS standards:
        _dataset = _dataset.transpose('time', 'latitude', 'longitude')
        # Insert georeferencing metadata to match QGIS standards:
        _dataset.rio.write_crs("epsg:4326", inplace = True)
        # Insert metadata to match Panoply standards: 
        _dataset.longitude.attrs = {'units': 'degrees_east',
                                    'long_name': 'longitude'}
        _dataset.latitude.attrs = {'units': 'degrees_north',
                                    'long_name': 'latitude'}
    
    if 'X' in list(_dataset.dims) or 'Y' in list(_dataset.dims):
        print('Renaming X/Y coordinates')
        _dataset = _dataset.rename(X = 'x', Y = 'y')
        # Change the order of coordinates to match QGIS standards:
        _dataset = _dataset.transpose('time', 'y', 'x')
        # Insert metadata to match Panoply standards: 
        _dataset.x.attrs = {'standard_name': 'projection_x_coordinate',
                            'long_name': 'x coordinate of projection',
                            'units': 'Meter'}
        _dataset.y.attrs = {'standard_name': 'projection_y_coordinate',
                            'long_name': 'y coordinate of projection',
                            'units': 'Meter'}

        
# =============================================================================
#     # Rename coordinates (ancienne version):
#     try:
#         _dataset.longitude
#     except AttributeError:
#         _dataset = _dataset.rename({'lon':'longitude'})
#     try:
#         _dataset.latitude
#     except AttributeError:
#         _dataset = _dataset.rename({'lat':'latitude'})    
#     try:
#         _dataset.time
#     except AttributeError:
#         _dataset = _dataset.rename({'t':'time'}) 
# =============================================================================
    
    #% Extraction and export:
    # -----------------------
    # Extraction of fields:
    _datasubset = _dataset[fields]
    # Extraction of dates:
    if dates != 'alldates':
        _datasubset = _datasubset.sel(time = dates)

    if output_format == 'NetCDF':
        _datasubset.attrs = {'Conventions': 'CF-1.6'} # I am not sure...
        
        # Export:
        _datasubset.to_netcdf(output_file + '.nc')
    
    elif output_format == 'GeoTIFF':
        _datasubset.rio.to_raster(output_file + '.tiff')
        
        
#%% GEOREFERENCING
###############################################################################
# Georef (ex-decorate_NetCDF_for_QGIS)
[docs]def georef(*, data, data_type = 'other', include_crs = None, export_opt = False, crs = None):   
    r"""
    Description
    -----------
    Il est fréquent que les données de source externe présentent des défauts
    de formattage (SCR non inclus, coordonnées non standard, incompatibilité
    avec QGIS...).
    Cette fonction permet de générer un raster ou shapefile standardisé, 
    en particulier du point de vue de ses métadonnées, facilitant ainsi les
    opérations de géotraitement mais aussi la visualisation sous QGIS.
    
    Exemple
    -------
    import geoconvert as gc
    gc.georef(data = r"D:\CWatM\raw_results\test1\modflow_watertable_monthavg.nc", 
              data_type = 'CWatM')
    
    Parametres
    ----------
    data : str or xr.Dataset (or xr.DataArray)
        Chemin d'accès au fichier à modifier
        (le fichier original ne sera pas altéré, un nouveau fichier '(...)_QGIS.nc'
         sera créé.)
    data_type : str
        Type de données :
            'modflow' | 'DRIAS-Climat 2020' | 'DRIAS-Eau 2021' \ 'SIM 2021' |
            'DRIAS-Climat 2022' \ 'Climat 2022' | 'DRIAS-Eau 2024' \ 'SIM 2024' |
            'CWatM' | 'autre' \ 'other'
            (case insensitive)
    include_crs : bool, optional
        DESCRIPTION. The default is True.
    export_opt : bool, optional
        DESCRIPTION. The default is True.
        Le NetCDF crée est directement enregistré dans le même dossier que 
        le fichier d'origine, en rajoutant 'georef' à son nom.
    crs : int, optional
        Destination CRS, only necessary when data_type == 'other' The default is None.

    Returns
    -------
    xarray.Dataset or geopandas.GeoDataFrame. 
    
    """
    
    if include_crs is None:
        if crs is not None:
            include_crs = True
    
    # ---- NetCDF de ModFlow
    # --------------------
    if data_type.casefold() == 'modflow': 
        # Load
        data_ds = load_any(data, decode_coords = 'all', decode_times = False)
        
        print("\nFormatting data...")
        # Add standard attributes for coordinates (mandatory for QGIS to correctly read data)
        data_ds.x.attrs = {'standard_name': 'projection_x_coordinate',
                            'long_name': 'x coordinate of projection',
                            'units': 'Meter'}
        data_ds.y.attrs = {'standard_name': 'projection_y_coordinate',
                            'long_name': 'y coordinate of projection',
                            'units': 'Meter'}
        print("   _ Standard attributes added for coordinates x and y")
        
        # Add CRS
        data_epsg = 2154 # Lambert 93
        crs_suffix = ''
        if include_crs:
            data_ds.rio.write_crs(data_epsg, inplace = True)
            print(f'   _ Coordinates Reference System (epsg:{data_epsg}) included.')
        else:
            print(f'   _ Coordinates Reference System not included. {data_epsg} has to be manually specified in QGIS')
            crs_suffix = 'nocrs'
        
    # ========== USELESS ==========================================================
    #     data_ds = data_ds.transpose('time', 'y', 'x')
    # =============================================================================


    # ---- NetCDF de CWatM
    # Inclusion du SCR
    # ------------------
    elif data_type.casefold() == 'cwatm'.casefold():
        # Load
        data_ds = load_any(data, decode_coords = 'all', decode_times = False)
        
        print("\nFormatting data...")
        # Add CRS
        data_epsg = 2154 # Lambert 93
        crs_suffix = ''
        if include_crs:
            data_ds.rio.write_crs(data_epsg, inplace = True)
            print(f'   _ Coordinates Reference System (epsg:{data_epsg}) included.')
        else:
            print(f'   _ Coordinates Reference System not included. {data_epsg} has to be manually specified in QGIS')
            crs_suffix = 'nocrs'
    
    
    # ---- NetCDF de la DRIAS-Climat 2020 et de la DRIAS-Eau 2021
    # Données SAFRAN (tas, prtot, rayonnement, vent... ) "DRIAS-2020"
    # ainsi que : 
    # Données SURFEX (evapc, drainc, runoffc, swe, swi...) "EXPLORE2-SIM2 2021"
    # ------------------------------------------------------------
    if data_type.replace(" ", "").casefold() in ['drias2020', 'drias-climat2020',
                                'sim2021', 'drias-eau2021']:         
        # Load
        data_ds = load_any(decode_coords = 'all')
        
        print("\nFormating data...")
        # Create X and Y coordinates if necessary, from i and j
        list_var_lowcase = [v.casefold() for v in list(data_ds.coords) + list(data_ds.data_vars)]
        if 'x' not in list_var_lowcase:
            data_ds = data_ds.assign_coords(
                X = ('i', 52000 + data_ds.i.values*8000))
            print("   _ X values created from i")
        if 'y' not in list_var_lowcase:
            data_ds = data_ds.assign_coords(
                Y = ('j', 1609000 + data_ds.j.values*8000))
            print("   _ Y values created from j")
        # Replace X and Y as coordinates, and rename them
        data_ds = data_ds.swap_dims(i = 'X', j = 'Y')
        print("   _ Coordinates i and j replaced with X and Y")
        data_ds = data_ds.rename(X = 'x', Y = 'y')
        print("   _ Coordinates renamed as lowcase x and y [optional]")
        # Get main variable
        var = main_var(data_ds)
        print(f"   _ Main variable is: {var}")
        # Ensure that lat, lon, i and j will be further loaded by xarray as coords
        data_ds[var].attrs['coordinates'] = 'x y i j lat lon'
        print("   _ x, y, i, j, lat, lon ensured to be read as coordinates")

# ============== USELESS ======================================================
#         # Reorder data, to ensure QGIS Mesh detects the correct data set   
#         data_ds = data_ds[[var, 'x', 'y', 'time']]
#         print("   _ Data reordered [safety]")
# =============================================================================
# ============== USELESS ======================================================
#         # To avoid conflicts with missing values
#         data_ds[var].encoding.pop('missing_value')
#         data_ds['lat'].encoding.pop('missing_value')
#         # data_ds['lat'].encoding['_FillValue'] = np.nan
#         data_ds['lon'].encoding.pop('missing_value')
#         # data_ds['lon'].encoding['_FillValue'] = np.nan
# =============================================================================
        
        # Add standard attributes for coordinates (mandatory for QGIS to correctly read data)
        data_ds.x.attrs = {'standard_name': 'projection_x_coordinate',
                            'long_name': 'x coordinate of projection',
                            'units': 'Meter'}
        data_ds.y.attrs = {'standard_name': 'projection_y_coordinate',
                            'long_name': 'y coordinate of projection',
                            'units': 'Meter'}
        print("   _ Standard attributes added for coordinates x and y")
        
        # Add CRS
        data_epsg = 27572 # Lambert zone II
        crs_suffix = ''
        if include_crs:
            data_ds.rio.write_crs(data_epsg, inplace = True)
            print(f'   _ Coordinates Reference System (epsg:{data_epsg}) included. (NB: This might alter Panoply georeferenced vizualisation)')
        else:
            print(f'   _ Coordinates Reference System not included. {data_epsg} has to be manually specified in QGIS')
            crs_suffix = 'nocrs'
         # Incompatibilité QGIS - Panoply :
         # Pour une raison inconnue, l'inclusion du CRS 27572 ("Lambert 
         # Zone II" / "NTF (Paris)" pose problème pour le géo-référencement
         # dans Panoply (c'est comme si Panoply prenait {lat = 0 ; lon = 0} 
         # comme origine de la projection). Sans 'spatial_ref' incluse dans le
         # netCDF, Panoply géo-référence correctement les données, probablement
         # en utilisant directement les variables 'lat' et 'lon'.
      
        
    # ---- NetCDF de la DRIAS-Climat 2022 
    # Données SAFRAN (tas, prtot, rayonnement, vent... ) "EXPLORE2-Climat 2022" 
    # -------------------------------------------------------------------------
    elif data_type.replace(" ", "").casefold() in ['drias2022', 'climat2022', 'drias-climat2022']:
        # Load
        data_ds = load_any(data, decode_cf = False)

        print("\nFormating data...")
        # Correcting the spatial_ref
# =============================================================================
#         data_ds['LambertParisII'] = xr.DataArray(
#             data = np.array(-2147220352.0),
#             coords = {'LambertParisII': -2147220352.0},
#             attrs = {'grid_mapping_name': 'lambert_conformal_conic_1SP',
#                      'latitude_of_origin': 52.0,
#                      'central_meridian': 0.0,
#                      'false_easting': 600000.0,
#                      'false_northing': 2200000.0,
#                      'epsg': 27572,
#                      'references': 'http://www.umr-cnrm.fr/spip.php?article125&lang=en'}) 
#         crs_suffix = 'georef'
# =============================================================================
        data_epsg = 27572 # Lambert zone II
        if include_crs:
            # data_ds = data_ds.drop('LambertParisII')
            # data_ds.rio.write_crs(f'epsg:{data_epsg}', inplace = True)
            data_ds = standard_grid_mapping(data_ds, data_epsg)
            print(f'   _ Coordinates Reference System (epsg:{data_epsg}) included. (NB: This might alter Panoply georeferenced vizualisation)')
            crs_suffix = ''
        else:
            print(f'   _ Coordinates Reference System not included. {data_epsg} has to be manually specified in QGIS')
            crs_suffix = 'nocrs'
        # Get main variable
        var = main_var(data_ds)
        print(f"   _ Main variable is: {var}")
        # Ensure that lat, and lon will be further loaded by xarray as coords
        data_ds[var].encoding['coordinates'] = 'x y lat lon'
        if 'coordinates' in data_ds[var].attrs:
            data_ds[var].attrs.pop('coordinates')
        data_ds.lat.encoding['grid_mapping'] = data_ds[var].encoding['grid_mapping']
        data_ds.lon.encoding['grid_mapping'] = data_ds[var].encoding['grid_mapping']
        print("   _ x, y, lat, lon ensured to be read as coordinates [safety]")
        # Remove grid_mapping in attributes (it is already in encoding)
        if 'grid_mapping' in data_ds[var].attrs:
            data_ds[var].attrs.pop('grid_mapping')
    
    
    # ---- NetCDF de la DRIAS-Eau 2024 
    # Données SURFEX (evapc, drainc, runoffc, swe, swi...) "EXPLORE2-SIM2 2024" 
    # -------------------------------------------------------------------------
    elif data_type.replace(" ", "").replace("-", "").casefold() in [
            'sim2024', 'driaseau2024']: 
        # Load
        data_ds = load_any(data, decode_cf = False)

        print("\nFormating data...")
        # Correcting the spatial_ref
# =============================================================================
#         data_ds['LambertParisII'] = xr.DataArray(
#             data = np.array(-2147220352.0),
#             coords = {'LambertParisII': -2147220352.0},
#             attrs = {'grid_mapping_name': 'lambert_conformal_conic_1SP',
#                      'latitude_of_origin': 52.0,
#                      'central_meridian': 0.0,
#                      'false_easting': 600000.0,
#                      'false_northing': 2200000.0,
#                      'epsg': 27572,
#                      'references': 'http://www.umr-cnrm.fr/spip.php?article125&lang=en'}) 
#         crs_suffix = ''
# =============================================================================
        data_epsg = 27572 # Lambert zone II
        if include_crs:
            # if ('LambertParisII' in data_ds.coords) | ('LambertParisII' in data_ds.data_vars):
            #     data_ds = data_ds.drop('LambertParisII')
            # data_ds.rio.write_crs(data_epsg, inplace = True)
            data_ds = standard_grid_mapping(data_ds, data_epsg)
            print(f'   _ Coordinates Reference System (epsg:{data_epsg}) included. (NB: This might alter Panoply georeferenced vizualisation)')
            crs_suffix = ''
        else:
            print(f'   _ Coordinates Reference System not included. {data_epsg} has to be manually specified in QGIS')
            crs_suffix = 'nocrs'
        
        # Create X and Y coordinates if necessary, from i and j
        list_var_lowcase = [v.casefold() for v in list(data_ds.coords) + list(data_ds.data_vars)]
        data_ds = data_ds.assign_coords(
            X = ('x', 52000 + data_ds.x.values*8000))
        print("   _ X values corrected from erroneous x")
        data_ds = data_ds.assign_coords(
            Y = ('y', 1609000 + data_ds.y.values*8000))
        print("   _ Y values corrected from erroneous y")
        # Replace X and Y as coordinates, and rename them
        data_ds = data_ds.swap_dims(x = 'X', y = 'Y')
        print("   _ Coordinates x and y replaced with X and Y")
        data_ds = data_ds.drop(['x', 'y'])
        print("   _ Previous coordinates x and y removed")
        data_ds = data_ds.rename(X = 'x', Y = 'y')
        print("   _ Coordinates renamed as lowcase x and y [optional]")
        # Get main variable
        var = main_var(data_ds)
        print(f"   _ Main variable is: {var}")
        
        # Ensure that lat, and lon will be further loaded by xarray as coords
        data_ds[var].encoding['coordinates'] = 'x y lat lon'
        if 'coordinates' in data_ds[var].attrs:
            data_ds[var].attrs.pop('coordinates')
        # Reporting grid_mapping to coords/vars that should not be displayed in QGIS:
        for c in ['lat', 'lon']: 
            if (c in data_ds.coords) | (c in data_ds.data_vars):
                data_ds[c].encoding['grid_mapping'] = data_ds[var].encoding['grid_mapping']
        print("   _ x, y, lat, lon ensured to be read as coordinates [safety]")
       # ======== USELESS ============================================================
       #         for c in ['lat', 'lon', 'time_bnds']:
       #             if c in data_ds.data_vars:
       #                 data_ds = data_ds.set_coords([c])
       # =============================================================================
        
       # Remove grid_mapping in attributes (it is already in encoding)
        if 'grid_mapping' in data_ds[var].attrs:
            data_ds[var].attrs.pop('grid_mapping')
        
# ============== USELESS ======================================================
#         # Reorder data, to ensure QGIS Mesh detects the correct data set   
#         data_ds = data_ds[[var, 'x', 'y', 'time']]
#         print("   _ Data reordered [safety]")
# =============================================================================
# ============== USELESS ======================================================
#         # To avoid conflicts with missing values
#         data_ds[var].encoding.pop('missing_value')
#         data_ds['lat'].encoding.pop('missing_value')
#         # data_ds['lat'].encoding['_FillValue'] = np.nan
#         data_ds['lon'].encoding.pop('missing_value')
#         # data_ds['lon'].encoding['_FillValue'] = np.nan
# =============================================================================
        
        # Add standard attributes for coordinates (mandatory for QGIS to correctly read data)
        data_ds.x.attrs = {'standard_name': 'projection_x_coordinate',
                            'long_name': 'x coordinate of projection',
                            'units': 'metre'}
        data_ds.y.attrs = {'standard_name': 'projection_y_coordinate',
                            'long_name': 'y coordinate of projection',
                            'units': 'metre'}
        print("   _ Standard attributes added for coordintes x and y")


    # ---- Autres fichiers ou variables
    # Inclusion du SCR
    # ------------------
# =============================================================================
#     elif data_type.casefold() in ['autre', 'other']:
# =============================================================================
    else:
        # Load
        data_ds = load_any(data, decode_times = True, decode_coords = 'all')
        
        x_var, y_var = main_space_coords(data_ds)
        time_coord = main_time_coord(data_ds)
        
        # Standardize spatial coords
# =============================================================================
#         if 'X' in data_ds.coords:
#             data_ds = data_ds.rename({'X': 'x'})
#         if 'Y' in data_ds.coords:
#             data_ds = data_ds.rename({'Y': 'y'})
#         if 'latitude' in data_ds.coords:
#             data_ds = data_ds.rename({'latitude': 'lat'})
#         if 'longitude' in data_ds.coords:
#             data_ds = data_ds.rename({'longitude': 'lon'})
# =============================================================================
        if x_var == 'X':
            data_ds = data_ds.rename({'X': 'x'})
        if y_var == 'Y':
            data_ds = data_ds.rename({'Y': 'y'})
        if y_var == 'latitude':
            data_ds = data_ds.rename({'latitude': 'lat'})
        if x_var == 'longitude':
            data_ds = data_ds.rename({'longitude': 'lon'})
        
        # Standardize time coord
        data_ds = data_ds.rename({time_coord: 'time'})
        
        if isinstance(data_ds, gpd.GeoDataFrame):
            print("\nFormatting data...")
            # Add CRS
            crs_suffix = ''
            if include_crs:
                if crs is not None:
                    data_epsg = crs
                else:
                    print("   _ Err: it is required to pass the crs argument")
                    return
                # data_ds.set_crs(epsg = data_epsg, 
                #                  inplace = True, 
                #                  allow_override = True)
                data_ds = standard_grid_mapping(data_ds, data_epsg)
                print(f'   _ Coordinates Reference System (epsg:{data_epsg}) included.')
            else:
                print('   _ Coordinates Reference System not included.')
                crs_suffix = 'nocrs'
        
        elif isinstance(data_ds, xr.Dataset):        
            print("\nFormatting data...")
            # Add CRS
            crs_suffix = ''
            if include_crs:
                if crs is not None:
                    data_epsg = crs
                else:
                    print("   _ Err: it is required to pass the crs argument")
                    return
                data_ds.rio.write_crs(data_epsg, inplace = True)
                print(f'   _ Coordinates Reference System (epsg:{data_epsg}) included.')
            else:
                print('   _ Coordinates Reference System not included.')
                crs_suffix = 'nocrs'
            
            # Add standard attributes for coordinates (mandatory for QGIS to correctly read data)
            if ('x' in list(data_ds.coords)) & ('y' in list(data_ds.coords)):
                data_ds.x.attrs = {'standard_name': 'projection_x_coordinate',
                                    'long_name': 'x coordinate of projection',
                                    'units': 'metre'}
                data_ds.y.attrs = {'standard_name': 'projection_y_coordinate',
                                    'long_name': 'y coordinate of projection',
                                    'units': 'metre'}
                print("   _ Standard attributes added for coordinates x and y")
            elif ('lon' in list(data_ds.coords)) & ('lat' in list(data_ds.coords)):
                data_ds.lon.attrs = {'standard_name': 'longitude',
                                     'long_name': 'longitude',
                                     'units': 'degrees_east'}
                data_ds.lat.attrs = {'standard_name': 'latitude',
                                     'long_name': 'latitude',
                                     'units': 'degrees_north'}
                print("   _ Standard attributes added for coordinates lat and lon")


    # ---- Export
    # ---------
    if export_opt == True:
        print('\nExporting...')
        # Output filepath
        if isinstance(data, (str, pathlib.Path)):
            (folder_name, _basename) = os.path.split(data)
            (file_name, file_extension) = os.path.splitext(_basename)
            output_file = os.path.join(folder_name, f"{'_'.join([file_name, 'georef', crs_suffix])}{file_extension}")
        else:
            print("   _ As data input is not a file, the result is exported to a standard directory")
            output_file = os.path.join(os.getcwd(), f"{'_'.join(['data', 'georef', crs_suffix])}.nc")
        
        # Export
        export(data_ds, output_file)
        
        
    # ---- Return variable
    return data_ds


    # =========================================================================
    #%    Mémos / Corrections bugs
    # =========================================================================
    # Si jamais il y a un problème de variable qui se retrouve en 'data_var'
    # au lieu d'etre en 'coords' : 
    #     data_ds = data_ds.set_coords('i')
    
    # S'il y a un problème d'incompatibilité 'missing_value' / '_FillValue' :
    #     data_ds['lon'] = data_ds.lon.fillna(np.nan)
    #     data_ds['lat'] = data_ds.lat.fillna(np.nan)
    
    # Si jamais une variable non essentielle pose problème à l'export : 
    #     data_ds = data_ds.drop('lon')
    #     data_ds = data_ds.drop('lat')
    
    # Pour trouver les positions des valeurs nan :
    #     np.argwhere(np.isnan(data_ds.lon.values))
    
    # Pour reconvertir la date
    #     units, reference_date = ds.time.attrs['units'].split('since')
    #     ds['time'] = pd.date_range(start = reference_date, 
    #                                periods = ds.sizes['time'], freq = 'MS')
    # =========================================================================


    # Créer les coordonnées 'x' et 'y'...
# =============================================================================
#         # ... à partir des lon.lat :
#         # LAISSÉ TOMBÉ, PARCE QUE LEURS VALEURS DE LATITUDE SONT FAUSSES [!]
#         coords_xy = rasterio.warp.transform(rasterio.crs.CRS.from_epsg(4326), 
#                                             rasterio.crs.CRS.from_epsg(27572), 
#                                             np.array(data_ds.lon).reshape((data_ds.lon.size), order = 'C'),
#                                             np.array(data_ds.lat).reshape((data_ds.lat.size), order = 'C'))
#         
#         # data_ds['x'] = np.round(coords_xy[0][0:data_ds.lon.shape[1]], -1) 
#         
#         # Arrondi à la dizaine à cause de l'approx. initiale sur les lat/lon :                                               
#         x = np.round(coords_xy[0], -1).reshape(data_ds.lon.shape[0], 
#                                                data_ds.lon.shape[1], 
#                                                order = 'C')
#         # donne un motif qui se répète autant de fois qu'il y a de latitudes
#         y = np.round(coords_xy[1], -1).reshape(data_ds.lon.shape[1], 
#                                                data_ds.lon.shape[0], 
#                                                order = 'F')
#         # donne un motif qui se répète autant de fois qu'il y a de longitudes
#         
#         # data_ds['x'] = x[0,:] 
#         # data_ds['y'] = y[0,:]
#         data_ds = data_ds.assign_coords(x = ('i', x[0,:]))
#         data_ds = data_ds.assign_coords(y = ('j', y[0,:]))
# =============================================================================


###############################################################################
[docs]def standard_grid_mapping(data, epsg = None):
    """
    QGIS needs a standard structure for grid_mapping information:
       - grid_mapping info should be in encodings and not in attrs
       - grid_mapping info should be stored in a coordinate names 'spatial_ref'
       - ...
    In MeteoFrance data, these QGIS standards are not met.
    This function standardizes grid_mapping handling, so that it is 
    compatible with QGIS.

    Parameters
    ----------
    data : TYPE
        DESCRIPTION.
    epsg : TYPE
        DESCRIPTION.

    Returns
    -------
    data_ds : TYPE
        DESCRIPTION.

    """
    # ---- Load
    data_ds = load_any(data)
    # Get main variable
    var = main_var(data_ds)
    
    # ---- Get the potential names of grid_mapping variable and clean all 
    # grid_mapping information
    
    # Remove all the metadata about grid_mapping, and save grid_mapping names
    names = set()
    if 'grid_mapping' in data_ds.attrs:
        names.add(data_ds.attrs['grid_mapping'])
        data_ds.attrs.pop('grid_mapping')
    if "grid_mapping" in data_ds.encoding:
        names.add(data_ds.encoding['grid_mapping'])
        data_ds.encoding.pop('grid_mapping')
    if 'grid_mapping' in data_ds[var].attrs:
        names.add(data_ds[var].attrs['grid_mapping'])
        data_ds[var].attrs.pop('grid_mapping')
    if "grid_mapping" in data_ds[var].encoding:
        names.add(data_ds[var].encoding['grid_mapping'])
        data_ds[var].encoding.pop('grid_mapping')

    # Drop all variable or coord corresponding to the previously founded 
    # grid_mapping names
    for n in list(names):
        if n in data_ds.data_vars:
            temp = data_ds[n]
            data_ds = data_ds.drop(n)
        if n in data_ds.coords:
            temp = data_ds[n]
            data_ds = data_ds.reset_coords(n, drop = True)
        
    if epsg is None:
        # Use the last grid_mapping value as the standard spatial_ref
        dummy_epsg = 2154
        data_ds.rio.write_crs(dummy_epsg, inplace = True) # creates the spatial_ref structure and mapping
        data_ds['spatial_ref'] = temp
    else:
        data_ds.rio.write_crs(epsg, inplace = True)

    
    return data_ds


###############################################################################
[docs]def standard_fill_value(*, data_ds, attrs, encod):
    
    var = main_var(data_ds)
    
    # Clean all fill_value info
    if '_FillValue' in data_ds[var].attrs:
        data_ds[var].attrs.pop('_FillValue')
    if 'missing_value' in data_ds[var].attrs:
        data_ds[var].attrs.pop('missing_value')
    if 'missing_value' in data_ds[var].attrs:
        data_ds[var].attrs.pop('missing_value')
        
    # Set the fill_value, according to a hierarchical rule
    if '_FillValue' in encod:
        data_ds[var].encoding['_FillValue'] = encod['_FillValue']
    elif '_FillValue' in attrs:
        data_ds[var].encoding['_FillValue'] = attrs['_FillValue']
    elif 'missing_value' in encod:
        data_ds[var].encoding['_FillValue'] = encod['missing_value']
    elif 'missing_value' in attrs:
        data_ds[var].encoding['_FillValue'] = attrs['missing_value']
    else:
        data_ds[var].encoding['_FillValue'] = np.nan
        
    return data_ds


#%% FILE MANAGEMENT
###############################################################################
[docs]def merge_folder(data):
    """
    This function merge all NetCDF inside a folder.

    Parameters
    ----------
    data: str or iterable
        Folder, filepath or iterable of filepaths.

    Returns
    -------
    Merged xarray.Dataset.
    """
    
    # ---- Load file list
    data_folder, filelist = get_filelist(data, filetype = '.nc')

    c = 0
    ds_list = []
    if len(filelist) > 1:
        print("Merging files...")
        
        # ---- Append all xr.Datasets in a list
        for f in filelist:
            ds_list.append(load_any(os.path.join(data_folder, f), 
                                    decode_coords = 'all', 
                                    decode_times = True))
            print(f"      . {f}  ({c}/{len(filelist)})")
            c += 1
            
        # ---- Backup of attributes and encodings
        var = main_var(ds_list[0])
# ========== useless ==========================================================
#         attrs = ds_list[0][var].attrs.copy()
# =============================================================================
        encod = ds_list[0][var].encoding.copy()
    
        # ---- Merge
        merged_ds = xr.merge(ds_list) # Note: works only when doublons are identical
# ========== wrong ============================================================
#         merged_ds = xr.concat(ds_list, dim = 'time')
# =============================================================================
# ========== useless ==========================================================
#         merged_ds = merged_ds.sortby('time') # In case the files are not loaded in time order   
# =============================================================================
        
        # ---- Transferring encodings (_FillValue, compression...)
        merged_ds[var].encoding = encod
        return merged_ds
    
    else:
        print("Err: Only one NetCDF file was found.")
        return


###############################################################################
# Shorten names
[docs]def shortname(data, data_type, ext = 'nc'):
    """
    ext : str
        
    """
    
    if ext is not None:
        ext = ext.replace('.', '')
    
    if os.path.isfile(data):
        root_folder = os.path.split(data)[0]
    else:
        root_folder = data
        
    if os.path.isdir(data):
        content_list = [os.path.join(data, f)
                     for f in os.listdir(data)]
    else:
        content_list = [data]
    
    # ---- DRIAS-Eau 2024 Indicateurs (netcdf)
    # Indicateurs SAFRAN (SWIAV, SSWI-3...) "EXPLORE2-SIM2 2024" 
    if data_type.casefold().replace(' ', '').replace('-', '') in [
            'indicateursim2024', 'indicateurssim2024','indicateurdriaseau2024',
            'indicateursdriaseau2024']:
        
        folder_pattern = re.compile('Indicateurs_(.*Annuel|.*Saisonnier)_(.*)_MF_ADAMONT_(.*)_SIM2')
        
        for elem in content_list:
            # Raccourcir le nom
            if os.path.isdir(elem):
                res = folder_pattern.findall(elem)
                if len(res) > 0:
                    outpath = '_'.join(res[0])
                    os.rename(elem, os.path.join(root_folder, outpath))
                    
    # ---- Remove custom added date-time
    elif data_type.casefold().replace(' ', '').replace('-', '') in [
            'removedate']:
        # This option enables to remove the '2024-11-06_12h53'-like suffixes
        # that I have added to file names (in order to trace the date of creation,
        # but it is unnecessary and causes file names to be too long for Windows).
        
        datetime_pattern = re.compile('(.*)_\d{4,4}-\d{2,2}-\d{2,2}_\d{2,2}h\d{2,2}.'+f'{ext}')
        
        for elem in content_list:
            if len(datetime_pattern.findall(elem)) > 0:
                os.rename(os.path.join(root_folder, elem), 
                          os.path.join(root_folder, datetime_pattern.findall(elem)[0] + f'.{ext}'))
            elif os.path.isdir(os.path.join(root_folder, elem)):
                shortname(os.path.join(root_folder, elem), data_type = data_type, ext = ext)
        

###############################################################################
[docs]def remove_double(data_folder, data_type):
    """
    The data downloaded from DRIAS website contains *some* data sets 
    available for different periods (it is the same data, but one version
    goes from 1950 to 2005, and another from 1970 to 2005). For some models,
    there are no duplicate of that sort.
    These duplicates are unnecessary. This script moves these duplicates to a
    subfolder named "doublons".
    
    Example
    -------
    folder_list = [r"Eau-SWIAV_Saisonnier_EXPLORE2-2024_historical", 
                   r"Eau-SWIAV_Saisonnier_EXPLORE2-2024_rcp45", 
                   r"Eau-SWIAV_Saisonnier_EXPLORE2-2024_rcp85"]
    root_folder = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\DRIAS\DRIAS-Eau\EXPLORE2-SIM2 2024\Pays Basque\Indicateurs"
    
    for folder in folder_list:
        base_folder = os.path.join(root_folder, folder)
        subfolder_list = [os.path.join(base_folder, f)
                     for f in os.listdir(base_folder) 
                     if os.path.isdir(os.path.join(base_folder, f))]
        for subfolder in subfolder_list:
            gc.remove_double(subfolder, data_type = "indicateurs drias eau 2024")
    """
    
    # ---- DRIAS-Eau 2024 Indicateurs (netcdf)
    # Indicateurs SAFRAN (SWIAV, SSWI-3...) "EXPLORE2-SIM2 2024" 
    if data_type.casefold().replace(' ', '').replace('-', '') in [
            'indicateursim2024', 'indicateurssim2024','indicateurdriaseau2024',
            'indicateursdriaseau2024']:
                
        content_list = [os.path.join(data_folder, f)
                     for f in os.listdir(data_folder) 
                     if (os.path.isfile(os.path.join(data_folder, f)) \
                         & (os.path.splitext(os.path.join(data_folder, f))[-1] == '.nc'))]
        
        model_pattern = re.compile("(.*)_(\d{4,4})-(\d{4,4})_(historical|rcp45|rcp85)_(.*)_(SIM2.*)")
        model_dict = {}
        
        # List all models
        for elem in content_list:
            filename = os.path.split(elem)[-1]
            var, date, datef, scenario, model, creation_date = model_pattern.findall(filename)[0]
            date = int(date)
            if model in model_dict:
                model_dict[model].append(date)
            else:
                model_dict[model] = [date]
        
        # Move duplicates
        if not os.path.exists(os.path.join(data_folder, "doublons")):
            os.makedirs(os.path.join(data_folder, "doublons"))
        
        for model in model_dict:
            if len(model_dict[model]) > 1:
                filename = '_'.join([var, f"{max(model_dict[model])}-{datef}", scenario, model, creation_date])
                os.rename(
                    os.path.join(data_folder, filename),
                    os.path.join(data_folder, 'doublons', filename)
                    )


###############################################################################
[docs]def name_xml_attributes(*, output_file, fields):  
    band_str_open = '\t<PAMRasterBand band="{}">\t\t<Description>{}'
    band_str_close = '</Description>\t</PAMRasterBand>\n</PAMDataset>'
    base_str = ''.join(['<PAMDataset>\n',
                       '\t<Metadata>\n',
                       '\t\t<MDI key="CRS">epsg:27572</MDI>\n',
                       '\t</Metadata>\n',
                       len(fields) * (band_str_open + band_str_close)])
    
    band_ids = [str(item) for item in list(range(1, len(fields)+1))]
    # Create a string list of indices: '1', '2', '3' ...
    addstr = [item for pair in zip(band_ids, fields) for item in pair]
    # Create a list alternating indices and names: '1', 'CLDC', '2', 'T2M' ...
    
    file_object = open(output_file + '.aux.xml', 'w')
    file_object.write(base_str.format(*addstr))
    file_object.close()


#%% REPROJECTION
###############################################################################
[docs]def reproject(data, *, src_crs=None, base_template=None, bounds=None,  
              x0=None, y0=None, mask=None, **rio_kwargs):
    r"""
    Reproject space-time data, using rioxarray.reproject().

    Parameters
    ----------
    data : str, pathlib.Path, xarray.Dataset or xarra.Dataarray
        Data to reproject. Supported file formats are .tif and .nc.
    src_crs : int or str or rasterio.crs.CRS, optional, default None
        Source coordinate reference system.    
        For *integers*, src_crs refers to the EPSG code. 
        For *strings*, src_crs can be OGC WKT string or Proj.4 string.
    base_template : str, pathlib.Path, xarra.Dataarray or geopandas.GeoDataFrame, optional, default None
        Filepath, used as a template for spatial profile. Supported file formats
        are .tif, .nc and .shp.
    bounds : tuple (float, float, float, float), optional, default None
        Boundaries of the target domain: (x_min, y_min, x_max, y_max)
    x0: number, optional, default None
        Origin of the X-axis, used to align the reprojection grid. 
    y0: number, optional, default None
        Origin of the Y-axis, used to align the reprojection grid. 
    mask : str or pathlib.Path, optional, default None
        Filepath of geopandas.dataframe of mask.    
    
    **rio_kwargs : keyword args, optional, defaults are None
        Argument passed to the ``xarray.Dataset.rio.reproject()`` function call.
        
        **Note**: These arguments are prioritary over ``base_template`` attributes.
        
        May contain: 
            - dst_crs : str
            - resolution : float or tuple
            - shape : tuple (int, int)
            - transform : Affine
            - resampling
            - nodata : float or None
            - see ``help(xarray.Dataset.rio.reproject)``

    Returns
    -------
    Reprojected xarray.Dataset.

    """
    
    #%%%% Load data, base and mask
    # ===========================
    data_ds = load_any(data, decode_times = True, decode_coords = 'all')
    base_ds = None
    if base_template is not None:
        base_ds = load_any(base_template, decode_times = True, decode_coords = 'all')
    if mask is not None:
        mask_ds = load_any(mask, decode_times = True, decode_coords = 'all')
    
    if src_crs is not None:
        data_ds.rio.write_crs(src_crs, inplace = True)
    
    # Identify spatial coord names
    for yname in ['latitude', 'lat', 'y', 'Y']:
        if yname in data_ds.coords:
            yvar = yname
    for xname in ['longitude', 'lon', 'x', 'X']:
        if xname in data_ds.coords:
            xvar = xname
    
    # Initialize x0 and y0 if None
    x_res, y_res = data_ds.rio.resolution()
    if x0 is None:
        x0 = data[xvar][0].item() + x_res/2
    if y0 is None:
        y0 = data[yvar][0].item() + y_res/2
    
    #%%%% Compute parameters
    # =====================
    print("\nComputing parameters...")
    
    # ---- Safeguards against redundant arguments
    # -------------------------------------------
    if ('transform' in rio_kwargs) & ('shape' in rio_kwargs) & (bounds is not None):
        print("   _ Err: bounds cannot be passed alongside with both transform and shape")
        return
    
    if 'resolution' in rio_kwargs:
        if ('shape' in rio_kwargs) | ('transform' in rio_kwargs):
        # safeguard to avoid RioXarrayError
            print("   _ Err: resolution cannot be used with shape or transform.")
            return
        
    if (bounds is not None) & (mask is not None):
        print("   _ Err: bounds and mask cannot be passed together")
        return
    
    
    # ---- Backup of rio_kwargs
    # -------------------------
    rio_kwargs0 = rio_kwargs.copy()
    
    # Info message
    if ('transform' in rio_kwargs) & (bounds is not None):
        if (bounds[0] != rio_kwargs['transform'][2]) | (bounds[3] != rio_kwargs['transform'][5]):
            print("   _ ...")
    
    
    # ---- No base_template
    # ---------------------
    if base_ds is None:
        ### Retrieve <dst_crs> (required parameter)
        if 'dst_crs' not in rio_kwargs:
            rio_kwargs['dst_crs'] = data_ds.rio.crs.to_string()
            
            
    # ---- Base_template
    # ------------------
    # if there is a base, it will be used after being updated with passed parameters
    else:
        base_kwargs = {}
        
        ### 1. Retrieve all the available info from base:
        if isinstance(base_ds, xr.Dataset):
            # A- Retrieve <dst_crs> (required parameter)
            if 'dst_crs' not in rio_kwargs:
                try:
                    rio_kwargs['dst_crs'] = base_ds.rio.crs.to_string()
                except:
                    rio_kwargs['dst_crs'] = data_ds.rio.crs.to_string()
            
            # B- Retrieve <shape> and <transform>
            base_kwargs['shape'] = base_ds.rio.shape
            base_kwargs['transform'] = base_ds.rio.transform()
            # Note that <resolution> is ignored from base_ds
                
        elif isinstance(base_ds, gpd.GeoDataFrame):
            # A- Retrieve <dst_crs> (required parameter)
            if 'dst_crs' not in rio_kwargs:
                try:
                    rio_kwargs['dst_crs'] = base_ds.crs.to_string()
                except:
                    rio_kwargs['dst_crs'] = data_ds.rio.crs.to_string()
            
            # B- Retrieve <shape> and <transform>
            if 'resolution' in rio_kwargs:
                # The bounds of gpd base are used with the user-defined resolution
                # in order to compute 'transform' and 'shape' parameters:
                x_res, y_res = format_xy_resolution(
                    resolution = rio_kwargs['resolution'])
                shape, x_min, y_max = get_shape(
                    x_res, y_res, base_ds.total_bounds, x0, y0)
                base_kwargs['transform'] = Affine(x_res, 0.0, x_min,
                                                  0.0, y_res, y_max)
                base_kwargs['shape'] = shape
            else:
                print("   _ Err: resolution needs to be passed when using a vector base")
                return
            
        ### 2. Update <base_kwargs> with <rio_kwargs>
        for k in rio_kwargs:
            base_kwargs[k] = rio_kwargs[k]
        # Replace rio_kwargs with the updated base_kwargs
        rio_kwargs = base_kwargs
    
    
    # ---- Mask
    # ---------
    # <mask> has priority over bounds or rio_kwargs
    if mask is not None:
        # Reproject the mask
        if isinstance(mask_ds, gpd.GeoDataFrame):
            mask_ds.to_crs(crs = rio_kwargs['dst_crs'], inplace = True)
            bounds_mask = mask_ds.total_bounds
        elif isinstance(mask_ds, xr.Dataset):
            mask_ds = mask_ds.rio.reproject(dst_crs = rio_kwargs['dst_crs'])
            bounds_mask = (mask_ds.rio.bounds()[0], mask_ds.rio.bounds()[3],
                           mask_ds.rio.bounds()[2], mask_ds.rio.bounds()[1])
    else:
        bounds_mask = None
    
    
    # ---- Bounds
    # -----------
    # <bounds> has priority over rio_kwargs
    if (bounds is not None) | (bounds_mask is not None):
        if bounds is not None:
            print("   _ Note that bounds should be in the format (x_min, y_min, x_max, y_max)")
        elif bounds_mask is not None:
            bounds = bounds_mask
            
        ### Apply <bounds> values to rio arguments
        if ('shape' in rio_kwargs0):
            # resolution will be defined from shape and bounds
            x_res, y_res = format_xy_resolution(bounds = bounds, 
                                                shape = rio_kwargs['shape'])
            rio_kwargs['transform'] = Affine(x_res, 0.0, bounds[0],
                                              0.0, y_res, bounds[3])
            
        elif ('resolution' in rio_kwargs0):
            # shape will be defined from resolution and bounds
            x_res, y_res = format_xy_resolution(
                resolution = rio_kwargs['resolution'])
            shape, x_min, y_max = get_shape(
                x_res, y_res, bounds, x0, y0)
            rio_kwargs['transform'] = Affine(x_res, 0.0, x_min,
                                              0.0, y_res, y_max)
            rio_kwargs['shape'] = shape
            
        # elif ('transform' in rio_kwargs0):
        else:
            # shape will be defined from transform and bounds
            if not 'transform' in rio_kwargs:
                rio_kwargs['transform'] = data_ds.rio.transform()
            x_res, y_res = format_xy_resolution(
                resolution = (rio_kwargs['transform'][0],
                              rio_kwargs['transform'][4]))
            shape, x_min, y_max = get_shape(
                x_res, y_res, bounds, x0, y0)
            rio_kwargs['transform'] = Affine(x_res, 0.0, x_min,
                                              0.0, y_res, y_max)
            rio_kwargs['shape'] = shape
        
    
    # ---- Resolution
    # ---------------
    if ('resolution' in rio_kwargs) and ('transform' in rio_kwargs):
        x_res, y_res = format_xy_resolution(
            resolution = rio_kwargs['resolution'])
        transform = list(rio_kwargs['transform'])
        transform[0] = x_res
        transform[4] = y_res
        rio_kwargs['transform'] = Affine(*transform[0:6])
        rio_kwargs.pop('resolution')   
        
    
    # ---- Resampling
    # ---------------
    if 'resampling' not in rio_kwargs:
        # by default, resampling is 5 (average) instead of 0 (nearest)
        rio_kwargs['resampling'] = rasterio.enums.Resampling(5)


    #%%%% Reproject
    # ===========
    print("\nReprojecting...")
    
    var = main_var(data_ds)
    # Backup of attributes and encodings
    attrs = data_ds[var].attrs.copy()
    encod = data_ds[var].encoding.copy()
    
    # Handle timedelta, as they are not currently supported (https://github.com/corteva/rioxarray/discussions/459)
    NaT = False
    if isinstance(data_ds[var].values[0, 0, 0], (pd.Timedelta, np.timedelta64)):
        NaT = True
        
        data_ds[var] = data_ds[var].dt.days
        data_ds[var].encoding = encod
        
    if ('x' in list(data_ds.coords)) & ('y' in list(data_ds.coords)) & \
        (('lat' in list(data_ds.coords)) | ('lon' in list(data_ds.coords))):
        # if lat and lon are among coordinates, they should be temporarily moved
        # to variables to be reprojected
        data_ds = data_ds.reset_coords(['lat', 'lon'])
        data_reprj = data_ds.rio.reproject(**rio_kwargs)
        data_reprj = data_reprj.set_coords(['lat', 'lon'])

    else:
        data_reprj = data_ds.rio.reproject(**rio_kwargs)
    
# ======= NOT FINISHED ========================================================
#     # Handle timedelta
#     if NaT:
#         val = pd.to_timedelta(data_reprj[var].values.flatten(), unit='D').copy()
#         data_reprj[var] = val.to_numpy().reshape(data_reprj[var].shape)
#         # It is still required to precise the dimensions...
# =============================================================================
    
    # Correct _FillValues    
    data_reprj = standard_fill_value(
        data_ds = data_reprj, encod = encod, attrs = attrs)

    return data_reprj  

    
###############################################################################
#%%% * Clip
# =============================================================================
# def clip(data, mask, src_crs=None, dst_crs=None):
#     """
#     The difference between clipping with clip() and with reproject() is that
#     clip() keeps the same grid as input, whereas reproject() aligns on the 
#     passed x0 and y0 values (by default x0=0 and y0=0).
# 
#     Parameters
#     ----------
#     data : TYPE
#         DESCRIPTION.
#     mask : TYPE
#         DESCRIPTION.
#     src_crs : TYPE, optional
#         DESCRIPTION. The default is None.
#     dst_crs : TYPE, optional
#         DESCRIPTION. The default is None.
# 
#     Returns
#     -------
#     TYPE
#         DESCRIPTION.
# 
#     """
#     kwargs = {}
#     if dst_crs is not None:
#         kwargs['dst_crs'] = dst_crs
#     
#     x_res, y_res = data.rio.resolution()
#     x0 = data.x[0].item() + x_res/2
#     y0 = data.y[0].item() + y_res/2
#     
#     return reproject(data, src_crs = src_crs, mask = mask, 
#                      x0 = x0, y0 = y0, **kwargs)
# =============================================================================

###############################################################################
#%%% Align on the closest value
[docs]def nearest(x = None, y = None, x0 = 700012.5, y0 = 6600037.5, res = 75): 
    """
    
    
    Exemple
    -------
    import geoconvert as gc
    gc.nearest(x = 210054)
    gc.nearest(y = 6761020)
    
    Parameters
    ----------
    x : float, optional
        Valeur de la coordonnée x (ou longitude). The default is None.
    y : float, optional
        Valeur de la coordonnée y (ou latitude). The default is None.

    Returns
    -------
    Par défault, cette fonction retourne la plus proche valeur (de x ou de y) 
    alignée sur la grille des cartes topo IGN de la BD ALTI.
    Il est possible de changer les valeurs de x0, y0 et res pour aligner sur
    d'autres grilles.
    """
    
    # ---- Paramètres d'alignement : 
    # ---------------------------
# =============================================================================
#     # Documentation Lambert-93
#     print('\n--- Alignement d'après doc Lambert-93 ---\n')
#     x0 = 700000 # origine X
#     y0 = 6600000 # origine Y
#     res = 75 # résolution
# =============================================================================
    
    # Coordonnées des cartes IGN BD ALTI v2
    if (x0 == 700012.5 or y0 == 6600037.5) and res == 75:
        print('\n--- Alignement sur grille IGN BD ALTI v2 (defaut) ---')   
    
    closest = []

    if x is not None and y is None:
        # print('x le plus proche = ')
        if (x0-x)%res <= res/2:
            closest = x0 - (x0-x)//res*res
        elif (x0-x)%res > res/2:
            closest = x0 - ((x0-x)//res + 1)*res
    
    elif y is not None and x is None:
        # print('y le plus proche = ')
        if (y0-y)%res <= res/2:
            closest = y0 - (y0-y)//res*res
        elif (y0-y)%res > res/2:
            closest = y0 - ((y0-y)//res + 1)*res
    
    else:
        print('Err: only one of x or y parameter should be passed')
        return
        
    return closest


###############################################################################
#%%% Format x_res and y_res
[docs]def format_xy_resolution(*, resolution=None, bounds=None, shape=None):
    """
    Format x_res and y_res from a resolution value/tuple/list, or from 
    bounds and shape.

    Parameters
    ----------
    resolution : number | iterable, optional
       xy_res or (x_res, y_res). The default is None.
    bounds : iterable, optional
        (x_min, y_min, x_max, y_max). The default is None.
    shape : iterable, optional
        (height, width). The default is None.

    Returns
    -------
    x_res and y_res

    """
    if (resolution is not None) & ((bounds is not None) | (shape is not None)):
        print("Err: resolution cannot be specified alongside with bounds or shape")
        return
    
    if resolution is not None:
        if isinstance(resolution, (tuple, list)):
            x_res = abs(resolution[0])
            y_res = -abs(resolution[1])
        else:
            x_res = abs(resolution)
            y_res = -abs(resolution)
            
    if ((bounds is not None) & (shape is None)) | ((bounds is None) & (shape is not None)):
        print("Err: both bounds and shape need to be specified")
    
    if (bounds is not None) & (shape is not None):
        (height, width) = shape
        (x_min, y_min, x_max, y_max) = bounds
        x_res = (x_max - x_min) / width
        y_res = -(y_max - y_min) / height
        
    return x_res, y_res


###############################################################################
#%%% Get shape
[docs]def get_shape(x_res, y_res, bounds, x0=0, y0=0):
    # bounds should be xmin, ymin, xmax, ymax
    # aligne sur le 0, arrondit, et tutti quanti
    (x_min, y_min, x_max, y_max) = bounds
    
    x_min2 = nearest(x = x_min, res = x_res, x0 = x0)
    if x_min2 > x_min:
        x_min2 = x_min2 - x_res
        
    y_min2 = nearest(y = y_min, res = y_res, y0 = y0)
    if y_min2 > y_min:
        y_min2 = y_min2 - abs(y_res)
        
    x_max2 = nearest(x = x_max, res = x_res, x0 = x0)
    if x_max2 < x_max:
        x_max2 = x_max2 + x_res
        
    y_max2 = nearest(y = y_max, res = y_res, y0 = y0)
    if y_max2 < y_max:
        y_max2 = y_max2 + abs(y_res)
    
    
    width = (x_max2 - x_min2)/x_res
    height = -(y_max2 - y_min2)/y_res
    if (int(width) == width) & (int(height) == height):
        shape = (int(height), int(width))
    else:
        print("Err: shape values are not integers")
    
    return shape, x_min2, y_max2

           
#%% COMPRESS & UNCOMPRESS
###############################################################################
[docs]def unzip(data):
    """
    In some cases, especially for loading in QGIS, it is much quicker to load
    uncompressed netcdf than compressed netcdf.
    This function only applies to non-destructive compression.

    Parameters
    ----------
    data : TYPE
        DESCRIPTION.

    Returns
    -------
    None.

    """
    # Load
    data_ds = load_any(data, decode_times = True, decode_coords = 'all')
    # Get main variable
    var = main_var(data_ds)
    # Deactivate zlib
    data_ds[var].encoding['zlib'] = False
    # Return
    return data_ds
    # Export
# =============================================================================
#     outpath = '_'.join([
#         os.path.splitext(data)[0], 
#         'unzip.nc',
#         ])
#     export(data_ds, outpath)
# =============================================================================
    

###############################################################################
[docs]def gzip(data, complevel = 3, shuffle = False):
    """
    Quick tool to apply lossless compression on a NetCDF file using gzip.
    
    examples
    --------
    gc.gzip(filepath_comp99.8, complevel = 4, shuffle = True)
    gc.gzip(filepath_drias2022like, complevel = 5)

    Parameters
    ----------
    data : TYPE
        DESCRIPTION.

    Returns
    -------
    None.

    """
    # Load
    data_ds = load_any(data, decode_times = True, decode_coords = 'all')
    # Get main variable
    var = main_var(data_ds)
    # Activate zlib
    data_ds[var].encoding['zlib'] = True
    data_ds[var].encoding['complevel'] = complevel
    data_ds[var].encoding['shuffle'] = shuffle
    data_ds[var].encoding['contiguous'] = False
    # Return
    return data_ds
    # Export
# =============================================================================
#     outpath = '_'.join([
#         os.path.splitext(data)[0], 
#         'gzip.nc',
#         ])
#     export(data_ds, outpath)
# =============================================================================
    
    
###############################################################################
[docs]def pack(data, nbits = 16):
    """

    
    examples
    --------

    Parameters
    ----------
    data : TYPE
        DESCRIPTION.

    Returns
    -------
    None.

    """
    if (nbits != 16) & (nbits != 8):
        print("Err: nbits should be 8 or 16")
        return
    
    nval = {8: 256,
            16: 65536}

    # Load
    data_ds = load_any(data, decode_times = True, decode_coords = 'all')
    # Get main variable
    var = main_var(data_ds)
    # Determine if there are negative and positive values or not
    if ((data_ds[var]>0).sum().item() > 0) & ((data_ds[var]<0).sum().item() > 0):
        rel_val = True
    else:
        rel_val = False
    # Compress
    bound_min = data_ds[var].min().item()
    bound_max = data_ds[var].max().item()
    # Add an increased max bound, that will be used for _FillValue
    bound_max = bound_max + (bound_max - bound_min + 1)/nval[nbits]
    scale_factor, add_offset = compute_scale_and_offset(
        bound_min, bound_max, nbits)
    data_ds[var].encoding['scale_factor'] = scale_factor
    if rel_val:
        data_ds[var].encoding['dtype'] = f'int{nbits}'
        data_ds[var].encoding['_FillValue'] = nval[nbits]//2-1
        data_ds[var].encoding['add_offset'] = add_offset
    else:
        data_ds[var].encoding['dtype'] = f'uint{nbits}'
        data_ds[var].encoding['_FillValue'] = nval[nbits]-1
        data_ds[var].encoding['add_offset'] = 0
    print("   Compression (lossy)")
    # Prevent _FillValue issues
    if ('missing_value' in data_ds[var].encoding) & ('_FillValue' in data_ds[var].encoding):
        data_ds[var].encoding.pop('missing_value')
    # Return
    return data_ds
    # Export
# =============================================================================
#     outpath = '_'.join([
#         os.path.splitext(data)[0], 
#         'pack' + os.path.splitext(data)[-1],
#         ])
#     export(data_ds, outpath)
# =============================================================================


#%%% Packing netcdf (previously packnetcdf.py)
"""
Created on Wed Aug 24 16:48:29 2022

@author: script based on James Hiebert's work (2015):
    http://james.hiebert.name/blog/work/2015/04/18/NetCDF-Scale-Factors.html

RAPPEL des dtypes :
    uint8 (unsigned int.)       0 to 255
    uint16 (unsigned int.)      0 to 65535
    uint32 (unsigned int.)      0 to 4294967295
    uint64 (unsigned int.)      0 to 18446744073709551615
    
    int8    (Bytes)             -128 to 127
    int16   (short integer)     -32768 to 32767
    int32   (integer)           -2147483648 to 2147483647
    int64   (integer)           -9223372036854775808 to 9223372036854775807 
    
    float16 (half precision float)      10 bits mantissa, 5 bits exponent (~ 4 cs ?)
    float32 (single precision float)    23 bits mantissa, 8 bits exponent (~ 8 cs ?)
    float64 (double precision float)    52 bits mantissa, 11 bits exponent (~ 16 cs ?)
"""

###############################################################################
[docs]def compute_scale_and_offset(min, max, n):
    """
    Computes scale and offset necessary to pack a float32 (or float64?) set 
    of values into a int16 or int8 set of values.
    
    Parameters
    ----------
    min : float
        Minimum value from the data
    max : float
        Maximum value from the data
    n : int
        Number of bits into which you wish to pack (8 or 16)

    Returns
    -------
    scale_factor : float
        Parameter for netCDF's encoding
    add_offset : float
        Parameter for netCDF's encoding
    """
    
    # stretch/compress data to the available packed range
    scale_factor = (max - min) / (2 ** n - 1)
    
    # translate the range to be symmetric about zero
    add_offset = min + 2 ** (n - 1) * scale_factor
    
    return (scale_factor, add_offset)


###############################################################################
[docs]def pack_value(unpacked_value, scale_factor, add_offset):
    """
    Compute the packed value from the original value, a scale factor and an 
    offset.

    Parameters
    ----------
    unpacked_value : numeric
        Original value.
    scale_factor : numeric
        Scale factor, multiplied to the original value.
    add_offset : numeric
        Offset added to the original value.

    Returns
    -------
    numeric
        Packed value.

    """
    
    print(f'math.floor: {math.floor((unpacked_value - add_offset) / scale_factor)}')
    return (unpacked_value - add_offset) / scale_factor


###############################################################################
[docs]def unpack_value(packed_value, scale_factor, add_offset):
    """
    Retrieve the original value from a packed value, a scale factor and an
    offset.

    Parameters
    ----------
    packed_value : numeric
        Value to unpack.
    scale_factor : numeric
        Scale factor that was multiplied to the original value to retrieve.
    add_offset : numeric
        Offset that was added to the original value to retrieve.

    Returns
    -------
    numeric
        Original unpacked value.

    """
    return packed_value * scale_factor + add_offset


#%% EXPORT
###############################################################################
[docs]def export(data, output_filepath):
    extension_dst = os.path.splitext(output_filepath)[-1]
    
    data_ds = load_any(data, decode_times = True, decode_coords = 'all')
    
    if isinstance(data_ds, gpd.GeoDataFrame):
        if extension_dst == '.shp':
            print("\nExporting...")
            data_ds.to_file(output_filepath)
            print(f"   _ Success: The data has been exported to the file '{output_filepath}'")

        elif extension_dst in ['.nc', '.tif']:
            print("Err: To convert vector to raster, use geoconvert.reproject() instead")
            return
    
    if isinstance(data_ds, xr.Dataset):
        print("\nExporting...")
        if extension_dst == '.nc':
            data_ds.to_netcdf(output_filepath)
        
        elif extension_dst in ['.tif', '.asc']:
            data_ds.rio.to_raster(output_filepath)
        
        print(f"   _ Success: The data has been exported to the file '{output_filepath}'")
   

#%% OPERATIONS
###############################################################################
[docs]def hourly_to_daily(data, mode = 'sum'):
   
    # ---- Process data
    #% Load data:
    data_ds = load_any(data, decode_coords = 'all', decode_times = True)
    
    var = main_var(data_ds)
    time_coord = main_time_coord(data_ds)
    
    #% Resample:
    print("   _ Resampling time...")
    if mode == 'mean':
        datarsmpl = data_ds.resample({time_coord: '1D'}).mean(dim = time_coord,
                                                            keep_attrs = True)
    elif mode == 'max':
        datarsmpl = data_ds.resample({time_coord: '1D'}).max(dim = time_coord,
                                                           keep_attrs = True)
    elif mode == 'min':
        datarsmpl = data_ds.resample({time_coord: '1D'}).min(dim = time_coord,
                                                           keep_attrs = True)
    elif mode == 'sum':
        datarsmpl = data_ds.resample({time_coord: '1D'}).sum(dim = time_coord,
                                                           skipna = False,
                                                           keep_attrs = True)
    
    # ---- Preparing export   
    # Transfer encodings
    for c in list(datarsmpl.coords):
        datarsmpl[c].encoding = data_ds[c].encoding
        datarsmpl[c].attrs = data_ds[c].attrs
    datarsmpl[time_coord].encoding['units'] = datarsmpl[time_coord].encoding['units'].replace('hours', 'days')
    
    datarsmpl[var].encoding = data_ds[var].encoding
        
    # Case of packing
    if ('scale_factor' in datarsmpl[var].encoding) | ('add_offset' in datarsmpl[var].encoding):
        # Packing (lossy compression) induces a loss of precision of 
        # apprx. 1/1000 of unit, for a quantity with an interval of 150 
        # units. The packing is initially used in some original ERA5-Land data.
        if mode == 'sum':
            print("   Correcting packing encodings...")
            datarsmpl[var].encoding['scale_factor'] = datarsmpl[var].encoding['scale_factor']*24
            datarsmpl[var].encoding['add_offset'] = datarsmpl[var].encoding['add_offset']*24
        
    return datarsmpl


###############################################################################
[docs]def to_instant(data, derivative = False):
    data_ds = load_any(data, decode_coords = 'all', decode_times = True)
    time_coord = main_time_coord(data_ds)
    
    if derivative:
        inst_ds = data_ds.diff(dim = time_coord)/data_ds[time_coord].diff(dim = time_coord)
    else:
        inst_ds = data_ds.diff(dim = time_coord)
    
    return inst_ds
    

###############################################################################
# Convert to CWatM (ex Prepare CWatM inputs)
[docs]def convert_to_cwatm(data, data_type, reso_m = None, EPSG_out=None, 
                        EPSG_in = None, coords_extent = 'Bretagne'):
# =============================================================================
#     previously prepare_CWatM_input(*, data, data_type, reso_m = None, 
#                                    EPSG_out, EPSG_in = None, 
#                                    coords_extent = 'Bretagne'):
# =============================================================================
    """
    Example
    -------
    # Standard:
    ds = gc.prepare_CWatM_input(data = r"",
                                data_type = 'DRIAS',
                                reso_m = 8000,
                                EPSG_out = 3035)
    
    # Crop coefficients:
    gc.prepare_CWatM_input(data = r"D:\2- Postdoc\2- Travaux\3_CWatM_EBR\data\input_1km_LeMeu\landcover\grassland\cropCoefficientGrassland_10days_refined.nc", 
                           data_type = "crop coeff", 
                           reso_m = 1000, 
                           EPSG_out = 3035, 
                           EPSG_in = 4326)

    Parameters
    ----------
    data : str
        Fichier à convertir.
    data_type : str
        Type de données : 'ERA5' | 'mask' | 'soil depth' | 'DRIAS'
    reso_m : float
        Résolution de sortie [m] : 75 | 1000 | 5000 | 8000
    EPSG_out : int
        Système de coordonnées de référence de sortie : 2154 | 3035 | 4326...
    EPSG_in : int
        Système de coordonnées de référence d'entrée
    coords_extent : list, or str
        Emprise spatiale : [x_min, x_max, y_min, y_max] 
        (for Elias: [3202405.9658273868262768,
                     4314478.7219533622264862,
                     2668023.2067883270792663,
                     2972043.0969522628001869]) -> shape = (35, 22)
        or keywords: "Bretagne", "from_input"

    Returns
    -------
    None.

    """
    
# ====== Ancienne version du script ===========================================
#     (input_folder, _file_name) = os.path.split(data)
#     (file_name, file_extension) = os.path.splitext(_file_name)
# =============================================================================

    #%%% ERA5
    if data_type.casefold().replace('-', '') in ["era5", "era5land", "era"]:
        # ---- Merge files
        # Usually ERA5-Land files are too big to be downloaded for the full period.
        # Here the first step is to merge the available ERA5.nc files.
        merged_ds = merge_folder(data)
        
        var = main_var(data)
        
        # ---- Convert hourly to daily
        print("   _ Converting hourly to daily...")
        modelist = { # to differentiate extensive from intensive quantities
            'ro': 'sum',
            'sro': 'sum',
            'ssro': 'sum',
            'evavt': 'sum',
            'evatc': 'sum',
            'pev': 'sum',
            'tp': 'sum',
            'fal': 'mean',
            'rh': 'mean',
            'ssrd': 'mean',
            'strd': 'mean',
            'sp': 'mean',
            't2m': 'mean',
            'd2m': 'mean',
            'u10': 'mean',
            'v10': 'mean',
            'wind_speed': 'mean'}
        daily_ds = hourly_to_daily(merged_ds, mode = modelist[var])
        
        # ---- Convert units when needed
        if var in ['ssrd', 'strd']:
            print("   _ Converting radiation units from J/m²/h to W/m²")
            daily_ds = convert_downwards_radiation(daily_ds)
        
        elif var == 'pev':
            print("   _ Converting negative potential evapotranspiration to positive")
            # Backup of attributes and encodings
            attrs = daily_ds[var].attrs.copy()
            encod = daily_ds[var].encoding.copy()
            
            daily_ds[var] = daily_ds[var]*-1
            
            # Transferring attributes and encodings
            daily_ds[var].encoding = encod
            daily_ds[var].attrs = attrs
            
            # Case of packing
            if ('scale_factor' in daily_ds[var].encoding) | ('add_offset' in daily_ds[var].encoding):
                # Packing (lossy compression) induces a loss of precision of 
                # apprx. 1/1000 of unit, for a quantity with an interval of 150 
                # units. The packing is initially used in some original ERA5-Land data.
                print("      . Correcting packing encodings...")
                daily_ds[var].encoding['add_offset'] = daily_ds[var].encoding['add_offset']*-1
        
        return daily_ds
        

    
    #%%% IGN BD ALTI 25m (DEM)
    elif data_type.casefold().replace(' ', '') in ["alti", "bdalti", "ignalti"]:
        data_folder, filelist = get_filelist(data, filetype = '.asc')
        
        c = 0
        ds_list = []
        for f in filelist:
            ds_list.append(load_any(os.path.join(data_folder, f), name = 'elev'))
            print(c)
            c += 1
            if c == 10:
                ds_list = [xr.merge(ds_list)]
                c = 0
        merged_ds = xr.merge(ds_list)
        
        # Replace 0 with NaN
        merged_ds['elev'] = merged_ds['elev'].where(merged_ds['elev'] != 0)
        
        # Encodings (_FillValue, compression...)
        temp = load_any(filelist[0], name = 'elev')
        merged_ds['elev'].encoding = temp['elev'].encoding

        
        return merged_ds
        
        # export(merged_ds, os.path.join(data_folder, 'MNT_fusion.asc'))
        
        
    #%%% ° Maskmap
    elif data_type.casefold() in ["mask", "areamaps"]:
        print("\nNB : Conversion à titre illustratif. Pour convertir proprement le masque, il convient d'utiliser le LDD\n")
        with xr.open_dataset(data) as mask_ds:
            mask_ds.load() # to unlock the resource
        
        mask_ds = mask_ds.squeeze().drop("band")
        
        if 'spatial_ref' in mask_ds.coords or 'spatial_ref' in mask_ds.data_vars:
            # _motif = re.compile('"EPSG","\d+"')
            # _substr = _motif.findall(mask_ds.spatial_ref.crs_wkt)[-1]
            # _epsg_motif = re.compile('\d+')
            # _mask_epsg = int(_epsg_motif.search(_substr).group())
            _mask_epsg = int(mask_ds.rio.crs.to_epsg())
        
        else:
            print('\nFor transforming, mask needs to have a CRS associated.')
            return
        
        #% Reprojection of the mask in the CRS and the resolution of the data:
        print('\nReprojecting...')
        print('   from epsg:{} to epsg:{}'.format(str(_mask_epsg), str(EPSG_out)))
        if reso_m//1000 == 0:
            print('   resolution: {} m'.format(reso_m))
        else:
            print('   resolution: {} km'.format(reso_m/1000))

        mask_coords_conv = rasterio.warp.transform(rasterio.crs.CRS.from_epsg(_mask_epsg), 
                                                   rasterio.crs.CRS.from_epsg(EPSG_out), 
                                                   [mask_ds.x.values.min()], [mask_ds.y.values.max()])
        
        reso_input = abs(mask_ds.x.values[1] - mask_ds.x.values[0])
        print('   resolution input: {} m'.format(reso_input))
        
        if reso_m == 75:
            x_min = nearest(x = mask_coords_conv[0][0] - reso_input)
            y_max = nearest(x = mask_coords_conv[1][0] + reso_input)
        else:
            x_min = (mask_coords_conv[0][0] - reso_input) // reso_m * reso_m - reso_m/2
            y_max = (mask_coords_conv[1][0] + reso_input) // reso_m * reso_m + reso_m/2
        
        # mask_coords_conv = rasterio.warp.transform(rasterio.crs.CRS.from_epsg(_mask_epsg), 
        #                                            rasterio.crs.CRS.from_epsg(EPSG_out), 
        #                                            [mask_ds.x.values.min()], [mask_ds.y.values.min()])
        
        # y_min = mask_coords_conv[1][0]//reso_m*reso_m - reso_m/2
        
        transform_ = Affine(reso_m, 0.0, x_min,  
                            0.0, -reso_m, y_max)
        
        width_ = int(mask_ds.x.shape[0] * reso_input / reso_m //1 +1)
        height_ = int(mask_ds.y.shape[0] * reso_input / reso_m //1 +1)
        
        mask_reprj = mask_ds.rio.reproject(dst_crs = 'epsg:'+str(EPSG_out),
                                          transform = transform_,
                                          resampling = rasterio.enums.Resampling(5),
                                          shape = (height_, width_),
                                          )
        
        # mask_reprj.rio.write_crs("epsg:"+str(EPSG_out), inplace = True)
        
        #% Export :
        print('\nCompressing and saving...')
        print('   output file type: *{}'.format(file_extension))
        
        output_file = os.path.join(input_folder, 
                                   ' '.join([file_name, 
                                             'res='+str(reso_m)+'m', 
                                             'CRS='+str(EPSG_out),
                                             ]) + file_extension)
        
        if file_extension == '.nc':
            var = main_var(mask_reprj)
            # mask_reprj[var].encoding['_FillValue'] = None # ou np.nan ?
            # ERREUR : mask_reprj.to_netcdf(output_file, encoding = {var: {'zlib': True, 'dtype': 'float32', 'scale_factor': 0.00000001, '_FillValue': np.nan}})
            mask_reprj.to_netcdf(output_file)
        
        elif file_extension == '.tif':
            ### Version courte ###
            mask_reprj.rio.to_raster(output_file)
            
            ### Version longue ###
# =============================================================================
#             # Ouvrir le fichier : 
#             _data_tmp = rasterio.open(data, 'r')
#             # Récupérer le profil :
#             profile_ = _data_tmp.profile
#             # Mettre à jour le profil
#             profile_.update(transform = transform_,
#                             dtype = str(mask_reprj.band_data.dtype),
#                             crs = 'epsg:'+str(EPSG_out), 
#                             width = width_, 
#                             height = height_,
#                             )
#             _data_tmp.close()
# 
#             with rasterio.open(output_file, 'w', **profile_) as out_f:
#                 out_f.write_band(1, mask_reprj.band_data)
# =============================================================================
        
        return mask_reprj
    
    #%%% ° Soil depth
    elif data_type.casefold() in ["soil depth", "soildepth", "ucs_pp_bzh"]:
        data_df = gpd.read_file(data) # data_df = soil depth
        data_epsg = data_df.crs.to_epsg()
        
        #% Calcul des profondeurs locales moyennes pondérées
        print('\nComputing local weighted depth averages...')
        soil_depth_limit = 140 # valeur semi-arbitraire
        class_depth_convert = {'0':0,
                               '1':soil_depth_limit - (soil_depth_limit-100)/2,
                               '2':90,
                               '3':70,
                               '4':50,
                               '5':30,
                               '6':10,
                               }
        data_df['EPAIS_1_NUM'] = [class_depth_convert[val] for val in data_df['EPAIS_DOM'].values] 
        data_df['EPAIS_2_NUM'] = [class_depth_convert[val] for val in data_df['EPAIS_2'].values] 
        data_df['EPAIS_1_2'] = (data_df['EPAIS_1_NUM'] * data_df['P_EPAISDOM'] + data_df['EPAIS_2_NUM'] * data_df['P_EPAIS2']) / (data_df['P_EPAISDOM'] + data_df['P_EPAIS2'])
        data_df['EPAIS_1_2'] = data_df['EPAIS_1_2']/100 # Convertir cm vers m
        
        #% Reprojection dans le CRS voulu
        print('\nReprojecting...')
        print('   from epsg:{} to epsg:{}'.format(str(data_epsg), str(EPSG_out)))
        data_df.to_crs('epsg:'+str(EPSG_out), inplace = True)
        
        #% Rasterisation
        print('\nRasterizing...')
        x_min = 74000
        y_max = 6901000
        bounds_conv = rasterio.warp.transform(rasterio.crs.CRS.from_epsg(2154), 
                                              rasterio.crs.CRS.from_epsg(EPSG_out), 
                                              [74000], [6901000]) # Limites (larges) de la Bretagne en L93
        x_min = bounds_conv[0][0] // reso_m * reso_m
        y_max = bounds_conv[1][0] // reso_m * reso_m
        
        # Décalage d'un demi pixel
        if reso_m == 75:
            x_min = nearest(x = x_min) - 75/2
            y_max = nearest(x = y_max) + 75/2
        else:
            x_min = x_min - reso_m/2
            y_max = y_max + reso_m/2
        
        transform_ = Affine(reso_m, 0.0, x_min,  
                            0.0, -reso_m, y_max)
        
# =============================================================================
#         transform_ = Affine(reso_m, 0.0, x_min,  
#                             0.0, reso_m, 670500)
# =============================================================================
        
        width_ = 377000 // reso_m + 1
        height_ = 227000 // reso_m + 1
        
        raster_data = rasterio.features.rasterize(
            [(val['geometry'], val['EPAIS_1_2']) for i, val in data_df.iterrows()],
            out_shape = (height_, width_),
            transform = transform_,
            fill = np.NaN,
            all_touched = True)
        
        #% Convertir en DataSet
        print('\nConverting into xarray.Dataset...')
        # Converting to xarray dataarray:
        dataarray_ = xr.DataArray(raster_data, 
                                  coords = [np.arange(y_max,
                                                      y_max - reso_m * height_,
                                                      -reso_m).astype(np.float32),
                                            np.arange(x_min, 
                                                      x_min + reso_m * width_, 
                                                      reso_m).astype(np.float32)], 
                                  dims = ['y', 'x'])
        # NB : La fonction 'range()' est évitée car elle renvoie des entiers
        # QGIS n'arrive pas à détecter le CRS lorsque les axes sont des entiers
        
# =============================================================================
#         dataarray_ = xr.DataArray(raster_data.transpose(), 
#                                   coords = [range(x_min, 
#                                                   x_min + reso_m * width_, 
#                                                   reso_m),
#                                             range(y_max,
#                                                   y_max - reso_m * height_,
#                                                   -reso_m)], 
#                                   dims = ['x', 'y'])
# =============================================================================
        
        # Converting to xarray dataset:
        dataset_ = dataarray_.to_dataset(name = 'soildepth')
        
        # Adding attributes:
        dataset_.rio.write_crs("epsg:"+str(EPSG_out), inplace = True)
        dataset_.x.attrs = {'standard_name': 'projection_x_coordinate',
                                    'long_name': 'x coordinate of projection',
                                    'units': 'Meter'}
        dataset_.y.attrs = {'standard_name': 'projection_y_coordinate',
                                    'long_name': 'y coordinate of projection',
                                    'units': 'Meter'}
        dataset_.soildepth.attrs = {'grid_mapping': "spatial_ref"} # A supprimer ???
        # dataset_.assign_coords(spatial_ref = dataset_.spatial_ref)
        # dataset_.rio.write_crs("epsg:"+str(EPSG_out), inplace = True)
        

        #% Export :
        print('\nCompressing and saving...')
        
        full_file_name = ' '.join([file_name, 
                                     'res='+str(reso_m)+'m', 
                                     'CRS='+str(EPSG_out),
                                     'lim='+str(soil_depth_limit),
                                     ])
        output_file = os.path.join(input_folder, 
                                   full_file_name + '.nc')
        print('   --> result exported to:\n   --> ' + output_file)
        
        var = main_var(dataset_)
        # dataset_[var].encoding['_FillValue'] = None # ou np.nan ?
        # ERREUR : dataset_.to_netcdf(output_file, encoding = {var: {'zlib': True, 'dtype': 'float32', 'scale_factor': 0.00000001, '_FillValue': np.nan},})
        dataset_.to_netcdf(output_file)
        
        #% Fichiers pour CWatM
        topsoil_ds = dataset_.where(dataset_['soildepth']<0.30, 0.30)
        topsoil_ds = topsoil_ds.where(~np.isnan(dataset_), np.nan)
        # topsoil_ds = topsoil_ds.where(~np.isnan(dataset_), 0)
        subsoil_ds = dataset_ - topsoil_ds
        output_top = os.path.join(input_folder, 
                                  'soildepth1_' + full_file_name + '.nc')
        output_sub = os.path.join(input_folder, 
                                  'soildepth2_' + full_file_name + '.nc')
        
        # topsoil_ds.to_netcdf(output_top, encoding = {var: {'zlib': True, 'dtype': 'float32', 'scale_factor': 0.00000001, '_FillValue': np.nan},})
        topsoil_ds.to_netcdf(output_top)
        # subsoil_ds.to_netcdf(output_sub, encoding = {var: {'zlib': True, 'dtype': 'float32', 'scale_factor': 0.00000001, '_FillValue': np.nan},})
        subsoil_ds.to_netcdf(output_sub)
        """
        La compression (27x plus petit) fait planter CWatM lorsqu'il utilise
        l'option use_soildepth_as_GWtop
        """

        
        return dataset_
        
    #%%% DRIAS-Climat 2022 (netcdf)
    # Données SAFRAN (tas, prtot, rayonnement, vent... ) "EXPLORE2-Climat 2022" 
    # -------------------------------------------------------------------------
    elif data_type.replace(" ", "").casefold() in ['drias2022', 'climat2022', 'drias-climat2022']:
        
        # Load
        data_ds = load_any(data)
        
        # Get main variable
        var = main_var(data_ds)
        print(f"   _ Main variable is: {var}")
        # Backup of attributes and encodings
        attrs = data_ds[var].attrs.copy()
        encod = data_ds[var].encoding.copy()
        
        # If it contains time_bnds (periods time)
        if ('time_bnds' in data_ds.coords) | ('time_bnds' in data_ds.data_vars):
            # The closing bounds is used for time coordinates
            attributes = data_ds['time'].attrs.copy()
            data_ds['time'] = data_ds.time_bnds.loc[{'bnds': 1}]
            data_ds['time'].attrs = attributes
            # time_bnds and bnds are removed
            data_ds = data_ds.drop_dims('bnds')
            print("   _ 'time_bnds' and 'bnds' removed")
        
        # Convert units
        if var in ['evspsblpotAdjust', 'prsnAdjust', 'prtotAdjust']:
            data_ds[var] = data_ds[var]/1000*(60*60*24) # conversion from mm.s-1 to m.d-1
            data_ds[var].attrs = attrs
            data_ds[var].attrs['units'] = 'm.d-1'
            data_ds[var].encoding = encod
            print('   _ Units converted from mm/s to m/d')
        
# =============================================================================
#         # Remove compression (too long to load in QGIS otherwise)
#         data_ds[var].encoding['zlib'] = False
# =============================================================================
        
        return data_ds
        
    #%%% DRIAS-Eau 2024 (netcdf)
    # Données SURFEX (evapc, drainc, runoffc, swe, swi...) "EXPLORE2-SIM2 2024" 
    # -------------------------------------------------------------------------
    elif data_type.replace(" ", "").casefold() in ['sim2024', 'drias-eau2024',
                                                   'driaseau2024']:
        
        # Load
        xr_kwargs = {}
# =============================================================================
#         # Particular case for SSWI (grid_mapping is bugged)
#         sswi_pattern = re.compile('.*(SSWI).*')
#         if len(sswi_pattern.findall(data)) > 0:
#             xr_kwargs['drop_variables'] = 'LambertParisII'
# =============================================================================
        xr_kwargs['drop_variables'] = 'LambertParisII'
        
        data_ds = load_any(data, **xr_kwargs)
        
        # Get main variable
        var = main_var(data_ds)
        print(f"   _ Main variable is: {var}")
        # Backup of attributes and encodings
        attrs = data_ds[var].attrs.copy()
        encod = data_ds[var].encoding.copy()
        
        # If it contains time_bnds (periods time)
        if ('time_bnds' in data_ds.coords) | ('time_bnds' in data_ds.data_vars):
            # The closing bounds is used for time coordinates
            attributes = data_ds['time'].attrs.copy()
            data_ds['time'] = data_ds.time_bnds.loc[{'bnds': 1}]
            data_ds['time'].attrs = attributes
            # time_bnds and bnds are removed
            data_ds = data_ds.drop_dims('bnds')
            print("   _ 'time_bnds' and 'bnds' removed")
        
        # Convert units
        if var in ['DRAINC', 'EVAPC', 'RUNOFFC']:
            data_ds[var] = data_ds[var]/1000 # conversion from mm.d-1 to m.d-1
            data_ds[var].attrs = attrs
            data_ds[var].attrs['units'] = 'm.d-1'
            data_ds[var].encoding = encod
            print('   _ Units converted from mm/d to m/d')
        
# =============================================================================
#         # Particular case for SSWI (_FillValue is wrong)
#         if ('missing_values' in data_ds[var].encoding) \
#             & ('_FillValue' in data_ds[var].encoding):
#             print(data_ds[var].encoding['missing_value'])
#             print(data_ds[var].encoding['_FillValue'])
# =============================================================================
# =============================================================================
#             data_ds[var].encoding['_FillValue'] = 999999
#             data_ds[var].encoding.pop('missing_value')
# =============================================================================
# ======= NOT ACCURATE ENOUGH =================================================
#         # Also, if SSWI and seasonal, dates should be shifted 45 days back
#         sswi_season_pattern = re.compile('.*(SSWI_seas).*')
#         if len(sswi_season_pattern.findall(data)) > 0:
#             encodings = data_ds['time'].encoding.copy()
#             data_ds['time'] = data_ds['time'] - pd.Timedelta(days = 45)
#             data_ds['time'].encoding = encodings
# =============================================================================
        
# =============================================================================
#         # Remove compression (too long to load in QGIS otherwise)
#         data_ds[var].encoding['zlib'] = False
# =============================================================================
        
        return data_ds
    
    #%%% C3S seasonal forecast
    # https://cds.climate.copernicus.eu/datasets/seasonal-original-single-levels?tab=overview
    elif data_type.casefold() in ["c3s"]:
        data_ds = load_any(data, 
                           # decode_times = True, 
                           # decode_coords = 'all'
                           )

        # Get main variable
        var = main_var(data_ds)
        print(f"   _ Main variables are: {', '.join(var)}")
        # Backup of attributes and encodings
        attrs = dict()
        encod = dict()
        for v in var:
            attrs[v] = data_ds[v].attrs.copy()
            encod[v] = data_ds[v].encoding.copy()
        
        # Format time coordinate
        if ('forecast_reference_time' in data_ds.coords) | ('forecast_reference_time' in data_ds.data_vars):
            data_ds = data_ds.squeeze('forecast_reference_time').drop('forecast_reference_time')
        if ('forecast_period' in data_ds.coords) | ('forecast_period' in data_ds.data_vars):
            data_ds = data_ds.drop('forecast_period')
        
# =========== implemented in load_any =========================================
#         data_ds = use_valid_time(data_ds)
# =============================================================================
        
        return data_ds
        
    
    #%%% TEMP
    elif data_type.casefold() in []:
        0
    
    
    #%%% * DRIAS
    elif data_type.casefold() in ["drias"]:
        print(f'\nProcessed file = {data}')
        
        with xr.open_dataset(data, decode_coords = 'all') as data_ds:
            data_ds.load() # to unlock the resource
        
        #% Formating
        # -----------
        print('\nPre-formating...')
        # Drop useless variables
        data_ds = data_ds.drop(['i', 'j', 'lat', 'lon'])
        print('   _ Additional coords (i, j, lat, lon) dropped')
        # Add CRS
        if not EPSG_in: # EPSG = None, by default
            if 'spatial_ref' not in list(data_ds.coords) + list(data_ds.data_vars):
                EPSG_in = 27572
                data_ds.rio.write_crs(f"epsg:{EPSG_in}", inplace = True)
                print(f"   _ The Coordinates Reference System is not indicated in the original file. By default, epsg:{EPSG_in} is used")
            else:
                EPSG_in = data_ds.rio.crs.to_epsg()
        
# =============================================================================
#         data_ds = data_ds.rename(prtotAdjust = 'pr')
#         print(f"   _ Precipitation variable name modified: prtotAdjust -> pr")
# =============================================================================
        # Get main variable
        var = main_var(data_ds)
        print(f"   _ Main variable is: {var}")
        
        data_ds[var] = data_ds[var]/1000*(60*60*24) # conversion from from mm.s-1 to m.d-1
        data_ds[var].attrs['units'] = 'm.d-1'
        print('   _ Units converted from mm/s to m/d')
        
        #% Reproject in defined CRS
        # -------------------------
        print('\nReprojecting...')
        print('   _ From epsg:{} to epsg:{}'.format(str(EPSG_in), str(EPSG_out)))
        
        # Resolution
        coord1 = list(data_ds.coords)[1]
        coord2 = list(data_ds.coords)[2]
        if reso_m is None:
            print('   _ Input resolution is taken as output resolution based on {}'.format(coord1))
            reso = float(data_ds[coord1][1] - data_ds[coord1][0])
            if data_ds[coord1].units[0:6].casefold() == 'degree':
                reso_m = round(reso*111200)
            elif data_ds[coord1].units[0:1].casefold() == 'm':
                reso_m = reso
        print('   _ Resolution: {} km'.format(reso_m/1000))
        
# =============================================================================
#         data_coords_conv = rasterio.warp.transform(rasterio.crs.CRS.from_epsg(EPSG_in), 
#                                                    rasterio.crs.CRS.from_epsg(EPSG_out), 
#                                                    [data_ds.x.values.min()], [data_ds.y.values.max()])
#         if reso_m == 75:
#             x_min = ac.nearest(x = data_coords_conv[0][0] - reso_input)
#             y_max = ac.nearest(x = data_coords_conv[1][0] + reso_input)
#         else:
#             x_min = (data_coords_conv[0][0] - reso_input) // reso_m * reso_m - reso_m/2
#             y_max = (data_coords_conv[1][0] + reso_input) // reso_m * reso_m + reso_m/2    
# =============================================================================
        
        
        if coords_extent == 'from_input':
            reprj_ds = data_ds.rio.reproject(
                dst_crs = 'epsg:'+str(EPSG_out), 
                resampling = rasterio.enums.Resampling(5),
                )
            print('   _ Data reprojected based on the extent and resolution of input data')
            print('     (user-defined resolution is not taken into account)')
        
        else: # In all other cases, x_min, x_max, y_max and y_min will be determined before .rio.reproject()
            if type(coords_extent) == str & coords_extent.casefold() in ["bretagne", "brittany"]:
                # Emprise sur la Bretagne
                if EPSG_out == 3035:   #LAEA
                    x_min = 3164000
                    x_max = 3572000
                    y_max = 2980000
                    y_min = 2720000
                elif EPSG_out == 2154: #Lambert-93
                    x_min = 70000
                    x_max = 460000
                    y_max = 6909000
                    y_min = 6651000
                elif EPSG_out == 4326: #WGS84
                    x_min = -5.625
                    x_max = -0.150
                    y_max = 49.240
                    y_min = 46.660
                print('   _ Extent based on Brittany')
            
            elif type(coords_extent) == list:
                [x_min, x_max, y_min, y_max] = coords_extent
                print('   _ User-defined extent')
            
            # Alignement des coordonnées selon la résolution (valeurs rondes)
            x_min = nearest(x = x_min, res = reso_m, x0 = 0)
            x_max = nearest(x = x_max, res = reso_m, x0 = 0)
            y_max = nearest(y = y_max, res = reso_m, y0 = 0)
            y_min = nearest(y = y_min, res = reso_m, y0 = 0)
            print("   _ Bounds:")
            print("               {}".format(y_max))
            print("      {}           {}".format(x_min, x_max))
            print("               {}".format(y_min))
            
            transform_ = Affine(reso_m, 0.0, x_min,  
                                0.0, -reso_m, y_max)
            
# =============================================================================
#         width_ = int(data_ds.x.shape[0] * reso_input / reso_m //1 +1)
#         height_ = int(data_ds.y.shape[0] * reso_input / reso_m //1 +1)
# =============================================================================
            
            dst_height = (y_max - y_min)//reso_m
            dst_width = (x_max - x_min)//reso_m
            
            reprj_ds = data_ds.rio.reproject(
                dst_crs = 'epsg:'+str(EPSG_out), 
                shape = (dst_height, dst_width),
                transform = transform_, 
                resampling = rasterio.enums.Resampling(5),
                # nodata = np.nan,
                )


# =============================================================================
#         # Adding attributes:
#         ds_reprj.rio.write_crs("epsg:"+str(EPSG_out), inplace = True)
#         ds_reprj.x.attrs = {'standard_name': 'projection_x_coordinate',
#                                     'long_name': 'x coordinate of projection',
#                                     'units': 'Meter'}
#         ds_reprj.y.attrs = {'standard_name': 'projection_y_coordinate',
#                                     'long_name': 'y coordinate of projection',
#                                     'units': 'Meter'}
#         ds_reprj.soildepth.attrs = {'grid_mapping': "spatial_ref"}
# =============================================================================
        
# =============================================================================
#         #% Convert values units :
#         # from [kg m-2s-1] to [mm d-1] 
#         ds_reprj[var] = ds_reprj[var] *60*60*24
#         ds_reprj.pr.attrs['units'] = 'mm.d-1'
#         # -> plutôt appliquer un facteur de conversion dans les settings de CWatM
# =============================================================================

        #% Export :
        print("\nPreparing export, formating encodings and attributes...")
        print('   _ Output file type: {}'.format(file_extension))
        
        reprj_ds[coord1].encoding = data_ds[coord1].encoding
        reprj_ds[coord2].encoding = data_ds[coord2].encoding
        print("   _ Coords attributes transferred")
        
        if 'original_shape' in reprj_ds[var].encoding.keys():
            reprj_ds[var].encoding.pop('original_shape')
            reprj_ds[coord1].encoding.pop('original_shape')
            reprj_ds[coord2].encoding.pop('original_shape')
            print("   _ 'original_shape' has been removed")
        
        
        # If it is the PrecipitationMaps, it is necessary to erase the x and y
        # '_FillValue' encoding, because precipitation inputs are taken as 
        # format model by CWatM
        if var == 'tp': 
            reprj_ds.x.encoding['_FillValue'] = None
            reprj_ds.y.encoding['_FillValue'] = None
            print("   _ Fill Value removed from x and y")
        
        # reprj_ds.x.encoding['_FillValue'] = None
        # reprj_ds.y.encoding['_FillValue'] = None
        # reprj_ds[var].encoding['_FillValue'] = np.nan
        
        # Compression (not lossy)
        reprj_ds[var].encoding['zlib'] = True
        reprj_ds[var].encoding['complevel'] = 4
        reprj_ds[var].encoding['contiguous'] = False
        reprj_ds[var].encoding['shuffle'] = True
        print('   _ Compression x4 without loss (zlib)')
        
        output_file = os.path.join(input_folder,
                                   ' '.join([file_name, 
                                             'res='+str(reso_m)+'m', 
                                             'CRS='+str(EPSG_out),
                                             ]) + file_extension)
        
        reprj_ds.to_netcdf(output_file)      
        """
        La compression (2x plus petit) fait planter QGIS lorsqu'il essaye
        de lire le fichier comme un maillage.
        Il est nécessaire d'effacer la _FillValue des coordonnées x et y.
        """
        
        print('\n -> Successfully exported to:\n' + output_file)
        
        return reprj_ds


    #%%% ° CORINE Land Cover
    elif data_type.casefold() in ["corine", "cld", "corine land cover"]:
        
        (folder_name, file_name) = os.path.split(data)
        (file_name, extension) = os.path.splitext(file_name)
        
        data_years = [1990, 2000, 2006, 2012, 2018]
        y = data_years[4]
        
        land_convert = [
            [23, 24, 25, 26, 27, 28, 29], # 1. forests
            [12, 15, 16, 17, 18, 19, 20, 21, 22], # 2. grasslands and non-irrigated crops
            [14], # 3. rice fields
            [13, ], # 4. irrigated crops
            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], # 5. sealed lands
            [40, 41, 42, 43], # 6. water surfaces
            # [7, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], # x. sol nu
            ]
        
# =============================================================================
#         with xr.open_dataset(data) as data_ds:
#             data_ds.load() # to unlock the resource
# =============================================================================
        
        #%%%% Découpe du fichier (mondial) sur la Bretagne 
        # -----------------------------------------------
        with rasterio.open(data, 'r') as data:
            # data_profile = data.profile
            # data_crs = data.crs # epsg:3035
            # data_val = data.read()[0] # np.ndarray
            #     # 46000 lignes
            #     # 65000 colonnes
            # data_dtype = data.dtypes[0] # int8
            
            # Définition du périmètre de découpe
# =============================================================================
#             shapes = Polygon([
#                 (3188000, 2960000),
#                 (3188000 + 364000, 2960000),
#                 (3188000 + 364000, 2960000 - 231000),
#                 (3188000, 2960000 - 231000),
#                 (3188000, 2960000)])
# =============================================================================
# =============================================================================
#             shapes = rasterio.warp.transform_geom(
#                 {'init': 'epsg:3035'},
#                 data.crs,
#                 {'type': 'Polygon',
#                  'coordinates': [[
#                      (3188000, 2960000),
#                      (3188000 + 364000, 2960000),
#                      (3188000 + 364000, 2960000 - 231000),
#                      (3188000, 2960000 - 231000),
#                      (3188000, 2960000)]]}
#                 )
# =============================================================================
            with fiona.open(r"D:\2- Postdoc\2- Travaux\2- Suivi\2- CWatM\2022-01-12) Synthèse inputs\Mask_Bretagne_3035_1km.shp", "r") as shapefile:
                shapes = [feature["geometry"] for feature in shapefile]
            
            # Découpe
            out_raster, out_transform = rasterio.mask.mask(data, 
                                                          shapes, 
                                                          crop = True)
            
            crp_data = out_raster[0]
            
            # Mise à jour des attributs
            # data_profile = data.meta
            data_profile = data.profile
            data_profile.update({
                # "driver": "Gtiff",
            	"height": crp_data.shape[0],
            	"width": crp_data.shape[1],
            	# "nodata": -128,
            	"transform": out_transform})
            
            # Export (fichier facultatif)
            output_file = os.path.join(folder_name, file_name + '_Brittany.tif')
            with rasterio.open(output_file, 'w', **data_profile) as output_f:
                output_f.write_band(1, crp_data)
            print('\nCropped file has been successfully exported')
            
        
        #%%%% Extraction des types d'occupation du sol
        # -------------------------------------------
        data_profile['dtype'] = 'float32' # pour limiter la taille sur disque
        
        for i in range(0,6):
            # Création d'une carte de mêmes dimensions, avec la valeur 1 sur
            # les pixels correspondant au landcover voulu, et 0 sur les autres
            print('\nClass ' + str(i+1) + ":")
            count = np.zeros(crp_data.shape, dtype = data_profile['dtype'])
            count = count.astype('float32') # pour limiter la taille sur disque
            
            for code in land_convert[i]:
                # count[crp_data == code] += 1 # Normalement ça ne dépasse jamais 1
                count[crp_data == code] = 1
            
            # Export en résolution initiale (fichiers facultatifs)
            output_file = os.path.join(folder_name, file_name + '_class' + str(i+1) + '.tif')
            with rasterio.open(output_file, 'w', **data_profile) as output_f:
                output_f.write_band(1, count)
            print('   - Successfully exported into initial-resolution *.tif')

          

            # Reprojection and conversion of counts into percentages
            # dst_trsfm = Affine(1000, 0, 900000,
            #                    0, -1000, 5500000)
            # dst_trsfm = Affine(1000, 0, 3188000,
            #                    0, -1000, 2960000)
            dst_trsfm = Affine(1000, 0, 3165000,
                               0, -1000, 2954000)
            
            # data_reprj = np.zeros((4600, 6500), dtype = np.float32)
            # data_reprj = np.zeros((231, 364), dtype = np.float32)
            data_reprj = np.zeros((258, 407), dtype = np.float32)
            
            dst_data_profile = data_profile.copy()
            dst_data_profile.update({
            	"height": data_reprj.shape[0],
            	"width": data_reprj.shape[1],
            	"transform": dst_trsfm,
                "nodata": np.nan})
            
            rasterio.warp.reproject(count, 
                                    destination = data_reprj, 
                                    src_transform = data_profile['transform'],
                                    src_crs = data_profile['crs'],
                                    src_nodata = data_profile['nodata'],
                                    dst_transform = dst_data_profile['transform'],
                                    dst_crs = dst_data_profile['crs'],
                                    dst_nodata = dst_data_profile['nodata'],
                                    resampling = rasterio.enums.Resampling(5),
                                    )
            
            # Export en résolution finale (fichiers intermédiaires nécessaires)
            output_file = os.path.join(folder_name, 
                                       '_'.join([file_name, '_class' + str(i+1), '1000m.tif'])
                                       )
            with rasterio.open(output_file, 'w', **dst_data_profile) as output_f:
                output_f.write_band(1, data_reprj)
            print('   - Successfully exported into coarse-resolution *.tif')
            
            
        #%%%% Conversion des derniers fichiers créés (*.tif) en *.nc
        # ---------------------------------------------------------
        var_names = [
            'fracforest',
            'fracgrassland',
            'fracirrNonPaddy',
            'fracirrPaddy',
            'fracsealed',
            'fracwater',
            ]
        
        # Initialization:
        i = 0
        with xr.open_dataset(os.path.join(folder_name, '_'.join([file_name, '_class' + str(i+1), '1000m.tif']))) as data_ds:
            data_ds.load()
        # data_ds = data_ds.squeeze('band').drop('band')
        data_ds = data_ds.rename(band = 'time')
        # data_ds = data_ds.reindex(time = [datetime.datetime(2018, 1, 31)])
        data_ds = data_ds.assign_coords({'time': ('time', [datetime.datetime(y, 1, 31)])})
        data_ds = data_ds.rename(band_data = var_names[i])
 
        for i in range(1,6):
            with xr.open_dataset(os.path.join(folder_name, '_'.join([file_name, '_class' + str(i+1), '1000m.tif']))) as data_tmp:
                data_tmp.load()
            # data_tmp = data_tmp.squeeze('band').drop('band')
            data_tmp = data_tmp.rename(band = 'time')
            # data_tmp = data_tmp.reindex(time = [datetime.datetime(2018, 1, 31)])
            data_tmp = data_tmp.assign_coords({'time': ('time', [datetime.datetime(y, 1, 31)])})
            data_tmp = data_tmp.rename(band_data = var_names[i])
            data_ds[var_names[i]] = data_tmp[var_names[i]]
        
        # Rectification de la normalisation des pourcentages
# =============================================================================
#         # data_ds.fillna(0)
#         sum_norm = data_ds[var_names[0]]+data_ds[var_names[1]]+data_ds[var_names[2]]+data_ds[var_names[3]]+data_ds[var_names[4]]+data_ds[var_names[5]]
#         for i in range(0,6):
#             data_ds[var_names[i]] = data_ds[var_names[i]]/sum_norm
# =============================================================================
        
        data_ds.rio.write_crs('epsg:3035', inplace = True)
        data_ds.x.attrs = {'standard_name': 'projection_x_coordinate',
                            'long_name': 'x coordinate of projection',
                            'units': 'Meter'}
        data_ds.y.attrs = {'standard_name': 'projection_y_coordinate',
                            'long_name': 'y coordinate of projection',
                            'units': 'Meter'}
        output_file = os.path.join(folder_name, 'fractionLandcover_Bretagne_CORINE_{}.nc'.format(str(y)))
        
        # NB : Il est nécessaire de faire disparaître les valeurs nan
# =============================================================================
#         data_ds['fracforest'].encoding['_FillValue'] = 0 # ou None ??
#         data_ds['fracgrassland'].encoding['_FillValue'] = 0 # ou None ??
#         data_ds['fracirrNonPaddy'].encoding['_FillValue'] = 0 # ou None ??
#         data_ds['fracirrPaddy'].encoding['_FillValue'] = 0 # ou None ??
#         data_ds['fracsealed'].encoding['_FillValue'] = 0 # ou None ??
#         data_ds['fracwater'].encoding['_FillValue'] = 0 # ou None ??
# =============================================================================
        
        data_ds.to_netcdf(output_file)

            
    #%%% ° Observatoire botanique de Brest (2018-2019)
    elif data_type.casefold() in ["observatoire", "obb"]:
        land_convert = [
            [0, 0, 0], # 1. forests
            [0, 0, 0], # 2. grasslands and non-irrigated crops
            [0, 0, 0], # 3. rice fields
            [0, 0, 0], # 4. irrigated crops
            [0, 0, 0], # 5. sealed lands
            [0, 0, 0], # 6. water surfaces
            # [7, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], # x. sol nu
            ]
        return 'Pas encore implémenté'
    
    #%%% ° CES OSO
    elif data_type.casefold() in ['ces oso', 'oso', 'ces']:
        return 'Pas encore implémenté'
    
    
    #%%% ° Crop coefficients, version "better"
    elif data_type.casefold() in ['crop', 'crop coeff', 'crop coefficient']:
        #%%%% Initialization:
        landcover_motif = re.compile('cropCoefficient[a-zA-Z]*')
        pos = landcover_motif.search(file_name).span()
        landcover = file_name[15:pos[1]]
        name = os.path.splitext(file_name)[0][0:pos[1]]
        print(f'\nProcessed landcover = {landcover}')
        print("___________________")
        
        
        with xr.open_dataset(
                data,
                decode_coords = 'all',
                ) as data:
            data.load()
        
        #%%%% Reprojection:
        print('\nReprojecting...')
        
        if not EPSG_in: # EPSG = None, by default
            if 'spatial_ref' not in list(data.coords) + list(data.data_vars):
                print("   Le SCR n'est pas renseigné dans les données initiales. Par défaut, le WGS84 est utilisé")
                EPSG_in = 4326
            else:
                EPSG_in = data.rio.crs.to_epsg()
        print("   From epsg:{} to espg:{}".format(EPSG_in, EPSG_out))
        data.rio.write_crs("epsg:{}".format(EPSG_in), inplace = True)
        
        # Emprise sur la Bretagne
        if EPSG_out == 3035:   #LAEA
            x_min = 3164000
            x_max = 3572000
            y_max = 2980000
            y_min = 2720000
        elif EPSG_out == 2154: #Lambert-93
            x_min = 70000
            x_max = 460000
            y_max = 6909000
            y_min = 6651000
# =============================================================================
#         # To properly implement management of lat/lon, reso_m should be upgraded
#         # with a system including reso_deg (future developments)
#         elif EPSG_out == 4326: #WGS84 (in this case, reso_m should be in ° instead of m)
#             x_min = -5.625
#             x_max = -0.150
#             y_max = 49.240
#             y_min = 46.660
# =============================================================================

        # Alignement des coordonnées selon la résolution (valeurs rondes)
        x_min = nearest(x = x_min, res = reso_m, x0 = 0)
        x_max = nearest(x = x_max, res = reso_m, x0 = 0)
        y_max = nearest(y = y_max, res = reso_m, y0 = 0)
        y_min = nearest(y = y_min, res = reso_m, y0 = 0)
        print("   Final bounds:")
        print("               {}".format(y_max))
        print("      {}           {}".format(x_min, x_max))
        print("               {}".format(y_min))

        # Reproject
        if reso_m is None:
            coord1 = list(data.coords)[1]
            print('NB : La resolution est déterminée à partir des {}\n'.format(coord1))
            reso = float(data[coord1][1] - data[coord1][0])
            if data[coord1].units[0:6].casefold() == 'degree':
                reso_m = round(reso*111200)
            elif data[coord1].units[0:1].casefold() == 'm':
                reso_m = reso
        print('   Resolution: {} km'.format(str(reso_m/1000)))
        
        dst_height = (y_max - y_min)//reso_m
        dst_width = (x_max - x_min)//reso_m
        
        data_reprj = data.rio.reproject(
            dst_crs = 'epsg:'+str(EPSG_out), 
            # resolution = (1000, 1000),
            shape = (dst_height, dst_width),
            transform = Affine(reso_m, 0.0, x_min,
                               0.0, -reso_m, y_max), 
            resampling = rasterio.enums.Resampling(5),
            nodata = np.nan)
        
        
        #%%%% Preparing export:
        print("\nPreparing export...")
# =============================================================================
#         # Change the order of coordinates to match QGIS standards:
#             # Normally, not necessary
#         data_reprj = data_reprj.transpose('time', 'y', 'x')
#         print("      order of coordinates = 'time', 'y', 'x'")
# =============================================================================
        # Formatting to match standards (encodings and attributes)
        print("   Formating encodings and attributes")
# =============================================================================
#         for c in ['time', 'latitude', 'longitude']:
#             data_reprj[c].attrs = data[c].attrs
#             data_reprj[c].encoding = data[c].encoding
#         for f in _fields_intersect:
#             data_reprj[f].attrs = data[f].attrs
#             data_reprj[f].encoding = data[f].encoding
# =============================================================================
        # Insert georeferencing metadata to match QGIS standards:
        # data_reprj.rio.write_crs("epsg:4326", inplace = True)
        # print("      CRS set to epsg:4326")
                
        data_reprj.x.encoding = data.lon.encoding
        data_reprj.y.encoding = data.lat.encoding
        data_reprj.x.encoding.pop('original_shape')
        data_reprj.y.encoding.pop('original_shape')
        
        var = main_var(data)
        
        # If it is the PrecipitationMaps, it is necessary to erase the x and y
        # '_FillValue' encoding, because precipitation inputs are taken as 
        # format model by CWatM
        if var == 'tp': 
        # if name == 'Total precipitation daily_mean':
            data_reprj.x.encoding['_FillValue'] = None
            data_reprj.y.encoding['_FillValue'] = None
        
        # Compression (not lossy)
        data_reprj[var].encoding = data[var].encoding
        data_reprj[var].encoding.pop('chunksizes')
        data_reprj[var].encoding.pop('original_shape')
# =============================================================================
#         data_reprj[var].encoding['zlib'] = True
#         data_reprj[var].encoding['complevel'] = 4
#         data_reprj[var].encoding['contiguous'] = False
#         data_reprj[var].encoding['shuffle'] = True
# =============================================================================
        # NB: The packing induces a loss of precision of apprx. 1/1000 of unit,
        # for a quantity with an interval of 150 units. The packing is
        # initially used in the original ERA5-Land data.    
        
        print("   All attributes and encodings transfered")
        

        #%%%% Export :
        print('\nSaving...')          
        output_file = os.path.join(input_folder,
                                   '_'.join([file_name, 
                                            'res='+str(reso_m)+'m', 
                                            'epsg'+str(EPSG_out),
                                             ]) + '.nc')        

        data_reprj.to_netcdf(output_file)
        print("\nLe resultat a bien été exporté dans le fichier : {}".format(output_file))
        
        return data_reprj


###############################################################################
#%%% * Formate les données RHT
[docs]def process_rht(shp_file, attrs_file, fields = 'all'):
    """
    Pour rajouter les attributs dans le shapefile, de anière à pouvoir 
    l'afficher facilement dans QGIS.

    Parameters
    ----------
    shp_file : str
        Chemin vers le fichier shapefile
    attrs_file : str
        Chemin vers la table externe d'attributs
    fields : list
        Liste des noms de colonnes que l'on veut insérer dans le shapefile

    Returns
    -------
    Create a new file.
    """
    
    # Inputs
    # ------
    gdf_in = gpd.read_file(shp_file)
    
    df_attrs = pd.read_csv(attrs_file, sep = r";", header = 0, skiprows = 0)
    
    if fields == 'all': fields = list(df_attrs.columns)
    if not isinstance(fields, list): fields = [fields]
    if 'id_drain' not in fields: fields.append('id_drain')
    print('Fields :\n{}'.format(fields))

    # Fusion
    # ------
    gdf_out = gdf_in.merge(df_attrs[fields], left_on = "ID_DRAIN", right_on = "id_drain")
    
    # Export
    # ------
    out_name = os.path.splitext(shp_file)[0] + '_merged.shp'
    gdf_out.to_file(out_name)
    
    return gdf_out    


###############################################################################
#%%% * convert_from_h5_newsurfex (with recessions)
[docs]def convert_from_h5_newsurfex(*, input_file, mesh_file, scenario = 'historic', 
                    output_format = 'NetCDF', **kwargs):
    r"""
    % DESCRIPTION:
    This function converts Ronan *.h5 files from SURFEX into NetCDF files, or
    GeoTIFF images, in order to make output files readable by QGIS.
    
    % EXAMPLE:
    >> import geoconvert as gc
    >> gc.convert_from_h5_newsurfex(input_file = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\Surfex\BZH\REA.h5",
                          mesh_file = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\Surfex\BZH\shapefile\maille_meteo_fr_pr93.shp",
                          output_format = "NetCDF",
                          fields = ["REC", "TAS"])
    
    % ARGUMENTS:
    >    
    
    % OPTIONAL ARGUMENTS:
    > output_format = 'NetCDF' (défault) | 'GeoTIFF'
    > scenario = 'historic' | 'RCP2.6' | 'RCP4.5' | 'RCP8.5'
    > kwargs:
        > fields = variable(s) to conserve (among all attributes in input file)
                 = ['ETP', 'PPT', 'REC', 'RUN', 'TAS']
        (One file per variable will be created.)
        
        > dates = for instance : ['2019-01-01', '2019-01-02', ...]
        (If no date is specified, all dates from input file are considered)
    """
    
    #% Get fields argument:
    # ---------------------
    if 'fields' in kwargs:
        fields = kwargs['fields']
        if isinstance(fields, str): fields = [fields]
        else: fields = list(fields) # in case fields are string or tuple
    else:
        print('\n/!\ Fields need to be specified\n')
        
    
    
    for _field in fields:
        print('Processing ' + _field + '...')
        print('   Loading data...')
        #% Load data sets:
        # ----------------
        # _data contains the values, loaded as a pandas dataframe
        _data = pd.read_hdf(input_file, _field + '/' + scenario)
        _data.index.freq = 'D' # Correct the frequency of timestamp, because it
                               # is altered during importation
        _data = _data.iloc[:, 0:-1] # Get rid of the last 'MEAN' column
        _data = _data.transpose()
        
        # _dataset contains the spatial metadata, and will be used as the
        # final output structure
        _dataset = gpd.read_file(mesh_file)
        _dataset.num_id = _dataset.num_id.astype(int) # Convert 'num_id' (str) 
                                                      # into integer
        
        # Only cells present in _data are considered:
        _datasubset = _dataset[_dataset.num_id.isin(_data.index)]
        _datasubset = _datasubset.merge(_data, left_on = "num_id", 
                                        right_index = True)
        _datasubset.index = _datasubset.num_id # replace indexes by id_values
        
        #% Get dates argument: (and build output_name)
        # --------------------
        _basename = os.path.splitext(input_file)[0]
        
        if 'dates' in kwargs:
            dates = kwargs['dates']
            if isinstance(dates, str): 
                output_file = '_'.join([_basename, dates, _field])
                dates = [dates]
            else: 
                dates = list(dates) # in case dates are tuple
                output_file = '_'.join([_basename, dates[0], 'to', dates[-1], _field])
        else:
            dates = _data.columns # if not input_arg, dates = all
            output_file = '_'.join([_basename, scenario, _field])
        
        #% Exports:
        # ---------       
        profile_base = {'driver': 'NetCDF',
                        'dtype': 'float32',
                        'nodata': None,
                        'width': 37,
                        'height': 23,
                        'count': 1,
                        # 'count': len(dates),
                        'crs': rio.crs.CRS.from_epsg(27572),
                        'transform': Affine(8000, 0.0, 56000, 0.0, 8000, 2261000),
                        # values deducted from Xlamb and Ylamb
                        # can also be found from QGIS: "X (ou Y) du sommet le plus proche"
                        'tiled': False,
                        'interleave': 'band'}
        
    # =============================================================================
    #     # Getting bounds from polygons is less precise:
    #     _datasubset.geometry.total_bounds[0:1]
    #     # >> [ 107438.43514434074, 6697835.528522245 ] in epsg:2154
    #     Affine(8000, 0.0, 56092.06862427108, 0.0, 8000, 2260917.5598924947)
    #     # more precise values: 57092.06862427143, 2259917.559892499
    #     #   --> does not improve much...
    # =============================================================================
    
    # =============================================================================
    #     # Using polygon limits instead of polygon centers is less precise:
    #     rasterized_data = rasterio.features.rasterize(
    #         [(_dataval.loc['geometry'], _dataval.loc[pd.to_datetime(_date)]) for i, _dataval in _datasubset.to_crs(27572).iterrows()],
    #         out_shape = (profile_base['height'], profile_base['width']),
    #         transform = profile_base['transform'],
    #         fill = 0,
    #         all_touched = True)
    #         # dtype = rasterio.uint8
    # =============================================================================
        
        #% Add coordinates of the center of each polygon, as a POINT object:
        # ------------------------------------------------------------------
        _datasubset['centers'] = [Point(x_y) 
                                  for x_y in zip(_datasubset.loc[:, 'Xlamb'], 
                                                 _datasubset.loc[:, 'Ylamb'])]
        # NB: Coordinates are here in Lambert Zone II (epsg:27572)    
        
# =============================================================================
#    #{a}     # Previous version, creating a raster per date (very slow):
#             rasterized_data = rasterio.features.rasterize(
#             [(_dataval.loc['centers'], _dataval.loc[pd.to_datetime(_date)]) 
#               for i, _dataval in _datasubset.iterrows()],
#             out_shape = (profile_base['height'], profile_base['width']),
#             transform = profile_base['transform'],
#             fill = np.NaN,
#             all_touched = True)
#             # dtype = rasterio.uint8
# =============================================================================
        
        #% Create a raster mask:
        # ---------------------
        rasterized_mask = rasterio.features.rasterize(
            [(_dataval.loc['centers'], _dataval.loc['num_id']) 
             for i, _dataval in _datasubset.iterrows()],
            out_shape = (profile_base['height'], profile_base['width']),
            transform = profile_base['transform'],
            fill = np.NaN,
            all_touched = True)
            # dtype = rasterio.uint16
        
# =============================================================================
#         #% Apercu visuel rapide :
#         plt.imshow(rasterized_mask, origin = 'lower')
# =============================================================================
        
        #% Build a xarray based on the previous mask:
        # -------------------------------------------
        print('   Building an xarray...')
        # Initialization:
        array3D = np.full((len(dates), 
                           profile_base['height'], 
                           profile_base['width']), np.nan)
        
        # Filling:
        for (_y, _x), _id_val in np.ndenumerate(rasterized_mask):
            if not np.isnan(_id_val):
                array3D[:, _y, _x] = _datasubset.loc[_id_val].iloc[10:-1]
        
        # Converting to xarray dataarray:
        dataarray = xr.DataArray(array3D, 
                                  coords = [dates, 
                                            np.arange(2265000,
                                                      2265000 + 8000*profile_base['height'],
                                                      8000).astype(np.float32),
                                            np.arange(60000, 
                                                      60000 + 8000*profile_base['width'], 
                                                      8000).astype(np.float32)],
        # NB : La fonction 'range()' est évitée car elle renvoie des entiers
        # QGIS n'arrive pas à détecter le CRS lorsque les axes sont des entiers
                                  dims = ['time', 'y', 'x'])
        
        # Converting to xarray dataset:
        dataset = dataarray.to_dataset(name = _field)
        
        # Adding attributes:
        dataset.rio.write_crs("epsg:27572", inplace = True)
        dataset.x.attrs = {'standard_name': 'projection_x_coordinate',
                                    'long_name': 'x coordinate of projection',
                                    'units': 'Meter'}
        dataset.y.attrs = {'standard_name': 'projection_y_coordinate',
                                    'long_name': 'y coordinate of projection',
                                    'units': 'Meter'}
        
        # Export:
        # -------
        if output_format.casefold() in ['nc', 'netcdf']:
            dataset.to_netcdf(output_file + '.nc')
            (folder_name, file_name) = os.path.split(output_file + '.nc')
            print("   The dataset has been successfully exported to the file '{}' in the folder '{}'\n".format(file_name, folder_name))
            
        elif output_format.casefold() in ['tif', 'tiff', 'geotiff']:
            with rasterio.open(output_file + '.tiff', 'w', **profile_base) as out_f:
                for i, d in enumerate(dates):
                    out_f.write_band(i, dataset.sel(time = d))
            (folder_name, file_name) = os.path.split(output_file + '.nc')
            print("   The dataset has been successfully exported to the file '{}' in the folder '{}'\n".format(file_name, folder_name))

        
    return fields
    
    
# =============================================================================
#     #- - - - - - SANDBOX - - - - - 
#     #% Single day:
#     # date = _data.loc[:,_data.columns == '2019-10-15'].columns.values
#     date = _data.loc[:,_data.columns == _date].columns[0]
#     _datasingleday = _datasubset.loc[:, date]
#     
#     #% Matplot:
#     _datasubset.plot(column = date, cmap = 'RdYlBu_r', vmin = 10.5, vmax = 15)
# =============================================================================


#%%% * convert_from_h5_oldsurfex (data from Quentin)
###############################################################################
[docs]def convert_from_h5_oldsurfex(*, output_format = 'csv', **kwargs):
    r"""
    % DESCRIPTION :
    Cette fonction formate les fichiers Surfex de Quentin en fichiers *.csv
    organisés par dates (lignes) et identifiants de mailles (colonnes).
    
    % EXEMPLES :
    >> import surfexconvert as sc  #(NB : il faut au préalable que le dossier soit ajouté dans le PYTHONPATH))
    >> sc.convert_from_h5_oldsurfex(output_format = "csv", 
                                 start_years = list(range(2005, 2011, 1)),
                                 variables = ['DRAIN', 'ETR'])
            
    >> sc.convert_from_oldsurfex(output_format = "nc", 
                                 mesh_file = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\Surfex\BZH\shapefile\maille_meteo_fr_pr93.shp")

    % ARGUMENTS (OPTIONNELS) :
    > output_format = 'csv' (défault) | 'NetCDF' | 'GeoTIFF'
    > kwargs:
        > input_folder = dossier contenant les fichiers à traiter.
            Si rien n'est spécifié, le dossier du script est pris en compte
        > variables = variable(s) à traiter (parmi DRAIN, ETR, PRCP et RUNOFF)
            Si ce n'est pas spécifié, toutes les variables sont considérées
        > start_years = années à traiter
            (par ex : 2012 correspond au fichier blabla_2012_2013)
            Si rien n'est spécifié, toutes les années sont considérées
        > mesh_file = chemin d'accès au fichier de correspondances entre les
        id des tuiles et leurs coordonnées
            (nécessaire uniquement pour NetCDF et GeoTIFF)
    """   
    
    # ---- RECUPERATION DES INPUTS
    #--------------------------
    if 'input_folder' in kwargs:
        input_folder = kwargs['input_folder']
    else:
        input_folder = os.path.dirname(os.path.realpath(__file__))
    
     # Si 'variables' est renseigné, il est converti en liste de strings.
     # Sinon, toutes les variables sont prises en compte (DRAIN, ETR, PRCP...).
    if 'variables' in kwargs:
        variables = kwargs['variables']
        if isinstance(variables, str): variables = [variables]
        else: variables = list(variables)
    else:
        variables = ['DRAIN', 'ETR', 'PRCP', 'RUNOFF']
    print('> variables = ' + ', '.join(variables))
    
     # Si 'start_years' est renseigné, il est converti en liste de strings.
     # Sinon, toutes les années sont considérées (1970-1971 -> 2011-2012).
    if 'start_years' in kwargs:
        start_years = kwargs['start_years']
        if isinstance(start_years, str): start_years = [start_years]
        else: 
            start_years = list(start_years)
            start_years = [str(y) for y in start_years]
    else:
        start_years = [str(y) for y in range(1970, 2012, 1)]
    print("> années de départ = " + ', '.join(start_years))
    
    if 'mesh_file' in kwargs:
        mesh_file = kwargs['mesh_file']
    
    
    #% Localisation des dossiers et fichiers de base : 
    #-------------------------------------------------
    idlist_file = os.path.join(input_folder, 
                               r"numero_mailles_bretagne_etendue.txt")
    # (= fichier des identifiants des mailles) 
    mesh_indexes = np.loadtxt(idlist_file, dtype = int)
    
    
    # ---- TRAITEMENT DES DONNEES
    for _variable in variables:
        print("Traitement de " + _variable + "...")
        dataset_allyears = pd.DataFrame(columns = mesh_indexes) #(initialisation)
        
        for _year in start_years:
            print(_year)
            input_file = os.path.join(input_folder, 
                                      "_".join([_variable, _year, 
                                                str(int(_year)+1)])) 
            # (= fichier de données)

            #% Importation :
            #---------------
            raw_data = pd.read_table(input_file, delimiter = "   ", 
                                     engine = 'python', header = None)
            
            # Redimensionnement :
            #--------------------
            if pd.to_datetime(str(int(_year) + 1)).is_leap_year: n_days = 366
            else: n_days = 365
            reshp_array = np.array(raw_data).reshape((n_days, 610), 
                                                     order = 'C')
            reshp_array = reshp_array[:, 0:-6] # Array de 365 lignes et 604 col
            
            # Ajout des étiquettes :
            #-----------------------
            _start_date = _year + '-07-31'
            date_indexes = pd.date_range(start = _start_date, 
                                         periods = n_days, freq = '1D')
            dataframe_1y = pd.DataFrame(data = reshp_array, 
                                        index = date_indexes, 
                                        columns = mesh_indexes, copy = True)
            
            dataset_allyears = dataset_allyears.append(dataframe_1y)
            
            # =================================================================
            #     #% Rajouter des moyennes ou sommes annuelles comme sur les 
            #        précédentes données Surfex :
            #     T['mean'] = T.mean(axis = 1)
            #     T['sum'] = T.sum(axis = 1)
            # =================================================================
          
            
        # ---- EXPORTATION
        output_file = os.path.join(input_folder, 
                                   "_".join([_variable, start_years[0],
                                            str(int(start_years[-1])+1)]
                                            )
                                   )
        
        #% Export en CSV :
        #-----------------
        if output_format in ['csv', 'CSV']:
            dataset_allyears.to_csv(output_file + ".csv", sep = '\t', 
                                    header = True) 
            (folder_name, file_name) = os.path.split(output_file + ".csv")
            print("> Le fichier \'{}\' a été créé dans le dossier \'{}\'.".format(
                file_name, folder_name))
            
        
        #% Export en NetCDF ou TIFF :
        #----------------------------
        elif output_format in ['NetCDF', 'nc', 'TIFF', 'tiff', 'tif', 'GeoTIFF']:            
            #% Géoréférencement des données / formatage
            print("Formatage...")
            # (NB : dataset_allyears sera écrasé plus tard)
            all_date_indexes = dataset_allyears.index
            n_dates = len(all_date_indexes)
            _data = dataset_allyears.transpose(copy = True)
            _geodataset = gpd.read_file(mesh_file)
            _geodataset.num_id = _geodataset.num_id.astype(int)
            
            _geodataset = _geodataset[_geodataset.num_id.isin(_data.index)]
            _geodataset = _geodataset.merge(_data, left_on = "num_id", 
                                            right_index = True)
            
            res_x = 8000 # résolution Surfex [m]
            res_y = 8000
            n_x = len(pd.unique(_geodataset.loc[:, 'Xlamb']))
            n_y = len(pd.unique(_geodataset.loc[:, 'Ylamb']))
            x_min = min(_geodataset.loc[:, 'Xlamb']) - res_x/2 #NB : not 56000
            y_min = min(_geodataset.loc[:, 'Ylamb']) - res_y/2
            profile_base = {'driver': 'NetCDF',
                            'dtype': 'float64',
                            'nodata': None,
                            # 'time': n_dates,
                            'height': n_y, #23
                            'width': n_x, #37
                            'count': n_dates,
                            'crs': rio.crs.CRS.from_epsg(27572),
                            'transform': Affine(res_x, 0.0, x_min,  
                                                0.0, res_y, y_min),
                            # values deducted from Xlamb and Ylamb and QGIS
                            'tiled': False,
                            'interleave': 'band'}
            
            _geodataset['centers'] = [Point(x_y) for x_y in 
                                        zip(_geodataset.loc[:, 'Xlamb'], 
                                            _geodataset.loc[:, 'Ylamb'])]
            # NB: Coordinates are here in Lambert Zone II (epsg:27572)    
            
            raster_data_allyears = np.empty(shape = (n_dates,
                                                     profile_base['height'], 
                                                     profile_base['width']))
            
            print("  Rasterisation... ({} éléments. Peut prendre plusieurs minutes)".format(len(all_date_indexes)))
            for d, _date in enumerate(all_date_indexes):
                _raster_data = rasterio.features.rasterize(
                    [(_dataval.loc['centers'], _dataval.loc[pd.to_datetime(_date)]) 
                      for i, _dataval in _geodataset.iterrows()],
                    out_shape = (profile_base['height'], profile_base['width']),
                    transform = profile_base['transform'],
                    fill = np.NaN,
                    all_touched = True)
                
                raster_data_allyears[d, :, :] = _raster_data
                #*** Est-ce que cette étape peut être faite telle quelle avec 
                # des données incluant tous les temps ?***
                
            print("  xarray...")
            dataarray_allyears = xr.DataArray(
                raster_data_allyears, 
                #raster_data_allyears.astype('float32'), 
                coords = [all_date_indexes, 
                          np.sort(pd.unique(_geodataset.loc[:, 'Ylamb'])), 
                          np.sort(pd.unique(_geodataset.loc[:, 'Xlamb']))],
                # <!> NB <!> Il est extrêmement important de trier les
                # indices dans l'ordre croissant !!! (ils sont désordonnés dans 
                # _geodataset). Sinon QGIS n'arrive pas à lire les fichiers !
                dims = ['time', 'y', 'x']) 
# =============================================================================
#                 # [AVANT : dims = ['time', 'latitude', 'longitude'])]
# =============================================================================
                # L'ordre conseillé des dimensions est t, lat, lon, mais ça n'a
                # pas d'incidence.
            
            # Conversion en Dataset :
            dataset_allyears = dataarray_allyears.to_dataset(name = _variable)
            
            print("Exportation...")
            if output_format in ['NetCDF', 'nc']:
                #% Pour NetCDF :
                # - - - - - - - -        
                # Formatage des attributs selon les conventions QGIS :
                # Insert georeferencing metadata :
                dataset_allyears.rio.write_crs("epsg:27572", inplace = True)
                dataset_allyears.x.attrs = {'standard_name': 'projection_x_coordinate',
                                            'long_name': 'x coordinate of projection',
                                            'units': 'Meter'}
                dataset_allyears.y.attrs = {'standard_name': 'projection_y_coordinate',
                                            'long_name': 'y coordinate of projection',
                                            'units': 'Meter'}
# =============================================================================
#                 # AVANT :
#                 dataset_allyears.longitude.attrs = {'units': 'degrees_east',
#                                            'long_name': 'longitude'}
#                 dataset_allyears.latitude.attrs = {'units': 'degrees_north',
#                                            'long_name': 'latitude'}
# =============================================================================
                # dataset_allyears.time.attrs = {'units': 'days since 1970-01-01',
                #                       'calendar': 'gregorian',
                #                       'long_name': 'time'}
                dataset_allyears.attrs = {'Conventions': 'CF-1.6'}
                
                dataset_allyears.to_netcdf(output_file + ".nc")
                (folder_name, file_name) = os.path.split(output_file + ".nc")
                print("> Le fichier \'{}\' a été créé dans le dossier \'{}\'.".format(
                    file_name, folder_name))
            
            else:
                #% Pour TIFF :
                # - - - - - - -
                dataset_allyears[_variable].rio.to_raster(output_file + '.tiff')
    # =============================================================================
    #             # Méthode sans rioxarray :
    #             # (Problème : les valeurs des coordonnées semblent désordonnées)
    #             with rasterio.open(output_file + ".tiff", 'w', 
    #                                **profile_base) as out_f:
    #                 for d, _date in enumerate(all_date_indexes):
    #                     # out_f.write_band(d + 1, raster_data_allyears[d, :, :])
    #                     out_f.write_band(d + 1, 
    #                                      dataset_allyears[_variable].sel(time = _date))      
    # =============================================================================
                (folder_name, file_name) = os.path.split(output_file + ".tiff")
                print("> Le fichier \'{}\' a été créé dans le dossier \'{}\'.".format(
                    file_name, folder_name))
                    
    return dataset_allyears


###############################################################################
#%%% correct biaises: Corrige les précipitations, rayonnements, ETP... de ERA5-Land
[docs]def correct_bias(input_file, correct_factor = 1, to_dailysum = True,
                 progressive = False):
    """
    Fonction plutôt à utiliser sur les données finales formatées
    correct_factor:
        - for precipitations (hourly) : 1.8
        - for precipitations (daily sum): 0.087 
        - for radiations (daily sum): 0.0715 (and progressive = True)
        - for potential evapotranspiration pos (daily sum): 0.04
    """
    
    print("\nLoading...")
    with xr.open_dataset(input_file, decode_coords = 'all') as ds:
        ds.load() # to unlock the resource
        
    ds2 = ds.copy(deep = True)
    
    print("\nCorrecting values: multiply by {}...".format(correct_factor))
    
    if not progressive: # constant correction for any day
        var = main_var(ds)
        suffix = f"{correct_factor}"
        ds2[var] = ds2[var]*correct_factor
        correct_factor_max = correct_factor
    
    elif progressive: # varying correction along the year (stronger in summer, weaker in winter)
        var = main_var(ds)
        suffix = f"{correct_factor}P1"
        monthly_correction = np.array([0.74130111, 0.6586179, 1.04861236, 
                                       0.98615636, 0.96493336, 1.15048825, 
                                       1.06386533, 1.16570181, 1.001253, 
                                       0.81417812, 0.71620761, 0.76901861]) * correct_factor
        # Redefinition of correct_factor as a monthly correction:
        # correct_factor = ds2.mean(dim = ['latitude', 'longitude']).groupby("time.month").mean()
        # correct_factor[var] = monthly_correction
        correct_factor_monthly = xr.DataArray(monthly_correction, dims = 'month', coords = {'month': list(range(1,13))})
        ds2[var] = ds2[var].groupby('time.month', squeeze = False)*correct_factor_monthly
        ds2 = ds2.drop("month")
        correct_factor_max = float(correct_factor_monthly.max())
        
# =============================================================================
#         # Pipeline pour calculer les coefficients
#         month_length = ds.time.dt.days_in_month
#         weights = (
#             month_length.groupby("time.month") / month_length.groupby("time.month").sum())
#         # Pour vérifier que la somme des pondérations fait bien 1 pour chaque mois
#         np.testing.assert_allclose(weights.groupby("time.month").sum().values, np.ones(12))
#         ds_weighted = (ds * weights).groupby("time.month").sum(dim="time")
#         ds_weighted.mean(dim = ['longitude', 'latitude'])
#         # Référence (visuelle, d'après https://loa.univ-lille.fr/documents/LOA/formation/stages/B_Anh.pdf)
#         [20, 30, 90, 125, 145, 170, 165, 155, 100, 50, 25, 20] # kWh/m²/month
#         [2322581, 3857143, 10451613, 15000000, 16838710, 20400000,
#          19161290, 18000000, 12000000, 5806452, 3000000, 2322581] # J/m²/d
# =============================================================================
        
    
    unit_factor = 1
    if to_dailysum: # Si les données d'entrée sont des moyennes en m/h :
        print("\nCorrecting units:")
        print("   input values are in .../h")
        print("   multiply by 24 --> .../d")
        ds2[var] = ds2[var]*24
        unit_factor = 24
        
    
    print("\nCorrecting encodings...")
    ds2[var].encoding = ds[var].encoding
    ds2[var].encoding['scale_factor'] = ds[var].encoding['scale_factor']*correct_factor_max*unit_factor
    ds2[var].encoding['add_offset'] = ds[var].encoding['add_offset']*correct_factor_max*unit_factor

    ds2.x.encoding['_FillValue'] = None
    ds2.y.encoding['_FillValue'] = None
    
    print("\nExporting...")
    output_file = os.path.splitext(input_file)[0] + f" x{suffix}.nc"
    ds2.to_netcdf(output_file)
    

###############################################################################
#%%% * hourly_to_daily (OLD)
[docs]def hourly_to_daily_old(*, data, mode = 'mean', **kwargs):
    # Cette version précédente (mise à jour) gère des dossiers
    
    """
    Example
    -------
    import geoconvert as gc
    # full example:
    gc.hourly_to_daily(input_file = r"D:/2011-2021_hourly Temperature.nc",
                       mode = 'max',
                       output_path = r"D:/2011-2021_daily Temperature Max.nc",
                       fields = ['t2m', 'tp'])
    
    # input_file can also be a folder:
    gc.hourly_to_daily(input_file = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\ERA5\Brittany\test", 
                       mode = 'mean')
    
    Parameters
    ----------
    input_file : str, or list of str
        Can be a path to a file (or a list of paths), or a path to a folder, 
        in which cas all the files in this folder will be processed.
    mode : str, or list of str, optional
        = 'mean' (default) | 'max' | 'min' | 'sum'
    **kwargs 
    --------
    fields : str or list of str, optional
        e.g: ['t2m', 'tp', 'u10', 'v10', ...]
        (if not specified, all fields are considered)
    output_path : str, optional
        e.g: [r"D:/2011-2021_daily Temperature Max.nc"]
        (if not specified, output_name is made up according to arguments)
        
    Returns
    -------
    None. Processed files are created in the output destination folder.

    """

    # ---- Get input file path(s)
    data_folder, filelist = get_filelist(data, filetype = '.nc')
        
    #% Safeguard for output_names:
    if len(filelist) > 1 and 'output_path' in kwargs:
        print('Warning: Due to multiple output, names of the output files are imposed.') 
    
    
    # ---- Format modes
    if isinstance(mode, str): mode = [mode]
    else: mode = list(mode)
    
    if len(mode) != len(filelist):
        if (len(mode) == 1) & (len(filelist)>1):
            mode = mode*len(filelist)
        else:
            print("Error: lengths of input file and mode lists do not match")
            return
            
        
    # ---- Process data
    for i, f in enumerate(filelist):  
        print(f"\n\nProcessing file {i+1}/{len(filelist)}: {f}...")
        print("-------------------")
        #% Load data:
        data_ds = load_any(os.path.join(data_folder, f), 
                           decode_coords = 'all', decode_times = True)

        #% Get fields:
        if 'fields' in kwargs:
            fields = kwargs['fields']
            if isinstance(fields, str): fields = [fields]
            else: fields = list(fields) # in case fields are string or tuple
        else:
            fields = list(data_ds.data_vars) # if not input_arg, fields = all
        
        #% Extract subset according to fields
        fields_intersect = list(set(fields) & set(data_ds.data_vars))
        data_subset = data_ds[fields_intersect]
        print("   _ Extracted fields are {}".format(fields_intersect))
        if fields_intersect != fields:
            print('Warning: ' + ', '.join(set(fields) ^ set(fields_intersect)) 
                  + ' absent from ' + data)
        
        #% Resample:
        print("   _ Resampling time...")
        if mode[i] == 'mean':
            datarsmpl = data_subset.resample(time = '1D').mean(dim = 'time',
                                                                keep_attrs = True)
        elif mode[i] == 'max':
            datarsmpl = data_subset.resample(time = '1D').max(dim = 'time',
                                                               keep_attrs = True)
        elif mode[i] == 'min':
            datarsmpl = data_subset.resample(time = '1D').min(dim = 'time',
                                                               keep_attrs = True)
        elif mode[i] == 'sum':
            datarsmpl = data_subset.resample(time = '1D').sum(dim = 'time',
                                                               skipna = False,
                                                               keep_attrs = True)
        
        #% Build output name(s):
        if len(filelist) > 1 or not 'output_path' in kwargs:         
            basename = os.path.splitext(f)[0]
            output_name = os.path.join(
                data_folder, 'daily',
                basename + ' daily_' + mode[i] + '.nc')
            ## Regex solution, instead of splitext:
            # _motif = re.compile('.+[^\w]')
            # _basename = _motif.search(data).group()[0:-1]
            
            if not os.path.isdir(os.path.join(data_folder, 'daily')):
                os.mkdir(os.path.join(data_folder, 'daily'))
            
        else:
            output_name = kwargs['output_path']
        
        
        # ---- Preparing export   
        # Transfer encodings
        for c in list(datarsmpl.coords):
            datarsmpl[c].encoding = data_ds[c].encoding
        
        for f in fields_intersect:
            datarsmpl[f].encoding = data_ds[f].encoding
            
            # Case of packing
            if ('scale_factor' in datarsmpl[f].encoding) | ('add_offset' in datarsmpl[f].encoding):
                # Packing (lossy compression) induces a loss of precision of 
                # apprx. 1/1000 of unit, for a quantity with an interval of 150 
                # units. The packing is initially used in some original ERA5-Land data.
                if mode[i] == 'sum':
                    print("   Correcting packing encodings...")
                    datarsmpl[f].encoding['scale_factor'] = datarsmpl[f].encoding['scale_factor']*24
                    datarsmpl[f].encoding['add_offset'] = datarsmpl[f].encoding['add_offset']*24
            
        #% Export
        export(datarsmpl, output_name)


#%% EXTRACTIONS
###############################################################################
#%%% ° times_series
[docs]def time_series(*, input_file, epsg_coords = None, epsg_data = None, 
                coords = None, mode = 'mean', dates = None, 
                fields = None, cumul = False):
    """
    % DESCRIPTION:
    This function extracts the temporal data in one location given by 
    coordinate.
    
    % EXAMPLE:
    import geoconvert as gc
    era5 = gc.time_series(input_file = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\ERA5\Brittany\daily/2011-2021_Temperature_daily_mean.nc", 
                          coords = (-2.199337, 48.17824), epsg = 4326, 
                          fields = 't2m')
    cwatm_ds = gc.time_series(input_file = r"D:\2- Postdoc\2- Travaux\3_CWatM_EBR\results\raw_results\005_calib_groundwater\01_base_Ronan\sum_gwRecharge_daily.nc", 
                          coords = (3417964, 2858067), epsg = 3035)

    % OPTIONAL ARGUMENTS:
        > coords = coordinates of one point
            /!\ Coordinates should be indicated in (X,Y) or (lon,lat) order 
            (and not (lat,lon) !!!)
        > coords can also indicate a mask: 
            coords = 'all' | filepath to a mask.tiff | filepath to a mask.shp
        > epsg_coords = 4326 | 3035 | 2154 | 27572 | etc.
            EPSG of the coords ! Useless if coords is a mask that includes a CRS 
        > epsg_data = same principle, for data without any included information about CRS
        > mode = 'mean' (standard) | 'sum' | 'max' | 'min'
    > dates = ['2021-09', '2021-12-01' ...]
    > fields = ['T2M', 'PRECIP', ...]
    """  
    _dataset = load_any(input_file, decode_times = True, decode_coords = 'all')
    
    #% Get and process arguments:
    # ---------------------------
    if epsg_coords is not None:
        if isinstance(epsg_coords, str):
            if epsg_coords.casefold()[0:5] == 'epsg:':
                epsg_coords = int(epsg_coords[5:])
                
    if fields is not None:
        if isinstance(fields, str): fields = [fields]
        else: fields = list(fields) # in case fields are string or tuple
    else:
        fields = list(_dataset.data_vars) # if not input_arg, fields = all
    
    if dates is not None:
        if not isinstance(dates, tuple): fields = tuple(fields)
        # in case dates are a list instead of a tuple
        
    if not isinstance(coords, str): 
    # if coords is not None:
        if not isinstance(coords, tuple): coords = tuple(coords)
        # in case coords are a list instead of a tuple
    
    
    #% Standardize terms:
    # -------------------
# =============================================================================
#     if 'lon' in list(_dataset.dims) or 'lat' in list(_dataset.dims):
#         print('Renaming lat/lon coordinates')
#         _dataset = _dataset.rename(lat = 'latitude', lon = 'longitude')
# =============================================================================
    if 'lon' in list(_dataset.dims) or 'lat' in list(_dataset.dims):
        print('Renaming lat/lon coordinates')
        _dataset = _dataset.rename(lat = 'y', lon = 'x')
    if 'longitude' in list(_dataset.dims) or 'latitude' in list(_dataset.dims):
        print('Renaming lat/lon coordinates')
        _dataset = _dataset.rename(latitude = 'y', longitude = 'x')
    if 'X' in list(_dataset.dims) or 'Y' in list(_dataset.dims):
        print('Renaming X/Y coordinates')
        _dataset = _dataset.rename(X = 'x', Y = 'y')
        
    _dataset.x.attrs = {'standard_name': 'projection_x_coordinate',
                                'long_name': 'x coordinate of projection',
                                'units': 'Meter'}
    _dataset.y.attrs = {'standard_name': 'projection_y_coordinate',
                                'long_name': 'y coordinate of projection',
                                'units': 'Meter'}
        
# =============================================================================
#     #% Convert temperature:
#     for _field in fields:
#         if 'units' in _dataset[_field].attrs:
#             if _dataset[_field].units == 'K':
#                 _dataset[_field] = _dataset[_field]-273.15
#                 # _datasubset[_field].units = '°C'
# =============================================================================
    
    print('Fields = {}'.format(str(fields)))
        
    
    if not isinstance(coords, str):
        if epsg_coords is not None:
            0
            # print('Coordinates = {} in epsg:{}'.format(str(coords), str(epsg_coords)))
        else:
            print('For numerical coords, it is necessary to indicate their CRS, by using the *epsg_coords* argument.')
            return
            
        #% Convert into appropriate CRS = CRS from the data:
        if epsg_data is not None:
            # 1re méthode de conversion : (x,y) ou (lon,lat) :
            coords_conv = rasterio.warp.transform(rasterio.crs.CRS.from_epsg(epsg_coords), 
                                                  rasterio.crs.CRS.from_epsg(epsg_data), 
                                                  [coords[0]], [coords[1]])
            coords_conv = (coords_conv[0][0], coords_conv[1][0])
            # (pour convertir une tuple de arrays en tuple)
            
# =============================================================================
#             # 2e méthode de conversion : (x,y) ou (lat,lon) :
#             coords_conv = convert_coord(coords[0], coords[1], 
#                                         inputEPSG = epsg_coords, 
#                                         outputEPSG = epsg_data)
# =============================================================================
            print('Coordinates = {} in epsg:{}'.format(str(coords_conv), 
                                                       str(epsg_data)))
            
        elif 'spatial_ref' in _dataset.coords or 'spatial_ref' in _dataset.data_vars:
            # _motif = re.compile('"EPSG","\d+"')
            # _substr = _motif.findall(_dataset.spatial_ref.crs_wkt)[-1]
            # _epsg_motif = re.compile('\d+')
            # epsg_data = int(_epsg_motif.search(_substr).group())
            epsg_data = int(_dataset.rio.crs.to_epsg())
            
            coords_conv = rasterio.warp.transform(rasterio.crs.CRS.from_epsg(epsg_coords), 
                                                  rasterio.crs.CRS.from_epsg(epsg_data), 
                                                  [coords[0]], [coords[1]])
            coords_conv = (coords_conv[0][0], coords_conv[1][0])

            print('Coordinates = {} in epsg:{}'.format(str(coords_conv), 
                                                       str(epsg_data)))
            
        else:
            print("'spatial_ref' is not indicated in the data file, and the argument *epsg_data* is not used. Be sure to use the same CRS as in the data.")
            coords_conv = coords
        
        #% Extract data:
        _dataset = _dataset[fields]
        if dates is not None:
            _dataset = _dataset.sel(time = slice(dates[0], dates[1]))
            
        if 'longitude' in list(_dataset.dims) or 'latitude' in list(_dataset.dims):
            results = _dataset.sel(longitude = coords_conv[0], 
                                   latitude = coords_conv[1],
                                   method = 'nearest')
        elif 'x' in list(_dataset.dims) or 'y' in list(_dataset.dims):
            results = _dataset.sel(x = coords_conv[0], 
                                   y = coords_conv[1],
                                   method = 'nearest')
        
        del _dataset # memory cleaning

                 
    else: # elif isinstance(coords, str): 
    # Dans ce cas, 'coords' indique un masque
        if coords.casefold() == 'all':
            print('All cells are considered')
                 
        else: # coords is a file_path
            #% Get CRS from the data:
            if 'spatial_ref' in _dataset.coords or 'spatial_ref' in _dataset.data_vars:
                # _motif = re.compile('"EPSG","\d+"')
                # _substr = _motif.findall(_dataset.spatial_ref.crs_wkt)[-1]
                # _epsg_motif = re.compile('\d+')
                # epsg_data = int(_epsg_motif.search(_substr).group())
                epsg_data = int(_dataset.rio.crs.to_epsg())
            elif epsg_data is None:
                print('Data file has no associated CRS.')
                return
            
            _mask_extension = os.path.splitext(coords)[-1]
            
            if _mask_extension in ['.tif', '.tiff', '.nc']: # If the mask is a geotiff or a netcdf
                if _mask_extension in ['.tif', '.tiff']:
                    with xr.open_dataset(coords) as mask_ds:
                        mask_ds.load()
                        mask_ds = mask_ds.squeeze('band').drop('band')
                        # NB: mask_ds.y.values are decreasing
                elif  _mask_extension in ['.nc']:
                    with xr.open_dataset(coords, decode_coords = 'all') as mask_ds:
                        mask_ds.load()
                        mask_ds = mask_ds.rename({list(mask_ds.data_vars)[0]: 'band_data'})

                if 'spatial_ref' in mask_ds.coords or 'spatial_ref' in mask_ds.data_vars:
                    # _motif = re.compile('"EPSG","\d+"')
                    # _substr = _motif.findall(mask_ds.spatial_ref.crs_wkt)[-1]
                    # _epsg_motif = re.compile('\d+')
                    # epsg_coords = int(_epsg_motif.search(_substr).group())
                    epsg_coords = int(mask_ds.rio.crs.to_epsg())
                else:
                    if epsg_coords is not None:
                        mask_ds.rio.write_crs('epsg:' + str(epsg_coords), inplace = True)
                        print('For clipping/masking, epsg:' + str(epsg_coords) + ' will be used.')
                    else:
                        print('For clipping/masking, mask needs to have a CRS associated.')
                        return
                    
                
                # Reprojection of the mask in the CRS of the data:
                res_x = float(_dataset.x[1] - _dataset.x[0])
                res_y = float(_dataset.y[1] - _dataset.y[0])
                
                # mask_ds['x'] = np.sort(mask_ds.x)
                # mask_ds['y'] = np.sort(mask_ds.y)
                
                mask_coords_conv = rasterio.warp.transform(rasterio.crs.CRS.from_epsg(epsg_coords), 
                                                      rasterio.crs.CRS.from_epsg(epsg_data), 
                                                      [mask_ds.x.values.min()], [mask_ds.y.values.max()])
                
                idx_x = (np.abs(_dataset.x.values - mask_coords_conv[0][0])).argmin()
                # if idx_x > 0: idx_x = idx_x - 1
                x_min = float(_dataset.x[idx_x]) - res_x/2
                
                idx_y = (np.abs(_dataset.y.values - mask_coords_conv[1][0])).argmin()
                # if idx_y > 0: idx_y = idx_y - 1
                y_max = float(_dataset.y[idx_y]) + res_y/2
                
                mask_proj = mask_ds.rio.reproject('epsg:' + str(epsg_data),
                                                  transform = Affine(res_x, 0.0, x_min,  
                                                                      0.0, -res_y, y_max),
                                                  resampling = rasterio.enums.Resampling(5))
                
                _dataset = _dataset.where(mask_proj.band_data == 1, drop = True)
                _dataset.rio.write_crs("epsg:"+str(epsg_data), inplace = True)
                
            elif _mask_extension in ['.shp']: # If the mask is a shapefile
                mask_df = gpd.read_file(coords)
                
                if epsg_coords is not None:
                    mask_df.set_crs('epsg:' + str(epsg_coords))
                else:
                    try: 
                        epsg_coords = mask_df.crs.to_epsg()
                    except AttributeError:
                        print('For clipping/masking, mask needs to have a CRS associated.')
                        return
                
                print('For clipping/masking, epsg:' + str(epsg_coords) + ' will be used.')
            
                _dataset = _dataset.rio.clip(mask_df.geometry.apply(mapping),
                                             mask_df.crs,
                                             all_touched = True)
        
            print('Data CRS is {}. Mask CRS is {}.'.format(epsg_data, epsg_coords))
        
        #% Extract data:
        if mode == 'mean':
            # results = _dataset.mean(dim = list(_dataset.dims)[-2:], 
            #                         skipna = True, keep_attrs = True)
            results = _dataset.mean(dim = ['x', 'y'], 
                                    skipna = True, keep_attrs = True)
        elif mode == 'sum':
            results = _dataset.sum(dim = ['x', 'y'], 
                                   skipna = True, keep_attrs = True)
        elif mode == 'max':
            results = _dataset.max(dim = ['x', 'y'], 
                                   skipna = True, keep_attrs = True)
        elif mode == 'min':
            results = _dataset.min(dim = ['x', 'y'], 
                                   skipna = True, keep_attrs = True)
            
        # memory cleaning
        del _dataset 
        gc.collect()
        
        
    if cumul:
        print('\ncumul == True')
        timespan = results['time'
            ].diff(
                dim = 'time', label = 'lower')/np.timedelta64(1, 'D')
        
        _var = main_var(results)
        results[_var][dict(time = slice(0, timespan.size))
                       ] = (results[_var][dict(time = slice(0, timespan.size))
                                           ] * timespan.values).cumsum(axis = 0)

        # Last value:
        results[_var][-1] = np.nan
                
    # Convert to pandas.dataframe
    results = results.drop('spatial_ref').to_dataframe()
    
    return results


###############################################################################
#%%% * xr.DataSet to DataFrame
[docs]def xr_to_pd(xr_data):
    """
    Format xr objects (such as those from gc.time_series) into pandas.DataFrames
    formatted as in gc.tss_to_dataframe.

    Parameters
    ----------
    xr_data : xarray.DataSet or xarray.DataArary
        Initial data to convert into pandas.DataFrame
        NB: xr_data needs to have only one dimension.

    Returns
    -------
    Pandas.DataFrame

    """
    print("\n_Infos...")
    
    if type(xr_data) == xr.core.dataset.Dataset:
        main_var = main_var(xr_data)
        print(f"    Data is a xr.Dataset, with '{main_var}' as the main variable")
        xr_data = xr_data[main_var]
    elif type(xr_data) == xr.core.dataarray.DataArray:
        print("    Data is a xr.Dataarray")
    
    res = xr_data.to_dataframe(name = 'val')
    res = res[['val']]
    res['time'] = pd.to_datetime(res.index)
    if not res.time.dt.tz:
        print("    The timezone is not defined")
        # res['time'] = res.time.dt.tz_localize('UTC')
    res.index = range(0, res.shape[0])
# =============================================================================
#     res['id'] = res.index
# =============================================================================
    
    print('') # newline
    return res
    

###############################################################################
#%%% ° tss_to_dataframe
[docs]def tss_to_dataframe(*, input_file, skip_rows, start_date, cumul = False):
    """
    Example
    -------
    base = gc.tss_to_dataframe(input_file = r"D:\2- Postdoc\2- Travaux\3_CWatM_EBR\results\raw_results\001_prelim_cotech\2022-03-19_base\discharge_daily.tss",
                         skip_rows = 4,
                         start_date = '1991-08-01')
    
    precip = gc.tss_to_dataframe(input_file = r"D:\2- Postdoc\2- Travaux\3_CWatM_EBR\results\raw_results\003_artif\2022-03-25_base\Precipitation_daily.tss",
                         skip_rows = 4,
                         start_date = '2000-01-01')
    precip.val = precip.val*534000000/86400
    # (le BV topographique du Meu fait 471 851 238 m2)
    precip['rolling_mean'] = precip['val'].rolling(10).mean()
    
    Parameters
    ----------
    input_file : str
        Chemin d'accès au fichier d'entrée
    skip_rows : int
        Nombre de lignes à enlever en tête de fichier. /!\ ce nombre n'est '
    start_date : str ou datetime
        Date de la 1re valeur du fichier
        /!\ Si str, il faut qu'elle soit au format "%Y-%m-%d"

    Returns
    -------
    df : pandas.DataFrame

    
    Implémentations futures
    -----------------------
    Récupérer la start_date à partir du fichier de settings indiqué au début
    du fichier *.tss., et regarder ensuite le SpinUp
    """
    
    #%% Récupération des inputs :
    # ---------------------------
    if start_date == 'std':
        print('> Pas encore implémenté...')
        # récupérer la start_date du fichier de settings
    else:
        start_date = pd.to_datetime(start_date)
    # print('> start_date = ' + str(start_date))
    
    
    #%% Création des dataframes :
    # ---------------------------
    # df = pd.read_csv(input_file, sep = r"\s+", header = 0, names = ['id', 'val'], skiprows = skip_rows)
    if skip_rows == 0: # Cas des fichiers de débits *.css, sans lignes d'info,
                       # avec seulement un header
        _fields = ['']
        n_col = 1
        
    else: # Cas des outputs *.tss avec plusieurs lignes d'info, la 2e ligne 
          # indiquant le nombre de colonnes. Dans ce cas, skip_rows doit être
          # égal à 2.
        with open(input_file) as temp_file:
            # temp_file.readline()
            # n_col = int(temp_file.readline()[0])
            content = temp_file.readlines()
            n_col = int(content[skip_rows-1][0])
        _fields = [str(n) for n in range(1, n_col)]
        _fields[0] = ''
        
    df = pd.read_csv(input_file, 
                     sep = r"\s+", 
                     header = 0, 
                     skiprows = skip_rows -1 + n_col, 
                     names = ['id'] +  ['val' + ending for ending in _fields],
                     )
    
    # Si la colonne id contient déjà des dates (au format texte ou datetime) :
    if type(df.id[0]) in [str, 
                          pd.core.indexes.datetimes.DatetimeIndex,
                          pd._libs.tslibs.timestamps.Timestamp]:
        df['time'] = pd.to_datetime(df.id)
    # Sinon (= si la colonne id contient des indices), il faut reconstruire les dates :
    else:
        date_indexes = pd.date_range(start = start_date, 
                                     periods = df.shape[0], freq = '1D')
        df['time'] = date_indexes
    
    if cumul:
        print('\ncumul == True')
        # Values are expected to be expressed in [.../d]
        # Cumsum is applied on all columns with values ('val', 'val2', 'val3', ...)
        
        timespan = df.loc[
            :, df.columns == 'time'
            ].diff().shift(-1, fill_value = 0)/np.timedelta64(1, 'D')
        
        # timespan = df.loc[
        #     :, df.columns == 'time'
        #     ].diff()/np.timedelta64(1, 'D')
        
        df.iloc[
            :].loc[:, (df.columns != 'id') * (df.columns != 'time')
            ] = (df.iloc[
                :].loc[:, (df.columns != 'id') * (df.columns != 'time')
                ] * timespan.values).cumsum(axis = 0)
        
        # Last value
        # df.iloc[-1].loc[:, (df.columns != 'id') * (df.columns != 'time')] = np.nan
        
                # np.diff(df.time)/np.timedelta64(1, 'D')
    return df


#%% WATERSHED OPERATIONS
###############################################################################
#%%% ° Extract watershed
[docs]def extract_watershed(d8_path, outlets_file, method):
    #%%% Vérification de la compatibilité des CRS
    with rasterio.open(d8_path) as d8:
        crs_d8 = d8.read_crs()
        print()
        gdf = gpd.read_file(outlets_file)
        crs_outlets = gdf.crs
        
        if crs_d8 == crs_outlets:
            print('Le SCR du d8 et des exutoires est EPSG:{}'.format(crs_d8.to_epsg()))
        else:
            print('ATTENTION : Le SCR détecté du d8 est EPSG:{} tandis que le SCR détecté des exutoires est EPSG:{}'.format(crs_d8.to_epsg(), crs_outlets.to_epsg()))
            print('Si les SCR sont différents, le résultat sera faux.')
    
    #%%% Avec pysheds
    if method.casefold() in ["pyshed", "pysheds"]:
        print('\nMéthode choisie : pysheds')
        
    #%%% Avec WhiteToolBox
    elif method.casefold() in ["wtb", "whitetoolbox"]:
        print('\nMéthode choisie : WhiteToolBox')
    
    wbt.watershed(
        d8_path,
        outlets_file,
        os.path.join(os.path.split(d8_path)[0], "mask_bassin_xxx_wbt.tif"),
        esri_pntr = True,
        )
    print('    Masque créé')
    
    
###############################################################################
#%%% ° Compute LDD
[docs]def compute_ldd(dem_path, method):
    """
    dem_path = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\0- MNT\IGN\MNT_fusion\RGEALTI_FXX_1m_0318_6774_0340_6781_MNT_LAMB93_IGN69.tif"
    """
    
    
    #%%% Avec pysheds
    if method.casefold() in ["pyshed", "pysheds"]:
        print('Méthode choisie : pysheds')
        
        # Compute the river network
        grid = Grid.from_raster(dem_path, data_name = 'dem')
        dem = grid.read_raster(dem_path)
        # Fill depressions in DEM
        print('dem no data is ', grid.nodata)
        flooded_dem = grid.fill_depressions(dem)
        # Resolve flats in DEM
        inflated_dem = grid.resolve_flats(flooded_dem)
        # Specify directional mapping
        dirmap = (64, 128, 1, 2, 4, 8, 16, 32)  #D8 system
        # dirmap = (8, 9, 6, 3, 2, 1, 4, 7)       #LDD system
        # Compute flow directions
        direc = grid.flowdir(inflated_dem, dirmap=dirmap)
        dem = read(dem_file)
        dem_corrected = wbt.BreachDepressionsLeastCost(dem)
        # dem_corrected = wbt.FillDepressions(dem)
        

    
    #%%% Avec WhiteToolBox
    elif method.casefold() in ["wtb", "whitetoolbox"]:
        print('Méthode choisie : WhiteToolBox')
        dist_ = 10
        
        # Breach depressions
        wbt.breach_depressions_least_cost(
            dem_path,
            os.path.splitext(dem_path)[0] + "_breached{}[wtb].tif".format(dist_),
            dist_)
        print('    Fichier intermédiaire créé')
        
# =============================================================================
#         # Fill depression (alternative)
#         wbt.fill_depressions(
#             dem_path,
#             os.path.splitext(dem_path)[0] + "_filled[wtb].tif",
#             10)
# =============================================================================
        
        # Creation du D8
        suffix = "breached{}[wtb]".format(dist_)
        wbt.d8_pointer(
            os.path.splitext(dem_path)[0] + "_" + suffix + ".tif",
            os.path.join(os.path.split(dem_path)[0], "D8_xxx_" + suffix + "_wtb.tif"), 
            esri_pntr = True)
        print('    LDD "D8 ESRI" créé')
    

###############################################################################
#%%% ° Convert LDD code
"""
To switch between different direction mappings
"""
[docs]def switch_direction_map(input_file, input_mapping, output_mapping):
    
    #%%% Inputs
    mapping = [input_mapping, output_mapping]
    position = ['entrée', 'sortie']
    print("")
    
    for i in [0, 1]:
        if mapping[i].casefold().replace("_", "").replace(" ", "") in ["ldd","localdraindirections"]:
            mapping[i] = "LDD"
        elif mapping[i].casefold().replace("_", "").replace(" ", "") in ["d8", "esri", "d8esri", "esrid8", "d8standard", "standardd8"]:
            mapping[i] = "D8 ESRI"
        elif mapping[i].casefold().replace("_", "").replace(" ", "") in ["wtb", "whitetoolbox", "d8whitetoolbox", "d8wtb", "wtbd8"]:
            mapping[i] = "WTB"
        else:
            mapping[i] = "/!\ non reconnu /!\ "
        print("Direction mapping en {} : {}".format(position[i], mapping[i]))

    if "/!\ non reconnu /!\ " in mapping:
        return "error"
    

    #%%% Conversion    
    # Chargement des données
    data_in = rasterio.open(input_file, 'r')
    data_profile = data_in.profile
    val = data_in.read()
    data_in.close()
    
    # Conversion
    
    # rows: 0:'LDD', 1:'D8', 2:'WTB'
    col = ['LDD', 'D8 ESRI', 'WTB']
    keys_ = np.array(
        [[8, 64,  128,],#N
         [9, 128, 1,],  #NE
         [6, 1,   2,],  #E
         [3, 2,   4,],  #SE
         [2, 4,   8,],  #S
         [1, 8,   16,], #SO
         [4, 16,  32,], #O
         [7, 32,  64,], #NO
         [5, 0,   0,]]) #-
    
    for d in range(0, 9):
        val[val == keys_[d, 
                         col.index(mapping[0])
                         ]
            ] = -keys_[d, 
                      col.index(mapping[1])]
        
    val = -val # On passe par une valeur négative pour éviter les redondances
    # du type : 3 --> 2, 2 --> 4
    
    #%%% Export
    output_file = os.path.splitext(input_file)[0] + "_{}.tif".format(mapping[1])
    
    with rasterio.open(output_file, 'w', **data_profile) as output_f:
        output_f.write(val)
        print("\nFichier créé.")
        

###############################################################################
#%%% ° Alter modflow_river_percentage
[docs]def river_pct(input_file, value):
    """
    Creates artificial modflow_river_percentage inputs (in *.nc) to use for
    drainage.

    Parameters
    ----------
    input_file : str
        Original modflow_river_percentage.tif file to duplicate/modify
    value : float
        Value to impose on cells (from [0 to 1], not in percentage!)
        This value is added to original values as a fraction of the remaining
        "non-river" fraction:
            For example, value = 0.3 (30%):
                - cells with 0 are filled with 0.3
                - cells with 1 remain the same
                - cells with 0.8 take the value 0.86, because 30% of what should
                have been capillary rise become baseflow (0.8 + 0.3*(1-0.8))
                - cells with 0.5 take the value 0.65 (0.5 + 0.3*(1-0.5))

    Returns
    -------
    None.

    """
    #%% Loading
    # ---------
    if os.path.splitext(input_file)[-1] == '.tif':
        with xr.open_dataset(input_file, # .tif 
                             decode_times = True, 
                             ) as ds:
            ds.load()
    elif os.path.splitext(input_file)[-1] == '.nc':
        with xr.open_dataset(input_file, 
                             decode_times = True, 
                             decode_coords = 'all',
                             ) as ds:
            ds.load()   
    
    #%% Computing
    # -----------
    # ds['band_data'] = ds['band_data']*0 + value
    ds_ones = ds.copy(deep = True)
    ds_ones['band_data'] = ds_ones['band_data']*0 + 1
    
    #% modflow_river_percentage_{value}.nc:
    ds1 = ds.copy(deep = True)
    ds1['band_data'] = ds1['band_data'] + (ds_ones['band_data'] - ds1['band_data'])*value
    
    #% drainage_river_percentage_{value}.nc :
    ds2 = ds1 - ds
    
    #%% Formatting
    # ------------
    ds1.rio.write_crs(2154, inplace = True)
    ds1.x.attrs = {'standard_name': 'projection_x_coordinate',
                                'long_name': 'x coordinate of projection',
                                'units': 'Meter'}
    ds1.y.attrs = {'standard_name': 'projection_y_coordinate',
                                'long_name': 'y coordinate of projection',
                                'units': 'Meter'}
    # To avoid conflict when exporting to netcdf:
    ds1.x.encoding['_FillValue'] = None
    ds1.y.encoding['_FillValue'] = None
    
    ds2.rio.write_crs(2154, inplace = True)
    ds2.x.attrs = {'standard_name': 'projection_x_coordinate',
                                'long_name': 'x coordinate of projection',
                                'units': 'Meter'}
    ds2.y.attrs = {'standard_name': 'projection_y_coordinate',
                                'long_name': 'y coordinate of projection',
                                'units': 'Meter'}
    # To avoid conflict when exporting to netcdf:
    ds2.x.encoding['_FillValue'] = None
    ds2.y.encoding['_FillValue'] = None
    
    #%% Exporting
    # -----------
    (folder, file) = os.path.split(input_file)
    (file, extension) = os.path.splitext(file)
    
    output_file1 = os.path.join(folder, "_".join([file, str(value)]) + '.nc')
    ds1.to_netcdf(output_file1)
    
    output_file2 = os.path.join(folder, "_".join(['drainage_river_percentage', str(value)]) + '.nc')
    ds2.to_netcdf(output_file2)


#%% QUANTITIES OPERATIONS
###############################################################################
# Calcule ETref et EWref à partir de la "pan evaporation" de ERA5-Land
[docs]def compute_Erefs_from_Epan(input_file):
    print("\nDeriving standard grass evapotranspiration and standard water evapotranspiration from pan evaporation...")
    Epan = load_any(input_file, decode_coords = 'all', decode_times = True)
    
    var = main_var(Epan)
    
    print("   _ Computing ETref (ET0) from Epan...")
    ETref = Epan.copy()
    ETref[var] = ETref[var]*0.675
    
    print("   _ Computing EWref from Epan...")
    EWref = Epan.copy()
    EWref[var] = EWref[var]*0.75
    
    print("   _ Transferring encodings...")
    ETref[var].encoding = Epan[var].encoding
    EWref[var].encoding = Epan[var].encoding
    # Case of packing
    if ('scale_factor' in Epan[var].encoding) | ('add_offset' in Epan[var].encoding):
        # Packing (lossy compression) induces a loss of precision of 
        # apprx. 1/1000 of unit, for a quantity with an interval of 150 
        # units. The packing is initially used in some original ERA5-Land data
        ETref[var].encoding['scale_factor'] = ETref[var].encoding['scale_factor']*0.675
        ETref[var].encoding['add_offset'] = ETref[var].encoding['add_offset']*0.675
        EWref[var].encoding['scale_factor'] = EWref[var].encoding['scale_factor']*0.75
        EWref[var].encoding['add_offset'] = EWref[var].encoding['add_offset']*0.75
    
    return ETref, EWref


###############################################################################
[docs]def compute_wind_speed(u_wind_data, v_wind_data):
    """
    U-component of wind is parallel to the x-axis
    V-component of wind is parallel to the y-axis
    """
    
# =============================================================================
#     print("\nIdentifying files...")
#     U_motif = re.compile('U-component')
#     U_match = U_motif.findall(input_file)
#     V_motif = re.compile('V-component')
#     V_match = V_motif.findall(input_file)
#     
#     if len(U_match) > 0:
#         U_input_file = '%s' % input_file # to copy the string
#         V_input_file = '%s' % input_file
#         V_input_file = V_input_file[:U_motif.search(input_file).span()[0]] + 'V' + V_input_file[U_motif.search(input_file).span()[0]+1:]
#     elif len(V_match) > 0:
#         V_input_file = '%s' % input_file # to copy the string
#         U_input_file = '%s' % input_file
#         U_input_file = U_input_file[:V_motif.search(input_file).span()[0]] + 'U' + U_input_file[V_motif.search(input_file).span()[0]+1:]
# =============================================================================
    
    print("\nComputing wind speed from U- and V-components...")

    U_ds = load_any(u_wind_data, decode_coords = 'all', decode_times = True)
    V_ds = load_any(v_wind_data, decode_coords = 'all', decode_times = True)
    
    wind_speed_ds = U_ds.copy()
    wind_speed_ds = wind_speed_ds.rename(u10 = 'wind_speed')
    wind_speed_ds['wind_speed'] = np.sqrt(U_ds.u10*U_ds.u10 + V_ds.v10*V_ds.v10)
        # nan remain nan
    
    print("   _ Transferring encodings...")
    wind_speed_ds['wind_speed'].encoding = V_ds.v10.encoding
    wind_speed_ds['wind_speed'].attrs['long_name'] = '10 metre wind speed'
    # Case of packing
    if ('scale_factor' in V_ds.v10.encoding) | ('add_offset' in V_ds.v10.encoding):
        # Packing (lossy compression) induces a loss of precision of 
        # apprx. 1/1000 of unit, for a quantity with an interval of 150 
        # units. The packing is initially used in some original ERA5-Land data
    
        # Theoretical max wind speed: 
        max_speed = 56 # m/s = 201.6 km/h
        (scale_factor, add_offset) = compute_scale_and_offset(-max_speed, max_speed, 16)
            # Out: (0.0017090104524299992, 0.0008545052262149966)
        wind_speed_ds['wind_speed'].encoding['scale_factor'] = scale_factor
        wind_speed_ds['wind_speed'].encoding['add_offset'] = add_offset
        # wind_speed_ds['wind_speed'].encoding['FillValue_'] = -32767
            # To remain the same as originally
            # Corresponds to -55.99829098954757 m/s
    
    return wind_speed_ds
    

###############################################################################
[docs]def compute_relative_humidity(*, dewpoint_input_file, 
                              temperature_input_file,
                              pressure_input_file,
                              method = "Penman-Monteith"):
    
    """
    cf formula on https://en.wikipedia.org/wiki/Dew_point
    
    gc.compute_relative_humidity(
        dewpoint_input_file = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\ERA5\Brittany\2011-2021 Dewpoint temperature.nc", 
        temperature_input_file = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\ERA5\Brittany\2011-2021 Temperature.nc",
        pressure_input_file = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\ERA5\Brittany\2011-2021 Surface pressure.nc",
        method = "Sonntag")
    
    """
    
    
    # ---- Loading data
    # --------------
    print("\nLoading data...")
    # Checking that the time period matches:
    years_motif = re.compile('\d{4,4}-\d{4,4}')
    years_dewpoint = years_motif.search(dewpoint_input_file).group()
    years_pressure = years_motif.search(pressure_input_file).group()
    years_temperature = years_motif.search(temperature_input_file).group()
    if (years_dewpoint == years_pressure) and (years_dewpoint == years_temperature):
        print("   Years are matching: {}".format(years_dewpoint))
    else:
        print("   /!\ Years are not matching: {}\n{}\n{}".format(years_dewpoint, years_pressure, years_temperature))
        # return 0
    
    with xr.open_dataset(dewpoint_input_file, decode_coords = 'all') as Dp:
        Dp.load() # to unlock the resource
    with xr.open_dataset(temperature_input_file, decode_coords = 'all') as T:
        T.load() # to unlock the resource
    with xr.open_dataset(pressure_input_file, decode_coords = 'all') as Pa:
        Pa.load() # to unlock the resource
    
    # ---- Sonntag formula
    # -----------------
    if method.casefold() in ['sonntag', 'sonntag1990']:
        print("\nComputing the relative humidity, using the Sonntag 1990 formula...")
        # NB : air pressure Pa is not used in this formula
        
        # Constants:
        alpha_ = 6.112 # [hPa]
        beta_ = 17.62 # [-]
        lambda_ = 243.12 # [°C]
        
        # Temperature in degrees Celsius:
        Tc = T.copy()
        Tc['t2m'] = T['t2m'] - 273.15
        Dpc = Dp.copy()
        Dpc['d2m'] = Dp['d2m'] - 273.15
        
        # Saturation vapour pressure [hPa]:
        Esat = Tc.copy()
        Esat = Esat.rename(t2m = 'vpsat')
        Esat['vpsat'] = alpha_ * np.exp((beta_ * Tc['t2m']) / (lambda_ + Tc['t2m']))
        
        # Vapour pressure [hPa]:
        E = Dp.copy()
        E = E.rename(d2m = 'vp')
        E['vp'] = alpha_ * np.exp((Dpc['d2m'] * beta_) / (lambda_ + Dpc['d2m']))
        
        # Relative humidity [%]:
        RHS = Dp.copy()
        RHS = RHS.rename(d2m = 'rh')
        RHS['rh'] = E['vp']/Esat['vpsat']*100
    
    elif method.casefold() in ['penman', 'monteith', 'penman-monteith']:
        print("\nComputing the relative humidity, using the Penman Monteith formula...")
        # NB : air pressure Pa is not used in this formula
        # Used in evaporationPot.py
        # http://www.fao.org/docrep/X0490E/x0490e07.htm   equation 11/12
        
        # Constants:
        alpha_ = 0.6108 # [kPa]
        beta_ = 17.27 # [-]
        lambda_ = 237.3 # [°C]
        
        # Temperature in degrees Celsius:
        Tc = T.copy()
        Tc['t2m'] = T['t2m'] - 273.15
        Dpc = Dp.copy()
        Dpc['d2m'] = Dp['d2m'] - 273.15
        
        # Saturation vapour pressure [kPa]:
        Esat = Tc.copy()
        Esat = Esat.rename(t2m = 'vpsat')
        Esat['vpsat'] = alpha_ * np.exp((beta_ * Tc['t2m']) / (lambda_ + Tc['t2m']))
        
        # Vapour pressure [kPa]:
        E = Dp.copy()
        E = E.rename(d2m = 'vp')
        E['vp'] = alpha_ * np.exp((beta_ * Dpc['d2m']) / (lambda_ + Dpc['d2m']))
        
        # Relative humidity [%]:
        # https://www.fao.org/3/X0490E/x0490e07.htm Eq. (10)
        RHS = Dp.copy()
        RHS = RHS.rename(d2m = 'rh')
        RHS['rh'] = E['vp']/Esat['vpsat']*100
        
    #% Attributes
    print("\nTransferring encodings...")
    RHS['rh'].attrs['units'] = '%'
    RHS['rh'].attrs['long_name'] = 'Relative humidity (from 2m dewpoint temperature)'
    RHS['rh'].encoding = Dp['d2m'].encoding
    # Case of packing
    if ('scale_factor' in Dp['d2m'].encoding) | ('add_offset' in Dp['d2m'].encoding):
        # Packing (lossy compression) induces a loss of precision of 
        # apprx. 1/1000 of unit, for a quantity with an interval of 150 
        # units. The packing is initially used in some original ERA5-Land data.
        
        # RHS['rh'].encoding['scale_factor'] = 0.0016784924086366065
        # RHS['rh'].encoding['add_offset'] = 55.00083924620432
        # RHS['rh'].encoding['_FillValue'] = 32767
        # RHS['rh'].encoding['missing_value'] = 32767
        (scale_factor, add_offset) = compute_scale_and_offset(-1, 100, 16)
            # Out: (0.0015411612115663385, 49.50077058060578)
        RHS['rh'].encoding['scale_factor'] = scale_factor
        RHS['rh'].encoding['add_offset'] = add_offset
        # RHS['rh'].encoding['_FillValue'] = -32767 
            # To match with original value
            # Corresponds to -0.9984588387884301 %
    
    
    return RHS


###############################################################################
# Convertit les données de radiation (J/m2/h) en W/m2
[docs]def convert_downwards_radiation(input_file, is_dailysum = False):   
    print("\nConverting radiation units...")
    rad = load_any(input_file, decode_coords = 'all', decode_times = True)
    
    var = main_var(rad)
    print("   _ Field is: {}".format(var))
    
    print("   _ Computing...")
    rad_W = rad.copy()
    if not is_dailysum:
        conv_factor = 3600 # because 3600s in 1h
    else:
        conv_factor = 86400 # because 86400s in 1d
    rad_W[var] = rad_W[var]/conv_factor 
    
    print("   _ Transferring encodings...")
    rad_W[var].attrs['units'] = 'W m**-2'
    rad_W[var].encoding = rad[var].encoding
    
    # Case of packing
    if ('scale_factor' in rad_W[var].encoding) | ('add_offset' in rad_W[var].encoding):
        # Packing (lossy compression) induces a loss of precision of 
        # apprx. 1/1000 of unit, for a quantity with an interval of 150 
        # units. The packing is initially used in some original ERA5-Land data.
        rad_W[var].encoding['scale_factor'] = rad_W[var].encoding['scale_factor']/conv_factor
        rad_W[var].encoding['add_offset'] = rad_W[var].encoding['add_offset']/conv_factor
        # NB: 
        # rad_W[var].encoding['FillValue_'] = -32767
            # To remain the same as originally
            # Corresponds to -472.11... m/s
            # NB: For few specific times, data are unavailable. Such data are coded 
            # with the value -1, packed into -32766
    
    return rad_W
    

###############################################################################
# Complete CWatM climatic variables
[docs]def secondary_era5_climvar(data):
    """
    This function computes the secondary data from ERA5-Land data, such as:
        - crop and water standard ETP from pan evaporation
        - wind speed from U- and V-components
        - relative humidity from T°, P and dewpoint

    Parameters
    ----------
    data : filepath or xarray.dataset (or xarray.dataarray)
        Main dataset used to generate secondary quantities.

    Returns
    -------
    None. Generates intended files.

    """
    
    # ---- Load main dataset
    data_ds = load_any(data, decode_coords = 'all', decode_times = True)
    var = main_var(data_ds)
    
    # ---- Corrections
    if var == 'pev': 
        ETref, EWref = compute_Erefs_from_Epan(data_ds)
        export(ETref, ' ET0crop'.join(os.path.splitext(data_ds)))
        export(EWref, ' EW0'.join(os.path.splitext(data_ds)))
    
    elif var in ['u10', 'v10']:
        if var == 'u10': 
            v10 = input("Filepath to V-component of wind")
            u10 = data_ds
            
        elif var == 'v10':
            u10 = input("Filepath to U-component of wind")
            v10 = data_ds
        
        wind_speed_ds = compute_wind_speed(u10, v10)
        export(wind_speed_ds, ' wind_speed'.join(os.path.splitext(data_ds)))
    
    elif var == 'rh':
        dewpoint_file = input("Filepath to dewpoint data: ")
        temperature_file = input("Filepath to 2m mean temperature data: ")
        pressure_file = input("Filepath to surface pressurce data: ")
        rhs_ds = compute_relative_humidity(
            dewpoint_input_file = dewpoint_file, 
            temperature_input_file = temperature_file,
            pressure_input_file = pressure_file,
            method = "Sonntag"
            )
        export(rhs_ds, ' Relative humidity'.join(os.path.splitext(data_ds)))

#%% * OBSOLETE ? Shift rasters (GeoTIFF or NetCDF)
###############################################################################
# Pas totalement fini. Code issu de 'datatransform.py'
[docs]def transform_tif(*, input_file, x_shift = 0, y_shift = 0, x_size = 1, y_size = 1):
    """
    EXAMPLE:
        import datatransform as dt
        dt.transform_tif(input_file = r"D:\2- Postdoc\2- Travaux\3_CWatM_EBR\data\input_1km_LeMeu\areamaps\mask_cwatm_LeMeu_1km.tif",
                     x_shift = 200,
                     y_shift = 300)
    """
    
    # Ouvrir le fichier : 
    data = rasterio.open(input_file, 'r')
    # Récupérer le profil :
    _prof_base = data.profile
    trans_base = _prof_base['transform']
    # Juste pour visualiser :
    print('\nLe profil affine initial est :')
    print(trans_base)
    # Modifier le profile :  
    trans_modf = Affine(trans_base[0]*x_size, trans_base[1], trans_base[2] + x_shift,
                        trans_base[3], trans_base[4]*y_size, trans_base[5] + y_shift)
    print('\nLe profil modifié est :')
    print(trans_modf)
    _prof_modf = _prof_base
    _prof_modf.update(transform = trans_modf)
    
    # Exporter :
    _basename = os.path.splitext(input_file)[0]
    
    add_name = ''
    if x_shift != 0 or y_shift !=0:
        add_name = '_'.join([add_name, 'shift'])
        if x_shift != 0:
            add_name = '_'.join([add_name, 'x' + str(x_shift)])
        if y_shift != 0:
            add_name = '_'.join([add_name, 'y' + str(y_shift)])
    if x_size != 1 or y_size !=1:
        add_name = '_'.join([add_name, 'size'])
        if x_size != 1:
            add_name = '_'.join([add_name, 'x' + str(x_size)])
        if y_size != 1:
            add_name = '_'.join([add_name, 'y' + str(y_size)])
    output_file = '_'.join([_basename, add_name]) + '.tif'
    with rasterio.open(output_file, 'w', **_prof_modf) as out_f:
        out_f.write_band(1, data.read()[0])
    
    data.close()
    
[docs]def transform_nc(*, input_file, x_shift = 0, y_shift = 0, x_size = 1, y_size = 1):
    """
    EXAMPLE:
        import datatransform as dt
        dt.transform_nc(input_file = r"D:\2- Postdoc\2- Travaux\3_CWatM_EBR\data\input_1km_LeMeu\landsurface\topo\demmin.nc",
                     x_shift = 200,
                     y_shift = 400)       
    """
    
    with xr.open_dataset(input_file) as data:
        data.load() # to unlock the resource
        
    # Modifier :
    data['x'] = data.x + x_shift
    data['y'] = data.y + y_shift
    
    # Exporter :
    _basename = os.path.splitext(input_file)[0]
    
    add_name = ''
    if x_shift != 0 or y_shift !=0:
        add_name = '_'.join([add_name, 'shift'])
        if x_shift != 0:
            add_name = '_'.join([add_name, 'x' + str(x_shift)])
        if y_shift != 0:
            add_name = '_'.join([add_name, 'y' + str(y_shift)])
    if x_size != 1 or y_size !=1:
        add_name = '_'.join([add_name, 'size'])
        if x_size != 1:
            add_name = '_'.join([add_name, 'x' + str(x_size)])
        if y_size != 1:
            add_name = '_'.join([add_name, 'y' + str(y_size)])
    output_file = '_'.join([_basename, add_name]) + '.nc'
    
    data.to_netcdf(output_file)
    

#%% * tools for computing coordinates
###############################################################################
[docs]def convert_coord(pointXin, pointYin, inputEPSG = 2154, outputEPSG = 4326):	
    """
    Il y a un soucis dans cette fonction. X et Y se retrouvent inversées.
    Il vaut mieux passer par les fonctions rasterio (voir plus haut) :
        
    coords_conv = rasterio.warp.transform(rasterio.crs.CRS.from_epsg(inputEPSG), 
                                          rasterio.crs.CRS.from_epsg(outputEPSG), 
                                          [pointXin], [pointYin])
    pointXout = coords_conv[0][0]
    pointYout = coords_conv[1][0]
    """
    #% Inputs (standards)
    # =============================================================================
    # # Projected coordinates in Lambert-93 
    # pointXin = 350556.92318   #Easthing
    # pointYin = 6791719.72296  #Northing
    # (Rennes coordinates)
    # =============================================================================
    
    # =============================================================================
    # # Geographical coordinates in WGS84 (2D) 
    # pointXin = 48.13222  #Latitude (Northing)
    # pointYin = -1.7      #Longitude (Easting)
    # (Rennes coordinates)
    # =============================================================================
    
    # =============================================================================
    # # Spatial Reference Systems
    # inputEPSG = 2154   #Lambert-93
    # outputEPSG = 4326  #WGS84 (2D)
    # =============================================================================
    
    # # Conversion into EPSG system
    # For easy use, inputEPSG and outputEPSG can be defined with identifiers strings
    switchEPSG = {
        'L93': 2154,   #Lambert-93
        'L-93': 2154,  #Lambert-93
        'WGS84': 4326, #WGS84 (2D)
        'GPS': 4326,   #WGS84 (2D)
        'LAEA': 3035,  #LAEA Europe 
        }
    
    if isinstance(inputEPSG, str):
        inputEPSG = switchEPSG.get(inputEPSG, False)
        # If the string is not a valid identifier:
        if not inputEPSG:
            print('Unknown input coordinates system')
            return
            
    if isinstance(outputEPSG, str):
        outputEPSG = switchEPSG.get(outputEPSG, False)
        # If the string is not a valid identifier:
        if not outputEPSG:
            print('Unknown output coordinates system')
            return

    
    #% Outputs
# =============================================================================
#     # Méthode osr
#     # create a geometry from coordinates
#     point = ogr.Geometry(ogr.wkbPoint)
#     point.AddPoint(pointXin, pointYin)
#     
#     # create coordinate transformation
#     inSpatialRef = osr.SpatialReference()
#     inSpatialRef.ImportFromEPSG(inputEPSG)
#     
#     outSpatialRef = osr.SpatialReference()
#     outSpatialRef.ImportFromEPSG(outputEPSG)
#     
#     coordTransform = osr.CoordinateTransformation(inSpatialRef, outSpatialRef)
#     
#     # transform point
#     point.Transform(coordTransform)
#     pointXout = point.GetX()
#     pointYout = point.GetY()
# =============================================================================
    
    # Méthode rasterio      
    coords_conv = rasterio.warp.transform(rasterio.crs.CRS.from_epsg(inputEPSG), 
                                          rasterio.crs.CRS.from_epsg(outputEPSG), 
                                          [pointXin], [pointYin])
    pointXout = coords_conv[0][0]
    pointYout = coords_conv[1][0]
    
    # Return point coordinates in output format
    return(pointXout, pointYout)


#%% date tools for QGIS
"""
Pour faire facilement la conversion "numéro de bande - date" dans QGIS lorsqu'on
ouvre les fichers NetCDF comme rasters.

/!\ Dans QGIS, le numéro de 'band' est différent du 'time'
(parfois 'band' = 'time' + 1, parfois il y a une grande différence)
C'est le 'time' qui compte.
"""
###############################################################################
[docs]def date_to_index(_start_date, _date, _freq):
    time_index = len(pd.date_range(start = _start_date, end = _date, freq = _freq))-1
    print('La date {} correspond au temps {}'.format(_date, str(time_index)))
    return time_index


###############################################################################
[docs]def index_to_date(_start_date, _time_index, _freq):
    date_index = pd.date_range(start = _start_date, periods = _time_index+1, freq = _freq)[-1]
    print('Le temps {} correspond à la date {}'.format(_time_index, str(date_index)))
    return date_index


#%% main
if __name__ == "__main__":
    # Format the inputs (assumed to be strings) into floats
    sys.argv[1] = float(sys.argv[1])
    sys.argv[2] = float(sys.argv[2])
    # Print some remarks
    print('Arguments:')
    print(sys.argv[1:])
    # Execute the ConvertCoord function
    (a,b) = convert_coord(*sys.argv[1:])
    print(a,b)