Source code for PAGAIE_interface

# -*- coding: utf-8 -*-
"""
Created on Tue Sep 17 16:58:10 2024

@author: Alexandre Kenshilik Coche
@contact: alexandre.co@hotmail.fr

PAGAIE_interface est une interface regroupant les fonctions de geoconvert
pertinentes pour géotraiter les données géographiques dans le cadre de la 
méthodologie Eau et Territoire (https://eau-et-territoire.org/ ).
A la difference de geoconvert, trajectoire_toolbox propose une sélection des
fonctions disponibles dans geoconvert, ainsi que leur traduction en Français.
"""

#%% IMPORTATIONS
import os
import re
import pathlib
import datetime
import xarray as xr
xr.set_options(keep_attrs = True)
from pywtraj.src.dataprocessing import geohydroconvert as ghc

#%% METHODOLOGIE
"""
Approche classique illustrée avec l'exemple ... : 
    0. Téléchargement des données
    1. georeferencer
    2. convertir_cwatm
    3. reprojeter (découper inclus)
    4. exporter
"""

#%% OPERATIONS DE BASE
[docs]def ouvrir(data, name=None, infer_time=False, **xr_kwargs):
    return ghc.load_any(data, name=name, infer_time=infer_time, **xr_kwargs)

[docs]def georeferencer(*, data, data_type = 'other', include_crs = True, export_opt = False, 
                  crs = None):
    """
    Il est fréquent que les données de source externe présentent des défauts
    de formattage (SCR non inclus, coordonnées non standard, incompatibilité
    avec QGIS...).
    Cette fonction permet de générer un raster ou shapefile standardisé, 
    en particulier du point de vue de ses métadonnées, facilitant ainsi les
    opérations de géotraitement mais aussi la visualisation sous QGIS.
    
    Parameters
    ----------
    data : str or xarray.dataset
        Path (str) to .tif or .nc raster, or xarray.dataset.
    scr_source : TYPE, optional
        DESCRIPTION. The default is None.
    scr_dest : TYPE, optional
        DESCRIPTION. The default is None.

    Returns
    -------
    xarray.dataset
        DESCRIPTION.

    """
    return ghc.georef(data = data, 
                     data_type = data_type,
                     include_crs = include_crs,
                     export_opt = export_opt,
                     crs = crs)  

[docs]def var_principale(data_ds):
    return ghc.main_var(data_ds)

[docs]def liste_fichiers(data, filetype):
    return ghc.get_filelist(data, filetype)

#%% CONVERSIONS POUSSEES
[docs]def convertir_cwatm(data, data_type):
    return ghc.convert_to_cwatm(data, data_type)
    

#%% RAFFINAGE DE L'EMPRISE
[docs]def reprojeter(data, *, src_crs=None, base_template=None, bounds=None,  
              x0=None, y0=None, mask=None, **rio_kwargs):
    
    return ghc.reproject(data, src_crs=src_crs, base_template=base_template, 
                        bounds=bounds, x0=x0, y0=y0, mask=mask, **rio_kwargs)
    """
    Une fonction assez générale qui permette de corriger la réso et le SCR.
    Notamment pour les mnt, il faudrait pouvoir choisir l'option 'min' (pour
    le demmin), ou 'std', etc.
    """

# =============================================================================
# def decouper(data, mask, src_crs=None, dst_crs=None):
#     return ghc.clip(data, mask, src_crs=src_crs, dst_crs=dst_crs)
# =============================================================================

#%% EXPORTER LES RESULTATS
[docs]def exporter(data, output_filepath):
    return ghc.export(data, output_filepath)

#%% COMPRESSER/DECOMPRESSER
[docs]def dezipper(data):
    """
    Dans certains cas, notamment pour le chargement des netcdf dans QGIS sous
    forme de maillage (MESH), il est beaucoup plus rapide de charger un fichier 
    netcdf non compressé qu'un fichier netcdf compressé.
    Cette fonction ne s'applique que dans le cas d'une compression non destructive.
    """
    if isinstance(data, (str, pathlib.Path)):
        if os.path.isdir(data):
            data_list = [os.path.join(data, f)
                         for f in os.listdir(data)
                         if (os.path.isfile(os.path.join(data, f)) \
                             & (os.path.splitext(os.path.join(data, f))[-1] == '.nc'))]
            # In that case, for each file in the folder, a new file will be created
            for file in data_list:
                ds = ghc.unzip(file)
                outpath = os.path.splitext(file)[0] + '_unzip' + '.nc'
                exporter(ds, outpath)
            return

    else:
        return ghc.unzip(data)

[docs]def zipper(data, complevel = 3, shuffle = False):
    """
    Compression sans perte sur NetCDF

    """
    if isinstance(data, (str, pathlib.Path)):
        if os.path.isdir(data):
            data_list = [os.path.join(data, f)
                         for f in os.listdir(data)
                         if (os.path.isfile(os.path.join(data, f)) \
                             & (os.path.splitext(os.path.join(data, f))[-1] == '.nc'))]
            # In that case, for each file in the folder, a new file will be created
            for file in data_list:
                ds = ghc.gzip(file, complevel = complevel, shuffle = shuffle)
                outpath = os.path.splitext(file)[0] + '_gzip' + '.nc'
                exporter(ds, outpath)
            return
                
    else:
        return ghc.gzip(data, complevel = complevel, shuffle = shuffle)
    
[docs]def compresser(data, nbits = 16):
    if isinstance(data, (str, pathlib.Path)):
        if os.path.isdir(data):
            data_list = [os.path.join(data, f)
                         for f in os.listdir(data)
                         if (os.path.isfile(os.path.join(data, f)) \
                             & (os.path.splitext(os.path.join(data, f))[-1] in ['.nc', '.tif']))]
            # In that case, for each file in the folder, a new file will be created
            for file in data_list:
                ds = ghc.pack(file, nbits = nbits)
                outpath = os.path.splitext(file)[0] + '_pack' + '.nc'
                exporter(ds, outpath)
            return

    else:
        return ghc.pack(data, nbits = nbits)
    

#%% ZONES BASSES
[docs]def zones_basses(mnt_raster):
    # Génère un raster indiquant les pixels sous le niveau de la mer, à partir
    # d'un raster de modèle numérique de terrain
    
    with xr.open_dataset(mnt_raster, 
                         decode_times = False, 
                         decode_coords = 'all') as mnt_ds:
        mnt_ds.load()

    
    
#%% FORMATAGE : PIPELINE COMPLET
# A récupérer de geoconvert.py
[docs]def formater(data, data_type, *, 
             mask=None, bounds=None, 
             resolution=None, x0=None, y0=None,
             base_template=None, **rio_kwargs,
             ):
    """
    # DEM
    ttbox.formater(os.path.join(r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees",
                                "0- MNT\IGN\BDALTIV2_2-0_25M_ASC_LAMB93-IGN69_PAYS-BASQUE"), 
                   'BD ALTI', 
                   mask = os.path.join(r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees",
                                       "15- Territoire Pays Basque\masque_zone_etude.shp"), 
                   resolution = [200, 1000], 
                   resampling = 9) # min
    
    ttbox.formater(os.path.join(r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees",
                                "0- MNT\IGN\BDALTIV2_2-0_25M_ASC_LAMB93-IGN69_PAYS-BASQUE"), 
                   'BD ALTI', 
                   mask = os.path.join(r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees",
                                       "15- Territoire Pays Basque\masque_zone_etude.shp"), 
                   resolution = [200, 1000], 
                   dst_crs = 27572,
                   resampling = 9) # min
    
    # Climatic
    ttbox.formater(os.path.join(r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees",
                                "8- Meteo\DRIAS\DRIAS-Climat\EXPLORE2-Climat2022",
                                "Model1",
                                "evspsblpotAdjust_France_MPI-M-MPI-ESM-LR_historical_r1i1p1_CLMcom-CCLM4-8-17_v1_MF-ADAMONT-SAFRAN-1980-2011_day_19500101-20051231_Hg0175.nc"), 
                   'DRIAS 2022', 
                   mask = os.path.join(r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees",
                                       "15- Territoire Pays Basque\masque_zone_etude.shp"), 
                   resolution = 1000, 
                   resampling = 5) # average
    
    ttbox.formater(data = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\DRIAS\DRIAS-Climat\EXPLORE2-Climat2022\Model1", 
                   data_type = 'DRIAS 2022', resolution = 1000, resampling = 5,
                   mask = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\15- Territoire Pays Basque\masque_zone_etude.shp"
                   )
    
    ttbox.formater(data = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\DRIAS\DRIAS-Climat\EXPLORE2-Climat2022\Model1", 
                   data_type = 'DRIAS 2022',
                   mask = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\15- Territoire Pays Basque\masque_zone_etude.shp"
                   )
    
    ttbox.formater(data = r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\8- Meteo\DRIAS\DRIAS-Climat\EXPLORE2-Climat2022\Model4", 
                   data_type = 'DRIAS 2022',
                   mask = [r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\15- Territoire Pays Basque\masque_zone_etude.shp", 
                           r"D:\2- Postdoc\2- Travaux\1- Veille\4- Donnees\16- Territoire Annecy\masque_zone_etude.shp"]
                   )
    

    Parameters
    ----------
    data : TYPE
        DESCRIPTION.
    data_type : TYPE
        DESCRIPTION.
    * : TYPE
        DESCRIPTION.
    mask : TYPE, optional
        DESCRIPTION. The default is None.
    resolution : TYPE, optional
        DESCRIPTION. The default is None.
    resampling : TYPE, optional
        DESCRIPTION. The default is None.

    Returns
    -------
    None.

    """
    #### Initialisation
    # -+-+-+-+-+-+-+-+-
    rundate = datetime.datetime.now().strftime("%Y-%m-%d_%Hh%M")
    
    if isinstance(resolution, int) or resolution is None:
        resolution = [resolution]
        
    if not isinstance(mask, list):
        mask = [mask]
    
    ### Pipeline
    # -+-+-+-+-+-
    
    # ---- BD ALTI 25m
    if data_type.casefold().replace(' ', '') in ['bdalti', 'alti', 'ignalti']:
        # Chemins des données d'entrée et de sortie
        if os.path.isfile(data):
            outpath = '_'.join([os.path.splitext(data)[0], 'CWATM', rundate])
        elif os.path.isdir(data):
            outpath = os.path.join(data, '_'.join([data_type, 'CWATM', rundate]))
        # Corriger les données
        ds = convertir_cwatm(data, data_type)
        # Géoréférencer les données        
        ds = georeferencer(data = ds, data_type = data_type, crs = 2154)
        # Reprojections et exports
        exporter(ds, outpath + '_original.tif')
        for res in resolution:
            if res is None:
                res_suffix = ''
                if 'resolution' in rio_kwargs:
                    rio_kwargs.pop('resolution')
            else:
                res_suffix = f'_{res}m'
                rio_kwargs['resolution'] = res
            ds_rprj = reprojeter(ds, mask = mask, bounds = bounds,
                                 x0 = x0, y0 = y0, base_template = base_template,
                                 **rio_kwargs) #  x0 = 12.5, y0 = 12.5
            exporter(ds_rprj, outpath + res_suffix + '.tif')

    # ---- DRIAS-Climat 2022 (netcdf)
    # Données SAFRAN (tas, prtot, rayonnement, vent... ) "EXPLORE2-Climat 2022" 
    elif data_type.casefold().replace(' ', '').replace('-', '') in [
            'drias2022', 'climat2022', 'driasclimat2022']:
# ============ a supprimer ====================================================
#         if os.path.isdir(data):
#             data_list = [os.path.join(data, f)
#                          for f in os.listdir(data)
#                          if (os.path.isfile(os.path.join(data, f)) \
#                              & (os.path.splitext(os.path.join(data, f))[-1] == '.nc'))]
#         else:
#             data_list = [data]
# =============================================================================
        
        data_folder, filelist = liste_fichiers(data, filetype = '.nc')
        
        for file_name in filelist:
            data = os.path.join(data_folder, file_name)
            # Raccourcir le nom
            motif1 = re.compile('(.*Adjust_France)')
            motif2 = re.compile('(historical|rcp25|rcp45|rcp85)')
            motif3 = re.compile('(\d{4,4}-\d{4,4}.*)')
            str1 = motif1.split(file_name)[1][:-7]
            str2 = motif2.split(file_name)[1]
            str3 = motif3.split(file_name)[1]
            outpath = '_'.join([str1, str2, str3])
            # outpath = outpath[:-3] # to remove '.nc'
            outpath = os.path.splitext(outpath)[0]
            outpath = os.path.join(data_folder, outpath)
            # Corriger les données
            ds = convertir_cwatm(data, data_type = 'drias2022')
            # Géoréférencer les données
            ds = georeferencer(data = ds, data_type = 'drias2022')
            # Compression avec pertes (packing)
            ds_comp = compresser(ds)
            exporter(ds_comp, outpath + '_pack' + '.nc')
            # Reprojections et exports
            # rio_kwargs['dst_crs'] = 2154
            for res in resolution:
                if res is None:
                    res_suffix = ''
                    if 'resolution' in rio_kwargs:
                        rio_kwargs.pop('resolution')
                else:
                    res_suffix = f'_{res}m'
                    rio_kwargs['resolution'] = res
                n_mask = 0
                for m in mask:
                    n_mask += 1
                    ds_rprj = reprojeter(ds, mask = m, bounds = bounds,
                                     x0 = x0, y0 = y0, base_template = base_template,
                                     **rio_kwargs)
                    exporter(ds_rprj, outpath + res_suffix + f'_mask{n_mask}' + '.nc')
                    # exporter(ds_rprj, outpath + f'_{rundate}' + res_suffix + '.nc') # Pas de rundate car noms des fichiers trop longs...
                    
            del ds_rprj
            del ds
            del ds_comp
            
                
    # ---- DRIAS-Eau 2024 (netcdf)
    # Indicateurs SAFRAN (SWIAV, SSWI-3...) "EXPLORE2-SIM2 2024" 
    elif data_type.casefold().replace(' ', '').replace('-', '') in [
            'sim2024', 'indicateursim2024', 'indicateurssim2024',
            'driaseau2024', 'indicateurdriaseau2024',
            'indicateursdriaseau2024']:
# ========== a supprimer ======================================================
#         if os.path.isdir(data):
#             data_list = [os.path.join(data, f)
#                          for f in os.listdir(data)
#                          if (os.path.isfile(os.path.join(data, f)) \
#                              & (os.path.splitext(os.path.join(data, f))[-1] == '.nc'))]
#         else:
#             data_list = [data]
# =============================================================================
            
        data_folder, filelist = liste_fichiers(data, filetype = '.nc')

        for file_name in filelist:
            data = os.path.join(data_folder, file_name)
            # Raccourcir le nom
            motif1 = re.compile('(.*_France)')
            motif2 = re.compile('(historical|rcp25|rcp45|rcp85)')
            motif3 = re.compile('(\d{4,4}-\d{4,4}.*)')
            str1 = motif1.split(file_name)[1][:-7]
            str2 = motif2.split(file_name)[1]
            str3 = motif3.split(file_name)[1]
            outpath = '_'.join([str1, str2, str3])
            # outpath = outpath[:-3] # to remove '.nc'
            outpath = os.path.splitext(outpath)[0]
            outpath = os.path.join(data_folder, outpath)
            # Corriger les données
            ds = convertir_cwatm(data, data_type = "DRIAS-Eau 2024")
            # Géoréférencer les données
            ds = georeferencer(data = ds, data_type = "DRIAS-Eau 2024")
            # Compression avec pertes (packing)
            ds_comp = compresser(ds)
            exporter(ds_comp, outpath + '_pack' + '.nc')
            # Reprojections et exports
            # rio_kwargs['dst_crs'] = 2154
            for res in resolution:
                if res is None:
                    res_suffix = ''
                    if 'resolution' in rio_kwargs:
                        rio_kwargs.pop('resolution')
                else:
                    res_suffix = f'_{res}m'
                    rio_kwargs['resolution'] = res
                n_mask = 0
                for m in mask:
                    n_mask += 1
                    ds_rprj = reprojeter(data = ds, mask = m, bounds = bounds,
                                         x0 = x0, y0 = y0, base_template = base_template,
                                         **rio_kwargs)
                    exporter(ds_rprj, outpath + res_suffix + f'_mask{n_mask}' + '.nc')
                    # exporter(ds_rprj, outpath + f'_{rundate}' + res_suffix + '.nc') # Pas de rundate car noms des fichiers trop longs...
            
            del ds_rprj
            del ds
            del ds_comp
            
    
    # ---- ERA5
    elif data_type.casefold().replace(' ', '').replace('-', '') in [
            "era5", "era5land", "era"]:
        data_folder, filelist = liste_fichiers(data, filetype = '.nc')

        # outpath = outpath[:-3] # to remove '.nc'
        outpath = os.path.splitext(filelist[0])[0]
        outpath = os.path.join(data_folder, outpath)
        # Corriger les données
        ds = convertir_cwatm([os.path.join(data_folder, f) for f in filelist], data_type = "ERA5")
        # Géoréférencer les données
        ds = georeferencer(data = ds, data_type = "other", crs = 4326)
        # Compression avec pertes (packing)
        ds_comp = compresser(ds)
        exporter(ds_comp, outpath + '_pack' + '.nc')
        # Reprojections et exports
        # rio_kwargs['dst_crs'] = 2154
        for res in resolution:
            if res is None:
                res_suffix = ''
                if 'resolution' in rio_kwargs:
                    rio_kwargs.pop('resolution')
            else:
                res_suffix = f'_{res}m'
                rio_kwargs['resolution'] = res
            n_mask = 0
            for m in mask:
                n_mask += 1
                ds_rprj = reprojeter(data = ds, mask = m, bounds = bounds,
                                     x0 = x0, y0 = y0, base_template = base_template,
                                     **rio_kwargs)
                exporter(ds_rprj, outpath + res_suffix + f'_mask{n_mask}' + '.nc')
                # exporter(ds_rprj, outpath + f'_{rundate}' + res_suffix + '.nc') # Pas de rundate car noms des fichiers trop longs...
        
        del ds_rprj
        del ds
        del ds_comp