From 7ed80f876fc425ba2ea6dacab0a0e760df2858f2 Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Fri, 19 Apr 2024 18:15:53 -0400 Subject: [PATCH 1/6] update GFDL fieldlist table --- data/fieldlist_GFDL.jsonc | 304 +++++++++++++++++++++++++++++++------- 1 file changed, 254 insertions(+), 50 deletions(-) diff --git a/data/fieldlist_GFDL.jsonc b/data/fieldlist_GFDL.jsonc index 7c230f539..da6f651b0 100644 --- a/data/fieldlist_GFDL.jsonc +++ b/data/fieldlist_GFDL.jsonc @@ -46,30 +46,28 @@ // } }, "variables" : { - "ucomp": { - "standard_name": "eastward_wind", - "long_name":"", - "realm": "atmos", - "units": "m s-1", - "scalar_coord_templates": {"plev": "u{value}"}, - "ndim": 4 - }, - "vcomp": { - "standard_name": "northward_wind", - "long_name":"", + "alb_sfc": { + "standard_name": "", + "long_name":"surface albedo", "realm": "atmos", - "units": "m s-1", - "scalar_coord_templates": {"plev": "v{value}"}, - "ndim": 4 + "units": "%", + "ndim": 3 }, "hght": { "standard_name": "geopotential_height", - "long_name":"", + "long_name": "", "realm": "atmos", "units": "m", "scalar_coord_templates": {"plev": "hght{value}"}, "ndim": 4 }, + "IWP": { + "standard_name": "atmosphere_mass_content_of_cloud_ice", + "long_name": "Ice water path", + "realm": "atmos", + "units": "kg m-2", + "ndim": 3 + }, "sphum": { "standard_name": "specific_humidity", "long_name":"", @@ -85,14 +83,6 @@ "scalar_coord_templates": {"plev": "omega{value}"}, "ndim": 4 }, - "t_surf": { - // "skin temperature", analogue of ts - "standard_name": "surface_temperature", - "long_name":"", - "realm": "atmos", - "units": "K", - "ndim": 3 - }, "precip": { "standard_name": "precipitation_flux", "long_name":"", @@ -102,31 +92,44 @@ }, "prec_conv": { "standard_name": "convective_precipitation_flux", + "long_name" : "", "realm": "atmos", - "units": "kg m-2 s-1", // need to verify + "units": "kg m-2 s-1", "ndim": 3 }, - "t_ref" : { - // CMIP6 equivalent = tas, temp at 2m ref height - "standard_name": "air_temperature", + "prec_ls": { + "standard_name": "", + "long_name" : "Precipitation rate from strat cloud", "realm": "atmos", - "long_name": "temperature at 2 m", - "units": "K", - "ndim": 3, - "modifier": "atmos_height" + "units": "kg m-2 s-1", + "ndim": 3 + }, + "prec_uwd": { + "standard_name": "", + "long_name" : "Precipitation rate from deep plume", + "realm": "atmos", + "units": "kg m-2 s-1", + "ndim": 3 + }, + "prw": { + "standard_name": "atmosphere_mass_content_of_water_vapor", + "long_name": "Water Vapor Path", + "realm": "atmos", + "units": "kg m-2", + "ndim": 3 }, "ps": { "standard_name": "surface_air_pressure", "long_name": "surface pressure", "realm": "atmos", - "units": "Pa", // need to verify + "units": "Pa", "ndim": 3 }, "tau_x": { "standard_name": "surface_downward_eastward_stress", "long_name": "zonal wind stress", "realm": "atmos", - "units": "Pa", // need to verify + "units": "Pa", "ndim": 3 }, "tau_y": { @@ -148,28 +151,28 @@ "standard_name": "surface_upwelling_shortwave_flux_in_air", "long_name": "", "realm": "atmos", - "units": "W m-2", // need to verify + "units": "W m-2", "ndim": 3 }, "swdn_sfc": { "standard_name": "surface_downwelling_shortwave_flux_in_air", "long_name": "", "realm": "atmos", - "units": "W m-2", // need to verify + "units": "W m-2", "ndim": 3 }, "swdn_toa": { "standard_name": "toa_incoming_shortwave_flux", - "long_name": "", + "long_name": "SW flux down at TOA", "realm": "atmos", - "units": "W m-2", // need to verify + "units": "W m-2", "ndim": 3 }, "swup_toa": { "standard_name": "toa_outgoing_shortwave_flux", - "long_name": "", + "long_name": "SW flux up at TOA", "realm": "atmos", - "units": "W m-2", // need to verify + "units": "W m-2", "ndim": 3 }, "lwup_sfc": { @@ -200,6 +203,13 @@ "units": "W m-2", "ndim": 3 }, + "ice_wat": { + "standard_name": "", + "long_name": "", + "realm": "atmos", + "units": "kg m-2", + "ndim": 3 + }, "hfls": { "standard_name": "surface_upward_latent_heat_flux", "long_name": "", @@ -295,6 +305,58 @@ "units": "W m-2", "ndim": 3 }, + "LWP": { + "standard_name": "atmosphere_mass_content_of_cloud_condensed_water", + "long_name": "Liquid Water Path", + "realm": "atmos", + "units": "kg m-2", + "ndim": 3 + }, + "q_ref": { + "standard_name": "specific_humidity", + "long_name": "specific humidity at 2 m", + "realm": "atmos", + "units": "kg kg-1", + "modifier": "atmos_height", + "ndim": 3 + }, + "rh": { + "standard_name": "relative_humidity", + "long_name": "relative humidity", + "realm": "atmos", + "units": "%", + "scalar_coord_templates": {"plev": "rh{value}"}, + "ndim": 4 + }, + "rh_ref": { + "standard_name": "relative_humidity", + "long_name": "relative humidity at 2 m", + "realm": "atmos", + "units": "%", + "modifier": "atmos_height", + "ndim": 3 + }, + "snow_conv": { + "standard_name": "", + "long_name": "Frozen precip rate from convection", + "realm": "atmos", + "units": "kg m-2 s-1", + "ndim": 3 + }, + "snow_ls": { + "standard_name": "", + "long_name": "Frozen precip rate from strat cloud", + "realm": "atmos", + "units": "kg m-2 s-1", + "ndim": 3 + }, + "snow_tot": { + "standard_name": "snowfall_flux", + "long_name": "Frozen precip rate from all sources", + "realm": "atmos", + "units": "kg m-2 s-1", + "ndim": 3 + }, // Variables for AMOC_3D_Structure module: // "uo": { // NB: need to perform rotation to get from u,v? @@ -326,22 +388,156 @@ // "standard_name": "sea_water_potential_temperature", // "units": "K" // }, - // Variables for Convective Transition Diagnostics module: + "tdt_dyn": { + "standard_name": "", + "long_name": "tdt_dyn", + "realm": "atmos", + "units": "K s-1", + "ndim": 3 + }, "temp": { "standard_name": "air_temperature", - "long_name": "", + "long_name": "temperature", "realm": "atmos", "units": "K", + "scalar_coord_templates": {"plev": "temp{value}"}, "ndim": 4 }, + "tot_cld_amt" : { + "standard_name": "cloud_area_fraction", + "realm": "atmos", + "long_name": "total cloud amount", + "units": "%", + "ndim": 3, + "modifier": "atmos_height" + }, + "t_ref" : { + // CMIP6 equivalent = tas, temp at 2m ref height + "standard_name": "air_temperature", + "realm": "atmos", + "long_name": "temperature at 2 m", + "units": "K", + "ndim": 3, + "modifier": "atmos_height" + }, + "t_surf": { + // "skin temperature", analogue of ts + "standard_name": "surface_temperature", + "long_name": "surface temperature", + "realm": "atmos", + "units": "K", + "ndim": 3 + }, + "ucomp": { + "standard_name": "eastward_wind", + "long_name": "zonal wind", + "realm": "atmos", + "units": "m s-1", + "scalar_coord_templates": {"plev": "u{value}"}, + "ndim": 4 + }, + "u_ref": { + "standard_name": "eastward_wind", + "long_name": "zonal wind component at 10 m", + "realm": "atmos", + "units": "m s-1", + "modifier": "atmos_height", + "ndim": 3 + }, + "uw_precip": { + "standard_name": "", + "long_name": "Precipitation rate from uw shallow", + "realm": "atmos", + "units": "kg m-2 s-1", + "ndim": 3 + }, + "vcomp": { + "standard_name": "northward_wind", + "long_name": "meridional wind", + "realm": "atmos", + "units": "m s-1", + "scalar_coord_templates": {"plev": "v{value}"}, + "ndim": 4 + }, + "v_ref": { + "standard_name": "northward_wind", + "long_name": "meridional wind component at 10 m", + "realm": "atmos", + "units": "m s-1", + "modifier": "atmos_height", + "ndim": 3 + }, + "wat_conv_col": { + "standard_name": "", + "long_name": "Column total water tendency from convection", + "realm": "atmos", + "units": "kg m-2 s-1", + "ndim": 3 + }, + "wat_ls_col": { + "standard_name": "", + "long_name": "Column total water tendency from strat cloud", + "realm": "atmos", + "units": "kg m-2 s-1", + "ndim": 3 + }, + "wat_uw_col": { + "standard_name": "", + "long_name": "Column total water tendency from UW convection", + "realm": "atmos", + "units": "kg m-2 s-1", + "ndim": 3 + }, + "wind_ref": { + "standard_name": "", + "long_name": "absolute value of wind at 10 m", + "realm": "atmos", + "units": "m s-1", + "ndim": 3 + }, + "WP_all_clouds": { + "standard_name": "atmosphere_mass_content_of_water_vapor", + "long_name": "Total water path -- all clouds + ls precip", + "realm": "atmos", + "units": "kg m-2", + "ndim": 3 + }, "WVP": { - // column integral; over the whole column? "standard_name": "atmosphere_mass_content_of_water_vapor", - "long_name": "", + "long_name": "Column integrated water vapor", "realm": "atmos", "units": "kg m-2", "ndim": 3 - } + }, + "zg500": { + "standard_name": "", + "long_name": "Geopotential Height at 500 hPa", + "realm": "atmos", + "units": "m", + "ndim": 3 + }, + "zg": { + "standard_name": "", + "long_name": "Geopotential Height", + "realm": "atmos", + "units": "m", + "scalar_coord_templates": {"plev": "zg{value}"}, + "ndim": 4 + }, + "z_pbl": { + "standard_name": "", + "long_name": "depth of planetary boundary layer", + "realm": "atmos", + "units": "m", + "ndim": 3 + }, + "z_Ri_025":{ + "standard_name": "", + "long_name": "Critical bulk Richardson height", + "realm": "atmos", + "units": "m", + "ndim": 3 + }, // Variables for SM_ET_coupling module // "mrsos": { // "standard_name": "mass_content_of_water_in_soil_layer", @@ -349,12 +545,20 @@ // "units": "kg m-2", // "ndim": 3 // }, - // "evspsbl": { - // "standard_name": "water_evapotranspiration_flux", - // "long_name": "", - // "units": "kg m-2 s-1", - // "ndim": 3 - // } + "evap": { + "standard_name": "", + "long_name": "evaporation rate", + "realm": "atmos", + "units": "kg m-2 s-1", + "ndim": 3 + }, + "evspsbl": { + "standard_name": "water_evapotranspiration_flux", + "long_name": "Evaporation", + "realm": "atmos", + "units": "kg m-2 s-1", + "ndim": 3 + } }, "env_vars" : { // 0 for CMIP/GFDL date index start, 1 for CESM native date index start From fe8335f9aec4fe1b8b88007588d9f5169be7a061 Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Mon, 22 Apr 2024 15:23:22 -0400 Subject: [PATCH 2/6] add more varaibles to fieldlist_GFDL.jsonc --- data/fieldlist_GFDL.jsonc | 202 ++++++++++++++++++++++++++++++-------- 1 file changed, 161 insertions(+), 41 deletions(-) diff --git a/data/fieldlist_GFDL.jsonc b/data/fieldlist_GFDL.jsonc index da6f651b0..80346c5d9 100644 --- a/data/fieldlist_GFDL.jsonc +++ b/data/fieldlist_GFDL.jsonc @@ -34,6 +34,13 @@ "time": {"axis": "T", "standard_name": "time", "units": "days"} }, "aux_coords": { + "band":{ + "standard_name": "", + "long_name": "spectral band", + "units": "1", + "ndim": 1, + } + // "deptho": { // "standard_name": "sea_floor_depth_below_geoid", // "units": "m", @@ -46,13 +53,44 @@ // } }, "variables" : { - "alb_sfc": { + "areacello": { + "standard_name": "cell_area", + "realm": "ocean", + "units": "m2", + "ndim": 2 + }, + "zos": { + "standard_name": "sea_surface_height_above_geoid", + "realm": "ocean", + "units": "m", + "ndim": 3 + }, + "tauuo": { + "standard_name": "downward_x_stress_at_sea_water_surface", + "realm": "ocean", + "units": "N m-2", + "ndim": 3 + }, + "tauvo": { + "standard_name": "downward_y_stress_at_sea_water_surface", + "realm": "ocean", + "units": "N m-2", + "ndim": 3 + }, + "alb_sfc": { "standard_name": "", "long_name":"surface albedo", "realm": "atmos", "units": "%", "ndim": 3 }, + "aliq": { + "standard_name": "", + "long_name": "Cloud fraction for large-scale liquid clouds", + "realm": "atmos", + "units": "1", + "ndim": 4 + }, "hght": { "standard_name": "geopotential_height", "long_name": "", @@ -61,6 +99,13 @@ "scalar_coord_templates": {"plev": "hght{value}"}, "ndim": 4 }, + "ice_mask": { + "standard_name": "sea_ice_area_fraction", + "long_name": "fractional amount of sea ice", + "realm": "atmos", + "units": "1", + "ndim": 3 + }, "IWP": { "standard_name": "atmosphere_mass_content_of_cloud_ice", "long_name": "Ice water path", @@ -77,9 +122,9 @@ }, "omega": { "standard_name": "lagrangian_tendency_of_air_pressure", - "long_name":"", + "long_name": "", "realm": "atmos", - "units": "Pa s-1", // need to verify + "units": "Pa s-1", "scalar_coord_templates": {"plev": "omega{value}"}, "ndim": 4 }, @@ -136,14 +181,36 @@ "standard_name": "surface_downward_northward_stress", "long_name": "meridional wind stress", "realm": "atmos", - "units": "Pa", // need to verify + "units": "Pa", "ndim": 3 }, "slp": { "standard_name": "air_pressure_at_mean_sea_level", "long_name": "sea level pressure", "realm": "atmos", - "units": "Pa", // need to verify + "units": "Pa", + "ndim": 3 + }, + "aer_c": { + "standard_name": "", + "long_name": "aerosol_col", + "realm": "aerosol", + "units": "kg m-2", + "ndim": 3 + }, + "o3": { + "standard_name": "mole_fraction_of_ozone_in_air", + "long_name": "Ozone Volume Mixing Ratio", + "realm": "aerosol", + "units": "mol mol-1", + "scalar_coord_templates": {"plev": "o3{value}"}, + "ndim": 4 + }, + "toz": { + "standard_name": "equivalent_thickness_at_stp_of_atmosphere_ozone_content", + "long_name": "Total Column Ozone", + "realm": "aerosol", + "units": "m", "ndim": 3 }, // radiative fluxes: @@ -179,14 +246,14 @@ "standard_name": "surface_upwelling_longwave_flux_in_air", "long_name": "", "realm": "atmos", - "units": "W m-2", // need to verify + "units": "W m-2", "ndim": 3 }, "lwdn_sfc": { "standard_name": "surface_downwelling_longwave_flux_in_air", "long_name": "", "realm": "atmos", - "units": "W m-2", // need to verify + "units": "W m-2", "ndim": 3 }, "olr": { @@ -205,9 +272,9 @@ }, "ice_wat": { "standard_name": "", - "long_name": "", + "long_name": "cloud ice water specific humidity", "realm": "atmos", - "units": "kg m-2", + "units": "kg kg-1", "ndim": 3 }, "hfls": { @@ -267,6 +334,7 @@ }, "swup_toa_ad_clr": { "standard_name": "clear_sky_SW_flux_up_at_TOA_without_aerosol", + "long_name": "", "realm": "atmos", "units": "W m-2", "ndim": 3 @@ -357,17 +425,6 @@ "units": "kg m-2 s-1", "ndim": 3 }, - // Variables for AMOC_3D_Structure module: - // "uo": { - // NB: need to perform rotation to get from u,v? - // "standard_name": "sea_water_x_velocity", - // "units": "m s-1" - // }, - // "vo": { - // NB: need to perform rotation to get from u,v? - // "standard_name": "sea_water_y_velocity", - // "units": "m s-1" - // }, "salt": { "standard_name": "sea_water_salinity", "long_name": "", @@ -375,19 +432,6 @@ "units": "psu", "ndim": 4 }, - // "umo": { - // "standard_name": "ocean_mass_x_transport", - // "units": "kg s-1" - // }, - // "vmo": { - // "standard_name": "ocean_mass_y_transport", - // "units": "kg s-1" - // }, - // "temp": { - // NB: conflicts with "temp" for air temperature! - // "standard_name": "sea_water_potential_temperature", - // "units": "K" - // }, "tdt_dyn": { "standard_name": "", "long_name": "tdt_dyn", @@ -421,7 +465,6 @@ "modifier": "atmos_height" }, "t_surf": { - // "skin temperature", analogue of ts "standard_name": "surface_temperature", "long_name": "surface temperature", "realm": "atmos", @@ -538,13 +581,90 @@ "units": "m", "ndim": 3 }, - // Variables for SM_ET_coupling module - // "mrsos": { - // "standard_name": "mass_content_of_water_in_soil_layer", - // "long_name": "", - // "units": "kg m-2", - // "ndim": 3 - // }, + "mrsos": { + "standard_name": "mass_content_of_water_in_soil_layer", + "long_name": "", + "realm": "land", + "units": "kg m-2", + "ndim": 3 + }, + "soil_liq": { + "standard_name": "", + "long_name": "bulk density of liquid water", + "realm": "land", + "units": "kg m-3", + "ndim": 4 + }, + "water_soil": { + "standard_name": "mass_content_of_water_in_soil", + "long_name": "column-integrated soil water", + "realm": "land", + "units": "kg m-2", + "ndim": 3 + }, + "soil_fgw": { + "standard_name": "", + "long_name": "groundwater storage frac above base elev", + "realm": "land", + "units": "1", + "ndim": 3 + }, + "soil_wtdep": { + "standard_name": "", + "long_name": "depth below sfc to saturated soil", + "realm": "land", + "units": "m", + "ndim": 3 + }, + "albedo_dif": { + "standard_name": "", + "long_name": "land surface albedo for diffuse light", + "realm": "land", + "units": "1", + "ndim": 4 + }, + "albedo_dir": { + "standard_name": "", + "long_name": "land surface albedo for direct light", + "realm": "land", + "units": "1", + "ndim": 4 + }, + "nep": { + "standard_name": "surface_net_downward_mass_flux_of_carbon_dioxide_expressed_as_carbon_dueP_to_all_land_processes_excluding_anthropogenic_land_use_change", + "long_name": "net ecosystem productivity", + "realm": "land", + "units": "kg m-2 s-1", + "ndim": 3 + }, + "npp": { + "standard_name": "net_primary_productivity_of_biomass_expressed_as_carbon", + "long_name": "net primary productivity", + "realm": "land", + "units": "kg m-2 s-1", + "ndim": 3 + }, + "gpp": { + "standard_name": "gross_primary_productivity_of_biomass_expressed_as_carbon", + "long_name": "gross primary productivity", + "realm": "land", + "units": "kg m-2 s-1", + "ndim": 3 + }, + "theta": { + "standard_name": "", + "long_name": "average soil wetness for carbon decomposition", + "realm": "land", + "units": "m3 m-3", + "ndim": 3 + }, + "transp": { + "standard_name": "transpiration_flux", + "long_name": "Transpiration", + "realm": "land", + "units": "kg m-2 s-1", + "ndim": 3 + }, "evap": { "standard_name": "", "long_name": "evaporation rate", From 19b8626db54dea0a91bde84f92e2e0f2664855e9 Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Mon, 22 Apr 2024 15:24:41 -0400 Subject: [PATCH 3/6] change varialble realm to aerosol in stc_ozone settings file --- diagnostics/stc_ozone/settings.jsonc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diagnostics/stc_ozone/settings.jsonc b/diagnostics/stc_ozone/settings.jsonc index 17f583296..a3b05fe7b 100644 --- a/diagnostics/stc_ozone/settings.jsonc +++ b/diagnostics/stc_ozone/settings.jsonc @@ -65,7 +65,7 @@ }, "o3": { "standard_name" : "mole_fraction_of_ozone_in_air", - "realm": "atmos", + "realm": "aerosol", "use_exact_name": true, "units" : "mol mol-1", "frequency": "mon", From 21a2639457000881917b7e1b6b3a1ed417c34702 Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Tue, 23 Apr 2024 17:46:52 -0400 Subject: [PATCH 4/6] add entries to CMIP fieldlist fix formatting in GFDL fieldlist --- data/fieldlist_CMIP.jsonc | 84 +++++++++++++++++++++++++++++++++++++++ data/fieldlist_GFDL.jsonc | 2 +- 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/data/fieldlist_CMIP.jsonc b/data/fieldlist_CMIP.jsonc index a38ac77f2..a36958ce6 100644 --- a/data/fieldlist_CMIP.jsonc +++ b/data/fieldlist_CMIP.jsonc @@ -74,6 +74,90 @@ "scalar_coord_templates": {"plev": "wap{value}"}, "ndim": 4 }, + "hurs": { + "standard_name": "relative_humidity", + "realm": "atmos", + "units": "%", + "ndim": 3 + }, + "ua850": { + "standard_name": "eastward_wind", + "long_name": "eastward wind at 850 hPa", + "realm": "atmos", + "units": "m s-1", + "ndim": 3 + }, + "va850": { + "standard_name": "northward_wind", + "long_name": "northward wind at 850 hPa", + "realm": "atmos", + "units": "m s-1", + "ndim": 3 + }, + "ua200": { + "standard_name": "eastward_wind", + "long_name": "eastward wind at 200 hPa", + "realm": "atmos", + "units": "m s-1", + "ndim": 3 + }, + "va200": { + "standard_name": "northward_wind", + "long_name": "northward wind at 200 hPa", + "realm": "atmos", + "units": "m s-1", + "ndim": 3 + }, + "ua10": { + "standard_name": "eastward_wind", + "long_name": "eastward wind at 10 hPa", + "realm": "atmos", + "units": "m s-1", + "ndim": 3 + }, + "va10": { + "standard_name": "northward_wind", + "long_name": "northward wind at 10 hPa", + "realm": "atmos", + "units": "m s-1", + "ndim": 3 + }, + "uas": { + "standard_name": "eastward_wind", + "long_name": "eastward near-surface wind", + "realm": "atmos", + "units": "m s-1", + "modifier": "atmos_height", + "ndim": 3 + }, + "vas": { + "standard_name": "northward_wind", + "long_name": "northward near-surface wind", + "realm": "atmos", + "units": "m s-1", + "modifier": "atmos_height", + "ndim": 3 + }, + "zg500": { + "standard_name": "geopotential_height", + "long_name": "geopotential_height at 500 hPa", + "realm": "atmos", + "units": "m", + "ndim": 3 + }, + "wap500": { + "standard_name": "lagrangian_tendency_of_air_pressure", + "long_name": "lagrangian tendency of air pressure at 500 hPa", + "realm": "atmos", + "units": "Pa s-1", + "ndim": 3 + }, + "hur_unmsk": { + "standard_name": "relative_humidity", + "realm": "atmos", + "units": "%", + "ndim": 3 + }, "o3": { "standard_name": "mole_fraction_of_ozone_in_air", "realm": "atmos", diff --git a/data/fieldlist_GFDL.jsonc b/data/fieldlist_GFDL.jsonc index 80346c5d9..a5cda4288 100644 --- a/data/fieldlist_GFDL.jsonc +++ b/data/fieldlist_GFDL.jsonc @@ -38,7 +38,7 @@ "standard_name": "", "long_name": "spectral band", "units": "1", - "ndim": 1, + "ndim": 1 } // "deptho": { From e9ad02d52f5910fcb6994348b53239c820c106dd Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Tue, 23 Apr 2024 17:49:30 -0400 Subject: [PATCH 5/6] add optional dataset_id parameter to example_builder_config.yml --- .../templates/example_builder_config.yml | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tools/catalog_builder/examples/templates/example_builder_config.yml b/tools/catalog_builder/examples/templates/example_builder_config.yml index 6d3e24db9..23c17b168 100644 --- a/tools/catalog_builder/examples/templates/example_builder_config.yml +++ b/tools/catalog_builder/examples/templates/example_builder_config.yml @@ -1,29 +1,29 @@ -# Configuration file template for catalog_builder -# DRS convention to use cmip | gfdl | cesm +## Configuration file template for catalog_builder +## DRS convention to use cmip | gfdl | cesm convention: cmip -# IMPORTANT: Attempting to build a catalog of the entire contents of a pp directory will likely max out available PPAN resources (i.e., it takes longer than 12 hours to build a catalog for atmos/ts/monthly/5yr one node w/16 threads). It is strongly recommended to use include_patterns and/or exclude_patterns to target a specific subset of variables and dates to improve the performance of the catalog builder. -# Path(s) to the root directory with the target dataset +## IMPORTANT: Attempting to build a catalog of the entire contents of a pp directory will likely max out available PPAN resources (i.e., it takes longer than 12 hours to build a catalog for atmos/ts/monthly/5yr one node w/16 threads). It is strongly recommended to use include_patterns and/or exclude_patterns to target a specific subset of variables and dates to improve the performance of the catalog builder. +## Path(s) to the root directory with the target dataset data_root_dirs: - /uda/cmip6/CMIP/IPSL/IPSL-CM6A-LR-INCA/historical/r1i1p1f1 # - /archive/oar.gfdl.cmip6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos/ts/monthly/5yr - -# depth to traverse for files from data_root_dir(s) -# (e.g., files that are in the root directory have dir_depth=1) +## (optional) dataset id used to determine parser for selected convention. Accepted values: am5 +# dataset_id: am5 +## depth to traverse for files from data_root_dir(s) +## (e.g., files that are in the root directory have dir_depth=1) dir_depth: 5 -# where to write catalog csv and json header files +## where to write catalog csv and json header files output_dir: /nbhome/[INSERT USERNAME HERE] -# name of catalog (.csv and .json will be appended to catalog and -# header files) +## name of catalog (.csv and .json will be appended to catalog and header files) output_filename: esm_catalog_IPSL-CM6A-LR-INCA_historica_r1i1p1f1 -# number of threads: 16 (for job running on one analysis node) -# The example catalog for the UDA directory takes a little over 5 min to build +## number of threads: 16 (for job running on one analysis node) +## The example catalog for the UDA directory takes a little over 5 min to build num_threads: 16 -# optional list of patterns to include in file and directory search +## optional list of patterns to include in file and directory search #include_patterns: # - "*hght.nc" # - "*slp.nc" # - "*t_surf.nc" # - "*t_ref.nc" -# optional list of patterns to exclude from file and directory search +## optional list of patterns to exclude from file and directory search exclude_patterns: - "DO_NOT_USE" From 8da9a685d41b46949cb69d56a4b0323e8b7e6041 Mon Sep 17 00:00:00 2001 From: wrongkindofdoctor <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Tue, 23 Apr 2024 17:51:31 -0400 Subject: [PATCH 6/6] add runtime config option dataset_id to switch to new custom parser for am5 data to catalog_builder add new parser for am5 data that uses the DRS and existing fieldlists to populate required catalog entries for GFDL AM5 data to the catalog builder --- tools/catalog_builder/catalog_builder.py | 138 ++++++++++++++++++++++- 1 file changed, 134 insertions(+), 4 deletions(-) diff --git a/tools/catalog_builder/catalog_builder.py b/tools/catalog_builder/catalog_builder.py index de5c8c6a1..14a250749 100644 --- a/tools/catalog_builder/catalog_builder.py +++ b/tools/catalog_builder/catalog_builder.py @@ -21,14 +21,18 @@ import traceback import typing import xarray as xr +import json +from src.util import json_utils, basic import yaml from datetime import datetime, timedelta from ecgtools import Builder from ecgtools.builder import INVALID_ASSET, TRACEBACK from ecgtools.parsers import parse_cmip6 from ecgtools.parsers.cesm import parse_cesm_timeseries +import logging - +# Define a log object for debugging +_log = logging.getLogger(__name__) # The ClassMaker is cribbed from SO # https://stackoverflow.com/questions/1176136/convert-string-to-python-class-object # Classmaker and the @catalog_class.maker decorator allow class instantiation from @@ -36,6 +40,7 @@ # argument instead of messy if/then/else blocks. Yes, both work, but I wanted # to try something that, if it is not more "Pythonic", is more extensible + class ClassMaker: def __init__(self): self.classes = {} @@ -56,8 +61,123 @@ def __getitem__(self, n): catalog_class = ClassMaker() +# custom parser for GFDL am5 data that uses fieldlist metadata and the DRS to populate +# required catalog fields +def parse_gfdl_am5_data(file_name: str): + root_dir = os.path.dirname(os.path.realpath(__file__)).split('/tools/catalog_builder')[0] + file = pathlib.Path(file_name) # uncomment when ready to run + + try: + num_dir_parts = len(file.parts) # file name index = num_parts 1 + # isolate file from rest of path + stem = file.stem + # split the file name into components based on + # assume am5 file name format is {realm}.{time_range}.[variable_id}.nc + split = stem.split('.') + num_file_parts = len(split) + realm = split[0] + cell_methods = "" + cell_measures = "" + time_range = split[1] + start_time = time_range.split('-')[0] + end_time = time_range.split('-')[1] + variable_id = split[2] + source_type = "" + member_id = "" + experiment_id = "" + source_id = "" + chunk_freq = file.parts[num_dir_parts-2] # e.g, 1yr, 5yr + variant_label = "" + grid_label = "" + table_id = "" + assoc_files = "" + activity_id = "GFDL" + institution_id = "" + long_name = "" + standard_name = "" + units = "" + + freq_opts = ['mon', + 'day', + 'daily', + '6hr', + '3hr', + '1hr', + 'subhr', + 'annual', + 'year'] + output_frequency = "" + file_freq = file.parts[num_dir_parts-3] + for f in freq_opts: + if f in file_freq: + output_frequency = f + break + if 'daily' in output_frequency: + output_frequency = 'day' + elif 'monthly' in output_frequency: + output_frequency = 'mon' + + # read metadata from the appropriate fieldlist + if 'cmip' in realm.lower(): + gfdl_fieldlist = os.path.join(root_dir, 'data/fieldlist_CMIP.jsonc') + else: + gfdl_fieldlist = os.path.join(root_dir, 'data/fieldlist_GFDL.jsonc') + try: + json_config = json_utils.read_json(gfdl_fieldlist, log=_log) + except IOError: + print("Unable to open file", gfdl_fieldlist) + sys.exit(1) + gfdl_info = basic.NameSpace.fromDict(json_config) + + if hasattr(gfdl_info.variables, variable_id): + var_metadata = gfdl_info.variables.get(variable_id) + else: + raise KeyError(f'{variable_id} not found in {gfdl_fieldlist}') + + if hasattr(var_metadata, 'standard_name'): + standard_name = var_metadata.standard_name + if hasattr(var_metadata, 'long_name'): + long_name = var_metadata.long_name + if hasattr(var_metadata, 'units'): + units = var_metadata.units + info = { + 'activity_id': activity_id, + 'assoc_files': assoc_files, + 'institution_id': institution_id, + 'member_id': member_id, + 'realm': realm, + 'variable_id': variable_id, + 'table_id': table_id, + 'source_id': source_id, + 'source_type': source_type, + 'cell_methods': cell_methods, + 'cell_measures': cell_measures, + 'experiment_id': experiment_id, + 'variant_label': variant_label, + 'grid_label': grid_label, + 'units': units, + 'time_range': time_range, + 'start_time': start_time, + 'end_time': end_time, + 'chunk_freq': chunk_freq, + 'standard_name': standard_name, + 'long_name': long_name, + 'frequency': output_frequency, + 'variable': variable_id, + 'file_name': stem, + 'path': str(file) + } + + return info + + except Exception as exc: + print(exc) + return {INVALID_ASSET: file, TRACEBACK: traceback.format_exc()} + # custom parser for pp data stored on GFDL archive filesystem # assumed DRS of [root_dir]/pp/[realm]/[analysis type (e.g, 'ts')]/[frequency]/[chunk size (e.g., 1yr, 5yr)] + + def parse_gfdl_pp_ts(file_name: str): # files = sorted(glob.glob(os.path.join(file_name,'*.nc'))) # debug comment when ready to run # file = pathlib.Path(files[0]) # debug comment when ready to run @@ -103,6 +223,10 @@ def parse_gfdl_pp_ts(file_name: str): if f in file_freq: output_frequency = f break + if 'daily' in output_frequency: + output_frequency = 'day' + elif 'monthly' in output_frequency: + output_frequency = 'mon' # call to xr.open_dataset required by ecgtoos.builder.Builder with xr.open_dataset(file, chunks={}, decode_times=False) as ds: variable_list = [var for var in ds if 'standard_name' in ds[var].attrs or 'long_name' in ds[var].attrs] @@ -208,7 +332,7 @@ def cat_builder(self, data_paths: list, include_patterns=include_patterns, joblib_parallel_kwargs={'n_jobs': nthreads}, # Number of jobs to execute - # should be equal to # threads you are using - extension='.nc' # extension of target files + extension='.nc' # extension of target file ) def call_save(self, output_dir: str, @@ -333,7 +457,7 @@ def main(config: str): # initialize the catalog object cat_obj = cat_cls() # instantiate the esm catalog builder - opt_keys = ['include_patterns', 'exclude_patterns'] + opt_keys = ['include_patterns', 'exclude_patterns', 'dataset_id'] for k in opt_keys: if k not in conf: conf[k] = None @@ -344,11 +468,17 @@ def main(config: str): dir_depth=conf['dir_depth'], nthreads=conf['num_threads'] ) + + file_parse_method = None + if conf['dataset_id'] is not None: + if 'am5' in conf['dataset_id'].lower(): + file_parse_method = parse_gfdl_am5_data + # build the catalog print('Building the catalog') start_time = time.monotonic() - cat_obj.call_build() + cat_obj.call_build(file_parse_method=file_parse_method) end_time = time.monotonic()