From e9e1f061e872b069ce29e7de8d8663406c7d9067 Mon Sep 17 00:00:00 2001 From: akrherz Date: Tue, 24 Jan 2023 15:42:15 -0600 Subject: [PATCH] introduce ERA5 ingest #321 --- deployment/symlink_manager.py | 1 + scripts/RUN_12Z.sh | 7 +- scripts/era5/fetch_era5.py | 146 +++++++++++++++++++++++++++ scripts/era5/init_hourly.py | 184 ++++++++++++++++++++++++++++++++++ 4 files changed, 336 insertions(+), 2 deletions(-) create mode 100644 scripts/era5/fetch_era5.py create mode 100644 scripts/era5/init_hourly.py diff --git a/deployment/symlink_manager.py b/deployment/symlink_manager.py index e48e5604af..93c8500230 100644 --- a/deployment/symlink_manager.py +++ b/deployment/symlink_manager.py @@ -16,6 +16,7 @@ ["/mesonet/ARCHIVE/raw", f"{M2}/longterm/raw"], ["/mesonet/ARCHIVE/rer", f"{M7}/ARCHIVE/rer"], ["/mesonet/data/dotcams", f"{M2}/data/dotcams"], + ["/mesonet/data/era5", f"{M2}/data/era5"], ["/mesonet/data/gempak", f"{M2}/data/gempak"], ["/mesonet/data/iemre", f"{M2}/data/iemre"], ["/mesonet/data/prism", f"{M2}/data/prism"], diff --git a/scripts/RUN_12Z.sh b/scripts/RUN_12Z.sh index 4d0d61256b..85ed93a46f 100644 --- a/scripts/RUN_12Z.sh +++ b/scripts/RUN_12Z.sh @@ -1,13 +1,16 @@ # Ensure this is actually being run at 12z, since crontab is in CST/CDT HH=$(date -u +%H) if [ "$HH" -ne "12" ] - then - exit + then + exit fi cd asos python cf6_to_iemaccess.py +cd ../era5 +python fetch_era5.py $(date -u --date '5 days ago' +'%Y %m %d') + # DVN wants this to run at 12:10 UTC, so we start the cron script a bit late cd ../12z python awos_rtp.py diff --git a/scripts/era5/fetch_era5.py b/scripts/era5/fetch_era5.py new file mode 100644 index 0000000000..f57f9cc0eb --- /dev/null +++ b/scripts/era5/fetch_era5.py @@ -0,0 +1,146 @@ +"""Download an hour worth of ERA5. + +Run from RUN_12Z.sh for 5 days ago. +""" +from datetime import timedelta +import os +import sys + +import numpy as np +from pyiem import iemre +from pyiem.util import utc, ncopen, logger +import cdsapi +import pygrib + +LOG = logger() +CDSVARS = ( + "10m_u_component_of_wind 10m_v_component_of_wind 2m_dewpoint_temperature " + "2m_temperature soil_temperature_level_1 soil_temperature_level_2 " + "soil_temperature_level_3 soil_temperature_level_4 " + "surface_solar_radiation_downwards total_evaporation total_precipitation " + "volumetric_soil_water_layer_1 volumetric_soil_water_layer_2 " + "volumetric_soil_water_layer_3 volumetric_soil_water_layer_4" +).split() + + +def ingest(grbfn, valid): + """Consume this grib file.""" + ncfn = f"/mesonet/data/era5/{valid.year}_era5land_hourly.nc" + grbs = pygrib.open(grbfn) + ames_i = int((-93.61 - iemre.WEST) * 10) + ames_j = int((41.99 - iemre.SOUTH) * 10) + # Eh + tidx = iemre.hourly_offset(valid) + with ncopen(ncfn, "a") as nc: + nc.variables["uwnd"][tidx, :, :] = grbs[1].values + LOG.info("uwnd %s", nc.variables["uwnd"][tidx, ames_j, ames_i]) + nc.variables["vwnd"][tidx, :, :] = grbs[2].values + LOG.info("vwnd %s", nc.variables["vwnd"][tidx, ames_j, ames_i]) + nc.variables["dwpk"][tidx, :, :] = grbs[3].values + LOG.info("dwpf %s", nc.variables["dwpk"][tidx, ames_j, ames_i]) + nc.variables["tmpk"][tidx, :, :] = grbs[4].values + LOG.info("tmpk %s", nc.variables["tmpk"][tidx, ames_j, ames_i]) + nc.variables["soilt"][tidx, 0, :, :] = grbs[5].values + nc.variables["soilt"][tidx, 1, :, :] = grbs[6].values + nc.variables["soilt"][tidx, 2, :, :] = grbs[7].values + nc.variables["soilt"][tidx, 3, :, :] = grbs[8].values + LOG.info("soilt %s", nc.variables["soilt"][tidx, :, ames_j, ames_i]) + # -- Solar Radiation is accumulated since 0z + rsds = nc.variables["rsds"] + p01m = nc.variables["p01m"] + evap = nc.variables["evap"] + val = grbs[9].values + if valid.hour == 0: + tidx0 = iemre.hourly_offset((valid - timedelta(hours=24))) + tsolar = np.sum(rsds[(tidx0 + 1) : tidx], 0) * 3600.0 + tp01m = np.sum(p01m[(tidx0 + 1) : tidx], 0) + tevap = np.sum(evap[(tidx0 + 1) : tidx], 0) + elif valid.hour > 1: + tidx0 = iemre.hourly_offset(valid.replace(hour=1)) + tsolar = np.sum(rsds[tidx0:tidx], 0) * 3600.0 + tp01m = np.sum(p01m[tidx0:tidx], 0) + tevap = np.sum(evap[tidx0:tidx], 0) + else: + tsolar = np.zeros(val.shape) + tp01m = np.zeros(val.shape) + tevap = np.zeros(val.shape) + # J m-2 to W/m2 + newval = (val - tsolar) / 3600.0 + nc.variables["rsds"][tidx, :, :] = np.where(newval < 0, 0, newval) + LOG.info( + "rsds nc:%s tsolar:%s grib:%s", + nc.variables["rsds"][tidx, ames_j, ames_i], + tsolar[ames_j, ames_i], + val[ames_j, ames_i], + ) + # m to mm + val = grbs[10].values + nc.variables["evap"][tidx, :, :] = (val * 1000.0) - tevap + LOG.info( + "evap %s grib:%s", + nc.variables["evap"][tidx, ames_j, ames_i], + val[ames_j, ames_i], + ) + # m to mm + val = grbs[11].values + accum = (val * 1000.0) - tp01m + nc.variables["p01m"][tidx, :, :] = np.where(accum < 0, 0, accum) + LOG.info( + "p01m %s grib:%s", + nc.variables["p01m"][tidx, ames_j, ames_i], + val[ames_j, ames_i], + ) + val = grbs[12].values + nc.variables["soilm"][tidx, 0, :, :] = val + nc.variables["soilm"][tidx, 1, :, :] = grbs[13].values + nc.variables["soilm"][tidx, 2, :, :] = grbs[14].values + nc.variables["soilm"][tidx, 3, :, :] = grbs[15].values + LOG.info( + "soilm %s grib1:%s", + nc.variables["soilm"][tidx, :, ames_j, ames_i], + val[ames_j, ames_i], + ) + + +def run(valid): + """Run for the given valid time.""" + LOG.info("Running for %s", valid) + grbfn = f"{valid:%Y%m%d%H}.grib" + + cds = cdsapi.Client(quiet=True) + + cds.retrieve( + "reanalysis-era5-land", + { + "variable": CDSVARS, + "year": f"{valid.year}", + "month": f"{valid.month}", + "day": f"{valid.day}", + "time": f"{valid:%H}:00", + "area": [ + iemre.NORTH, + iemre.WEST, + iemre.SOUTH, + iemre.EAST, + ], + "format": "grib", + }, + grbfn, + ) + ingest(grbfn, valid) + os.unlink(grbfn) + + +def main(argv): + """Go!""" + if len(argv) == 5: + valid = utc(*[int(a) for a in argv[1:]]) + run(valid) + elif len(argv) == 4: + valid = utc(*[int(a) for a in argv[1:]]) + for hr in range(24): + run(valid.replace(hour=hr)) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/scripts/era5/init_hourly.py b/scripts/era5/init_hourly.py new file mode 100644 index 0000000000..40d8dee090 --- /dev/null +++ b/scripts/era5/init_hourly.py @@ -0,0 +1,184 @@ +"""Generate the ERA5 hourly analysis file for a year""" +import datetime +import sys +import os + +import numpy as np +from pyiem import iemre +from pyiem.util import ncopen, logger + +LOG = logger() + + +def init_year(ts): + """ + Create a new NetCDF file for a year of our specification! + """ + ncfn = f"/mesonet/data/era5/{ts.year}_era5land_hourly.nc" + if os.path.isfile(ncfn): + LOG.info("Cowardly refusing to overwrite: %s", ncfn) + return + nc = ncopen(ncfn, "w") + nc.title = f"ERA5 Hourly Reanalysis {ts.year}" + nc.platform = "Grided Observations" + nc.description = "ERA5 hourly analysis" + nc.institution = "Iowa State University, Ames, IA, USA" + nc.source = "Iowa Environmental Mesonet" + nc.project_id = "IEM" + nc.realization = 1 + nc.Conventions = "CF-1.0" + nc.contact = "Daryl Herzmann, akrherz@iastate.edu, 515-294-5978" + nc.history = f"{datetime.datetime.now():%d %B %Y} Generated" + nc.comment = "No Comment at this time" + + # Setup Dimensions + nc.createDimension("lat", (iemre.NORTH - iemre.SOUTH) * 10.0 + 1) + nc.createDimension("lon", (iemre.EAST - iemre.WEST) * 10.0 + 1) + ts2 = datetime.datetime(ts.year + 1, 1, 1) + days = (ts2 - ts).days + LOG.info("Year %s has %s days", ts.year, days) + nc.createDimension("time", int(days) * 24) + nc.createDimension("soil_level", 4) + + ncv = nc.createVariable("soil_level", float, ("soil_level",)) + ncv.units = "m" + # midpoints + ncv[:] = [0.03, 0.14, 0.64, 1.94] + + # Setup Coordinate Variables + lat = nc.createVariable("lat", float, ("lat",)) + lat.units = "degrees_north" + lat.long_name = "Latitude" + lat.standard_name = "latitude" + lat.axis = "Y" + lat[:] = np.arange(iemre.SOUTH, iemre.NORTH + 0.001, 0.1) + + lon = nc.createVariable("lon", float, ("lon",)) + lon.units = "degrees_east" + lon.long_name = "Longitude" + lon.standard_name = "longitude" + lon.axis = "X" + lon[:] = np.arange(iemre.WEST, iemre.EAST + 0.001, 0.1) + + tm = nc.createVariable("time", float, ("time",)) + tm.units = f"Hours since {ts.year}-01-01 00:00:0.0" + tm.long_name = "Time" + tm.standard_name = "time" + tm.axis = "T" + tm.calendar = "gregorian" + tm[:] = np.arange(0, int(days) * 24) + + # 0->65535 + tmpk = nc.createVariable( + "tmpk", np.uint16, ("time", "lat", "lon"), fill_value=65535 + ) + tmpk.units = "K" + tmpk.scale_factor = 0.01 + tmpk.long_name = "2m Air Temperature" + tmpk.standard_name = "2m Air Temperature" + tmpk.coordinates = "lon lat" + + # 0->65535 0 to 655.35 + dwpk = nc.createVariable( + "dwpk", np.uint16, ("time", "lat", "lon"), fill_value=65335 + ) + dwpk.units = "K" + dwpk.scale_factor = 0.01 + dwpk.long_name = "2m Air Dew Point Temperature" + dwpk.standard_name = "2m Air Dew Point Temperature" + dwpk.coordinates = "lon lat" + + # NOTE: we need to store negative numbers here, gasp + # -32768 to 32767 so -98 to 98 mps + uwnd = nc.createVariable( + "uwnd", np.int16, ("time", "lat", "lon"), fill_value=32767 + ) + uwnd.scale_factor = 0.003 + uwnd.units = "meters per second" + uwnd.long_name = "U component of the wind" + uwnd.standard_name = "U component of the wind" + uwnd.coordinates = "lon lat" + + # NOTE: we need to store negative numbers here, gasp + # -32768 to 32767 so -98 to 98 mps + vwnd = nc.createVariable( + "vwnd", np.int16, ("time", "lat", "lon"), fill_value=32767 + ) + vwnd.scale_factor = 0.003 + vwnd.units = "meters per second" + vwnd.long_name = "V component of the wind" + vwnd.standard_name = "V component of the wind" + vwnd.coordinates = "lon lat" + + # 0->65535 0 to 327.675 + p01m = nc.createVariable( + "p01m", np.uint16, ("time", "lat", "lon"), fill_value=65535 + ) + p01m.units = "mm" + p01m.scale_factor = 0.005 + p01m.long_name = "Precipitation" + p01m.standard_name = "Precipitation" + p01m.coordinates = "lon lat" + p01m.description = "Precipitation accumulation for the hour valid time" + + # NOTE: Condensation is + and Evapration is - + # -128 to 127 for -25 to 25 + ncv = nc.createVariable( + "evap", np.int8, ("time", "lat", "lon"), fill_value=127 + ) + ncv.units = "mm" + ncv.scale_factor = 0.4 + ncv.long_name = "Evaporation" + ncv.standard_name = "Evaporation" + ncv.coordinates = "lon lat" + ncv.description = "Evaporation for the hour valid time" + + # 0 -> 65535 so 0 to 1966 + ncv = nc.createVariable( + "rsds", np.uint16, ("time", "lat", "lon"), fill_value=65535 + ) + ncv.units = "W m-2" + ncv.scale_factor = 0.03 + ncv.long_name = "surface_downwelling_shortwave_flux_in_air" + ncv.standard_name = "surface_downwelling_shortwave_flux_in_air" + ncv.coordinates = "lon lat" + ncv.description = "Global Shortwave Irradiance" + + # 0->255 [213 333] + ncv = nc.createVariable( + "soilt", + np.uint8, + ("time", "soil_level", "lat", "lon"), + fill_value=255, + ) + ncv.units = "K" + ncv.add_offset = 213.0 + ncv.scale_factor = 0.5 + ncv.long_name = "Soil Temperature" + ncv.standard_name = "Soil Temperature" + ncv.coordinates = "lon lat" + + # 0->255 [0 0.8] Hope this works? + ncv = nc.createVariable( + "soilm", + np.uint8, + ("time", "soil_level", "lat", "lon"), + fill_value=255, + ) + ncv.units = "m^3 m^-3" + ncv.scale_factor = 0.0031 + ncv.long_name = "Volumetric Soil Moisture" + ncv.standard_name = "Volumetric Soil Moisture" + ncv.coordinates = "lon lat" + + nc.close() + + +def main(argv): + """Go Main Go""" + year = int(argv[1]) + init_year(datetime.datetime(year, 1, 1)) + + +if __name__ == "__main__": + main(sys.argv)