Skip to content

Commit

Permalink
feat(modelmetadata): Make running hours a property of the model
Browse files Browse the repository at this point in the history
  • Loading branch information
devsjc committed Jan 22, 2025
1 parent 52ca076 commit 01a0f47
Show file tree
Hide file tree
Showing 14 changed files with 75 additions and 39 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
[![pypi badge](https://img.shields.io/pypi/v/nwp-consumer?&color=086788)](https://pypi.org/project/nwp-consumer)
[![documentation badge](https://img.shields.io/badge/docs-latest-333333)](https://openclimatefix.github.io/nwp-consumer/)
[![contributors badge](https://img.shields.io/github/contributors/openclimatefix/nwp-consumer?color=FFFFFF)](https://github.com/openclimatefix/nwp-consumer/graphs/contributors)
[![workflows badge](https://img.shields.io/github/actions/workflow/status/openclimatefix/nwp-consumer/branch_ci.yml?branch=main&color=FFD053)](https://github.com/openclimatefix/nwp-consumer/actions/workflows/ci.yml)
[![workflows badge](https://img.shields.io/github/actions/workflow/status/openclimatefix/nwp-consumer/branch_ci.yml?branch=main&color=FFD053)](https://github.com/openclimatefix/nwp-consumer/actions/workflows/branch_ci.yml)
[![ease of contribution: easy](https://img.shields.io/badge/ease%20of%20contribution:%20easy-32bd50)](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories)

Some renewables, such as solar and wind, generate power according to the weather conditions.
Expand Down Expand Up @@ -102,6 +102,8 @@ parameter modifications to the model's expected coordinates in it's metadata for
repository.

## Development

### Linting and static type checking

This project uses [MyPy](https://mypy.readthedocs.io/en/stable/) for static type checking
and [Ruff](https://docs.astral.sh/ruff/) for linting.
Expand Down Expand Up @@ -151,7 +153,7 @@ src and flat layouts.

## Contributing and community

[![issues badge](https://img.shields.io/github/issues/openclimatefix/ocf-template?color=FFAC5F)](https://github.com/openclimatefix/ocf-template/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc)
[![issues badge](https://img.shields.io/github/issues/openclimatefix/nwp-consumer?color=FFAC5F)](https://github.com/openclimatefix/nwp-consumer/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc)

- PR's are welcome! See the [Organisation Profile](https://github.com/openclimatefix) for details on contributing
- Find out about our other projects in the [OCF Meta Repo](https://github.com/openclimatefix/ocf-meta-repo)
Expand Down
21 changes: 20 additions & 1 deletion src/nwp_consumer/internal/entities/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,9 @@ class NWPDimensionCoordinateMap:
"""The longitude coordinates of the forecast grid in degrees. """

x: list[float] | None = None
"""X coordinates of an OSGB (or other alternative projection) grid."""
y: list[float] | None = None
# These are the x and y osgb that the UKV model uses. They are not used in the other models
"""Y coordinates of an OSGB (or other alternative projection) grid."""

def __post_init__(self) -> None:
"""Rigidly set input value ordering and precision."""
Expand Down Expand Up @@ -208,6 +209,20 @@ def from_pandas(
"Longitude coordinates should run from -180 -> 180. "
"Modify the coordinate in the source data to be in ascending order.",
))
if "x" in pd_indexes \
and pd_indexes["x"].values[0] > pd_indexes["x"].values[-1]:
return Failure(ValueError(
"Cannot create NWPDimensionCoordinateMap instance from pandas indexes "
"as the x values are not in ascending order. "
"Modify the coordinate in the source data to be in ascending order.",
))
if "y" in pd_indexes \
and pd_indexes["y"].values[0] > pd_indexes["y"].values[-1]:
return Failure(ValueError(
"Cannot create NWPDimensionCoordinateMap instance from pandas indexes "
"as the y values are not in ascending order. "
"Modify the coordinate in the source data to be in ascending order.",
))

# Convert the pandas Index objects to lists of the appropriate types
return Success(
Expand Down Expand Up @@ -235,6 +250,10 @@ def from_pandas(
if "latitude" in pd_indexes else None,
longitude=pd_indexes["longitude"].to_list() \
if "longitude" in pd_indexes else None,
x=pd_indexes["x"].to_list() \
if "x" in pd_indexes else None,
y=pd_indexes["y"].to_list() \
if "y" in pd_indexes else None,
),
)

Expand Down
30 changes: 30 additions & 0 deletions src/nwp_consumer/internal/entities/modelmetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
"""

import dataclasses
import datetime as dt
import logging

import numpy as np
import pandas as pd

from .coordinates import NWPDimensionCoordinateMap
from .parameters import Parameter
Expand Down Expand Up @@ -55,6 +57,14 @@ class ModelMetadata:
Which prints grid data from the grib file.
"""

running_hours: list[int]
"""The hours of the day that the model runs.
Raw Repositories that provide data for the model may not have every running time.
In this instance, use `with_running_hours` to specify the running hours specific
to the repository.
"""

chunk_count_overrides: dict[str, int] = dataclasses.field(default_factory=dict)
"""Mapping of dimension names to the desired number of chunks in that dimension.
Expand Down Expand Up @@ -117,6 +127,19 @@ def with_chunk_count_overrides(self, overrides: dict[str, int]) -> "ModelMetadat
)
return dataclasses.replace(self, chunk_count_overrides=overrides)

def with_running_hours(self, hours: list[int]) -> "ModelMetadata":
"""Returns metadata for the given model with the given running hours."""
return dataclasses.replace(self, running_hours=hours)

def month_its(self, year: int, month: int) -> list[dt.datetime]:
"""Generate all init times for a given month."""
days = pd.Period(f"{year}-{month}").days_in_month
its: list[dt.datetime] = []
for day in range(1, days + 1):
for hour in self.running_hours:
its.append(dt.datetime(year, month, day, hour, tzinfo=dt.UTC))
return its

class Models:
"""Namespace containing known models."""

Expand Down Expand Up @@ -149,6 +172,7 @@ class Models:
latitude=[float(f"{lat / 10:.2f}") for lat in range(900, -900 - 1, -1)],
longitude=[float(f"{lon / 10:.2f}") for lon in range(-1800, 1800 + 1, 1)],
),
running_hours=[0, 6, 12, 18],
)
"""ECMWF's High Resolution Integrated Forecast System."""

Expand All @@ -168,6 +192,7 @@ class Models:
latitude=[v/10 for v in range(900, -900, -1)],
longitude=[v/10 for v in range(-1800, 1800, 1)],
),
running_hours=[0, 12],
)
"""Summary statistics from ECMWF's Ensemble Forecast System."""

Expand Down Expand Up @@ -195,6 +220,7 @@ class Models:
latitude=[v/10 for v in range(900, -900, -1)],
longitude=[v/10 for v in range(-1800, 1800, 1)],
),
running_hours=[0, 6, 12, 18],
)
"""Full ensemble data from ECMWF's Ensemble Forecast System."""

Expand Down Expand Up @@ -226,6 +252,7 @@ class Models:
latitude=[float(lat) for lat in range(90, -90 - 1, -1)],
longitude=[float(lon) for lon in range(-180, 180 + 1, 1)],
),
running_hours=[0, 6, 12, 18],
)
"""NCEP's Global Forecast System."""

Expand Down Expand Up @@ -261,6 +288,7 @@ class Models:
],
# TODO: Change to -180 -> 180
),
running_hours=[0, 6, 12, 18],
)
"""MetOffice's Unified Model, in the Global configuration, at a resolution of 17km."""

Expand Down Expand Up @@ -294,6 +322,7 @@ class Models:
for lon in np.arange(-179.929687, 179.929688 + 0.140625, 0.140625)
],
),
running_hours=[0, 6, 12, 18],
)
"""MetOffice's Unified Model, in the Global configuration, at a resolution of 10km."""

Expand Down Expand Up @@ -321,6 +350,7 @@ class Models:
x=list(range(0, 455)),
y=list(range(0, 639)),
),
running_hours=list(range(0, 24)),
)
"""MetOffice's Unified Model in the UKV configuration, at a resolution of 2km"""

24 changes: 4 additions & 20 deletions src/nwp_consumer/internal/entities/repometadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
import datetime as dt
import os

import pandas as pd

from .modelmetadata import ModelMetadata
from .postprocess import PostProcessOptions

Expand All @@ -42,11 +40,6 @@ class RawRepositoryMetadata:
but rather are defined by pre-selected agreements with the provider.
"""

running_hours: list[int]
"""The running hours of the model.
Most NWP models are run at fixed intervals throughout the day."""

delay_minutes: int
"""The approximate model delay in minutes.
Expand All @@ -72,31 +65,23 @@ class RawRepositoryMetadata:
available_models: dict[str, ModelMetadata]
"""A dictionary of available models and their metadata."""

def determine_latest_it_from(self, t: dt.datetime) -> dt.datetime:
def determine_latest_it_from(self, t: dt.datetime, running_hours: list[int]) -> dt.datetime:
"""Determine the latest available initialization time from a given time.
Args:
t: The time from which to determine the latest initialization time.
running_hours: A list of hours at which the model runs each day.
Returns:
The latest available initialization time prior to the given time.
"""
it = t.replace(minute=0, second=0, microsecond=0) \
- dt.timedelta(minutes=self.delay_minutes)
while it.hour not in self.running_hours:
while it.hour not in running_hours:
it -= dt.timedelta(hours=1)

return it

def month_its(self, year: int, month: int) -> list[dt.datetime]:
"""Generate all init times for a given month."""
days = pd.Period(f"{year}-{month}").days_in_month
its: list[dt.datetime] = []
for day in range(1, days + 1):
for hour in self.running_hours:
its.append(dt.datetime(year, month, day, hour, tzinfo=dt.UTC))
return its

def missing_required_envs(self) -> list[str]:
"""Get a list of unset required environment variables.
Expand All @@ -110,8 +95,7 @@ def __str__(self) -> str:
pretty: str = "".join((
"Model Repository: ",
f"\n\t{self.name} ({'archive' if self.is_archive else 'live/rolling'} dataset.)",
f"\n\truns at: {self.running_hours} hours ",
"(available after {self.delay_minutes} minute delay)",
f"\n\t\t(available after {self.delay_minutes} minute delay)",
"\nEnvironment variables:",
"\n\tRequired:",
"\n".join(f"\t\t{var}" for var in self.required_env),
Expand Down
1 change: 1 addition & 0 deletions src/nwp_consumer/internal/entities/test_modelmetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def test_with_region(self) -> None:
latitude=[float(f"{lat / 10:.2f}") for lat in range(900, -900 - 1, -1)],
longitude=[float(f"{lon / 10:.2f}") for lon in range(-1800, 1800 + 1, 1)],
),
running_hours=[0, 6, 12, 18],
)

@dataclasses.dataclass
Expand Down
3 changes: 1 addition & 2 deletions src/nwp_consumer/internal/entities/test_repometadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ class TestRawRepositoryMetadata(unittest.TestCase):
name="test",
is_archive=False,
is_order_based=False,
running_hours=[0, 6, 12, 18],
delay_minutes=60,
required_env=["TEST"],
optional_env={"TEST": "test"},
Expand Down Expand Up @@ -46,7 +45,7 @@ class TestCase:

for test in tests:
with self.subTest(name=test.name):
result = self.metadata.determine_latest_it_from(test.t)
result = self.metadata.determine_latest_it_from(test.t, [0, 6, 12, 18])
self.assertEqual(result, test.expected)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,17 +114,17 @@ def repository() -> entities.RawRepositoryMetadata:
name="CEDA",
is_archive=True,
is_order_based=False,
running_hours=[0, 12], # 6 and 18 exist, but are lacking variables
delay_minutes=(60 * 24 * 7) + (60 * 12), # 7.5 days
max_connections=20,
required_env=["CEDA_FTP_USER", "CEDA_FTP_PASS"],
optional_env={},
postprocess_options=entities.PostProcessOptions(),
available_models={
"default": entities.Models.MO_UM_GLOBAL_17KM.with_chunk_count_overrides({
"default": entities.Models.MO_UM_GLOBAL_17KM\
.with_chunk_count_overrides({
"latitude": 8,
"longitude": 8,
}),
}).with_running_hours([0, 12]), # 6 and 18 exist, but are lacking variables
},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,6 @@ def repository() -> entities.RawRepositoryMetadata:
name="ECMWF-MARS",
is_archive=True,
is_order_based=False,
running_hours=[0, 12],
delay_minutes=(60 * 26), # 1 day, plus leeway
max_connections=20,
required_env=[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ def repository() -> entities.RawRepositoryMetadata:
name="ECMWF-Realtime-S3",
is_archive=False,
is_order_based=True,
running_hours=[0, 6, 12, 18],
delay_minutes=(60 * 6), # 6 hours
max_connections=100,
required_env=[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,10 @@ def __init__(self, order_id: str, api_key: str) -> None:
@override
def repository() -> entities.RawRepositoryMetadata:

requested_model: str = os.getenv("MODEL", default="default")
running_hours = list(range(0, 24)) if requested_model == "um-ukv-2km" else [0, 12]

return entities.RawRepositoryMetadata(
name="MetOffice-Weather-Datahub",
is_archive=False,
is_order_based=True,
running_hours=running_hours,
delay_minutes=60,
max_connections=10,
required_env=["METOFFICE_API_KEY", "METOFFICE_ORDER_ID"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def repository() -> entities.RawRepositoryMetadata:
name="NOAA-GFS-S3",
is_archive=False,
is_order_based=False,
running_hours=[0, 6, 12, 18],
delay_minutes=(60 * 5), # 5 hours
max_connections=100,
required_env=[],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ def test__download(self) -> None:
self.assertIsInstance(auth_result, Success, msg=f"{auth_result!s}")
c = auth_result.unwrap()

test_it = c.repository().determine_latest_it_from(dt.datetime.now(tz=dt.UTC))
test_it = c.repository().determine_latest_it_from(
dt.datetime.now(tz=dt.UTC),
c.model().running_hours,
)

dl_result = c._download(
f"{c.request_url}/agl_u-component-of-wind-surface-adjusted_10.0_{test_it:%Y%m%d%H}_1/data",
Expand Down
2 changes: 1 addition & 1 deletion src/nwp_consumer/internal/services/_dummy_adaptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def repository() -> entities.RawRepositoryMetadata:
name="ACME-Test-Models",
is_archive=False,
is_order_based=False,
running_hours=[0, 6, 12, 18],
delay_minutes=60,
max_connections=4,
required_env=[],
Expand All @@ -50,6 +49,7 @@ def model() -> entities.ModelMetadata:
latitude=np.linspace(90, -90, 721).tolist(),
longitude=np.linspace(-180, 179.8, 1440).tolist(),
),
running_hours=[0, 6, 12, 18],
)


Expand Down
9 changes: 7 additions & 2 deletions src/nwp_consumer/internal/services/consumer_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,16 @@ def _create_suitable_store(
its: list[dt.datetime] = []
match period:
case _ if period is None:
its = [repository_metadata.determine_latest_it_from(dt.datetime.now(tz=dt.UTC))]
its = [
repository_metadata.determine_latest_it_from(
t=dt.datetime.now(tz=dt.UTC),
running_hours=model_metadata.running_hours,
),
]
case single_it if isinstance(period, dt.datetime):
its = [single_it] # type: ignore
case multiple_its if isinstance(period, dt.date):
its = repository_metadata.month_its(
its = model_metadata.month_its(
year=multiple_its.year,
month=multiple_its.month,
)
Expand Down

0 comments on commit 01a0f47

Please sign in to comment.