From 36e5472d13884251ec5c27fa082925930236583f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Longchamp?= <97044425+regislon@users.noreply.github.com> Date: Tue, 10 Oct 2023 07:58:02 +0200 Subject: [PATCH 1/3] fix(schema validation) : pydantic migratation --- disdrodb/l0/check_configs.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/disdrodb/l0/check_configs.py b/disdrodb/l0/check_configs.py index 48aa3479..24c322e3 100644 --- a/disdrodb/l0/check_configs.py +++ b/disdrodb/l0/check_configs.py @@ -4,7 +4,7 @@ from typing import List, Optional, Union import numpy as np -from pydantic import BaseModel, ValidationError, validator +from pydantic import BaseModel, ValidationError, field_validator from disdrodb.l0.standards import ( available_sensor_name, @@ -128,21 +128,21 @@ class NetcdfEncodingSchema(BaseModel): chunksizes: Optional[Union[int, List[int]]] # if contiguous=False, chunksizes specified, otherwise should be not ! - @validator("chunksizes") + @field_validator("chunksizes") def check_chunksizes(cls, v, values): if not values.get("contiguous") and not v: raise ValueError("'chunksizes' must be defined if 'contiguous' is False") return v # if contiguous = True, then zlib must be set to False - @validator("zlib") + @field_validator("zlib") def check_zlib(cls, v, values): if values.get("contiguous") and v: raise ValueError("'zlib' must be set to False if 'contiguous' is True") return v # if contiguous = True, then fletcher32 must be set to False - @validator("fletcher32") + @field_validator("fletcher32") def check_fletcher32(cls, v, values): if values.get("contiguous") and v: raise ValueError("'fletcher32' must be set to False if 'contiguous' is True") @@ -204,7 +204,7 @@ class RawDataFormatSchema(BaseModel): dimension_order: Optional[List[str]] n_values: Optional[int] - @validator("data_range", pre=True) + @field_validator("data_range", pre=True) def check_list_length(cls, value): if value: if len(value) != 2: @@ -224,6 +224,7 @@ def check_raw_data_format(sensor_name: str) -> None: # check that the second level of the dictionary match the schema for key, value in data.items(): + print(key, value) schema_error( object_to_validate=value, schema=RawDataFormatSchema, From 50a0fe1cf30ef9091ffb9ad85cda2a698f223f2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Longchamp?= <97044425+regislon@users.noreply.github.com> Date: Tue, 10 Oct 2023 09:23:58 +0200 Subject: [PATCH 2/3] fix(pydantic) : migrate field_validator to model_validator --- disdrodb/l0/check_configs.py | 43 ++++++++++++++++------------- disdrodb/l0/issue.py | 6 ++-- disdrodb/tests/test_config_files.py | 4 +-- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/disdrodb/l0/check_configs.py b/disdrodb/l0/check_configs.py index 24c322e3..eee8889e 100644 --- a/disdrodb/l0/check_configs.py +++ b/disdrodb/l0/check_configs.py @@ -4,7 +4,7 @@ from typing import List, Optional, Union import numpy as np -from pydantic import BaseModel, ValidationError, field_validator +from pydantic import BaseModel, ValidationError, field_validator, model_validator from disdrodb.l0.standards import ( available_sensor_name, @@ -128,25 +128,31 @@ class NetcdfEncodingSchema(BaseModel): chunksizes: Optional[Union[int, List[int]]] # if contiguous=False, chunksizes specified, otherwise should be not ! - @field_validator("chunksizes") - def check_chunksizes(cls, v, values): - if not values.get("contiguous") and not v: + @model_validator(mode="before") + def check_chunksizes_and_zlib(cls, values): + contiguous = values.get("contiguous") + chunksizes = values.get("chunksizes") + if not contiguous and not chunksizes: raise ValueError("'chunksizes' must be defined if 'contiguous' is False") - return v + return values # if contiguous = True, then zlib must be set to False - @field_validator("zlib") - def check_zlib(cls, v, values): - if values.get("contiguous") and v: + @model_validator(mode="before") + def check_contiguous_and_zlib(cls, values): + contiguous = values.get("contiguous") + zlib = values.get("zlib") + if contiguous and zlib: raise ValueError("'zlib' must be set to False if 'contiguous' is True") - return v + return values # if contiguous = True, then fletcher32 must be set to False - @field_validator("fletcher32") - def check_fletcher32(cls, v, values): - if values.get("contiguous") and v: + @model_validator(mode="before") + def check_contiguous_and_fletcher32(cls, values): + contiguous = values.get("contiguous") + fletcher32 = values.get("fletcher32") + if contiguous and fletcher32: raise ValueError("'fletcher32' must be set to False if 'contiguous' is True") - return v + return values def check_l0b_encoding(sensor_name: str) -> None: @@ -199,12 +205,12 @@ class RawDataFormatSchema(BaseModel): n_decimals: Optional[int] n_naturals: Optional[int] data_range: Optional[List[float]] - nan_flags: Optional[str] - valid_values: Optional[List[float]] - dimension_order: Optional[List[str]] - n_values: Optional[int] + nan_flags: Optional[Union[int, str]] = None + valid_values: Optional[List[float]] = None + dimension_order: Optional[List[str]] = None + n_values: Optional[int] = None - @field_validator("data_range", pre=True) + @field_validator("data_range") def check_list_length(cls, value): if value: if len(value) != 2: @@ -224,7 +230,6 @@ def check_raw_data_format(sensor_name: str) -> None: # check that the second level of the dictionary match the schema for key, value in data.items(): - print(key, value) schema_error( object_to_validate=value, schema=RawDataFormatSchema, diff --git a/disdrodb/l0/issue.py b/disdrodb/l0/issue.py index eea0c071..6b65f4c6 100644 --- a/disdrodb/l0/issue.py +++ b/disdrodb/l0/issue.py @@ -250,8 +250,7 @@ def check_issue_file(fpath: str) -> None: def _write_issue_docs(f): """Provide template for issue.yml""" - f.write( - """# This file is used to store timesteps/time periods with wrong/corrupted observation. + f.write("""# This file is used to store timesteps/time periods with wrong/corrupted observation. # The specified timesteps are dropped during the L0 processing. # The time format used is the isoformat : YYYY-mm-dd HH:MM:SS. # The 'timesteps' key enable to specify the list of timesteps to be discarded. @@ -267,8 +266,7 @@ def _write_issue_docs(f): # - ['2018-08-01 12:00:00', '2018-08-01 14:00:00'] # - ['2018-08-01 15:44:30', '2018-08-01 15:59:31'] # - ['2018-08-02 12:44:30', '2018-08-02 12:59:31'] \n -""" - ) +""") return None diff --git a/disdrodb/tests/test_config_files.py b/disdrodb/tests/test_config_files.py index e9e03951..2de7a2a2 100644 --- a/disdrodb/tests/test_config_files.py +++ b/disdrodb/tests/test_config_files.py @@ -3,7 +3,7 @@ import pytest import yaml -from pydantic import BaseModel, validator +from pydantic import BaseModel, field_validator # Define the pydantic models for *.bins.yaml config files @@ -13,7 +13,7 @@ class raw_data_format_2n_level(BaseModel): n_decimals: Optional[int] data_range: Optional[list] - @validator("data_range", pre=True) + @field_validator("data_range") def check_list_length(cls, value): if value: if len(value) != 2: From 7970e43a88a95cbb4153e9e3d1a75821dee9b388 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 10 Oct 2023 07:24:24 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- disdrodb/l0/issue.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/disdrodb/l0/issue.py b/disdrodb/l0/issue.py index 6b65f4c6..eea0c071 100644 --- a/disdrodb/l0/issue.py +++ b/disdrodb/l0/issue.py @@ -250,7 +250,8 @@ def check_issue_file(fpath: str) -> None: def _write_issue_docs(f): """Provide template for issue.yml""" - f.write("""# This file is used to store timesteps/time periods with wrong/corrupted observation. + f.write( + """# This file is used to store timesteps/time periods with wrong/corrupted observation. # The specified timesteps are dropped during the L0 processing. # The time format used is the isoformat : YYYY-mm-dd HH:MM:SS. # The 'timesteps' key enable to specify the list of timesteps to be discarded. @@ -266,7 +267,8 @@ def _write_issue_docs(f): # - ['2018-08-01 12:00:00', '2018-08-01 14:00:00'] # - ['2018-08-01 15:44:30', '2018-08-01 15:59:31'] # - ['2018-08-02 12:44:30', '2018-08-02 12:59:31'] \n -""") +""" + ) return None