From 7dc926d01cdc8943d3f0cf02a1ba9dd609315558 Mon Sep 17 00:00:00 2001 From: Jannis Born Date: Thu, 13 Jun 2024 12:55:42 +0200 Subject: [PATCH] Bumping pydantic >= 2.0.0 (#247) * chore: bump pydantic to >2.0 * chore: black * chore: mypy checks * chore : remove duplication of options/examples * chore: add pydantic_settings * fix: pydantic error * chore: mypy * fix: mock on subclass __parameters__ * chore * fix: tests for registry * chore: black * fix: ignore mypy * chore: black * fix: move deps from dev to main requirements file * fix: absolute imports before relative --------- Co-authored-by: fiskrt --- dev_requirements.txt | 3 +- requirements.txt | 4 ++- setup.cfg | 3 ++ src/gt4sd/__init__.py | 2 +- src/gt4sd/algorithms/core.py | 2 -- src/gt4sd/algorithms/registry.py | 14 +++++++++- src/gt4sd/algorithms/tests/test_registry.py | 31 +++------------------ src/gt4sd/configuration.py | 8 ++---- src/gt4sd/properties/core.py | 14 ++++++---- src/gt4sd/properties/molecules/core.py | 30 +++++++++----------- src/gt4sd/properties/proteins/core.py | 4 +-- src/gt4sd/tests/utils.py | 7 ++--- 12 files changed, 52 insertions(+), 70 deletions(-) diff --git a/dev_requirements.txt b/dev_requirements.txt index f2ec4d4f0..f1548e8d2 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -12,5 +12,4 @@ pytest==6.2.5 pytest-cov==2.10.1 sphinx>=5 sphinx-autodoc-typehints==1.11.1 -jinja2<3.1.0 -sphinx_rtd_theme==0.5.1 \ No newline at end of file +jinja2<3.1.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index ed71d4c78..30735523c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ numpy>=1.16.5,<1.24.0 pandas<=2.0.3 protobuf<3.20 pyarrow>=8.0.0 -pydantic>=1.7.3,<2.0.0 +pydantic>=2.0.0 pymatgen>=2022.11.7 PyTDC==0.3.7 pytorch_lightning<=1.7.7 @@ -49,3 +49,5 @@ transformers>=4.22.0,<=4.24.0 typing_extensions>=3.7.4.3 wheel>=0.26 xgboost>=1.7.6 +sphinx_rtd_theme==0.5.1 +pydantic-settings>=2.0.0 diff --git a/setup.cfg b/setup.cfg index 1cb92f78a..7bcdf6d75 100644 --- a/setup.cfg +++ b/setup.cfg @@ -290,4 +290,7 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-xgboost.*] +ignore_missing_imports = True + +[mypy-pydantic_settings.*] ignore_missing_imports = True \ No newline at end of file diff --git a/src/gt4sd/__init__.py b/src/gt4sd/__init__.py index 609eb33e1..bacfa5e8e 100644 --- a/src/gt4sd/__init__.py +++ b/src/gt4sd/__init__.py @@ -23,7 +23,7 @@ # """Module initialization.""" -__version__ = "1.4.1" +__version__ = "1.4.2" __name__ = "gt4sd" # NOTE: configure SSL to allow unverified contexts by default diff --git a/src/gt4sd/algorithms/core.py b/src/gt4sd/algorithms/core.py index 266da321f..98321e299 100644 --- a/src/gt4sd/algorithms/core.py +++ b/src/gt4sd/algorithms/core.py @@ -912,8 +912,6 @@ def get_configuration_class_with_attributes( class PropertyPredictor(ABC, Generic[S, U]): - """TODO: Might be deprecated in future release.""" - def __init__(self, context: U) -> None: """Property predictor to investigate items. diff --git a/src/gt4sd/algorithms/registry.py b/src/gt4sd/algorithms/registry.py index e3af6c8b9..20927e1ad 100644 --- a/src/gt4sd/algorithms/registry.py +++ b/src/gt4sd/algorithms/registry.py @@ -28,7 +28,17 @@ from dataclasses import dataclass as vanilla_dataclass from dataclasses import field, make_dataclass from functools import WRAPPER_ASSIGNMENTS, update_wrapper -from typing import Any, Callable, ClassVar, Dict, List, NamedTuple, Optional, Type +from typing import ( + Any, + Callable, + ClassVar, + Dict, + List, + NamedTuple, + Optional, + Type, + TypeVar, +) import pydantic @@ -185,6 +195,8 @@ def decorator( ), ], # type: ignore ) + # NOTE: Needed to circumvent a pydantic TypeError: Parameter list to Generic[...] cannot be empty + VanillaConfiguration.__parameters__ = (TypeVar("T"),) # type: ignore # NOTE: Duplicate call necessary for pydantic >=1.10.* - see https://github.com/pydantic/pydantic/issues/4695 PydanticConfiguration: Type[AlgorithmConfiguration] = dataclass( # type: ignore VanillaConfiguration diff --git a/src/gt4sd/algorithms/tests/test_registry.py b/src/gt4sd/algorithms/tests/test_registry.py index 123a51241..286d99c33 100644 --- a/src/gt4sd/algorithms/tests/test_registry.py +++ b/src/gt4sd/algorithms/tests/test_registry.py @@ -58,19 +58,15 @@ def test_list_available_local_via_S3SyncError(mock_wrong_s3_env): def test_inherited_validation(): Config = next(iter(ApplicationsRegistry.applications.values())).configuration_class - with pytest.raises( - ValidationError, match="algorithm_version\n +none is not an allowed value" - ): + with pytest.raises(ValidationError, match="should be a valid string"): Config(algorithm_version=None) # type: ignore - # NOTE: values convertible to string will not raise! - Config(algorithm_version=5) # type: ignore + with pytest.raises(ValidationError, match="should be a valid string"): + Config(algorithm_version=5) # type: ignore def test_validation(): - with pytest.raises( - ValidationError, match="batch_size\n +value is not a valid integer" - ): + with pytest.raises(ValidationError, match="should be a valid integer"): ApplicationsRegistry.get_configuration_instance( algorithm_type="conditional_generation", domain="materials", @@ -80,25 +76,6 @@ def test_validation(): ) -def test_pickable_wrapped_configurations(): - # https://github.com/samuelcolvin/pydantic/issues/2111 - Config = next(iter(ApplicationsRegistry.applications.values())).configuration_class - restored_obj = assert_pickable(Config(algorithm_version="test")) - - # wrong type assignment, but we did not configure it to raise here: - restored_obj.algorithm_version = object - # ensure the restored dataclass is still a pydantic dataclass (mimic validation) - _, optional_errors = restored_obj.__pydantic_model__.__fields__.get( - "algorithm_version" - ).validate( - restored_obj.algorithm_version, - restored_obj.__dict__, - loc="algorithm_version", - cls=restored_obj.__class__, - ) - assert optional_errors is not None - - def test_multiple_registration(): class OtherAlgorithm(GeneratorAlgorithm): pass diff --git a/src/gt4sd/configuration.py b/src/gt4sd/configuration.py index b50876477..ac18e4a46 100644 --- a/src/gt4sd/configuration.py +++ b/src/gt4sd/configuration.py @@ -27,8 +27,7 @@ import os from functools import lru_cache from typing import Dict, Optional, Set - -from pydantic import BaseSettings +from pydantic_settings import BaseSettings, SettingsConfigDict from .s3 import GT4SDS3Client, S3SyncError, sync_folder_with_s3, upload_file_to_s3 @@ -65,10 +64,7 @@ class GT4SDConfiguration(BaseSettings): gt4sd_s3_secure_hub: bool = True gt4sd_s3_bucket_hub_algorithms: str = "gt4sd-cos-hub-algorithms-artifacts" gt4sd_s3_bucket_hub_properties: str = "gt4sd-cos-hub-properties-artifacts" - - class Config: - # immutable and in turn hashable, that is required for lru_cache - frozen = True + model_config = SettingsConfigDict(frozen=True) @staticmethod @lru_cache(maxsize=None) diff --git a/src/gt4sd/properties/core.py b/src/gt4sd/properties/core.py index 821ebede0..7194c00bc 100644 --- a/src/gt4sd/properties/core.py +++ b/src/gt4sd/properties/core.py @@ -47,19 +47,21 @@ class S3Parameters(PropertyPredictorParameters): algorithm_type: str = "prediction" domain: DomainSubmodule = Field( - ..., example="molecules", description="Submodule of gt4sd.properties" + ..., examples=["molecules"], description="Submodule of gt4sd.properties" + ) + algorithm_name: str = Field( + ..., examples=["MCA"], description="Name of the algorithm" ) - algorithm_name: str = Field(..., example="MCA", description="Name of the algorithm") algorithm_version: str = Field( - ..., example="v0", description="Version of the algorithm" + ..., examples=["v0"], description="Version of the algorithm" ) - algorithm_application: str = Field(..., example="Tox21") + algorithm_application: str = Field(..., examples=["Tox21"]) class ApiTokenParameters(PropertyPredictorParameters): api_token: str = Field( ..., - example="apk-c9db......", + examples=["apk-c9db......"], description="The API token/key to access the service", ) @@ -68,7 +70,7 @@ class IpAdressParameters(PropertyPredictorParameters): host_ip: str = Field( ..., - example="xx.xx.xxx.xxx", + examples=["xx.xx.xxx.xxx"], description="The host IP address to access the service", ) diff --git a/src/gt4sd/properties/molecules/core.py b/src/gt4sd/properties/molecules/core.py index 07c51f1a3..83ec3dcab 100644 --- a/src/gt4sd/properties/molecules/core.py +++ b/src/gt4sd/properties/molecules/core.py @@ -59,7 +59,7 @@ from paccmann_generator.drug_evaluators import OrganDB as _OrganTox from paccmann_generator.drug_evaluators import SCScore from paccmann_generator.drug_evaluators import Tox21 as _Tox21 -from pydantic import Field +from pydantic import ConfigDict, Field from tdc import Oracle from tdc.metadata import download_receptor_oracle_name @@ -119,12 +119,12 @@ class ScscoreConfiguration(PropertyPredictorParameters): class SimilaritySeedParameters(PropertyPredictorParameters): - smiles: str = Field(..., example="c1ccccc1") + smiles: str = Field(..., examples=["c1ccccc1"]) fp_key: str = "ECFP4" class ActivityAgainstTargetParameters(PropertyPredictorParameters): - target: str = Field(..., example="drd2", description="name of the target.") + target: str = Field(..., examples=["drd2"], description="name of the target.") class AskcosParameters(IpAdressParameters): @@ -136,9 +136,8 @@ class Output(str, Enum): output: Output = Field( default=Output.plausability, - example=Output.synthesizability, + examples=[Output.synthesizability], description="Main output return type from ASKCOS", - options=["plausibility", "num_step", "synthesizability", "price"], ) save_json: bool = Field(default=False) file_name: str = Field(default="tree_builder_result.json") @@ -159,10 +158,7 @@ class Output(str, Enum): min_chempop_products: int = Field(default=5) filter_threshold: float = Field(default=0.1) return_first: str = Field(default="true") - - # Convert enum items back to strings - class Config: - use_enum_values = True + model_config = ConfigDict(use_enum_values=True) class MoleculeOneParameters(ApiTokenParameters): @@ -174,9 +170,8 @@ class DockingTdcParameters(PropertyPredictorParameters): # To dock against a receptor defined via TDC target: str = Field( ..., - example="1iep_docking", + examples=download_receptor_oracle_name, description="Target for docking, provided via TDC", - options=download_receptor_oracle_name, ) @@ -184,12 +179,14 @@ class DockingParameters(PropertyPredictorParameters): # To dock against a user-provided receptor name: str = Field(default="pyscreener") receptor_pdb_file: str = Field( - example="/tmp/2hbs.pdb", description="Path to receptor PDB file" + examples=["/tmp/2hbs.pdb"], description="Path to receptor PDB file" ) box_center: List[int] = Field( - example=[15.190, 53.903, 16.917], description="Docking box center" + examples=[[15.190, 53.903, 16.917]], description="Docking box center" + ) + box_size: List[float] = Field( + examples=[[20, 20, 20]], description="Docking box size" ) - box_size: List[float] = Field(example=[20, 20, 20], description="Docking box size") class S3ParametersMolecules(S3Parameters): @@ -265,14 +262,13 @@ class ToxType(str, Enum): algorithm_application: str = "OrganTox" site: Organs = Field( ..., - example=Organs.kidney, + examples=[Organs.kidney], description="name of the target site of interest.", ) toxicity_type: ToxType = Field( default=ToxType.all, - example=ToxType.chronic, + examples=[ToxType.chronic], description="type of toxicity for which predictions are made.", - options=["chronic", "subchronic", "multigenerational", "all"], ) diff --git a/src/gt4sd/properties/proteins/core.py b/src/gt4sd/properties/proteins/core.py index 29859f045..40aeae567 100644 --- a/src/gt4sd/properties/proteins/core.py +++ b/src/gt4sd/properties/proteins/core.py @@ -46,7 +46,7 @@ class AmideConfiguration(PropertyPredictorParameters): amide: bool = Field( False, - example=False, + examples=[False], description="whether the sequences are C-terminally amidated.", ) @@ -58,7 +58,7 @@ class PhConfiguration(PropertyPredictorParameters): class AmidePhConfiguration(PropertyPredictorParameters): amide: bool = Field( False, - example=False, + examples=[False], description="whether the sequences are C-terminally amidated.", ) ph: float = 7.0 diff --git a/src/gt4sd/tests/utils.py b/src/gt4sd/tests/utils.py index ff1ddf877..8ee88bc0f 100644 --- a/src/gt4sd/tests/utils.py +++ b/src/gt4sd/tests/utils.py @@ -29,7 +29,7 @@ from pathlib import PosixPath import importlib_resources -from pydantic import BaseSettings +from pydantic_settings import BaseSettings, SettingsConfigDict class GT4SDTestSettings(BaseSettings): @@ -40,10 +40,7 @@ class GT4SDTestSettings(BaseSettings): gt4sd_s3_secret_key: str = "5748375c761a4f09c30a68cd15e218e3b27ca3e2aebd7726" gt4sd_s3_secure: bool = True gt4sd_ci: bool = False - - class Config: - # immutable and in turn hashable, that is required for lru_cache - frozen = True + model_config = SettingsConfigDict(frozen=True) @staticmethod @lru_cache(maxsize=None)