Skip to content

Commit

Permalink
[MAINTENANCE] Basic expectation testing framework (#10554)
Browse files Browse the repository at this point in the history
  • Loading branch information
tyler-hoffman authored Oct 24, 2024
1 parent 16e41e6 commit 84cfd3d
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 0 deletions.
61 changes: 61 additions & 0 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from typing import Callable, Generator, Sequence, TypeVar

import pandas as pd
import pytest

from great_expectations.datasource.fluent.interfaces import Batch
from tests.integration.test_utils.data_source_config import DataSourceTestConfig

_F = TypeVar("_F", bound=Callable)


def parameterize_batch_for_data_sources(
data_source_configs: Sequence[DataSourceTestConfig],
data: pd.DataFrame,
) -> Callable[[_F], _F]:
"""Test decorator that parametrizes a test function with batches for various data sources.
This injects a `batch_for_datasource` parameter into the test function for each data source
type.
example use:
@parameterize_batch_for_data_sources(
data_source_configs=[DataSourceType.FOO, DataSourceType.BAR],
data=pd.DataFrame{"col_name": [1, 2]},
# description="test_stuff",
)
def test_stuff(batch_for_datasource) -> None:
...
"""

def decorator(func: _F) -> _F:
pytest_params = [
pytest.param(
(data, t),
id=t.test_id,
marks=[t.pytest_mark],
)
for t in data_source_configs
]
parameterize_decorator = pytest.mark.parametrize(
batch_for_datasource.__name__,
pytest_params,
indirect=True,
)
return parameterize_decorator(func)

return decorator


@pytest.fixture
def batch_for_datasource(request: pytest.FixtureRequest) -> Generator[Batch, None, None]:
"""Fixture that yields a batch for a specific data source type.
This must be used in conjunction with `indirect=True` to defer execution
"""
data, data_source_config = request.param
assert isinstance(data, pd.DataFrame)
assert isinstance(data_source_config, DataSourceTestConfig)

batch_setup = data_source_config.create_batch_setup(data)

batch_setup.setup()
yield batch_setup.make_batch()
batch_setup.teardown()
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import pandas as pd

import great_expectations.expectations as gxe
from tests.integration.conftest import parameterize_batch_for_data_sources
from tests.integration.test_utils.data_source_config import (
PandasDataFrameDatasourceTestConfig,
)


@parameterize_batch_for_data_sources(
data_source_configs=[
PandasDataFrameDatasourceTestConfig(),
],
data=pd.DataFrame({"a": [1, 2]}),
)
def test_expect_column_min_to_be_between(batch_for_datasource) -> None:
expectation = gxe.ExpectColumnMinToBeBetween(column="a", min_value=1, max_value=1)
result = batch_for_datasource.validate(expectation)
assert result.success


@parameterize_batch_for_data_sources(
data_source_configs=[
PandasDataFrameDatasourceTestConfig(),
],
data=pd.DataFrame({"a": [1, 2]}),
)
def test_expect_column_max_to_be_between(batch_for_datasource) -> None:
expectation = gxe.ExpectColumnMaxToBeBetween(column="a", min_value=2, max_value=2)
result = batch_for_datasource.validate(expectation)
assert result.success
Empty file.
2 changes: 2 additions & 0 deletions tests/integration/test_utils/data_source_config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .base import BatchTestSetup, DataSourceTestConfig
from .pandas_data_frame import PandasDataFrameBatchTestSetup, PandasDataFrameDatasourceTestConfig
72 changes: 72 additions & 0 deletions tests/integration/test_utils/data_source_config/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from __future__ import annotations

import random
import string
from abc import ABC, abstractmethod
from dataclasses import dataclass
from functools import cached_property
from typing import TYPE_CHECKING, Generic, Optional, TypeVar

import great_expectations as gx
from great_expectations.data_context.data_context.abstract_data_context import AbstractDataContext
from great_expectations.datasource.fluent.interfaces import Batch

if TYPE_CHECKING:
import pandas as pd
import pytest


@dataclass(frozen=True)
class DataSourceTestConfig(ABC):
name: Optional[str] = None

@property
@abstractmethod
def label(self) -> str:
"""Label that will show up in test name."""
...

@property
@abstractmethod
def pytest_mark(self) -> pytest.MarkDecorator:
"""Mark for pytest"""
...

@abstractmethod
def create_batch_setup(self, data: pd.DataFrame) -> BatchTestSetup:
"""Create a batch setup object for this data source."""

@property
def test_id(self) -> str:
parts: list[Optional[str]] = [self.label, self.name]
non_null_parts = [p for p in parts if p is not None]

return "-".join(non_null_parts)


_ConfigT = TypeVar("_ConfigT", bound=DataSourceTestConfig)


class BatchTestSetup(ABC, Generic[_ConfigT]):
"""ABC for classes that set up and tear down batches."""

def __init__(self, config: _ConfigT, data: pd.DataFrame) -> None:
self.config = config
self.data = data

@abstractmethod
def make_batch(self) -> Batch: ...

@abstractmethod
def setup(self) -> None: ...

@abstractmethod
def teardown(self) -> None: ...

@staticmethod
def _random_resource_name() -> str:
return "".join(random.choices(string.ascii_lowercase, k=10))

@cached_property
def _context(self) -> AbstractDataContext:
return gx.get_context(mode="ephemeral")
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pandas as pd
import pytest

from great_expectations.compatibility.typing_extensions import override
from great_expectations.datasource.fluent.interfaces import Batch
from tests.integration.test_utils.data_source_config.base import (
BatchTestSetup,
DataSourceTestConfig,
)


class PandasDataFrameDatasourceTestConfig(DataSourceTestConfig):
@property
@override
def label(self) -> str:
return "pandas-data-frame-datasource"

@property
@override
def pytest_mark(self) -> pytest.MarkDecorator:
return pytest.mark.unit

@override
def create_batch_setup(self, data: pd.DataFrame) -> BatchTestSetup:
return PandasDataFrameBatchTestSetup(data=data, config=self)


class PandasDataFrameBatchTestSetup(BatchTestSetup[PandasDataFrameDatasourceTestConfig]):
@override
def make_batch(self) -> Batch:
name = self._random_resource_name()
return (
self._context.data_sources.add_pandas(name)
.add_dataframe_asset(name)
.add_batch_definition_whole_dataframe(name)
.get_batch(batch_parameters={"dataframe": self.data})
)

@override
def setup(self) -> None: ...

@override
def teardown(self) -> None: ...

0 comments on commit 84cfd3d

Please sign in to comment.