dbt-labs · plypaul · Jul 19, 2024 · Jul 19, 2024 · Jul 19, 2024 · tlento
diff --git a/metricflow-semantics/metricflow_semantics/formatting/__init__.py b/metricflow-semantics/metricflow_semantics/formatting/__init__.py
diff --git a/metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py b/metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+import textwrap
+
+
+def mf_dedent(text: str) -> str:
+    """Remove leading newlines, dedents, and remove tailing newlines.
+
+    This function simplifies the commonly used:
+
+        text = textwrap.dedent(
+            [triple quote][backslash]
+            Line 0
+            Line 1
+            [triple quote]
+        ).rstrip()
+
+    to:
+
+       text = mf_dedent(
+           [triple quote]
+           Line 0
+           Line 1
+           [triple quote]
+       )
+    """
+    return textwrap.dedent(text.lstrip("\n")).rstrip("\n")
diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/logger_configuration.py b/metricflow-semantics/metricflow_semantics/mf_logging/logger_configuration.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+import inspect
+import threading
+
+from metricflow_semantics.mf_logging.mf_logger import MetricFlowLogger, MetricFlowLoggerFactory
+from metricflow_semantics.mf_logging.pretty_printing_logger import PrettyPrintingLoggerFactory
+
+
+class MetricFlowLoggerConfiguration:
+    """Global object used to configure logger object creation in MF.
+
+    The factory can be configured to use other implementations to realize different behaviors like passing JSON to the
+    `extra` argument. By default, it pretty-formats the objects and appends them to the message.
+    """
+
+    _logger: MetricFlowLoggerFactory = PrettyPrintingLoggerFactory()
+    _state_lock = threading.Lock()
+
+    @classmethod
+    def set_logger_factory(cls, logger_factory: MetricFlowLoggerFactory) -> None:
+        """Set the factory to be used for creating logger objects in the `get_logger` method."""
+        with cls._state_lock:
+            cls._logger = logger_factory
+
+    @classmethod
+    def get_logger(cls, module_name: str) -> MetricFlowLogger:
+        """Get the logger using the module where the call was made.
+
+        This is a replacement for the standard `logging.getLogger` pattern.
+
+        e.g.
+            import logging
+            logger = logging.getLogger(__name__)
+
+            ->
+
+            import ...
+            logger = MetricFlowLoggerConfiguration.get_module_logger()
+        """
+        with cls._state_lock:
+            return cls._logger.get_logger(module_name)
+
+
+def mf_get_logger() -> MetricFlowLogger:
+    """Get the logger using the module where the call was made.
+
+    This is a replacement for the standard `logging.getLogger` pattern.
+
+    e.g.
+        import logging
+        logger = logging.getLogger(__name__)
+
+        ->
+
+        import ...
+        logger = mf_get_logger()
+    """
+    caller_frame = inspect.stack()[1]
+    caller_module = inspect.getmodule(caller_frame[0])
+
+    if caller_module is None:
+        raise RuntimeError(f"Unable to get module for {caller_frame[0]=}")
+
+    return MetricFlowLoggerConfiguration.get_logger(caller_module.__name__)
diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/mf_logger.py b/metricflow-semantics/metricflow_semantics/mf_logging/mf_logger.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+import logging
+from abc import ABC, abstractmethod
+
+
+class MetricFlowLogger(ABC):
+    """Logger backed by the Python standard-library logger to conveniently and efficiently log objects.
+
+    In MF, there is a need to log complex objects like DAGs and paths for debugging. Previously, the pattern for logging
+    those objects used a pretty-formatting method like:
+
+        logger = logging.getLogger(__name__)
+        ...
+        logger.debug(mf_pformat_many("Found matching path.", {"matching_path": matching_path, "source_nodes": source_nodes}))
+
+        ->
+
+        Found matching path.
+          matching_path: [1, 2, 3]
+          source_nodes: ["a", "b", "c"]
+
+    However, there were a few issues:
+
+    * The `mf_pformat_many()` call was evaluated regardless of whether the given logging level was enabled. The
+      formatting of complex objects was found to be significant in performance profiling.
+    * Since the message is the only object passed to the logger, it was not reasonable to convert the objects to a
+      different form (e.g. JSON) for different backends.
+    * Logging calls were verbose, and string keys like "matching_path" were not amenable to auto-complete in the IDE.
+
+    The class is used in a new approach that uses a custom logging class that takes keyword arguments that are only
+    converted to strings when the corresponding logging level is enabled. The logger is created via a configurable
+    factory that can be used to implement different ways of logging those objects (e.g. JSON).
+
+    The new pattern addresses the above issues and looks like:
+
+        logger = mf_get_logger()
+        ...
+        logger.debug("Found matching path.", matching_path=matching_path, source_nodes=source_nodes)
+
+    A signature for the logging calls that allows non-named arguments and uses the variable names as keys in the output
+    was considered for reduced verbosity:
+
+        logger.debug("Found matching path.", matching_path, source_nodes)
+
+    This approach was prototyped using the `varname` library to resolve the variable name, but it was found to be too
+    slow (~0.7s for 1000 calls).
+    """
+
+    @abstractmethod
+    def log_implementation(self, level_int: int, message: str, stack_level: int, **kwargs) -> None:  # type: ignore[no-untyped-def]
+        """Appropriately log the message and the appropriate representation of `kwargs`.
+
+        An example of the representation of `kwargs` might be the string form of the objects on separate lines.
+        The `stack_level` argument should be passed to the Python-standard-library log-call so that the correct file
+        and line number are used for the source of the log line, not this library.
+        """
+        raise NotImplementedError
+
+    def _safely_log(self, level_int: int, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]
+        """Calls `.log_implementation` with a try-except so that bugs do not cause a fatal exit."""
+        if not self.standard_library_logger.isEnabledFor(level_int):
+            return
+        try:
+            self.log_implementation(level_int, message, stack_level=4, **kwargs)
+        except Exception:
+            logging.exception(
+                f"Got an exception while using logger instance {self}. "
+                f"Since there might be an issue with the logger instance, this message is logged using the root logger."
+            )
+
+    def debug(self, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]  # noqa: D102
+        self._safely_log(logging.DEBUG, message, **kwargs)
+
+    def info(self, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]  # noqa: D102
+        self._safely_log(logging.INFO, message, **kwargs)
+
+    def warning(self, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]  # noqa: D102
+        self._safely_log(logging.WARNING, message, **kwargs)
+
+    def error(self, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]  # noqa: D102
+        self._safely_log(logging.ERROR, message, **kwargs)
+
+    def critical(self, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]  # noqa: D102
+        self._safely_log(logging.CRITICAL, message, **kwargs)
+
+    def fatal(self, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]  # noqa: D102
+        self._safely_log(logging.FATAL, message, **kwargs)
+
+    @property
+    @abstractmethod
+    def standard_library_logger(self) -> logging.Logger:
+        """Return the Python-standard-library logger that this uses."""
+        raise NotImplementedError
+
+
+class MetricFlowLoggerFactory(ABC):
+    """Interface for creating logger instances."""
+
+    @abstractmethod
+    def get_logger(self, module_name: str) -> MetricFlowLogger:
+        """Create a logger for the given module. Similar to the standard `logging.getLogger(__name__)`."""
+        raise NotImplementedError
diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/pretty_printing_logger.py b/metricflow-semantics/metricflow_semantics/mf_logging/pretty_printing_logger.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+import logging
+
+from typing_extensions import override
+
+from metricflow_semantics.mf_logging.mf_logger import MetricFlowLogger, MetricFlowLoggerFactory
+from metricflow_semantics.mf_logging.pretty_print import mf_pformat_many
+
+
+class PrettyPrintingLogger(MetricFlowLogger):
+    """Logger that pretty-prints the objects passed as keyword arguments.
+
+    e.g.
+        logger.debug("Found matching path.", matching_path=matching_path, source_nodes=source_nodes)
+
+        ->
+
+        Found matching path.
+          matching_path: [1, 2, 3]
+          source_nodes: ["a", "b", "c"]
+    """
+
+    def __init__(self, python_logger: logging.Logger) -> None:  # noqa: D107
+        self._standard_library_logger = python_logger
+
+    @override
+    def log_implementation(self, level_int: int, message: str, stack_level: int, **kwargs) -> None:  # type: ignore[no-untyped-def]
+        self._standard_library_logger.log(level_int, mf_pformat_many(message, kwargs), stacklevel=stack_level)
+
+    @property
+    @override
+    def standard_library_logger(self) -> logging.Logger:
+        return self._standard_library_logger
+
+
+class PrettyPrintingLoggerFactory(MetricFlowLoggerFactory):
+    """Factory for creating `PrettyPrintingLogger`."""
+
+    @override
+    def get_logger(self, module_name: str) -> MetricFlowLogger:
+        return PrettyPrintingLogger(logging.getLogger(module_name))
diff --git a/metricflow-semantics/metricflow_semantics/test_helpers/logging_helpers.py b/metricflow-semantics/metricflow_semantics/test_helpers/logging_helpers.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+import logging
+from contextlib import contextmanager
+from typing import Iterator, List, Optional, Sequence, Tuple
+
+from typing_extensions import override
+
+from metricflow_semantics.mf_logging.logger_configuration import mf_get_logger
+from metricflow_semantics.mf_logging.mf_logger import MetricFlowLogger
+
+
+class RecordingLogHandler(logging.Handler):
+    """A log-record handler that stores them so that they can be checked in tests."""
+
+    def __init__(self) -> None:  # noqa: D107
+        super().__init__()
+        self._log_records: List[logging.LogRecord] = []
+
+    @override
+    def emit(self, record: logging.LogRecord) -> None:
+        self._log_records.append(record)
+
+    @property
+    def log_records(self) -> Sequence[logging.LogRecord]:
+        """Return the log records seen by the handler."""
+        return self._log_records
+
+    def get_last_message(self) -> Optional[str]:
+        """Return the message in the last log record, or None if this hasn't seen any."""
+        if len(self._log_records) == 0:
+            return None
+
+        return self._log_records[-1].message
+
+
+@contextmanager
+def recorded_logging_context(logging_level_int: int) -> Iterator[Tuple[MetricFlowLogger, RecordingLogHandler]]:
+    """Context with a logger (with the given log level) and associated handler to check what was logged.
+
+    The handler records all log records emitted that is appropriate for the given level during this context.
+    Log propagation could be disabled in this context to clean test log output, but some issues need to be resolved.
+    """
+    mf_logger = mf_get_logger()
+    standard_logger = mf_logger.standard_library_logger
+
+    previous_logging_level = standard_logger.level
+    handler = RecordingLogHandler()
+    standard_logger.addHandler(handler)
+    try:
+        standard_logger.setLevel(logging_level_int)
+        yield mf_logger, handler
+    finally:
+        standard_logger.removeHandler(handler)
+        standard_logger.setLevel(previous_logging_level)
diff --git a/metricflow-semantics/tests_metricflow_semantics/mf_logging/__init__.py b/metricflow-semantics/tests_metricflow_semantics/mf_logging/__init__.py
diff --git a/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_mf_logger.py b/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_mf_logger.py
@@ -0,0 +1,76 @@
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+import pytest
+from metricflow_semantics.formatting.formatting_helpers import mf_dedent
+from metricflow_semantics.mf_logging.mf_logger import MetricFlowLogger
+from metricflow_semantics.test_helpers.logging_helpers import RecordingLogHandler, recorded_logging_context
+
+
+def test_log_file_name_and_line_number() -> None:
+    """Tests that the file and the line number associated with the log call points to the source.
+
+    The source is the file / line where the `.info(...)` and similar call was made, not within the MF logging library.
+    """
+    with recorded_logging_context(logging.INFO) as (logger, handler):
+        log_line_number = 19
+        logger.info(f"Example message from line {log_line_number} in {__file__}")
+        assert len(handler.log_records) == 1, f"There should have been 1 record but got: {handler.log_records}"
+
+        record = handler.log_records[0]
+        assert record.filename == Path(__file__).name
+        assert (
+            record.lineno == log_line_number
+        ), f"The log line should have been {log_line_number} but got {record.lineno}"
+
+
+@pytest.mark.skip(reason="Running this prints error messages in the test results.")
+def test_log_levels() -> None:
+    """Test that the logger works for different logging levels."""
+    for level_int in (logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL, logging.FATAL):
+        with recorded_logging_context(level_int) as (logger, handler):
+            # Log a message at the appropriate level
+            message = f"Example message at level {level_int} in {__file__}"
+            if level_int == logging.DEBUG:
+                logger.debug(message)
+            elif level_int == logging.INFO:
+                logger.info(message)
+            elif level_int == logging.WARNING:
+                logger.warning(message)
+            elif level_int == logging.ERROR:
+                logger.error(message)
+            elif level_int == logging.CRITICAL:
+                logger.critical(message)
+            elif level_int == logging.FATAL:
+                logger.fatal(message)
+            else:
+                raise ValueError(f"Logging level not handled {level_int=}")
+
+            # Check that the log record is correct.
+            assert (
+                len(handler.log_records) == 1
+            ), f"There should have been 1 record for {level_int=} but got: {handler.log_records}"
+
+            record = handler.log_records[0]
+            assert record.levelno == level_int, f"Expected a record with {level_int=} but got {record.levelno}"
+            assert record.message == message
+            assert record.levelno == level_int
+
+
+def test_log_kwargs() -> None:
+    """Test that objects included via keyword args are formatted."""
+    logger: MetricFlowLogger
+    handler: RecordingLogHandler
+    with recorded_logging_context(logging.DEBUG) as (logger, handler):
+        logger.debug("Found candidates.", matches=[1, 2, 3], parameters={"field_0": "value_0", "field_1": "value_1"})
+        assert handler.get_last_message() == mf_dedent(
+            """
+            Found candidates.
+              matches:
+                [1, 2, 3]
+              parameters:
+                {'field_0': 'value_0', 'field_1': 'value_1'}
+            """
+        )