dbt-labs · plypaul · Jul 19, 2024 · Jul 19, 2024 · Jul 19, 2024 · tlento
diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/logger_configuration.py b/metricflow-semantics/metricflow_semantics/mf_logging/logger_configuration.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+import inspect
+import threading
+
+from metricflow_semantics.mf_logging.mf_logger import MetricFlowLogger, MetricFlowLoggerFactory
+from metricflow_semantics.mf_logging.pretty_printing_logger import PrettyPrintingLoggerFactory
+
+
+class MetricFlowLoggerConfiguration:
+    """Global object used to configure logger object creation in MF.
+
+    The factory can be configured to use other implementations to realize different behaviors like passing JSON to the
+    `extra` argument. By default, it pretty-formats the objects and appends them to the message.
+    """
+
+    _logger: MetricFlowLoggerFactory = PrettyPrintingLoggerFactory()
+    _state_lock = threading.Lock()
+
+    @classmethod
+    def set_logger_factory(cls, logger_factory: MetricFlowLoggerFactory) -> None:
+        """Set the factory to be used for creating logger objects in the `get_logger` method."""
+        with cls._state_lock:
+            cls._logger = logger_factory
+
+    @classmethod
+    def get_logger(cls, module_name: str) -> MetricFlowLogger:
+        """Get the logger using the module where the call was made.
+
+        This is a replacement for the standard `logging.getLogger` pattern.
+
+        e.g.
+            import logging
+            logger = logging.getLogger(__name__)
+
+            ->
+
+            import ...
+            logger = MetricFlowLoggerConfiguration.get_module_logger()
+        """
+        with cls._state_lock:
+            return cls._logger.get_logger(module_name)
+
+
+def mf_get_logger() -> MetricFlowLogger:
+    """Get the logger using the module where the call was made.
+
+    This is a replacement for the standard `logging.getLogger` pattern.
+
+    e.g.
+        import logging
+        logger = logging.getLogger(__name__)
+
+        ->
+
+        import ...
+        logger = mf_get_logger()
+    """
+    caller_frame = inspect.stack()[1]
+    caller_module = inspect.getmodule(caller_frame[0])
+
+    if caller_module is None:
+        raise RuntimeError(f"Unable to get module for {caller_frame[0]=}")
+
+    return MetricFlowLoggerConfiguration.get_logger(caller_module.__name__)
diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/mf_logger.py b/metricflow-semantics/metricflow_semantics/mf_logging/mf_logger.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+import logging
+from abc import ABC, abstractmethod
+
+
+class MetricFlowLogger(ABC):
+    """Logger backed by the Python standard-library logger to conveniently and efficiently log objects.
+
+    In MF, there is a need to log complex objects like DAGs and paths for debugging. Previously, the pattern for logging
+    those objects used a pretty-formatting method like:
+
+        logger = logging.getLogger(__name__)
+        ...
+        logger.debug(mf_pformat_many("Found matching path.", {"matching_path": matching_path, "source_nodes": source_nodes}))
+
+        ->
+
+        Found matching path.
+          matching_path: [1, 2, 3]
+          source_nodes: ["a", "b", "c"]
+
+    However, there were a few issues:
+
+    * The `mf_pformat_many()` call was evaluated regardless of whether the given logging level was enabled. The
+      formatting of complex objects was found to be significant in performance profiling.
+    * Since the message is the only object passed to the logger, it was not reasonable to convert the objects to a
+      different form (e.g. JSON) for different backends.
+    * Logging calls were verbose, and string keys like "matching_path" were not amenable to auto-complete in the IDE.
+
+    The class is used in a new approach that uses a custom logging class that takes keyword arguments that are only
+    converted to strings when the corresponding logging level is enabled. The logger is created via a configurable
+    factory that can be used to implement different ways of logging those objects (e.g. JSON).
+
+    The new pattern addresses the above issues and looks like:
+
+        logger = mf_get_logger()
+        ...
+        logger.debug("Found matching path.", matching_path=matching_path, source_nodes=source_nodes)
+
+    A signature for the logging calls that allows non-named arguments and uses the variable names as keys in the output
+    was considered for reduced verbosity:
+
+        logger.debug("Found matching path.", matching_path, source_nodes)
+
+    This approach was prototyped using the `varname` library to resolve the variable name, but it was found to be too
+    slow (~0.7s for 1000 calls).
+    """
+
+    @abstractmethod
+    def log_implementation(self, level_int: int, message: str, stack_level: int, **kwargs) -> None:  # type: ignore[no-untyped-def]
+        """Appropriately log the message and the appropriate representation of `kwargs`.
+
+        An example of the representation of `kwargs` might be the string form of the objects on separate lines.
+        The `stack_level` argument should be passed to the Python-standard-library log-call so that the correct file
+        and line number are used for the source of the log line, not this library.
+        """
+        raise NotImplementedError
+
+    def _safely_log(self, level_int: int, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]
+        """Calls `.log_implementation` with a try-except so that bugs do not cause a fatal exit."""
+        if not self.standard_library_logger.isEnabledFor(level_int):
+            return
+        try:
+            self.log_implementation(level_int, message, stack_level=4, **kwargs)
+        except Exception:
+            logging.exception(
+                f"Got an exception while using logger instance {self}. "
+                f"Since there might be an issue with the logger instance, this message is logged using the root logger."
+            )
+
+    def debug(self, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]  # noqa: D102
+        self._safely_log(logging.DEBUG, message, **kwargs)
+
+    def info(self, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]  # noqa: D102
+        self._safely_log(logging.INFO, message, **kwargs)
+
+    def warning(self, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]  # noqa: D102
+        self._safely_log(logging.WARNING, message, **kwargs)
+
+    def error(self, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]  # noqa: D102
+        self._safely_log(logging.ERROR, message, **kwargs)
+
+    def critical(self, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]  # noqa: D102
+        self._safely_log(logging.CRITICAL, message, **kwargs)
+
+    def fatal(self, message: str, **kwargs) -> None:  # type: ignore[no-untyped-def]  # noqa: D102
+        self._safely_log(logging.FATAL, message, **kwargs)
+
+    @property
+    @abstractmethod
+    def standard_library_logger(self) -> logging.Logger:
+        """Return the Python-standard-library logger that this uses."""
+        raise NotImplementedError
+
+
+class MetricFlowLoggerFactory(ABC):
+    """Interface for creating logger instances."""
+
+    @abstractmethod
+    def get_logger(self, module_name: str) -> MetricFlowLogger:
+        """Create a logger for the given module. Similar to the standard `logging.getLogger(__name__)`."""
+        raise NotImplementedError
diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/pretty_printing_logger.py b/metricflow-semantics/metricflow_semantics/mf_logging/pretty_printing_logger.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+import logging
+
+from typing_extensions import override
+
+from metricflow_semantics.mf_logging.mf_logger import MetricFlowLogger, MetricFlowLoggerFactory
+from metricflow_semantics.mf_logging.pretty_print import mf_pformat_many
+
+
+class PrettyPrintingLogger(MetricFlowLogger):
+    """Logger that pretty-prints the objects passed as keyword arguments.
+
+    e.g.
+        logger.debug("Found matching path.", matching_path=matching_path, source_nodes=source_nodes)
+
+        ->
+
+        Found matching path.
+          matching_path: [1, 2, 3]
+          source_nodes: ["a", "b", "c"]
+    """
+
+    def __init__(self, python_logger: logging.Logger) -> None:  # noqa: D107
+        self._standard_library_logger = python_logger
+
+    @override
+    def log_implementation(self, level_int: int, message: str, stack_level: int, **kwargs) -> None:  # type: ignore[no-untyped-def]
+        self._standard_library_logger.log(level_int, mf_pformat_many(message, kwargs), stacklevel=stack_level)
+
+    @property
+    @override
+    def standard_library_logger(self) -> logging.Logger:
+        return self._standard_library_logger
+
+
+class PrettyPrintingLoggerFactory(MetricFlowLoggerFactory):
+    """Factory for creating `PrettyPrintingLogger`."""
+
+    @override
+    def get_logger(self, module_name: str) -> MetricFlowLogger:
+        return PrettyPrintingLogger(logging.getLogger(module_name))