diff --git a/metricflow-semantics/metricflow_semantics/formatting/__init__.py b/metricflow-semantics/metricflow_semantics/formatting/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py b/metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py new file mode 100644 index 0000000000..a91e9d99e2 --- /dev/null +++ b/metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +import textwrap + + +def mf_dedent(text: str) -> str: + """Remove leading newlines, dedents, and remove tailing newlines. + + This function simplifies the commonly used: + + text = textwrap.dedent( + [triple quote][backslash] + Line 0 + Line 1 + [triple quote] + ).rstrip() + + to: + + text = mf_dedent( + [triple quote] + Line 0 + Line 1 + [triple quote] + ) + """ + return textwrap.dedent(text.lstrip("\n")).rstrip("\n") diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/logger_configuration.py b/metricflow-semantics/metricflow_semantics/mf_logging/logger_configuration.py new file mode 100644 index 0000000000..0257ca7bd2 --- /dev/null +++ b/metricflow-semantics/metricflow_semantics/mf_logging/logger_configuration.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import inspect +import threading + +from metricflow_semantics.mf_logging.mf_logger import MetricFlowLogger, MetricFlowLoggerFactory +from metricflow_semantics.mf_logging.pretty_printing_logger import PrettyPrintingLoggerFactory + + +class MetricFlowLoggerConfiguration: + """Global object used to configure logger object creation in MF. + + The factory can be configured to use other implementations to realize different behaviors like passing JSON to the + `extra` argument. By default, it pretty-formats the objects and appends them to the message. + """ + + _logger: MetricFlowLoggerFactory = PrettyPrintingLoggerFactory() + _state_lock = threading.Lock() + + @classmethod + def set_logger_factory(cls, logger_factory: MetricFlowLoggerFactory) -> None: + """Set the factory to be used for creating logger objects in the `get_logger` method.""" + with cls._state_lock: + cls._logger = logger_factory + + @classmethod + def get_logger(cls, module_name: str) -> MetricFlowLogger: + """Get the logger using the module where the call was made. + + This is a replacement for the standard `logging.getLogger` pattern. + + e.g. + import logging + logger = logging.getLogger(__name__) + + -> + + import ... + logger = MetricFlowLoggerConfiguration.get_module_logger() + """ + with cls._state_lock: + return cls._logger.get_logger(module_name) + + +def mf_get_logger() -> MetricFlowLogger: + """Get the logger using the module where the call was made. + + This is a replacement for the standard `logging.getLogger` pattern. + + e.g. + import logging + logger = logging.getLogger(__name__) + + -> + + import ... + logger = mf_get_logger() + """ + caller_frame = inspect.stack()[1] + caller_module = inspect.getmodule(caller_frame[0]) + + if caller_module is None: + raise RuntimeError(f"Unable to get module for {caller_frame[0]=}") + + return MetricFlowLoggerConfiguration.get_logger(caller_module.__name__) diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/mf_logger.py b/metricflow-semantics/metricflow_semantics/mf_logging/mf_logger.py new file mode 100644 index 0000000000..e9d78135f8 --- /dev/null +++ b/metricflow-semantics/metricflow_semantics/mf_logging/mf_logger.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import logging +from abc import ABC, abstractmethod + + +class MetricFlowLogger(ABC): + """Logger backed by the Python standard-library logger to conveniently and efficiently log objects. + + In MF, there is a need to log complex objects like DAGs and paths for debugging. Previously, the pattern for logging + those objects used a pretty-formatting method like: + + logger = logging.getLogger(__name__) + ... + logger.debug(mf_pformat_many("Found matching path.", {"matching_path": matching_path, "source_nodes": source_nodes})) + + -> + + Found matching path. + matching_path: [1, 2, 3] + source_nodes: ["a", "b", "c"] + + However, there were a few issues: + + * The `mf_pformat_many()` call was evaluated regardless of whether the given logging level was enabled. The + formatting of complex objects was found to be significant in performance profiling. + * Since the message is the only object passed to the logger, it was not reasonable to convert the objects to a + different form (e.g. JSON) for different backends. + * Logging calls were verbose, and string keys like "matching_path" were not amenable to auto-complete in the IDE. + + The class is used in a new approach that uses a custom logging class that takes keyword arguments that are only + converted to strings when the corresponding logging level is enabled. The logger is created via a configurable + factory that can be used to implement different ways of logging those objects (e.g. JSON). + + The new pattern addresses the above issues and looks like: + + logger = mf_get_logger() + ... + logger.debug("Found matching path.", matching_path=matching_path, source_nodes=source_nodes) + + A signature for the logging calls that allows non-named arguments and uses the variable names as keys in the output + was considered for reduced verbosity: + + logger.debug("Found matching path.", matching_path, source_nodes) + + This approach was prototyped using the `varname` library to resolve the variable name, but it was found to be too + slow (~0.7s for 1000 calls). + """ + + @abstractmethod + def log_implementation(self, level_int: int, message: str, stack_level: int, **kwargs) -> None: # type: ignore[no-untyped-def] + """Appropriately log the message and the appropriate representation of `kwargs`. + + An example of the representation of `kwargs` might be the string form of the objects on separate lines. + The `stack_level` argument should be passed to the Python-standard-library log-call so that the correct file + and line number are used for the source of the log line, not this library. + """ + raise NotImplementedError + + def _safely_log(self, level_int: int, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] + """Calls `.log_implementation` with a try-except so that bugs do not cause a fatal exit.""" + if not self.standard_library_logger.isEnabledFor(level_int): + return + try: + self.log_implementation(level_int, message, stack_level=4, **kwargs) + except Exception: + logging.exception( + f"Got an exception while using logger instance {self}. " + f"Since there might be an issue with the logger instance, this message is logged using the root logger." + ) + + def debug(self, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D102 + self._safely_log(logging.DEBUG, message, **kwargs) + + def info(self, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D102 + self._safely_log(logging.INFO, message, **kwargs) + + def warning(self, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D102 + self._safely_log(logging.WARNING, message, **kwargs) + + def error(self, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D102 + self._safely_log(logging.ERROR, message, **kwargs) + + def critical(self, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D102 + self._safely_log(logging.CRITICAL, message, **kwargs) + + def fatal(self, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D102 + self._safely_log(logging.FATAL, message, **kwargs) + + @property + @abstractmethod + def standard_library_logger(self) -> logging.Logger: + """Return the Python-standard-library logger that this uses.""" + raise NotImplementedError + + +class MetricFlowLoggerFactory(ABC): + """Interface for creating logger instances.""" + + @abstractmethod + def get_logger(self, module_name: str) -> MetricFlowLogger: + """Create a logger for the given module. Similar to the standard `logging.getLogger(__name__)`.""" + raise NotImplementedError diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/pretty_printing_logger.py b/metricflow-semantics/metricflow_semantics/mf_logging/pretty_printing_logger.py new file mode 100644 index 0000000000..e60ff3cb35 --- /dev/null +++ b/metricflow-semantics/metricflow_semantics/mf_logging/pretty_printing_logger.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import logging + +from typing_extensions import override + +from metricflow_semantics.mf_logging.mf_logger import MetricFlowLogger, MetricFlowLoggerFactory +from metricflow_semantics.mf_logging.pretty_print import mf_pformat_many + + +class PrettyPrintingLogger(MetricFlowLogger): + """Logger that pretty-prints the objects passed as keyword arguments. + + e.g. + logger.debug("Found matching path.", matching_path=matching_path, source_nodes=source_nodes) + + -> + + Found matching path. + matching_path: [1, 2, 3] + source_nodes: ["a", "b", "c"] + """ + + def __init__(self, python_logger: logging.Logger) -> None: # noqa: D107 + self._standard_library_logger = python_logger + + @override + def log_implementation(self, level_int: int, message: str, stack_level: int, **kwargs) -> None: # type: ignore[no-untyped-def] + self._standard_library_logger.log(level_int, mf_pformat_many(message, kwargs), stacklevel=stack_level) + + @property + @override + def standard_library_logger(self) -> logging.Logger: + return self._standard_library_logger + + +class PrettyPrintingLoggerFactory(MetricFlowLoggerFactory): + """Factory for creating `PrettyPrintingLogger`.""" + + @override + def get_logger(self, module_name: str) -> MetricFlowLogger: + return PrettyPrintingLogger(logging.getLogger(module_name)) diff --git a/metricflow-semantics/metricflow_semantics/test_helpers/logging_helpers.py b/metricflow-semantics/metricflow_semantics/test_helpers/logging_helpers.py new file mode 100644 index 0000000000..c08a7018e5 --- /dev/null +++ b/metricflow-semantics/metricflow_semantics/test_helpers/logging_helpers.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +import logging +from contextlib import contextmanager +from typing import Iterator, List, Optional, Sequence, Tuple + +from typing_extensions import override + +from metricflow_semantics.mf_logging.logger_configuration import mf_get_logger +from metricflow_semantics.mf_logging.mf_logger import MetricFlowLogger + + +class RecordingLogHandler(logging.Handler): + """A log-record handler that stores them so that they can be checked in tests.""" + + def __init__(self) -> None: # noqa: D107 + super().__init__() + self._log_records: List[logging.LogRecord] = [] + + @override + def emit(self, record: logging.LogRecord) -> None: + self._log_records.append(record) + + @property + def log_records(self) -> Sequence[logging.LogRecord]: + """Return the log records seen by the handler.""" + return self._log_records + + def get_last_message(self) -> Optional[str]: + """Return the message in the last log record, or None if this hasn't seen any.""" + if len(self._log_records) == 0: + return None + + return self._log_records[-1].message + + +@contextmanager +def recorded_logging_context(logging_level_int: int) -> Iterator[Tuple[MetricFlowLogger, RecordingLogHandler]]: + """Context with a logger (with the given log level) and associated handler to check what was logged. + + The handler records all log records emitted that is appropriate for the given level during this context. + Log propagation could be disabled in this context to clean test log output, but some issues need to be resolved. + """ + mf_logger = mf_get_logger() + standard_logger = mf_logger.standard_library_logger + + previous_logging_level = standard_logger.level + handler = RecordingLogHandler() + standard_logger.addHandler(handler) + try: + standard_logger.setLevel(logging_level_int) + yield mf_logger, handler + finally: + standard_logger.removeHandler(handler) + standard_logger.setLevel(previous_logging_level) diff --git a/metricflow-semantics/tests_metricflow_semantics/mf_logging/__init__.py b/metricflow-semantics/tests_metricflow_semantics/mf_logging/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_mf_logger.py b/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_mf_logger.py new file mode 100644 index 0000000000..867a49dfc7 --- /dev/null +++ b/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_mf_logger.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +import logging +from pathlib import Path + +import pytest +from metricflow_semantics.formatting.formatting_helpers import mf_dedent +from metricflow_semantics.mf_logging.mf_logger import MetricFlowLogger +from metricflow_semantics.test_helpers.logging_helpers import RecordingLogHandler, recorded_logging_context + + +def test_log_file_name_and_line_number() -> None: + """Tests that the file and the line number associated with the log call points to the source. + + The source is the file / line where the `.info(...)` and similar call was made, not within the MF logging library. + """ + with recorded_logging_context(logging.INFO) as (logger, handler): + log_line_number = 19 + logger.info(f"Example message from line {log_line_number} in {__file__}") + assert len(handler.log_records) == 1, f"There should have been 1 record but got: {handler.log_records}" + + record = handler.log_records[0] + assert record.filename == Path(__file__).name + assert ( + record.lineno == log_line_number + ), f"The log line should have been {log_line_number} but got {record.lineno}" + + +@pytest.mark.skip(reason="Running this prints error messages in the test results.") +def test_log_levels() -> None: + """Test that the logger works for different logging levels.""" + for level_int in (logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL, logging.FATAL): + with recorded_logging_context(level_int) as (logger, handler): + # Log a message at the appropriate level + message = f"Example message at level {level_int} in {__file__}" + if level_int == logging.DEBUG: + logger.debug(message) + elif level_int == logging.INFO: + logger.info(message) + elif level_int == logging.WARNING: + logger.warning(message) + elif level_int == logging.ERROR: + logger.error(message) + elif level_int == logging.CRITICAL: + logger.critical(message) + elif level_int == logging.FATAL: + logger.fatal(message) + else: + raise ValueError(f"Logging level not handled {level_int=}") + + # Check that the log record is correct. + assert ( + len(handler.log_records) == 1 + ), f"There should have been 1 record for {level_int=} but got: {handler.log_records}" + + record = handler.log_records[0] + assert record.levelno == level_int, f"Expected a record with {level_int=} but got {record.levelno}" + assert record.message == message + assert record.levelno == level_int + + +def test_log_kwargs() -> None: + """Test that objects included via keyword args are formatted.""" + logger: MetricFlowLogger + handler: RecordingLogHandler + with recorded_logging_context(logging.DEBUG) as (logger, handler): + logger.debug("Found candidates.", matches=[1, 2, 3], parameters={"field_0": "value_0", "field_1": "value_1"}) + assert handler.get_last_message() == mf_dedent( + """ + Found candidates. + matches: + [1, 2, 3] + parameters: + {'field_0': 'value_0', 'field_1': 'value_1'} + """ + )