diff --git a/chord_metadata_service/chord/views_data_types.py b/chord_metadata_service/chord/views_data_types.py index 57fc9a22c..2df306dfe 100644 --- a/chord_metadata_service/chord/views_data_types.py +++ b/chord_metadata_service/chord/views_data_types.py @@ -198,9 +198,9 @@ async def dataset_data_type(request: DrfRequest, dataset_id: str, data_type: str await qs.adelete() - logger.info(f"Running cleanup after clearing data type {data_type} in dataset {dataset_id} via API") + await logger.ainfo(f"Running cleanup after clearing data type {data_type} in dataset {dataset_id} via API") n_removed = await run_all_cleanup() - logger.info(f"Cleanup: removed {n_removed} objects in total") + await logger.ainfo(f"Cleanup: removed {n_removed} objects in total") return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/chord_metadata_service/logger.py b/chord_metadata_service/logger.py index a1ee86719..8c0565a17 100644 --- a/chord_metadata_service/logger.py +++ b/chord_metadata_service/logger.py @@ -1,7 +1,7 @@ -import logging +import structlog __all__ = [ "logger", ] -logger = logging.getLogger("katsu") +logger = structlog.getLogger("katsu") diff --git a/chord_metadata_service/metadata/asgi.py b/chord_metadata_service/metadata/asgi.py index 7cf938cc4..68b090f2f 100644 --- a/chord_metadata_service/metadata/asgi.py +++ b/chord_metadata_service/metadata/asgi.py @@ -7,7 +7,7 @@ https://docs.djangoproject.com/en/4.1/howto/deployment/asgi/ """ -import logging +import structlog import os from django.conf import settings @@ -15,7 +15,7 @@ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "chord_metadata_service.metadata.settings") -logger = logging.getLogger(__name__) +logger = structlog.get_logger(__name__) # debugger section if settings.DEBUG: diff --git a/chord_metadata_service/metadata/settings.py b/chord_metadata_service/metadata/settings.py index 2ea7a41ba..35a0633e0 100644 --- a/chord_metadata_service/metadata/settings.py +++ b/chord_metadata_service/metadata/settings.py @@ -14,6 +14,7 @@ import sys import logging import json +import structlog from bento_lib.service_info.types import GA4GHServiceType from urllib.parse import quote, urlparse @@ -24,8 +25,6 @@ load_dotenv() -logging.getLogger().setLevel(logging.INFO) - # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -144,6 +143,7 @@ 'corsheaders', 'django_filters', + 'django_structlog', 'rest_framework', 'adrf', 'drf_spectacular', @@ -159,6 +159,7 @@ 'chord_metadata_service.authz.middleware.AuthzMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', + # 'django_structlog.middlewares.RequestMiddleware', ] CORS_ALLOWED_ORIGINS = [orig.strip() for orig in os.environ.get("CORS_ORIGINS", "").split(";") if orig.strip()] @@ -186,12 +187,30 @@ ASGI_APPLICATION = 'chord_metadata_service.metadata.asgi.application' WSGI_APPLICATION = 'chord_metadata_service.metadata.wsgi.application' +# Logging -------------------------------------------------------------------------------------------------------------- + +STRUCTLOG_COMMON_PROCESSORS = [ + structlog.contextvars.merge_contextvars, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + # format events (messages) with % using positional arguments, like Python's standard logging library: + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.stdlib.ExtraAdder(), + structlog.processors.TimeStamper(fmt="iso"), +] + LOGGING = { 'version': 1, 'disable_existing_loggers': False, 'formatters': { 'console': { - 'format': '%(asctime)s %(name)-12s %(levelname)-8s %(message)s', + '()': structlog.stdlib.ProcessorFormatter, + 'foreign_pre_chain': STRUCTLOG_COMMON_PROCESSORS, + 'processors': [ + structlog.stdlib.ProcessorFormatter.remove_processors_meta, + structlog.dev.ConsoleRenderer(), + ], + # 'format': '%(asctime)s %(name)-12s %(levelname)-8s %(message)s', }, }, 'handlers': { @@ -205,13 +224,43 @@ 'level': LOG_LEVEL, 'handlers': ['console'], }, + 'asyncio': { + 'handlers': [], + 'propogate': True, + }, + 'daphne': { + # suppress daphne's DEBUG log spam + 'level': 'INFO', + 'handlers': [], + 'propogate': True, + }, + 'daphne.server': {'handlers': [], 'propogate': True}, + 'django': {'handlers': [], 'propogate': True}, + # 'django.channels.server': { + # 'handlers': [], + # 'propogate': False, + # }, + 'django.request': {'handlers': [], 'propogate': True}, + 'katsu': { + # 'level': LOG_LEVEL, + 'handlers': [], + 'propogate': True, + } }, } +structlog.configure( + processors=STRUCTLOG_COMMON_PROCESSORS + [structlog.stdlib.ProcessorFormatter.wrap_for_formatter], + logger_factory=structlog.stdlib.LoggerFactory(), + cache_logger_on_first_use=True, +) + # if we are running the test suite, only log CRITICAL messages if len(sys.argv) > 1 and sys.argv[1] == 'test': logging.disable(logging.CRITICAL) +# ---------------------------------------------------------------------------------------------------------------------- + # function to read postgres password file def get_secret(path): diff --git a/chord_metadata_service/metadata/wsgi.py b/chord_metadata_service/metadata/wsgi.py index a2d077f69..73e6653d3 100644 --- a/chord_metadata_service/metadata/wsgi.py +++ b/chord_metadata_service/metadata/wsgi.py @@ -7,7 +7,7 @@ https://docs.djangoproject.com/en/4.1/howto/deployment/wsgi/ """ -import logging +import structlog import os from django.conf import settings @@ -15,7 +15,7 @@ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'chord_metadata_service.metadata.settings') -logger = logging.getLogger(__name__) +logger = structlog.get_logger(__name__) # debugger section if settings.DEBUG: diff --git a/chord_metadata_service/patients/api_views.py b/chord_metadata_service/patients/api_views.py index f90e2ddc8..7dbff53d9 100644 --- a/chord_metadata_service/patients/api_views.py +++ b/chord_metadata_service/patients/api_views.py @@ -325,26 +325,30 @@ async def get(self, request, *_args, **_kwargs): authz_middleware.mark_authz_done(request) return Response(dres.NO_PUBLIC_DATA_AVAILABLE, status=status.HTTP_404_NOT_FOUND) except ValidationError as e: - logger.info(f"Public individuals endpoint recieved validation error: {e} ({repr(discovery_scope)})") + await logger.ainfo(f"Public individuals endpoint recieved validation error: {e} ({repr(discovery_scope)})") authz_middleware.mark_authz_done(request) return Response(errors.bad_request_error( *(e.error_list if hasattr(e, "error_list") else e.error_dict.items()), ), status=status.HTTP_400_BAD_REQUEST) ind_qct = thresholded_count(await filtered_qs.acount(), discovery, dt_perms_pheno) + threshold = get_threshold(discovery, dt_perms_pheno) + + # structured event logging for public search: embed search details + await logger.ainfo( + "public individuals search", + queried_fields=queried_fields, + individual_count=ind_qct, + threshold=threshold, + sub_threshold=ind_qct <= threshold, + ) if ind_qct == 0 and not perm_pheno_query_data: # 0 count means insufficient data if we only have counts permissions, but means a true 0 if we have full # data permissions. - logger.info( - f"Public individuals endpoint queried fields {queried_fields} which resulted in " - f"sub-threshold count: {ind_qct} <= {get_threshold(discovery, dt_perms_pheno)} " - f"({repr(discovery_scope)})") authz_middleware.mark_authz_done(request) return Response(dres.INSUFFICIENT_DATA_AVAILABLE) - logger.info(f"Public individuals search queried fields {queried_fields}, resulting in {ind_qct} individuals") - (tissues_count, sampled_tissues), (experiments_count, experiment_types) = await asyncio.gather( individual_biosample_tissue_stats(filtered_qs, discovery, dt_perms_pheno), individual_experiment_type_stats(filtered_qs, discovery, dt_perms_exp), diff --git a/poetry.lock b/poetry.lock index 4a5cbc28c..de1ef137e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -817,6 +817,41 @@ files = [ [package.dependencies] Django = ">=4.2" +[[package]] +name = "django-ipware" +version = "7.0.1" +description = "A Django application to retrieve user's IP address" +optional = false +python-versions = ">=3.8" +files = [ + {file = "django-ipware-7.0.1.tar.gz", hash = "sha256:d9ec43d2bf7cdf216fed8d494a084deb5761a54860a53b2e74346a4f384cff47"}, + {file = "django_ipware-7.0.1-py2.py3-none-any.whl", hash = "sha256:db16bbee920f661ae7f678e4270460c85850f03c6761a4eaeb489bdc91f64709"}, +] + +[package.dependencies] +python-ipware = ">=2.0.3" + +[[package]] +name = "django-structlog" +version = "9.0.0" +description = "Structured Logging for Django" +optional = false +python-versions = ">=3.9" +files = [ + {file = "django_structlog-9.0.0-py3-none-any.whl", hash = "sha256:8c32097b3f09b2727b746d6cdf4f50565c62fd040b3a0c98cb731de0b380b854"}, + {file = "django_structlog-9.0.0.tar.gz", hash = "sha256:0ada1ac0fa4e90a499fc9ce22fc164830304e7219cab12d3b5ef9c2f2967ff5b"}, +] + +[package.dependencies] +asgiref = ">=3.6.0" +django = ">=4.2" +django-ipware = ">=6.0.2" +structlog = ">=21.4.0" + +[package.extras] +celery = ["celery (>=5.1)"] +commands = ["django-extensions (>=1.4.9)"] + [[package]] name = "djangorestframework" version = "3.15.2" @@ -1863,6 +1898,20 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "python-ipware" +version = "3.0.0" +description = "A Python package to retrieve user's IP address" +optional = false +python-versions = ">=3.7" +files = [ + {file = "python_ipware-3.0.0-py3-none-any.whl", hash = "sha256:fc936e6e7ec9fcc107f9315df40658f468ac72f739482a707181742882e36b60"}, + {file = "python_ipware-3.0.0.tar.gz", hash = "sha256:9117b1c4dddcb5d5ca49e6a9617de2fc66aec2ef35394563ac4eecabdf58c062"}, +] + +[package.extras] +dev = ["coverage[toml]", "coveralls (>=3.3,<4.0)", "ruff", "twine"] + [[package]] name = "pyyaml" version = "6.0.2" @@ -2207,6 +2256,23 @@ files = [ dev = ["build", "hatch"] doc = ["sphinx"] +[[package]] +name = "structlog" +version = "24.4.0" +description = "Structured Logging for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "structlog-24.4.0-py3-none-any.whl", hash = "sha256:597f61e80a91cc0749a9fd2a098ed76715a1c8a01f73e336b746504d1aad7610"}, + {file = "structlog-24.4.0.tar.gz", hash = "sha256:b27bfecede327a6d2da5fbc96bd859f114ecc398a6389d664f62085ee7ae6fc4"}, +] + +[package.extras] +dev = ["freezegun (>=0.2.8)", "mypy (>=1.4)", "pretend", "pytest (>=6.0)", "pytest-asyncio (>=0.17)", "rich", "simplejson", "twisted"] +docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-mermaid", "sphinxext-opengraph", "twisted"] +tests = ["freezegun (>=0.2.8)", "pretend", "pytest (>=6.0)", "pytest-asyncio (>=0.17)", "simplejson"] +typing = ["mypy (>=1.4)", "rich", "twisted"] + [[package]] name = "tabulate" version = "0.9.0" @@ -2420,13 +2486,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "virtualenv" -version = "20.28.1" +version = "20.29.0" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" files = [ - {file = "virtualenv-20.28.1-py3-none-any.whl", hash = "sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb"}, - {file = "virtualenv-20.28.1.tar.gz", hash = "sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329"}, + {file = "virtualenv-20.29.0-py3-none-any.whl", hash = "sha256:c12311863497992dc4b8644f8ea82d3b35bb7ef8ee82e6630d76d0197c39baf9"}, + {file = "virtualenv-20.29.0.tar.gz", hash = "sha256:6345e1ff19d4b1296954cee076baaf58ff2a12a84a338c62b02eda39f20aa982"}, ] [package.dependencies] @@ -2619,4 +2685,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = "^3.10.0" -content-hash = "2ada094c47dccd63ac88249e8b291ca7cfa3e0d2e02c6a724694ba8730bcfff8" +content-hash = "c728db63cfb5870a43f9bc4a1a31ec84fe939134152240f4859f818f5ad5be94" diff --git a/pyproject.toml b/pyproject.toml index ed0dff7c3..97fb18406 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ uritemplate = "^4.1.1" pyhumps = "^3.8.0" adrf = "^0.1.7" referencing = "^0.32.1" +django-structlog = "^9.0.0" [tool.poetry.group.dev.dependencies] coverage = "^7.4.0"