diff --git a/.coveragerc b/.coveragerc index 2404b60..a7e7bad 100644 --- a/.coveragerc +++ b/.coveragerc @@ -8,3 +8,4 @@ omit = *admin.py */static/* */templates/* + **/tests/* diff --git a/Makefile b/Makefile index 08ada5f..5273bcc 100644 --- a/Makefile +++ b/Makefile @@ -72,6 +72,10 @@ test: clean ## run tests in the current virtualenv diff_cover: test ## find diff lines that need test coverage diff-cover coverage.xml +format: + isort platform_plugin_aspects + black . + test-all: quality pii_check ## run tests on every supported Python/Django combination tox tox -e docs diff --git a/docs/conf.py b/docs/conf.py index caf4d87..e2fb66a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -33,15 +33,15 @@ def get_version(*file_paths): version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M) if version_match: return version_match.group(1) - raise RuntimeError('Unable to find version string.') + raise RuntimeError("Unable to find version string.") REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(REPO_ROOT) -VERSION = get_version('../platform_plugin_aspects', '__init__.py') +VERSION = get_version("../platform_plugin_aspects", "__init__.py") # Configure Django for autodoc usage -os.environ['DJANGO_SETTINGS_MODULE'] = 'test_settings' +os.environ["DJANGO_SETTINGS_MODULE"] = "test_settings" django_setup() # If extensions (or modules to document with autodoc) are in another directory, @@ -62,46 +62,46 @@ def get_version(*file_paths): # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'sphinx.ext.ifconfig', - 'sphinx.ext.napoleon' + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + "sphinx.ext.ifconfig", + "sphinx.ext.napoleon", ] # A list of warning types to suppress arbitrary warning messages. suppress_warnings = [ - 'image.nonlocal_uri', + "image.nonlocal_uri", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. # # source_encoding = 'utf-8-sig' # The top level toctree document. -top_level_doc = 'index' +top_level_doc = "index" # General information about the project. -project = 'platform-plugin-aspects' -copyright = f'{datetime.now().year}, Axim Collaborative, Inc.' # pylint: disable=redefined-builtin -author = 'Axim Collaborative, Inc.' -project_title = 'platform-plugin-aspects' +project = "platform-plugin-aspects" +copyright = f"{datetime.now().year}, Axim Collaborative, Inc." # pylint: disable=redefined-builtin +author = "Axim Collaborative, Inc." +project_title = "platform-plugin-aspects" documentation_title = f"{project_title}" # Set display_github to False if you don't want "edit on Github" button html_context = { "display_github": True, # Integrate GitHub "github_user": "edx", # Username - "github_repo": 'platform-plugin-aspects', # Repo name + "github_repo": "platform-plugin-aspects", # Repo name "github_version": "main", # Version "conf_py_path": "/docs/", # Path in the checkout to the docs root } @@ -120,7 +120,7 @@ def get_version(*file_paths): # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = 'en' +language = "en" # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: @@ -135,12 +135,12 @@ def get_version(*file_paths): # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = [ - '_build', - 'Thumbs.db', - '.DS_Store', + "_build", + "Thumbs.db", + ".DS_Store", # This file is intended as a guide for developers browsing the source tree, # not to be rendered into the output docs. - 'decisions/README.rst', + "decisions/README.rst", ] # The reST default role (used for this markup: `text`) to use for all @@ -163,7 +163,7 @@ def get_version(*file_paths): # show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] @@ -179,7 +179,7 @@ def get_version(*file_paths): # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'sphinx_book_theme' +html_theme = "sphinx_book_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -187,7 +187,7 @@ def get_version(*file_paths): # html_theme_options = { "repository_url": "https://github.com/openedx/platform-plugin-aspects", - "repository_branch": 'main', + "repository_branch": "main", "path_to_docs": "docs/", "home_page_in_toc": True, "use_repository_button": True, @@ -214,7 +214,7 @@ def get_version(*file_paths): rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/" >Creative Commons Attribution-ShareAlike 4.0 International License. - """ + """, } # Add any paths that contain custom themes here, relative to this directory. @@ -244,7 +244,7 @@ def get_version(*file_paths): # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied @@ -324,7 +324,7 @@ def get_version(*file_paths): # html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = f'{project}doc' +htmlhelp_basename = f"{project}doc" # -- Options for LaTeX output --------------------------------------------- @@ -332,15 +332,12 @@ def get_version(*file_paths): # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -349,10 +346,9 @@ def get_version(*file_paths): # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). -latex_target = f'{project}.tex' +latex_target = f"{project}.tex" latex_documents = [ - (top_level_doc, latex_target, documentation_title, - author, 'manual'), + (top_level_doc, latex_target, documentation_title, author, "manual"), ] # The name of an image file (relative to this directory) to place at the top of @@ -392,10 +388,7 @@ def get_version(*file_paths): # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (top_level_doc, project_title, documentation_title, - [author], 1) -] +man_pages = [(top_level_doc, project_title, documentation_title, [author], 1)] # If true, show URL addresses after external links. # @@ -408,9 +401,15 @@ def get_version(*file_paths): # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (top_level_doc, project_title, documentation_title, - author, project_title, 'Aspects plugins for edx-platform', - 'Miscellaneous'), + ( + top_level_doc, + project_title, + documentation_title, + author, + project_title, + "Aspects plugins for edx-platform", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. @@ -484,7 +483,7 @@ def get_version(*file_paths): # epub_post_files = [] # A list of files that should not be packed into the epub file. -epub_exclude_files = ['search.html'] +epub_exclude_files = ["search.html"] # The depth of the table of contents in toc.ncx. # @@ -517,9 +516,12 @@ def get_version(*file_paths): # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'python': ('https://docs.python.org/3.8', None), - 'django': ('https://docs.djangoproject.com/en/3.2/', 'https://docs.djangoproject.com/en/3.2/_objects/'), - 'model_utils': ('https://django-model-utils.readthedocs.io/en/latest/', None), + "python": ("https://docs.python.org/3.8", None), + "django": ( + "https://docs.djangoproject.com/en/3.2/", + "https://docs.djangoproject.com/en/3.2/_objects/", + ), + "model_utils": ("https://django-model-utils.readthedocs.io/en/latest/", None), } @@ -531,17 +533,24 @@ def on_init(app): # pylint: disable=unused-argument avoid checking in the generated reStructuredText files. """ docs_path = os.path.abspath(os.path.dirname(__file__)) - root_path = os.path.abspath(os.path.join(docs_path, '..')) - apidoc_path = 'sphinx-apidoc' - if hasattr(sys, 'real_prefix'): # Check to see if we are in a virtualenv + root_path = os.path.abspath(os.path.join(docs_path, "..")) + apidoc_path = "sphinx-apidoc" + if hasattr(sys, "real_prefix"): # Check to see if we are in a virtualenv # If we are, assemble the path manually - bin_path = os.path.abspath(os.path.join(sys.prefix, 'bin')) + bin_path = os.path.abspath(os.path.join(sys.prefix, "bin")) apidoc_path = os.path.join(bin_path, apidoc_path) - check_call([apidoc_path, '-o', docs_path, os.path.join(root_path, 'platform_plugin_aspects'), - os.path.join(root_path, 'platform_plugin_aspects/migrations')]) + check_call( + [ + apidoc_path, + "-o", + docs_path, + os.path.join(root_path, "platform_plugin_aspects"), + os.path.join(root_path, "platform_plugin_aspects/migrations"), + ] + ) def setup(app): """Sphinx extension: run sphinx-apidoc.""" - event = 'builder-inited' + event = "builder-inited" app.connect(event, on_init) diff --git a/manage.py b/manage.py index c4c1f40..f45575c 100644 --- a/manage.py +++ b/manage.py @@ -8,8 +8,8 @@ PWD = os.path.abspath(os.path.dirname(__file__)) -if __name__ == '__main__': - os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'test_settings') +if __name__ == "__main__": + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "test_settings") sys.path.append(PWD) try: from django.core.management import execute_from_command_line diff --git a/platform_plugin_aspects/apps.py b/platform_plugin_aspects/apps.py index 08c092b..fa2b8a4 100644 --- a/platform_plugin_aspects/apps.py +++ b/platform_plugin_aspects/apps.py @@ -3,6 +3,7 @@ """ from django.apps import AppConfig +from edx_django_utils.plugins import PluginSettings, PluginSignals class PlatformPluginAspectsConfig(AppConfig): @@ -13,18 +14,41 @@ class PlatformPluginAspectsConfig(AppConfig): name = "platform_plugin_aspects" plugin_app = { - "settings_config": { + PluginSettings.CONFIG: { "lms.djangoapp": { - "common": {"relative_path": "settings.common"}, - "production": {"relative_path": "settings.production"}, + "production": {PluginSettings.RELATIVE_PATH: "settings.production"}, + "common": {PluginSettings.RELATIVE_PATH: "settings.common"}, }, "cms.djangoapp": { - "common": {"relative_path": "settings.common"}, - "production": {"relative_path": "settings.production"}, + "production": {PluginSettings.RELATIVE_PATH: "settings.production"}, + "common": {PluginSettings.RELATIVE_PATH: "settings.common"}, }, }, + # Configuration setting for Plugin Signals for this app. + PluginSignals.CONFIG: { + # Configure the Plugin Signals for each Project Type, as needed. + "cms.djangoapp": { + # List of all plugin Signal receivers for this app and project type. + PluginSignals.RECEIVERS: [ + { + # The name of the app's signal receiver function. + PluginSignals.RECEIVER_FUNC_NAME: "receive_course_publish", + # The full path to the module where the signal is defined. + PluginSignals.SIGNAL_PATH: "xmodule.modulestore.django.COURSE_PUBLISHED", + } + ], + } + }, } def ready(self): """Load modules of Aspects.""" - from platform_plugin_aspects.extensions import filters # pylint: disable=unused-import, import-outside-toplevel + super().ready() + from platform_plugin_aspects import ( # pylint: disable=import-outside-toplevel, unused-import + signals, + sinks, + tasks, + ) + from platform_plugin_aspects.extensions import ( # pylint: disable=unused-import, import-outside-toplevel + filters, + ) diff --git a/platform_plugin_aspects/conf/locale/config.yaml b/platform_plugin_aspects/conf/locale/config.yaml index bf2539e..d1c618e 100644 --- a/platform_plugin_aspects/conf/locale/config.yaml +++ b/platform_plugin_aspects/conf/locale/config.yaml @@ -72,7 +72,7 @@ locales: - te # Telugu - th # Thai - tr_TR # Turkish (Turkey) - - uk # Ukrainian + - uk # Ukranian - ur # Urdu - uz # Uzbek - vi # Vietnamese diff --git a/platform_plugin_aspects/extensions/filters.py b/platform_plugin_aspects/extensions/filters.py index 1ba6d23..60b7959 100644 --- a/platform_plugin_aspects/extensions/filters.py +++ b/platform_plugin_aspects/extensions/filters.py @@ -37,9 +37,7 @@ def run_filter( filters = ASPECTS_SECURITY_FILTERS_FORMAT + extra_filters_format - context = generate_superset_context( - context, dashboard_uuid, filters - ) + context = generate_superset_context(context, dashboard_uuid, filters) template = Template(self.resource_string("static/html/superset.html")) html = template.render(Context(context)) diff --git a/platform_plugin_aspects/extensions/tests/__init__.py b/platform_plugin_aspects/extensions/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/platform_plugin_aspects/tests/test_filters.py b/platform_plugin_aspects/extensions/tests/test_filters.py similarity index 100% rename from platform_plugin_aspects/tests/test_filters.py rename to platform_plugin_aspects/extensions/tests/test_filters.py diff --git a/platform_plugin_aspects/management/__init__.py b/platform_plugin_aspects/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/platform_plugin_aspects/management/commands/__init__.py b/platform_plugin_aspects/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/platform_plugin_aspects/management/commands/dump_data_to_clickhouse.py b/platform_plugin_aspects/management/commands/dump_data_to_clickhouse.py new file mode 100644 index 0000000..ae6594b --- /dev/null +++ b/platform_plugin_aspects/management/commands/dump_data_to_clickhouse.py @@ -0,0 +1,207 @@ +""" +Management command for exporting the modulestore ClickHouse. + +Example usages (see usage for more options): + + # Dump all objects published since last dump. + # Use connection parameters from `settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG`: + python manage.py cms dump_objects_to_clickhouse --object user_profile + + # Specify certain objects instead of dumping all of them. + # Use connection parameters from `settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG`. + python manage.py cms dump_objects_to_clickhouse --object user_profile --objects 123 124 125 + + # Dump a limited number of objects to prevent stress on production systems + python manage.py cms dump_objects_to_clickhouse --limit 1000 +""" + +import logging +import time +from textwrap import dedent + +from django.core.management.base import BaseCommand, CommandError + +from platform_plugin_aspects.sinks.base_sink import ModelBaseSink + +log = logging.getLogger(__name__) + + +def dump_target_objects_to_clickhouse( + sink=None, + start_pk=None, + object_ids=None, + objects_to_skip=None, + force=False, + limit=None, + batch_size=1000, + sleep_time=10, +): + """ + Iterates through a list of objects in the ORN, serializes them to csv, + then submits tasks to post them to ClickHouse. + + Arguments: + force: serialize the objects even if they've been recently + serialized + + Returns: two lists--one of the objects that had dump jobs queued for them + and one of objects that did not. + """ + + count = 0 + skipped_objects = [] + objects_to_submit = [] + + for obj, should_be_dumped, reason in sink.fetch_target_items( + start_pk, object_ids, objects_to_skip, force, batch_size + ): + if not should_be_dumped: + skipped_objects.append(obj.pk) + log.info(f"{sink.model}: Skipping object {obj.pk}, reason: '{reason}'") + else: + objects_to_submit.append(obj) + if len(objects_to_submit) % batch_size == 0: + count += len(objects_to_submit) + sink.dump(objects_to_submit, many=True) + objects_to_submit = [] + log.info(f"Last ID: {obj.pk}") + time.sleep(sleep_time) + + if limit and count == limit: + log.info( + f"Limit of {limit} eligible objects has been reached, quitting!" + ) + break + + if objects_to_submit: + sink.dump(objects_to_submit, many=True) + count += len(objects_to_submit) + log.info(f"Last ID: {objects_to_submit[-1].pk}") + + log.info(f"Dumped {count} objects to ClickHouse") + + +class Command(BaseCommand): + """ + Dump objects to a ClickHouse instance. + """ + + help = dedent(__doc__).strip() + + def add_arguments(self, parser): + parser.add_argument( + "--url", + type=str, + help="the URL of the ClickHouse server", + ) + parser.add_argument( + "--username", + type=str, + help="the username of the ClickHouse user", + ) + parser.add_argument( + "--password", + type=str, + help="the password of the ClickHouse user", + ) + parser.add_argument( + "--database", + type=str, + help="the database in ClickHouse to connect to", + ) + parser.add_argument( + "--timeout_secs", + type=int, + help="timeout for ClickHouse requests, in seconds", + ) + parser.add_argument( + "--object", + type=str, + help="the type of object to dump", + ) + parser.add_argument( + "--start_pk", + type=int, + help="the primary key to start at", + default=None, + ) + parser.add_argument( + "--ids", + metavar="KEY", + type=str, + nargs="*", + help="keys of objects to serialize; if omitted all objects in system are serialized", + ) + parser.add_argument( + "--ids_to_skip", + metavar="KEY", + type=str, + nargs="*", + help="keys of objects to NOT to serialize", + ) + parser.add_argument( + "--force", + action="store_true", + help="dump all objects regardless of when they were last published", + ) + parser.add_argument( + "--limit", + type=int, + help="maximum number of objects to dump, cannot be used with '--ids' or '--force'", + ) + parser.add_argument( + "--batch_size", + type=int, + default=10000, + help="number of objects to dump in a single batch", + ) + parser.add_argument( + "--sleep_time", + type=int, + default=1, + help="number of seconds to sleep between batches", + ) + + def handle(self, *args, **options): + """ + Iterates through each objects, serializes and saves them to clickhouse. + """ + connection_overrides = { + key: options[key] + for key in ["url", "username", "password", "database", "timeout_secs"] + if options[key] + } + + ids = options["ids"] if options["ids"] else [] + ids_to_skip = options["ids_to_skip"] if options["ids_to_skip"] else [] + + if options["limit"] is not None and int(options["limit"]) < 1: + message = "'limit' must be greater than 0!" + log.error(message) + raise CommandError(message) + + if options["limit"] and options["force"]: + message = ( + "The 'limit' option cannot be used with 'force' as running the " + "command repeatedly will result in the same objects being dumped every time." + ) + log.error(message) + raise CommandError(message) + + if options["object"] is None: + message = "You must specify an object type to dump!" + log.error(message) + raise CommandError(message) + + Sink = ModelBaseSink.get_sink_by_model_name(options["object"]) + sink = Sink(connection_overrides, log) + dump_target_objects_to_clickhouse( + sink, + options["start_pk"], + [object_id.strip() for object_id in ids], + [object_id.strip() for object_id in ids_to_skip], + options["force"], + options["limit"], + options["batch_size"], + options["sleep_time"], + ) diff --git a/platform_plugin_aspects/settings/common.py b/platform_plugin_aspects/settings/common.py index fe95bb2..b8b84e2 100644 --- a/platform_plugin_aspects/settings/common.py +++ b/platform_plugin_aspects/settings/common.py @@ -5,6 +5,7 @@ For the full list of settings and their values, see https://docs.djangoproject.com/en/2.22/ref/settings/ """ + from platform_plugin_aspects import ROOT_DIRECTORY @@ -21,3 +22,41 @@ def plugin_settings(settings): } settings.ASPECTS_INSTRUCTOR_DASHBOARD_UUID = "1d6bf904-f53f-47fd-b1c9-6cd7e284d286" settings.SUPERSET_EXTRA_FILTERS_FORMAT = [] + settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG = { + # URL to a running ClickHouse server's HTTP interface. ex: https://foo.openedx.org:8443/ or + # http://foo.openedx.org:8123/ . Note that we only support the ClickHouse HTTP interface + # to avoid pulling in more dependencies to the platform than necessary. + "url": "http://clickhouse:8123", + "username": "ch_cms", + "password": "password", + "database": "event_sink", + "timeout_secs": 5, + } + + settings.EVENT_SINK_CLICKHOUSE_PII_MODELS = [ + "user_profile", + "external_id", + ] + + settings.EVENT_SINK_CLICKHOUSE_MODEL_CONFIG = { + "auth_user": { + "module": "django.contrib.auth.models", + "model": "User", + }, + "user_profile": { + "module": "common.djangoapps.student.models", + "model": "UserProfile", + }, + "course_overviews": { + "module": "openedx.core.djangoapps.content.course_overviews.models", + "model": "CourseOverview", + }, + "external_id": { + "module": "openedx.core.djangoapps.external_user_ids.models", + "model": "ExternalId", + }, + "custom_course_edx": { + "module": "lms.djangoapps.ccx.models", + "model": "CustomCourseForEdX", + }, + } diff --git a/platform_plugin_aspects/settings/production.py b/platform_plugin_aspects/settings/production.py index 25a72a9..7e344ef 100644 --- a/platform_plugin_aspects/settings/production.py +++ b/platform_plugin_aspects/settings/production.py @@ -11,9 +11,19 @@ def plugin_settings(settings): settings.SUPERSET_CONFIG = getattr(settings, "ENV_TOKENS", {}).get( "SUPERSET_CONFIG", settings.SUPERSET_CONFIG ) - settings.ASPECTS_INSTRUCTOR_DASHBOARD_UUID = getattr(settings, "ENV_TOKENS", {}).get( + settings.ASPECTS_INSTRUCTOR_DASHBOARD_UUID = getattr( + settings, "ENV_TOKENS", {} + ).get( "ASPECTS_INSTRUCTOR_DASHBOARD_UUID", settings.ASPECTS_INSTRUCTOR_DASHBOARD_UUID ) settings.SUPERSET_EXTRA_FILTERS_FORMAT = getattr(settings, "ENV_TOKENS", {}).get( "SUPERSET_EXTRA_FILTERS_FORMAT", settings.SUPERSET_EXTRA_FILTERS_FORMAT ) + settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG = settings.ENV_TOKENS.get( + "EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG", + settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG, + ) + settings.EVENT_SINK_CLICKHOUSE_PII_MODELS = settings.ENV_TOKENS.get( + "EVENT_SINK_CLICKHOUSE_PII_MODELS", + settings.EVENT_SINK_CLICKHOUSE_PII_MODELS, + ) diff --git a/platform_plugin_aspects/settings/tests/__init__.py b/platform_plugin_aspects/settings/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/platform_plugin_aspects/tests/test_settings.py b/platform_plugin_aspects/settings/tests/test_settings.py similarity index 67% rename from platform_plugin_aspects/tests/test_settings.py rename to platform_plugin_aspects/settings/tests/test_settings.py index cbbbc44..5b937bd 100644 --- a/platform_plugin_aspects/tests/test_settings.py +++ b/platform_plugin_aspects/settings/tests/test_settings.py @@ -26,11 +26,18 @@ def test_common_settings(self): self.assertIn("password", settings.SUPERSET_CONFIG) self.assertIsNotNone(settings.ASPECTS_INSTRUCTOR_DASHBOARD_UUID) self.assertIsNotNone(settings.SUPERSET_EXTRA_FILTERS_FORMAT) + for key in ("url", "username", "password", "database", "timeout_secs"): + assert key in settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG def test_production_settings(self): """ Test production settings """ + test_url = "https://foo.bar" + test_username = "bob" + test_password = "secret" + test_database = "cool_data" + test_timeout = 1 settings.ENV_TOKENS = { "SUPERSET_CONFIG": { "url": "http://superset.local.overhang.io:8088", @@ -42,6 +49,13 @@ def test_production_settings(self): "dashboard_uuid": "1d6bf904-f53f-47fd-b1c9-6cd7e284d286", }, "SUPERSET_EXTRA_FILTERS_FORMAT": [], + "EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG": { + "url": test_url, + "username": test_username, + "password": test_password, + "database": test_database, + "timeout_secs": test_timeout, + }, } production_setttings.plugin_settings(settings) self.assertEqual( @@ -55,3 +69,13 @@ def test_production_settings(self): settings.SUPERSET_EXTRA_FILTERS_FORMAT, settings.ENV_TOKENS["SUPERSET_EXTRA_FILTERS_FORMAT"], ) + + for key, val in ( + ("url", test_url), + ("username", test_username), + ("password", test_password), + ("database", test_database), + ("timeout_secs", test_timeout), + ): + assert key in settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG + assert settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG[key] == val diff --git a/platform_plugin_aspects/signals.py b/platform_plugin_aspects/signals.py new file mode 100644 index 0000000..314d04e --- /dev/null +++ b/platform_plugin_aspects/signals.py @@ -0,0 +1,93 @@ +""" +Signal handler functions, mapped to specific signals in apps.py. +""" + +from django.db.models.signals import post_save +from django.dispatch import Signal, receiver + +from platform_plugin_aspects.sinks import ( + ExternalIdSink, + UserProfileSink, + UserRetirementSink, +) +from platform_plugin_aspects.utils import get_model + +try: + from openedx.core.djangoapps.user_api.accounts.signals import USER_RETIRE_LMS_MISC +except ImportError: + # Tests don't have the platform installed + USER_RETIRE_LMS_MISC = Signal() + + +def receive_course_publish( # pylint: disable=unused-argument # pragma: no cover + sender, course_key, **kwargs +): + """ + Receives COURSE_PUBLISHED signal and queues the dump job. + """ + # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded + from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel + dump_course_to_clickhouse, + ) + + dump_course_to_clickhouse.delay(str(course_key)) + + +@receiver(post_save, sender=get_model("user_profile")) +def on_user_profile_updated( # pylint: disable=unused-argument # pragma: no cover + sender, instance, **kwargs +): + """ + Receives post save signal and queues the dump job. + """ + # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded + from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel + dump_data_to_clickhouse, + ) + + sink = UserProfileSink(None, None) + dump_data_to_clickhouse.delay( + sink_module=sink.__module__, + sink_name=sink.__class__.__name__, + object_id=str(instance.id), + ) + + +@receiver(post_save, sender=get_model("external_id")) +def on_externalid_saved( # pylint: disable=unused-argument # pragma: no cover + sender, instance, **kwargs +): + """ + Receives post save signal and queues the dump job. + """ + # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded + from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel + dump_data_to_clickhouse, + ) + + sink = ExternalIdSink(None, None) + dump_data_to_clickhouse.delay( + sink_module=sink.__module__, + sink_name=sink.__class__.__name__, + object_id=str(instance.id), + ) + + +@receiver(USER_RETIRE_LMS_MISC) +def on_user_retirement( # pylint: disable=unused-argument # pragma: no cover + sender, user, **kwargs +): + """ + Receives a user retirement signal and queues the retire_user job. + """ + # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded + from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel + dump_data_to_clickhouse, + ) + + sink = UserRetirementSink(None, None) + dump_data_to_clickhouse.delay( + sink_module=sink.__module__, + sink_name=sink.__class__.__name__, + object_id=str(user.id), + ) diff --git a/platform_plugin_aspects/sinks/__init__.py b/platform_plugin_aspects/sinks/__init__.py new file mode 100644 index 0000000..62fef16 --- /dev/null +++ b/platform_plugin_aspects/sinks/__init__.py @@ -0,0 +1,9 @@ +""" +This module contains the sinks for the platform plugin aspects. +""" + +from .base_sink import BaseSink, ModelBaseSink +from .course_overview_sink import CourseOverviewSink, XBlockSink +from .external_id_sink import ExternalIdSink +from .user_profile_sink import UserProfileSink +from .user_retire_sink import UserRetirementSink diff --git a/platform_plugin_aspects/sinks/base_sink.py b/platform_plugin_aspects/sinks/base_sink.py new file mode 100644 index 0000000..7e80ea2 --- /dev/null +++ b/platform_plugin_aspects/sinks/base_sink.py @@ -0,0 +1,372 @@ +""" +Base classes for event sinks +""" + +import csv +import datetime +import io +from collections import namedtuple + +import requests +from django.conf import settings +from django.core.paginator import Paginator +from edx_toggles.toggles import WaffleFlag + +from platform_plugin_aspects.utils import get_model +from platform_plugin_aspects.waffle import WAFFLE_FLAG_NAMESPACE + +ClickHouseAuth = namedtuple("ClickHouseAuth", ["username", "password"]) + + +class BaseSink: + """ + Base class for ClickHouse event sink, allows overwriting of default settings + """ + + CLICKHOUSE_BULK_INSERT_PARAMS = { + "input_format_allow_errors_num": 1, + "input_format_allow_errors_ratio": 0.1, + } + + def __init__(self, connection_overrides, log): + self.connection_overrides = connection_overrides + self.log = log + self.ch_url = settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["url"] + self.ch_auth = ClickHouseAuth( + settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["username"], + settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["password"], + ) + self.ch_database = settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG["database"] + self.ch_timeout_secs = settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG[ + "timeout_secs" + ] + + # If any overrides to the ClickHouse connection + if connection_overrides: + self.ch_url = connection_overrides.get("url", self.ch_url) + self.ch_auth = ClickHouseAuth( + connection_overrides.get("username", self.ch_auth.username), + connection_overrides.get("password", self.ch_auth.password), + ) + self.ch_database = connection_overrides.get("database", self.ch_database) + self.ch_timeout_secs = connection_overrides.get( + "timeout_secs", self.ch_timeout_secs + ) + + def _send_clickhouse_request(self, request): + """ + Perform the actual HTTP requests to ClickHouse. + """ + session = requests.Session() + prepared_request = request.prepare() + + try: + response = session.send(prepared_request, timeout=self.ch_timeout_secs) + response.raise_for_status() + return response + except requests.exceptions.HTTPError as e: + self.log.error(str(e)) + self.log.error(e.response.headers) + self.log.error(e.response) + self.log.error(e.response.text) + raise + + +class ModelBaseSink(BaseSink): + """ + Base class for ClickHouse event sink, allows overwriting of default settings + + This class is used for the model based event sink, which uses the Django ORM to write + events to ClickHouse. + """ + + unique_key = None + """ + str: A unique identifier key used to distinguish between different instances of the sink. + It can be used to specify the uniqueness constraint when writing events to ClickHouse. + """ + + clickhouse_table_name = None + """ + str: The name of the ClickHouse table where the events will be written. + This should be set to the desired table name for the specific event type. + """ + + queryset = None + """ + QuerySet: A Django QuerySet that represents the initial set of data to be processed by the sink. + It can be used to filter and select specific data for writing to ClickHouse. + """ + + name = None + """ + str: A human-readable name for the sink instance. This can be used for logging and identification purposes. + """ + + timestamp_field = None + """ + str: The name of the field in the model representing the timestamp of the event. + It is used to extract the timestamp from the event data for writing to ClickHouse. + """ + + serializer_class = None + """ + Serializer: The serializer class responsible for converting event data into a format suitable for storage. + This serializer should be compatible with Django's serialization framework. + """ + + model = None + """ + Model: The Django model class representing the structure of the event data. + This is used to validate and organize the data before writing it to ClickHouse. + """ + + nested_sinks = [] + """ + list: A list of nested sink instances that can be used to further process or route the event data. + Nested sinks allow chaining multiple sinks together for more complex event processing pipelines. + """ + pk_format = int + """ + function: A function to format the primary key of the model + """ + + def __init__(self, connection_overrides, log): + super().__init__(connection_overrides, log) + + required_fields = [ + self.clickhouse_table_name, + self.timestamp_field, + self.unique_key, + self.name, + ] + + if not all(required_fields): + raise NotImplementedError( + "ModelBaseSink needs to be subclassed with clickhouse_table_name," + "timestamp_field, unique_key, and name" + ) + + self._nested_sinks = [ + sink(connection_overrides, log) for sink in self.nested_sinks + ] + + def get_model(self): + """ + Return the model to be used for the insert + """ + return get_model(self.model) + + def get_queryset(self, start_pk=None): + """ + Return the queryset to be used for the insert + """ + if start_pk: + start_pk = self.pk_format(start_pk) + return self.get_model().objects.filter(pk__gt=start_pk).order_by("pk") + else: + return self.get_model().objects.all().order_by("pk") + + def dump(self, item_id, many=False, initial=None): + """ + Do the serialization and send to ClickHouse + """ + if many: + # If we're dumping many items, we expect to get a list of items + serialized_item = self.serialize_item(item_id, many=many, initial=initial) + self.log.info( + f"Now dumping {len(serialized_item)} {self.name} to ClickHouse", + ) + self.send_item_and_log(item_id, serialized_item, many) + self.log.info( + f"Completed dumping {len(serialized_item)} {self.name} to ClickHouse" + ) + + for item in serialized_item: + for nested_sink in self._nested_sinks: + nested_sink.dump_related( + item, item["dump_id"], item["time_last_dumped"] + ) + else: + item = self.get_object(item_id) + serialized_item = self.serialize_item(item, many=many, initial=initial) + self.log.info( + f"Now dumping {self.name} {item_id} to ClickHouse", + ) + self.send_item_and_log(item_id, serialized_item, many) + self.log.info(f"Completed dumping {self.name} {item_id} to ClickHouse") + + for nested_sink in self._nested_sinks: + nested_sink.dump_related( + serialized_item, + serialized_item["dump_id"], + serialized_item["time_last_dumped"], + ) + + def send_item_and_log( + self, + item_id, + serialized_item, + many, + ): + """Send the item to clickhouse and log any errors""" + try: + self.send_item(serialized_item, many=many) + except Exception: + self.log.exception( + f"Error trying to dump {self.name} {str(item_id)} to ClickHouse!", + ) + raise + + def get_object(self, item_id): + """ + Return the object to be dumped to ClickHouse + """ + return self.get_model().objects.get(id=item_id) + + def dump_related(self, serialized_item, dump_id, time_last_dumped): + """ + Dump related items to ClickHouse + """ + raise NotImplementedError( + "dump_related needs to be implemented in the subclass" + f"{self.__class__.__name__}!" + ) + + def serialize_item(self, item, many=False, initial=None): + """ + Serialize the data to be sent to ClickHouse + """ + Serializer = self.get_serializer() + serializer = Serializer( # pylint: disable=not-callable + item, many=many, initial=initial + ) + return serializer.data + + def get_serializer(self): + """ + Return the serializer to be used for the insert + """ + return self.serializer_class + + def send_item(self, serialized_item, many=False): + """ + Create the insert query and CSV to send the serialized CourseOverview to ClickHouse. + + We still use a CSV here even though there's only 1 row because it affords handles + type serialization for us and keeps the pattern consistent. + """ + params = self.CLICKHOUSE_BULK_INSERT_PARAMS.copy() + + # "query" is a special param for the query, it's the best way to get the FORMAT CSV in there. + params["query"] = ( + f"INSERT INTO {self.ch_database}.{self.clickhouse_table_name} FORMAT CSV" + ) + + output = io.StringIO() + writer = csv.writer(output, quoting=csv.QUOTE_NONNUMERIC) + + if many: + for node in serialized_item: + writer.writerow(node.values()) + else: + writer.writerow(serialized_item.values()) + + request = requests.Request( + "POST", + self.ch_url, + data=output.getvalue().encode("utf-8"), + params=params, + auth=self.ch_auth, + ) + + self._send_clickhouse_request(request) + + def fetch_target_items( + self, start_pk=None, ids=None, skip_ids=None, force_dump=False, batch_size=None + ): + """ + Fetch the items that should be dumped to ClickHouse + """ + queryset = self.get_queryset(start_pk) + if ids: + ids = [self.pk_format(id) for id in ids] + queryset = queryset.filter(pk__in=ids) + + if skip_ids: + skip_ids = [self.pk_format(id) for id in skip_ids] + queryset = queryset.exclude(pk__in=skip_ids) + + paginator = Paginator(queryset, batch_size) + for i in range(1, paginator.num_pages + 1): + page = paginator.page(i) + items = page.object_list + for item in items: + if force_dump: + yield item, True, "Force is set" + else: + should_be_dumped, reason = self.should_dump_item(item) + yield item, should_be_dumped, reason + + def should_dump_item(self, item): # pylint: disable=unused-argument + """ + Return True if the item should be dumped to ClickHouse, False otherwise + """ + return True, "No reason" + + def get_last_dumped_timestamp(self, item_id): + """ + Return the last timestamp that was dumped to ClickHouse + """ + params = { + "query": f"SELECT max({self.timestamp_field}) as time_last_dumped " + f"FROM {self.ch_database}.{self.clickhouse_table_name} " + f"WHERE {self.unique_key} = '{item_id}'" + } + + request = requests.Request("GET", self.ch_url, params=params, auth=self.ch_auth) + + response = self._send_clickhouse_request(request) + response.raise_for_status() + if response.text.strip(): + # ClickHouse returns timestamps in the format: "2023-05-03 15:47:39.331024+00:00" + # Our internal comparisons use the str() of a datetime object, this handles that + # transformation so that downstream comparisons will work. + return str(datetime.datetime.fromisoformat(response.text.strip())) + + # Item has never been dumped, return None + return None + + @classmethod + def is_enabled(cls): + """ + Return True if the sink is enabled, False otherwise + """ + enabled = getattr( + settings, + f"{WAFFLE_FLAG_NAMESPACE.upper()}_{cls.model.upper()}_ENABLED", + False, + ) + # .. toggle_name: event_sink_clickhouse.model.enabled + # .. toggle_implementation: WaffleFlag + # .. toggle_default: False + # .. toggle_description: Waffle flag to enable sink + # .. toggle_use_cases: open_edx + # .. toggle_creation_date: 2022-08-17 + waffle_flag = WaffleFlag( + f"{WAFFLE_FLAG_NAMESPACE}.{cls.model}.enabled", + __name__, + ) + + return enabled or waffle_flag.is_enabled() + + @classmethod + def get_sink_by_model_name(cls, model): + """ + Return the sink instance for the given model + """ + for sink in cls.__subclasses__(): + if sink.model == model: + return sink + + return None diff --git a/platform_plugin_aspects/sinks/course_overview_sink.py b/platform_plugin_aspects/sinks/course_overview_sink.py new file mode 100644 index 0000000..d7b0fb9 --- /dev/null +++ b/platform_plugin_aspects/sinks/course_overview_sink.py @@ -0,0 +1,219 @@ +""" +Handler for the CMS COURSE_PUBLISHED event + +Does the following: +- Pulls the course structure from modulestore +- Serialize the xblocks +- Sends them to ClickHouse in CSV format + +Note that the serialization format does not include all fields as there may be things like +LTI passwords and other secrets. We just take the fields necessary for reporting at this time. +""" + +import datetime +import json + +from opaque_keys.edx.keys import CourseKey + +from platform_plugin_aspects.sinks.base_sink import ModelBaseSink +from platform_plugin_aspects.sinks.serializers import CourseOverviewSerializer +from platform_plugin_aspects.utils import get_detached_xblock_types, get_modulestore + +# Defaults we want to ensure we fail early on bulk inserts +CLICKHOUSE_BULK_INSERT_PARAMS = { + "input_format_allow_errors_num": 1, + "input_format_allow_errors_ratio": 0.1, +} + + +class XBlockSink(ModelBaseSink): + """ + Sink for XBlock model + """ + + unique_key = "location" + clickhouse_table_name = "course_blocks" + timestamp_field = "time_last_dumped" + name = "XBlock" + nested_sinks = [] + + def dump_related(self, serialized_item, dump_id, time_last_dumped): + """Dump all XBlocks for a course""" + self.dump( + serialized_item, + many=True, + initial={"dump_id": dump_id, "time_last_dumped": time_last_dumped}, + ) + + def serialize_item(self, item, many=False, initial=None): + """ + Serialize an XBlock into a dict + """ + course_key = CourseKey.from_string(item["course_key"]) + modulestore = get_modulestore() + detached_xblock_types = get_detached_xblock_types() + + location_to_node = {} + items = modulestore.get_items(course_key) + + # Serialize the XBlocks to dicts and map them with their location as keys the + # whole map needs to be completed before we can define relationships + index = 0 + section_idx = 0 + subsection_idx = 0 + unit_idx = 0 + + for block in items: + index += 1 + fields = self.serialize_xblock( + block, + index, + detached_xblock_types, + initial["dump_id"], + initial["time_last_dumped"], + ) + + if fields["xblock_data_json"]["block_type"] == "chapter": + section_idx += 1 + subsection_idx = 0 + unit_idx = 0 + elif fields["xblock_data_json"]["block_type"] == "sequential": + subsection_idx += 1 + unit_idx = 0 + elif fields["xblock_data_json"]["block_type"] == "vertical": + unit_idx += 1 + + fields["xblock_data_json"]["section"] = section_idx + fields["xblock_data_json"]["subsection"] = subsection_idx + fields["xblock_data_json"]["unit"] = unit_idx + + fields["xblock_data_json"] = json.dumps(fields["xblock_data_json"]) + location_to_node[XBlockSink.strip_branch_and_version(block.location)] = ( + fields + ) + + return list(location_to_node.values()) + + def serialize_xblock( + self, item, index, detached_xblock_types, dump_id, time_last_dumped + ): + """Serialize an XBlock instance into a dict""" + course_key = item.scope_ids.usage_id.course_key + block_type = item.scope_ids.block_type + + # Extra data not needed for the table to function, things can be + # added here without needing to rebuild the whole table. + json_data = { + "course": course_key.course, + "run": course_key.run, + "block_type": block_type, + "detached": 1 if block_type in detached_xblock_types else 0, + "graded": 1 if getattr(item, "graded", False) else 0, + "completion_mode": getattr(item, "completion_mode", ""), + } + + # Core table data, if things change here it's a big deal. + serialized_block = { + "org": course_key.org, + "course_key": str(course_key), + "location": str(item.location), + "display_name": item.display_name_with_default.replace("'", "'"), + "xblock_data_json": json_data, + "order": index, + "edited_on": str(getattr(item, "edited_on", "")), + "dump_id": dump_id, + "time_last_dumped": time_last_dumped, + } + + return serialized_block + + @staticmethod + def strip_branch_and_version(location): + """ + Removes the branch and version information from a location. + Args: + location: an xblock's location. + Returns: that xblock's location without branch and version information. + """ + return location.for_branch(None) + + +class CourseOverviewSink(ModelBaseSink): # pylint: disable=abstract-method + """ + Sink for CourseOverview model + """ + + model = "course_overviews" + unique_key = "course_key" + clickhouse_table_name = "course_overviews" + timestamp_field = "time_last_dumped" + name = "Course Overview" + serializer_class = CourseOverviewSerializer + nested_sinks = [XBlockSink] + pk_format = str + + def should_dump_item(self, item): + """ + Only dump the course if it's been changed since the last time it's been + dumped. + Args: + course_key: a CourseKey object. + Returns: + - whether this course should be dumped (bool) + - reason why course needs, or does not need, to be dumped (string) + """ + + course_last_dump_time = self.get_last_dumped_timestamp(item) + + # If we don't have a record of the last time this command was run, + # we should serialize the course and dump it + if course_last_dump_time is None: + return True, "Course is not present in ClickHouse" + + course_last_published_date = self.get_course_last_published(item) + + # If we've somehow dumped this course but there is no publish date + # skip it + if course_last_dump_time and course_last_published_date is None: + return False, "No last modified date in CourseOverview" + + # Otherwise, dump it if it is newer + course_last_dump_time = datetime.datetime.strptime( + course_last_dump_time, "%Y-%m-%d %H:%M:%S.%f+00:00" + ) + course_last_published_date = datetime.datetime.strptime( + course_last_published_date, "%Y-%m-%d %H:%M:%S.%f+00:00" + ) + needs_dump = course_last_dump_time < course_last_published_date + + if needs_dump: + reason = ( + "Course has been published since last dump time - " + f"last dumped {course_last_dump_time} < last published {str(course_last_published_date)}" + ) + else: + reason = ( + f"Course has NOT been published since last dump time - " + f"last dumped {course_last_dump_time} >= last published {str(course_last_published_date)}" + ) + return needs_dump, reason + + def get_course_last_published(self, course_overview): + """ + Get approximate last publish date for the given course. + We use the 'modified' column in the CourseOverview table as a quick and easy + (although perhaps inexact) way of determining when a course was last + published. This works because CourseOverview rows are re-written upon + course publish. + Args: + course_key: a CourseKey + Returns: The datetime the course was last published at, stringified. + Uses Python's default str(...) implementation for datetimes, which + is sortable and similar to ISO 8601: + https://docs.python.org/3/library/datetime.html#datetime.date.__str__ + """ + approx_last_published = course_overview.modified + if approx_last_published: + return str(approx_last_published) + + return None diff --git a/platform_plugin_aspects/sinks/external_id_sink.py b/platform_plugin_aspects/sinks/external_id_sink.py new file mode 100644 index 0000000..52ccd1c --- /dev/null +++ b/platform_plugin_aspects/sinks/external_id_sink.py @@ -0,0 +1,20 @@ +"""User profile sink""" + +from platform_plugin_aspects.sinks.base_sink import ModelBaseSink +from platform_plugin_aspects.sinks.serializers import UserExternalIDSerializer + + +class ExternalIdSink(ModelBaseSink): # pylint: disable=abstract-method + """ + Sink for user external ID serializer + """ + + model = "external_id" + unique_key = "id" + clickhouse_table_name = "external_id" + timestamp_field = "time_last_dumped" + name = "External ID" + serializer_class = UserExternalIDSerializer + + def get_queryset(self, start_pk=None): + return super().get_queryset(start_pk).select_related("user", "external_id_type") diff --git a/platform_plugin_aspects/sinks/serializers.py b/platform_plugin_aspects/sinks/serializers.py new file mode 100644 index 0000000..1057d80 --- /dev/null +++ b/platform_plugin_aspects/sinks/serializers.py @@ -0,0 +1,154 @@ +"""Django serializers for the event_sink_clickhouse app.""" + +import json +import uuid + +from django.utils import timezone +from rest_framework import serializers + +from platform_plugin_aspects.utils import get_model + + +class BaseSinkSerializer(serializers.Serializer): # pylint: disable=abstract-method + """Base sink serializer for ClickHouse.""" + + dump_id = serializers.SerializerMethodField() + time_last_dumped = serializers.SerializerMethodField() + + class Meta: + """Meta class for base sink serializer.""" + + fields = [ + "dump_id", + "time_last_dumped", + ] + + def get_dump_id(self, instance): # pylint: disable=unused-argument + """Return a unique ID for the dump.""" + return uuid.uuid4() + + def get_time_last_dumped(self, instance): # pylint: disable=unused-argument + """Return the timestamp for the dump.""" + return timezone.now() + + +class UserProfileSerializer(BaseSinkSerializer, serializers.ModelSerializer): + """Serializer for user profile events.""" + + email = serializers.CharField(source="user.email") + + class Meta: + """Meta class for user profile serializer.""" + + model = get_model("user_profile") + + fields = [ + "id", + "user_id", + "name", + "email", + "meta", + "courseware", + "language", + "location", + "year_of_birth", + "gender", + "level_of_education", + "mailing_address", + "city", + "country", + "state", + "goals", + "bio", + "profile_image_uploaded_at", + "phone_number", + "dump_id", + "time_last_dumped", + ] + + +class UserExternalIDSerializer(BaseSinkSerializer, serializers.ModelSerializer): + """Serializer for user external ID events.""" + + external_id_type = serializers.CharField(source="external_id_type.name") + username = serializers.CharField(source="user.username") + + class Meta: + """Meta class for user external ID serializer.""" + + model = get_model("external_id") + fields = [ + "external_user_id", + "external_id_type", + "username", + "user_id", + "dump_id", + "time_last_dumped", + ] + + +class UserRetirementSerializer(BaseSinkSerializer, serializers.ModelSerializer): + """Serializer for user retirement events.""" + + user_id = serializers.CharField(source="id") + + class Meta: + """Meta class for user retirement serializer.""" + + model = get_model("auth_user") + fields = [ + "user_id", + ] + + +class CourseOverviewSerializer(BaseSinkSerializer, serializers.ModelSerializer): + """Serializer for course overview events.""" + + course_data_json = serializers.SerializerMethodField() + course_key = serializers.SerializerMethodField() + course_start = serializers.CharField(source="start") + course_end = serializers.CharField(source="end") + + class Meta: + """Meta classes for course overview serializer.""" + + model = get_model("course_overviews") + fields = [ + "org", + "course_key", + "display_name", + "course_start", + "course_end", + "enrollment_start", + "enrollment_end", + "self_paced", + "course_data_json", + "created", + "modified", + "dump_id", + "time_last_dumped", + ] + + def get_course_data_json(self, overview): + """Return the course data as a JSON string.""" + json_fields = { + "advertised_start": getattr(overview, "advertised_start", ""), + "announcement": getattr(overview, "announcement", ""), + "lowest_passing_grade": float( + getattr(overview, "lowest_passing_grade", 0.0) + ), + "invitation_only": getattr(overview, "invitation_only", ""), + "max_student_enrollments_allowed": getattr( + overview, "max_student_enrollments_allowed", None + ), + "effort": getattr(overview, "effort", ""), + "enable_proctored_exams": getattr(overview, "enable_proctored_exams", ""), + "entrance_exam_enabled": getattr(overview, "entrance_exam_enabled", ""), + "external_id": getattr(overview, "external_id", ""), + "language": getattr(overview, "language", ""), + } + return json.dumps(json_fields) + + def get_course_key(self, overview): + """Return the course key as a string.""" + return str(overview.id) diff --git a/platform_plugin_aspects/sinks/tests/test_base_sink.py b/platform_plugin_aspects/sinks/tests/test_base_sink.py new file mode 100644 index 0000000..054b1b8 --- /dev/null +++ b/platform_plugin_aspects/sinks/tests/test_base_sink.py @@ -0,0 +1,296 @@ +""" +Tests for the base sinks. +""" + +import logging +from unittest.mock import MagicMock, Mock, patch + +import ddt +from django.test import TestCase +from django.test.utils import override_settings + +from platform_plugin_aspects.sinks.base_sink import ModelBaseSink + + +class ChildSink(ModelBaseSink): # pylint: disable=abstract-method + """ + Demo child sink. + """ + + nested_sinks = [MagicMock()] + model = "child_model" + unique_key = "id" + clickhouse_table_name = "child_model_table" + timestamp_field = "time_last_dumped" + name = "Child Model" + serializer_class = Mock() + + +@override_settings( + EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG={ + "url": "http://clickhouse:8123", + "username": "ch_cms", + "password": "password", + "database": "event_sink", + "timeout_secs": 5, + }, + EVENT_SINK_CLICKHOUSE_MODEL_CONFIG={}, +) +class TestBaseSink(TestCase): + """ + Tests for the BaseSink. + """ + + def test_connection_overrides(self): + """ + Test that connection_overrides() returns the correct data. + """ + child_sink = ChildSink( + connection_overrides={ + "url": "http://dummy:8123", + "username": "dummy_username", + "password": "dummy_password", + "database": "dummy_database", + "timeout_secs": 0, + }, + log=logging.getLogger(), + ) + + self.assertEqual(child_sink.ch_url, "http://dummy:8123") + self.assertEqual(child_sink.ch_auth, ("dummy_username", "dummy_password")) + self.assertEqual(child_sink.ch_database, "dummy_database") + self.assertEqual(child_sink.ch_timeout_secs, 0) + + +@override_settings( + EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG={ + # URL to a running ClickHouse server's HTTP interface. ex: https://foo.openedx.org:8443/ or + # http://foo.openedx.org:8123/ . Note that we only support the ClickHouse HTTP interface + # to avoid pulling in more dependencies to the platform than necessary. + "url": "http://clickhouse:8123", + "username": "ch_cms", + "password": "password", + "database": "event_sink", + "timeout_secs": 5, + }, + EVENT_SINK_CLICKHOUSE_MODEL_CONFIG={}, +) +@ddt.ddt +class TestModelBaseSink(TestCase): + """ + Tests for the ModelBaseSink. + """ + + def setUp(self): + """ + Set up the test suite. + """ + self.child_sink = ChildSink(connection_overrides={}, log=logging.getLogger()) + + @ddt.data( + (1, {"dump_id": 1, "time_last_dumped": "2020-01-01 00:00:00"}, False), + ( + [1, 2], + [ + {"dump_id": 1, "time_last_dumped": "2020-01-01 00:00:00"}, + {"dump_id": 2, "time_last_dumped": "2020-01-01 00:00:00"}, + ], + True, + ), + ) + @ddt.unpack + def test_dump(self, items_id, serialized_items, many): + """ + Test that the serialization/send logic is called correctly with many=True and many=False. + """ + self.child_sink.send_item_and_log = Mock() + self.child_sink.serialize_item = Mock(return_value=serialized_items) + self.child_sink.get_object = Mock(return_value=items_id) + + self.child_sink.dump(items_id, many=many) + + self.child_sink.serialize_item.assert_called_once_with( + items_id, many=many, initial=None + ) + self.child_sink.send_item_and_log.assert_called_once_with( + items_id, self.child_sink.serialize_item.return_value, many + ) + + def test_send_item_and_log(self): + """ + Test that send_item is called correctly. + """ + item = Mock(id=1) + self.child_sink.send_item = Mock() + serialized_item = {"dump_id": 1, "time_last_dumped": "2020-01-01 00:00:00"} + + self.child_sink.send_item_and_log(item.id, serialized_item, many=False) + + self.child_sink.send_item.assert_called_once_with(serialized_item, many=False) + + def test_serialize_item(self): + """ + Test that serialize_item() returns the correct serialized data. + """ + item = Mock(id=1) + serialized_item = {"dump_id": 1, "time_last_dumped": "2020-01-01 00:00:00"} + self.child_sink.get_serializer = Mock(data=serialized_item) + self.child_sink.send_item_and_log = Mock() + + serialized_item = self.child_sink.serialize_item(item, many=False, initial=None) + + self.child_sink.get_serializer.return_value.assert_called_once_with( + item, + many=False, + initial=None, + ) + self.assertEqual( + serialized_item, + self.child_sink.get_serializer.return_value.return_value.data, + ) + + @patch("platform_plugin_aspects.sinks.base_sink.io") + @patch("platform_plugin_aspects.sinks.base_sink.requests") + @ddt.data( + ({"dump_id": 1, "time_last_dumped": "2020-01-01 00:00:00"}, False), + ( + [ + {"dump_id": 1, "time_last_dumped": "2020-01-01 00:00:00"}, + {"dump_id": 2, "time_last_dumped": "2020-01-01 00:00:00"}, + ], + True, + ), + ) + @ddt.unpack + def test_send_items(self, serialized_items, many, mock_requests, mock_io): + """ + Test that send_item() calls the correct requests. + """ + params = self.child_sink.CLICKHOUSE_BULK_INSERT_PARAMS.copy() + params["query"] = "INSERT INTO event_sink.child_model_table FORMAT CSV" + self.child_sink._send_clickhouse_request = ( # pylint: disable=protected-access + Mock() + ) + data = "1,2020-01-01 00:00:00\n2,2020-01-01 00:00:00\n" + mock_io.StringIO.return_value.getvalue.return_value.encode.return_value = data + + self.child_sink.send_item(serialized_items, many=many) + + mock_requests.Request.assert_called_once_with( + "POST", + self.child_sink.ch_url, + data=data, + params=params, + auth=self.child_sink.ch_auth, + ) + self.child_sink._send_clickhouse_request( # pylint: disable=protected-access + mock_requests.Request.return_value + ) + + def test_init(self): + # Mock the required fields + connection_overrides = {} + log = MagicMock() + + # Test without all required fields + with self.assertRaises(NotImplementedError): + sink = ModelBaseSink(connection_overrides, log) + self.assertIsInstance(sink, ModelBaseSink) + + def fetch_target_items(self): + """ + Test that fetch_target_items() returns the correct data. + """ + + def test_get_last_dumped_timestamp(self): + """ + Test that get_last_dumped_timestamp() returns the correct data. + """ + + @override_settings( + EVENT_SINK_CLICKHOUSE_MODEL_CONFIG={ + "child_model": { + "module": "dummy.module", + "model": "dummy", + }, + } + ) + @patch("platform_plugin_aspects.sinks.base_sink.get_model") + def test_get_model(self, mock_get_model): + """ + Test that get_model() returns a query set. + """ + self.child_sink.get_model() + mock_get_model.assert_called_once_with("child_model") + + def test_get_queryset(self): + """ + Test that get_queryset() returns a query set. + """ + self.child_sink.get_model = Mock() + self.child_sink.get_queryset() + self.child_sink.get_model.return_value.objects.all.assert_called_once() + + def test_get_queryset_by_start_pk(self): + """ + Test that get_queryset() returns a query set. + """ + self.child_sink.get_model = Mock() + self.child_sink.get_queryset(start_pk=1) + self.child_sink.get_model.return_value.objects.filter.assert_called_once_with( + pk__gt=1 + ) + + def test_nested_sink_dump_related(self): + """ + Test that dump_related() calls the correct methods. + """ + self.child_sink.dump = Mock() + with self.assertRaises(NotImplementedError): + self.child_sink.dump_related("foo", "bar", "baz") + + def test_get_serializer(self): + """ + Test that get_serializer() returns the correct serializer. + """ + serializer = self.child_sink.get_serializer() + self.assertEqual(serializer, self.child_sink.serializer_class) + + def test_should_dump_item(self): + """ + Test that should_dump_item() returns the correct data. + """ + self.assertEqual(self.child_sink.should_dump_item(1), (True, "No reason")) + + @patch("platform_plugin_aspects.sinks.base_sink.WaffleFlag.is_enabled") + def test_is_not_enabled_waffle(self, mock_waffle_flag_is_enabled): + """ + Test that is_enable() returns the correct data. + """ + mock_waffle_flag_is_enabled.return_value = False + self.assertEqual(self.child_sink.__class__.is_enabled(), False) + + @patch("platform_plugin_aspects.sinks.base_sink.WaffleFlag.is_enabled") + def test_is_enabled_waffle(self, mock_waffle_flag_is_enabled): + """ + Test that is_enable() returns the correct data. + """ + mock_waffle_flag_is_enabled.return_value = True + self.assertEqual(self.child_sink.__class__.is_enabled(), True) + + @override_settings(EVENT_SINK_CLICKHOUSE_CHILD_MODEL_ENABLED=True) + def test_is_enabled(self): + """ + Test that is_enable() returns the correct data. + """ + self.assertEqual(self.child_sink.is_enabled(), True) + + def test_get_sink_by_model_name(self): + """ + Test that get_sink_by_model_name() returns the correct data. + """ + no_sink = ModelBaseSink.get_sink_by_model_name("non_existent_model") + child_sink = ModelBaseSink.get_sink_by_model_name("child_model") + + self.assertIsNone(no_sink) + self.assertEqual(child_sink, ChildSink) diff --git a/platform_plugin_aspects/sinks/tests/test_course_overview_sink.py b/platform_plugin_aspects/sinks/tests/test_course_overview_sink.py new file mode 100644 index 0000000..47c04a8 --- /dev/null +++ b/platform_plugin_aspects/sinks/tests/test_course_overview_sink.py @@ -0,0 +1,379 @@ +""" +Tests for the course_overview_sink sinks. +""" + +import json +import logging +from datetime import datetime +from unittest.mock import MagicMock, patch + +import pytest +import requests +import responses +from django.test.utils import override_settings +from responses import matchers +from responses.registries import OrderedRegistry + +from platform_plugin_aspects.sinks import CourseOverviewSink, XBlockSink +from platform_plugin_aspects.tasks import dump_course_to_clickhouse +from test_utils.helpers import ( + check_block_csv_matcher, + check_overview_csv_matcher, + course_factory, + course_str_factory, + fake_course_overview_factory, + fake_serialize_fake_course_overview, + get_clickhouse_http_params, + mock_detached_xblock_types, +) + + +@responses.activate( # pylint: disable=unexpected-keyword-arg,no-value-for-parameter + registry=OrderedRegistry +) +@override_settings(EVENT_SINK_CLICKHOUSE_COURSE_OVERVIEW_ENABLED=True) +@patch("platform_plugin_aspects.sinks.CourseOverviewSink.serialize_item") +@patch("platform_plugin_aspects.sinks.CourseOverviewSink.get_model") +@patch("platform_plugin_aspects.sinks.course_overview_sink.get_detached_xblock_types") +@patch("platform_plugin_aspects.sinks.course_overview_sink.get_modulestore") +@patch("platform_plugin_aspects.tasks.get_ccx_courses") +def test_course_publish_success( + mock_get_ccx_courses, + mock_modulestore, + mock_detached, + mock_overview, + mock_serialize_item, +): + """ + Test of a successful end-to-end run. + """ + # Create a fake course structure with a few fake XBlocks + course = course_factory() + course_overview = fake_course_overview_factory(modified=datetime.now()) + mock_modulestore.return_value.get_items.return_value = course + + mock_serialize_item.return_value = fake_serialize_fake_course_overview( + course_overview + ) + + # Fake the "detached types" list since we can't import it here + mock_detached.return_value = mock_detached_xblock_types() + + mock_overview.return_value.get_from_id.return_value = course_overview + mock_get_ccx_courses.return_value = [] + + # Use the responses library to catch the POSTs to ClickHouse + # and match them against the expected values, including CSV + # content + course_overview_params, blocks_params = get_clickhouse_http_params() + + responses.post( + "https://foo.bar/", + match=[ + matchers.query_param_matcher(course_overview_params), + check_overview_csv_matcher(course_overview), + ], + ) + responses.post( + "https://foo.bar/", + match=[ + matchers.query_param_matcher(blocks_params), + check_block_csv_matcher(course), + ], + ) + + course = course_str_factory() + dump_course_to_clickhouse(course) + + # Just to make sure we're not calling things more than we need to + assert mock_modulestore.call_count == 1 + assert mock_detached.call_count == 1 + mock_get_ccx_courses.assert_called_once_with(course_overview.id) + + +@responses.activate( # pylint: disable=unexpected-keyword-arg,no-value-for-parameter + registry=OrderedRegistry +) +@patch("platform_plugin_aspects.sinks.CourseOverviewSink.serialize_item") +@patch("platform_plugin_aspects.sinks.CourseOverviewSink.get_model") +@patch("platform_plugin_aspects.sinks.course_overview_sink.get_detached_xblock_types") +@patch("platform_plugin_aspects.sinks.course_overview_sink.get_modulestore") +# pytest:disable=unused-argument +def test_course_publish_clickhouse_error( + mock_modulestore, mock_detached, mock_overview, mock_serialize_item, caplog +): + """ + Test the case where a ClickHouse POST fails. + """ + course = course_factory() + mock_modulestore.return_value.get_items.return_value = course + mock_detached.return_value = mock_detached_xblock_types() + + course_overview = fake_course_overview_factory(modified=datetime.now()) + mock_overview.return_value.get_from_id.return_value = course_overview + + mock_serialize_item.return_value = fake_serialize_fake_course_overview( + course_overview + ) + + # This will raise an exception when we try to post to ClickHouse + responses.post("https://foo.bar/", body="Test Bad Request error", status=400) + + course = course_str_factory() + + with pytest.raises(requests.exceptions.RequestException): + dump_course_to_clickhouse(course) + + # Make sure our log messages went through. + assert "Test Bad Request error" in caplog.text + assert ( + f"Error trying to dump Course Overview {course} to ClickHouse!" in caplog.text + ) + + +def test_get_course_last_published(): + """ + Make sure we get a valid date back from this in the expected format. + """ + # Create a fake course overview, which will return a datetime object + course_overview = fake_course_overview_factory(modified=datetime.now()) + + # Confirm that the string date we get back is a valid date + last_published_date = CourseOverviewSink(None, None).get_course_last_published( + course_overview + ) + dt = datetime.strptime(last_published_date, "%Y-%m-%d %H:%M:%S.%f") + assert dt + + +@responses.activate( # pylint: disable=unexpected-keyword-arg,no-value-for-parameter + registry=OrderedRegistry +) +def test_no_last_published_date(): + """ + Test that we get a None value back for courses that don't have a modified date. + + In some cases there is not modified date on a course. In coursegraph we + skipped these if they are already in the database, so we're continuing this trend here. + """ + course_overview = fake_course_overview_factory(modified=None) + + # should_dump_course will reach out to ClickHouse for the last dump date + # we'll fake the response here to have any date, such that we'll exercise + # all the "no modified date" code. + responses.get("https://foo.bar/", body="2023-05-03 15:47:39.331024+00:00") + + # Confirm that the string date we get back is a valid date + sink = CourseOverviewSink(connection_overrides={}, log=logging.getLogger()) + should_dump_course, reason = sink.should_dump_item(course_overview) + + assert should_dump_course is False + assert reason == "No last modified date in CourseOverview" + + +@responses.activate( # pylint: disable=unexpected-keyword-arg,no-value-for-parameter + registry=OrderedRegistry +) +def test_should_dump_item(): + """ + Test that we get the expected results from should_dump_item. + """ + course_overview = fake_course_overview_factory( + modified=datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + ) + + # should_dump_course will reach out to ClickHouse for the last dump date + # we'll fake the response here to have any date, such that we'll exercise + # all the "no modified date" code. + responses.get("https://foo.bar/", body="2023-05-03 15:47:39.331024+00:00") + + # Confirm that the string date we get back is a valid date + sink = CourseOverviewSink(connection_overrides={}, log=logging.getLogger()) + should_dump_course, reason = sink.should_dump_item(course_overview) + + assert should_dump_course is True + assert "Course has been published since last dump time - " in reason + + +@responses.activate( # pylint: disable=unexpected-keyword-arg,no-value-for-parameter + registry=OrderedRegistry +) +def test_should_dump_item_not_in_clickhouse(): + """ + Test that a course gets dumped if it's never been dumped before + """ + course_overview = fake_course_overview_factory( + modified=datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + ) + responses.get("https://foo.bar/", body="") + + sink = CourseOverviewSink(connection_overrides={}, log=logging.getLogger()) + should_dump_course, reason = sink.should_dump_item(course_overview) + + assert should_dump_course is True + assert "Course is not present in ClickHouse" == reason + + +@responses.activate( # pylint: disable=unexpected-keyword-arg,no-value-for-parameter + registry=OrderedRegistry +) +def test_should_dump_item_no_needs_dump(): + """ + Test that a course gets dumped if it's never been dumped before + """ + modified = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + course_overview = fake_course_overview_factory(modified=modified) + responses.get("https://foo.bar/", body=modified) + + sink = CourseOverviewSink(connection_overrides={}, log=logging.getLogger()) + should_dump_course, reason = sink.should_dump_item(course_overview) + + assert should_dump_course is False + assert "Course has NOT been published since last dump time - " in reason + + +@responses.activate( # pylint: disable=unexpected-keyword-arg,no-value-for-parameter + registry=OrderedRegistry +) +def test_course_not_present_in_clickhouse(): + """ + Test that a course gets dumped if it's never been dumped before + """ + # Request our course last published date + course_key = course_str_factory() + + responses.get("https://foo.bar/", body="") + + # Confirm that the string date we get back is a valid date + sink = CourseOverviewSink(connection_overrides={}, log=logging.getLogger()) + last_published_date = sink.get_last_dumped_timestamp(course_key) + assert last_published_date is None + + +@responses.activate( # pylint: disable=unexpected-keyword-arg,no-value-for-parameter + registry=OrderedRegistry +) +def test_get_last_dump_time(): + """ + Test that we return the expected thing from last dump time. + """ + # Request our course last published date + course_key = course_str_factory() + + # Mock out the response we expect to get from ClickHouse, just a random + # datetime in the correct format. + responses.get("https://foo.bar/", body="2023-05-03 15:47:39.331024+00:00") + + # Confirm that the string date we get back is a valid date + sink = CourseOverviewSink(connection_overrides={}, log=logging.getLogger()) + last_published_date = sink.get_last_dumped_timestamp(course_key) + dt = datetime.strptime(last_published_date, "%Y-%m-%d %H:%M:%S.%f+00:00") + assert dt + + +@patch("platform_plugin_aspects.sinks.course_overview_sink.get_detached_xblock_types") +@patch("platform_plugin_aspects.sinks.course_overview_sink.get_modulestore") +# pytest:disable=unused-argument +def test_xblock_tree_structure(mock_modulestore, mock_detached): + """ + Test that our calculations of section/subsection/unit are correct. + """ + # Create a fake course structure with a few fake XBlocks + course = course_factory() + course_overview = fake_course_overview_factory(modified=datetime.now()) + mock_modulestore.return_value.get_items.return_value = course + + # Fake the "detached types" list since we can't import it here + mock_detached.return_value = mock_detached_xblock_types() + + fake_serialized_course_overview = fake_serialize_fake_course_overview( + course_overview + ) + sink = XBlockSink(connection_overrides={}, log=MagicMock()) + + initial_data = {"dump_id": "xyz", "time_last_dumped": "2023-09-05"} + results = sink.serialize_item(fake_serialized_course_overview, initial=initial_data) + + def _check_tree_location( + block, expected_section=0, expected_subsection=0, expected_unit=0 + ): + """ + Assert the expected values in certain returned blocks or print useful debug information. + """ + try: + j = json.loads(block["xblock_data_json"]) + assert j["section"] == expected_section + assert j["subsection"] == expected_subsection + assert j["unit"] == expected_unit + except AssertionError as e: + print(e) + print(block) + raise + + # The tree has new sections at these indexes + _check_tree_location(results[1], 1) + _check_tree_location(results[2], 2) + _check_tree_location(results[15], 3) + + # The tree has new subsections at these indexes + _check_tree_location(results[3], 2, 1) + _check_tree_location(results[7], 2, 2) + _check_tree_location(results[11], 2, 3) + _check_tree_location(results[24], 3, 3) + + # The tree has new units at these indexes + _check_tree_location(results[4], 2, 1, 1) + _check_tree_location(results[5], 2, 1, 2) + _check_tree_location(results[6], 2, 1, 3) + _check_tree_location(results[10], 2, 2, 3) + _check_tree_location(results[25], 3, 3, 1) + _check_tree_location(results[26], 3, 3, 2) + _check_tree_location(results[27], 3, 3, 3) + + +@patch("platform_plugin_aspects.sinks.course_overview_sink.get_detached_xblock_types") +@patch("platform_plugin_aspects.sinks.course_overview_sink.get_modulestore") +def test_xblock_graded_completable_mode(mock_modulestore, mock_detached): + """ + Test that our grading and completion fields serialize. + """ + # Create a fake course structure with a few fake XBlocks + course = course_factory() + course_overview = fake_course_overview_factory(modified=datetime.now()) + mock_modulestore.return_value.get_items.return_value = course + + # Fake the "detached types" list since we can't import it here + mock_detached.return_value = mock_detached_xblock_types() + + fake_serialized_course_overview = fake_serialize_fake_course_overview( + course_overview + ) + sink = XBlockSink(connection_overrides={}, log=MagicMock()) + + initial_data = {"dump_id": "xyz", "time_last_dumped": "2023-09-05"} + results = sink.serialize_item(fake_serialized_course_overview, initial=initial_data) + + def _check_item_serialized_location( + block, expected_graded=0, expected_completion_mode="unknown" + ): + """ + Assert the expected values in certain returned blocks or print useful debug information. + """ + try: + j = json.loads(block["xblock_data_json"]) + assert j["graded"] == expected_graded + assert j["completion_mode"] == expected_completion_mode + except AssertionError as e: + print(e) + print(block) + raise + + # These tree indexes are the only ones which should have gradable set + _check_item_serialized_location(results[31], 1) + _check_item_serialized_location(results[32], 1) + _check_item_serialized_location(results[33], 1) + + # These tree indexes are the only ones which should have non-"unknown" completion_modes. + _check_item_serialized_location(results[34], 0, "completable") + _check_item_serialized_location(results[35], 0, "aggregator") + _check_item_serialized_location(results[36], 0, "excluded") diff --git a/platform_plugin_aspects/sinks/tests/test_external_id_sink.py b/platform_plugin_aspects/sinks/tests/test_external_id_sink.py new file mode 100644 index 0000000..6ee1b83 --- /dev/null +++ b/platform_plugin_aspects/sinks/tests/test_external_id_sink.py @@ -0,0 +1,22 @@ +""" +Test the external_id_sink module. +""" + +from unittest.mock import patch + +from platform_plugin_aspects.sinks import ExternalIdSink + + +@patch("platform_plugin_aspects.sinks.ModelBaseSink.get_queryset") +def test_get_queryset(mock_get_queryset): + """ + Test the get_queryset method. + """ + sink = ExternalIdSink(None, None) + + sink.get_queryset() + + mock_get_queryset.assert_called_once_with(None) + mock_get_queryset.return_value.select_related.assert_called_once_with( + "user", "external_id_type" + ) diff --git a/platform_plugin_aspects/sinks/tests/test_serializers.py b/platform_plugin_aspects/sinks/tests/test_serializers.py new file mode 100644 index 0000000..4d41211 --- /dev/null +++ b/platform_plugin_aspects/sinks/tests/test_serializers.py @@ -0,0 +1,82 @@ +import json +from unittest.mock import Mock + +from django.test import TestCase + +from platform_plugin_aspects.sinks.serializers import ( + BaseSinkSerializer, + CourseOverviewSerializer, +) + + +class TestBaseSinkSerializer(TestCase): + """ + Test BaseSinkSerializer + """ + + def setUp(self): + self.serializer = BaseSinkSerializer() + + def test_to_representation(self): + """ + Test to_representation + """ + self.assertEqual( + list(self.serializer.to_representation({}).keys()), + ["dump_id", "time_last_dumped"], + ) + + +class TestCourseOverviewSerializer(TestCase): + """ + Test CourseOverviewSerializer + """ + + def setUp(self): + self.serializer = CourseOverviewSerializer() + + def test_get_course_data_json(self): + """ + Test to_representation + + json_fields = { + "advertised_start": getattr(overview, "advertised_start", ""), + "announcement": getattr(overview, "announcement", ""), + "lowest_passing_grade": float( + getattr(overview, "lowest_passing_grade", 0.0) + ), + "invitation_only": getattr(overview, "invitation_only", ""), + "max_student_enrollments_allowed": getattr( + overview, "max_student_enrollments_allowed", None + ), + "effort": getattr(overview, "effort", ""), + "enable_proctored_exams": getattr(overview, "enable_proctored_exams", ""), + "entrance_exam_enabled": getattr(overview, "entrance_exam_enabled", ""), + "external_id": getattr(overview, "external_id", ""), + "language": getattr(overview, "language", ""), + } + """ + json_fields = { + "advertised_start": "2018-01-01T00:00:00Z", + "announcement": "announcement", + "lowest_passing_grade": 0.0, + "invitation_only": "invitation_only", + "max_student_enrollments_allowed": None, + "effort": "effort", + "enable_proctored_exams": "enable_proctored_exams", + "entrance_exam_enabled": "entrance_exam_enabled", + "external_id": "external_id", + "language": "language", + } + mock_overview = Mock(**json_fields) + self.assertEqual( + self.serializer.get_course_data_json(mock_overview), json.dumps(json_fields) + ) + + def test_get_course_key(self): + """ + Test get_course_key + """ + mock_id = Mock() + mock_overview = Mock(id=mock_id) + self.assertEqual(self.serializer.get_course_key(mock_overview), str(mock_id)) diff --git a/platform_plugin_aspects/sinks/tests/test_user_profile_sink.py b/platform_plugin_aspects/sinks/tests/test_user_profile_sink.py new file mode 100644 index 0000000..dfc0251 --- /dev/null +++ b/platform_plugin_aspects/sinks/tests/test_user_profile_sink.py @@ -0,0 +1,20 @@ +""" +Test the external_id_sink module. +""" + +from unittest.mock import patch + +from platform_plugin_aspects.sinks.user_profile_sink import UserProfileSink + + +@patch("platform_plugin_aspects.sinks.ModelBaseSink.get_queryset") +def test_get_queryset(mock_get_queryset): + """ + Test the get_queryset method. + """ + sink = UserProfileSink(None, None) + + sink.get_queryset() + + mock_get_queryset.assert_called_once_with(None) + mock_get_queryset.return_value.select_related.assert_called_once_with("user") diff --git a/platform_plugin_aspects/sinks/tests/test_user_retire_sink.py b/platform_plugin_aspects/sinks/tests/test_user_retire_sink.py new file mode 100644 index 0000000..64ce82a --- /dev/null +++ b/platform_plugin_aspects/sinks/tests/test_user_retire_sink.py @@ -0,0 +1,106 @@ +""" +Tests for the user_retire sinks. +""" + +import logging +from unittest.mock import patch + +import responses +from django.test.utils import override_settings +from responses.registries import OrderedRegistry + +from platform_plugin_aspects.sinks import UserRetirementSink +from platform_plugin_aspects.tasks import dump_data_to_clickhouse +from test_utils.helpers import FakeUser + +log = logging.getLogger(__name__) + + +@responses.activate( # pylint: disable=unexpected-keyword-arg,no-value-for-parameter + registry=OrderedRegistry +) +@override_settings(EVENT_SINK_CLICKHOUSE_PII_MODELS=["user_profile", "external_id"]) +@patch("platform_plugin_aspects.sinks.UserRetirementSink.serialize_item") +@patch("platform_plugin_aspects.sinks.UserRetirementSink.is_enabled") +@patch("platform_plugin_aspects.sinks.UserRetirementSink.get_model") +def test_retire_user(mock_user_model, mock_is_enabled, mock_serialize_item): + """ + Test of a successful user retirement. + """ + # Create a fake user + user = FakeUser(246) + mock_user_model.return_value.get_from_id.return_value = user + mock_is_enabled.return_value = True + mock_serialize_item.return_value = {"user_id": user.id} + + # Use the responses library to catch the POSTs to ClickHouse + # and match them against the expected values + user_profile_delete = responses.post( + "https://foo.bar/", + match=[ + responses.matchers.query_param_matcher( + { + "query": f"ALTER TABLE cool_data.user_profile DELETE WHERE user_id in ({user.id})", + } + ) + ], + ) + external_id_delete = responses.post( + "https://foo.bar/", + match=[ + responses.matchers.query_param_matcher( + { + "query": f"ALTER TABLE cool_data.external_id DELETE WHERE user_id in ({user.id})", + } + ) + ], + ) + + sink = UserRetirementSink(None, None) + dump_data_to_clickhouse( + sink_module=sink.__module__, + sink_name=sink.__class__.__name__, + object_id=user.id, + ) + + assert mock_user_model.call_count == 1 + assert mock_is_enabled.call_count == 1 + assert mock_serialize_item.call_count == 1 + assert user_profile_delete.call_count == 1 + assert external_id_delete.call_count == 1 + + +@responses.activate( # pylint: disable=unexpected-keyword-arg,no-value-for-parameter + registry=OrderedRegistry +) +@override_settings(EVENT_SINK_CLICKHOUSE_PII_MODELS=["user_profile"]) +@patch("platform_plugin_aspects.sinks.UserRetirementSink.serialize_item") +def test_retire_many_users(mock_serialize_item): + """ + Test of a successful "many users" retirement. + """ + # Create and serialize a few fake users + users = (FakeUser(246), FakeUser(22), FakeUser(91)) + mock_serialize_item.return_value = [{"user_id": user.id} for user in users] + + # Use the responses library to catch the POSTs to ClickHouse + # and match them against the expected values + user_profile_delete = responses.post( + "https://foo.bar/", + match=[ + responses.matchers.query_param_matcher( + { + "query": "ALTER TABLE cool_data.user_profile DELETE WHERE user_id in (22,246,91)", + } + ) + ], + ) + + sink = UserRetirementSink(None, log) + sink.dump( + item_id=users[0].id, + many=True, + ) + + assert mock_serialize_item.call_count == 1 + assert user_profile_delete.call_count == 1 diff --git a/platform_plugin_aspects/sinks/user_profile_sink.py b/platform_plugin_aspects/sinks/user_profile_sink.py new file mode 100644 index 0000000..529036e --- /dev/null +++ b/platform_plugin_aspects/sinks/user_profile_sink.py @@ -0,0 +1,20 @@ +"""User profile sink""" + +from platform_plugin_aspects.sinks.base_sink import ModelBaseSink +from platform_plugin_aspects.sinks.serializers import UserProfileSerializer + + +class UserProfileSink(ModelBaseSink): # pylint: disable=abstract-method + """ + Sink for user profile events + """ + + model = "user_profile" + unique_key = "id" + clickhouse_table_name = "user_profile" + timestamp_field = "time_last_dumped" + name = "User Profile" + serializer_class = UserProfileSerializer + + def get_queryset(self, start_pk=None): + return super().get_queryset(start_pk).select_related("user") diff --git a/platform_plugin_aspects/sinks/user_retire_sink.py b/platform_plugin_aspects/sinks/user_retire_sink.py new file mode 100644 index 0000000..e793d88 --- /dev/null +++ b/platform_plugin_aspects/sinks/user_retire_sink.py @@ -0,0 +1,50 @@ +"""User retirement sink""" + +import requests +from django.conf import settings + +from platform_plugin_aspects.sinks.base_sink import ModelBaseSink +from platform_plugin_aspects.sinks.serializers import UserRetirementSerializer + + +class UserRetirementSink(ModelBaseSink): # pylint: disable=abstract-method + """ + Sink for user retirement events + """ + + model = "auth_user" + unique_key = "id" + clickhouse_table_name = ( + "dummy" # uses settings.EVENT_SINK_CLICKHOUSE_PII_MODELS instead + ) + timestamp_field = "modified" + name = "User Retirement" + serializer_class = UserRetirementSerializer + + def send_item(self, serialized_item, many=False): + """ + Unlike the other data sinks, the User Retirement sink deletes records from the user PII tables in Clickhouse. + + Send delete queries to remove the serialized User from ClickHouse. + """ + if many: + users = serialized_item + else: + users = [serialized_item] + user_ids = {str(user["user_id"]) for user in users} + user_ids_str = ",".join(sorted(user_ids)) + clickhouse_pii_tables = getattr( + settings, "EVENT_SINK_CLICKHOUSE_PII_MODELS", [] + ) + + for table in clickhouse_pii_tables: + params = { + "query": f"ALTER TABLE {self.ch_database}.{table} DELETE WHERE user_id in ({user_ids_str})", + } + request = requests.Request( + "POST", + self.ch_url, + params=params, + auth=self.ch_auth, + ) + self._send_clickhouse_request(request) diff --git a/platform_plugin_aspects/tasks.py b/platform_plugin_aspects/tasks.py new file mode 100644 index 0000000..51c7379 --- /dev/null +++ b/platform_plugin_aspects/tasks.py @@ -0,0 +1,61 @@ +""" +This file contains a management command for exporting course modulestore data to ClickHouse. +""" + +import logging +from importlib import import_module + +from celery import shared_task +from edx_django_utils.monitoring import set_code_owner_attribute +from opaque_keys.edx.keys import CourseKey + +from platform_plugin_aspects.sinks import CourseOverviewSink +from platform_plugin_aspects.utils import get_ccx_courses + +log = logging.getLogger(__name__) +celery_log = logging.getLogger("edx.celery.task") + + +@shared_task +@set_code_owner_attribute +def dump_course_to_clickhouse(course_key_string, connection_overrides=None): + """ + Serialize a course and writes it to ClickHouse. + + Arguments: + course_key_string: course key for the course to be exported + connection_overrides (dict): overrides to ClickHouse connection + parameters specified in `settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG`. + """ + if CourseOverviewSink.is_enabled(): # pragma: no cover + course_key = CourseKey.from_string(course_key_string) + sink = CourseOverviewSink( + connection_overrides=connection_overrides, log=celery_log + ) + sink.dump(course_key) + + ccx_courses = get_ccx_courses(course_key) + for ccx_course in ccx_courses: + ccx_course_key = str(ccx_course.locator) + sink.dump(ccx_course_key) + + +@shared_task +@set_code_owner_attribute +def dump_data_to_clickhouse( + sink_module, sink_name, object_id, connection_overrides=None +): + """ + Serialize a data and writes it to ClickHouse. + + Arguments: + sink_module: module path of sink + sink_name: name of sink class + object_id: id of object + connection_overrides (dict): overrides to ClickHouse connection + """ + Sink = getattr(import_module(sink_module), sink_name) + + if Sink.is_enabled(): + sink = Sink(connection_overrides=connection_overrides, log=celery_log) + sink.dump(object_id) diff --git a/platform_plugin_aspects/tests/commands/test_dump_data_to_clickhouse.py b/platform_plugin_aspects/tests/commands/test_dump_data_to_clickhouse.py new file mode 100644 index 0000000..98e7959 --- /dev/null +++ b/platform_plugin_aspects/tests/commands/test_dump_data_to_clickhouse.py @@ -0,0 +1,235 @@ +""" +Tests for the dump_data_to_clickhouse management command. +""" + +from collections import namedtuple +from datetime import datetime + +import django.core.management.base +import pytest +from django.core.management import call_command +from django_mock_queries.query import MockModel, MockSet + +from platform_plugin_aspects.sinks.base_sink import ModelBaseSink + +CommandOptions = namedtuple( + "TestCommandOptions", ["options", "expected_num_submitted", "expected_logs"] +) + + +def dummy_model_factory(): + """ + Create a dummy model for testing. + """ + + class DummyModel: + """ + Dummy model for testing. + """ + + def __init__(self, id): + self.id = id + self.created = datetime.now() + + @property + def pk(self): + return self.id + + return DummyModel + + +def dummy_serializer_factory(): + """ + Create a dummy serializer for testing. + """ + + class DummySerializer: + """ + Dummy serializer for testing. + """ + + def __init__(self, model, many=False, initial=None): + self.model = model + self.many = many + self.initial = initial + + @property + def data(self): + if self.many: + return [{"id": item, "created": datetime.now()} for item in self.model] + return {"id": self.model.id, "created": self.model.created} + + return DummySerializer + + +class DummySink(ModelBaseSink): + """ + Dummy sink for testing. + """ + + name = "Dummy" + model = "dummy" + unique_key = "id" + serializer_class = dummy_serializer_factory() + timestamp_field = "created" + clickhouse_table_name = "dummy_table" + factory = dummy_model_factory() + + def get_queryset(self, start_pk=None): + qs = MockSet( + MockModel(mock_name="john", email="john@test.invalid", pk=1), + MockModel(mock_name="jeff", email="jeff@test.invalid", pk=2), + MockModel(mock_name="bill", email="bill@test.invalid", pk=3), + MockModel(mock_name="joe", email="joe@test.invalid", pk=4), + MockModel(mock_name="jim", email="jim@test.invalid", pk=5), + ) + if start_pk: + qs = qs.filter(pk__gt=start_pk) + return qs + + def should_dump_item(self, unique_key): + return unique_key.pk != 1, "No reason" + + def send_item_and_log(self, item_id, serialized_item, many): + pass + + def get_object(self, item_id): + return self.factory(item_id) + + +def dump_command_basic_options(): + """ + Pytest params for all the different non-ClickHouse command options. + """ + options = [ + CommandOptions( + options={"object": "dummy", "batch_size": 1, "sleep_time": 0}, + expected_num_submitted=4, + expected_logs=[ + "Dumped 4 objects to ClickHouse", + ], + ), + CommandOptions( + options={"object": "dummy", "limit": 1, "batch_size": 1, "sleep_time": 0}, + expected_num_submitted=1, + expected_logs=["Limit of 1 eligible objects has been reached, quitting!"], + ), + CommandOptions( + options={"object": "dummy", "batch_size": 2, "sleep_time": 0}, + expected_num_submitted=2, + expected_logs=[ + "Now dumping 2 Dummy to ClickHouse", + ], + ), + CommandOptions( + options={ + "object": "dummy", + "batch_size": 1, + "sleep_time": 0, + "ids": ["1", "2", "3"], + }, + expected_num_submitted=3, + expected_logs=[ + "Now dumping 1 Dummy to ClickHouse", + "Dumped 2 objects to ClickHouse", + "Last ID: 3", + ], + ), + CommandOptions( + options={ + "object": "dummy", + "batch_size": 1, + "sleep_time": 0, + "start_pk": 1, + }, + expected_num_submitted=4, + expected_logs=[ + "Now dumping 1 Dummy to ClickHouse", + "Dumped 4 objects to ClickHouse", + ], + ), + CommandOptions( + options={ + "object": "dummy", + "batch_size": 1, + "sleep_time": 0, + "force": True, + }, + expected_num_submitted=4, + expected_logs=[ + "Now dumping 1 Dummy to ClickHouse", + "Dumped 5 objects to ClickHouse", + ], + ), + CommandOptions( + options={ + "object": "dummy", + "batch_size": 2, + "sleep_time": 0, + "ids_to_skip": ["3", "4", "5"], + }, + expected_num_submitted=4, + expected_logs=[ + "Now dumping 1 Dummy to ClickHouse", + "Dumped 1 objects to ClickHouse", + ], + ), + ] + + for option in options: + yield option + + +@pytest.mark.parametrize("test_command_option", dump_command_basic_options()) +def test_dump_courses_options(test_command_option, caplog): + option_combination, expected_num_submitted, expected_outputs = test_command_option + + assert DummySink.model in [cls.model for cls in ModelBaseSink.__subclasses__()] + + call_command("dump_data_to_clickhouse", **option_combination) + + for expected_output in expected_outputs: + assert expected_output in caplog.text + + +def dump_basic_invalid_options(): + """ + Pytest params for all the different non-ClickHouse command options. + """ + options = [ + CommandOptions( + options={"object": "dummy", "limit": 1, "force": True}, + expected_num_submitted=1, + expected_logs=[], + ), + CommandOptions( + options={"object": "dummy", "limit": 1, "force": True}, + expected_num_submitted=1, + expected_logs=[], + ), + CommandOptions( + options={"object": "dummy", "limit": 0, "force": True}, + expected_num_submitted=1, + expected_logs=[], + ), + CommandOptions( + options={}, + expected_num_submitted=1, + expected_logs=[], + ), + ] + + for option in options: + yield option + + +@pytest.mark.parametrize("test_command_option", dump_basic_invalid_options()) +def test_dump_courses_options_invalid(test_command_option, caplog): + option_combination, expected_num_submitted, expected_outputs = test_command_option + assert DummySink.model in [cls.model for cls in ModelBaseSink.__subclasses__()] + + with pytest.raises(django.core.management.base.CommandError): + call_command("dump_data_to_clickhouse", **option_combination) + # assert mock_dump_data.apply_async.call_count == expected_num_submitted + for expected_output in expected_outputs: + assert expected_output in caplog.text diff --git a/platform_plugin_aspects/tests/test_signals.py b/platform_plugin_aspects/tests/test_signals.py new file mode 100644 index 0000000..a720ae1 --- /dev/null +++ b/platform_plugin_aspects/tests/test_signals.py @@ -0,0 +1,66 @@ +""" +Tests for signal handlers. +""" + +from unittest.mock import Mock, patch + +from django.test import TestCase + +from platform_plugin_aspects.signals import ( + on_externalid_saved, + on_user_retirement, + receive_course_publish, +) +from platform_plugin_aspects.sinks.external_id_sink import ExternalIdSink +from platform_plugin_aspects.sinks.user_retire_sink import UserRetirementSink + + +class SignalHandlersTestCase(TestCase): + """ + Test cases for signal handlers. + """ + + @patch("platform_plugin_aspects.tasks.dump_course_to_clickhouse") + def test_receive_course_publish(self, mock_dump_task): + """ + Test that receive_course_publish calls dump_course_to_clickhouse. + """ + sender = Mock() + course_key = "sample_key" + receive_course_publish(sender, course_key) + + mock_dump_task.delay.assert_called_once_with(course_key) + + @patch("platform_plugin_aspects.tasks.dump_data_to_clickhouse") + def test_on_externalid_saved(self, mock_dump_task): + """ + Test that on_externalid_saved calls dump_data_to_clickhouse. + """ + instance = Mock() + sender = Mock() + on_externalid_saved(sender, instance) + + sink = ExternalIdSink(None, None) + + mock_dump_task.delay.assert_called_once_with( + sink_module=sink.__module__, + sink_name=sink.__class__.__name__, + object_id=str(instance.id), + ) + + @patch("platform_plugin_aspects.tasks.dump_data_to_clickhouse") + def test_on_user_retirement(self, mock_dump_task): + """ + Test that on_user_retirement calls dump_data_to_clickhouse + """ + instance = Mock() + sender = Mock() + on_user_retirement(sender, instance) + + sink = UserRetirementSink(None, None) + + mock_dump_task.delay.assert_called_once_with( + sink_module=sink.__module__, + sink_name=sink.__class__.__name__, + object_id=str(instance.id), + ) diff --git a/platform_plugin_aspects/tests/test_tasks.py b/platform_plugin_aspects/tests/test_tasks.py new file mode 100644 index 0000000..79c2825 --- /dev/null +++ b/platform_plugin_aspects/tests/test_tasks.py @@ -0,0 +1,59 @@ +""" +Tests for the tasks module. +""" + +import unittest +from unittest.mock import MagicMock, patch + +from platform_plugin_aspects.tasks import dump_data_to_clickhouse + + +class TestTasks(unittest.TestCase): + """ + Test cases for tasks. + """ + + @patch("platform_plugin_aspects.tasks.import_module") + @patch("platform_plugin_aspects.tasks.celery_log") + def test_dump_data_to_clickhouse(self, mock_celery_log, mock_import_module): + # Mock the required objects and methods + mock_Sink_class = MagicMock() + mock_Sink_instance = mock_Sink_class.return_value + mock_Sink_instance.dump.return_value = None + mock_import_module.return_value = MagicMock(**{"sink_name": mock_Sink_class}) + + # Call the function + dump_data_to_clickhouse( + "sink_module", + "sink_name", + "object_id", + connection_overrides={"param": "value"}, + ) + + # Assertions + mock_import_module.assert_called_once_with("sink_module") + mock_Sink_class.assert_called_once_with( + connection_overrides={"param": "value"}, log=mock_celery_log + ) + mock_Sink_instance.dump.assert_called_once_with("object_id") + + @patch("platform_plugin_aspects.tasks.import_module") + def test_dump_data_to_clickhouse_disabled_sink(self, mock_import_module): + # Mock the required objects and methods + mock_Sink_class = MagicMock() + mock_Sink_class.is_enabled.return_value = False + mock_Sink_instance = mock_Sink_class.return_value + mock_Sink_instance.dump.return_value = None + mock_import_module.return_value = MagicMock(**{"sink_name": mock_Sink_class}) + + dump_data_to_clickhouse( + "sink_module", + "sink_name", + "object_id", + connection_overrides={"param": "value"}, + ) + + # Assertions + mock_import_module.assert_called_once_with("sink_module") + mock_Sink_class.assert_not_called() + mock_Sink_instance.dump.assert_not_called() diff --git a/platform_plugin_aspects/tests/test_utils.py b/platform_plugin_aspects/tests/test_utils.py index d11c9c7..01cd44d 100644 --- a/platform_plugin_aspects/tests/test_utils.py +++ b/platform_plugin_aspects/tests/test_utils.py @@ -1,5 +1,5 @@ """ -Tests for the utils module. +Test utils. """ from collections import namedtuple @@ -8,7 +8,11 @@ from django.conf import settings -from platform_plugin_aspects.utils import generate_superset_context +from platform_plugin_aspects.utils import ( + generate_superset_context, + get_ccx_courses, + get_model, +) User = namedtuple("User", ["username"]) @@ -18,6 +22,78 @@ class TestUtils(TestCase): Test utils module """ + @patch("platform_plugin_aspects.utils.import_module") + @patch.object( + settings, + "EVENT_SINK_CLICKHOUSE_MODEL_CONFIG", + {"my_model": {"module": "myapp.models", "model": "MyModel"}}, + ) + @patch("platform_plugin_aspects.utils.logger") + def test_get_model_success(self, mock_log, mock_import_module): + mock_model = Mock(__name__="MyModel") + mock_import_module.return_value = Mock(MyModel=mock_model) + + model = get_model("my_model") + + mock_import_module.assert_called_once_with("myapp.models") + self.assertIsNotNone(model) + self.assertEqual(model.__name__, "MyModel") + mock_log.assert_not_called() + + @patch.object( + settings, + "EVENT_SINK_CLICKHOUSE_MODEL_CONFIG", + {"my_model": {"module": "myapp.models", "model": "NonExistentModel"}}, + ) + def test_get_model_non_existent_model(self): + model = get_model("my_model") + self.assertIsNone(model) + + @patch.object( + settings, + "EVENT_SINK_CLICKHOUSE_MODEL_CONFIG", + {"my_model": {"module": "non_existent_module", "model": "MyModel"}}, + ) + def test_get_model_non_existent_module(self): + model = get_model("my_model") + + self.assertIsNone(model) + + @patch.object( + settings, "EVENT_SINK_CLICKHOUSE_MODEL_CONFIG", {"my_model": {"module": ""}} + ) + def test_get_model_missing_module_and_model(self): + model = get_model("my_model") + self.assertIsNone(model) + + @patch.object(settings, "EVENT_SINK_CLICKHOUSE_MODEL_CONFIG", {}) + def test_get_model_missing_module_and_model_2(self): + model = get_model("my_model") + self.assertIsNone(model) + + @patch.object( + settings, + "EVENT_SINK_CLICKHOUSE_MODEL_CONFIG", + {"my_model": {"module": "myapp.models"}}, + ) + def test_get_model_missing_model_config(self): + model = get_model("my_model") + self.assertIsNone(model) + + @patch("platform_plugin_aspects.utils.get_model") + def test_get_ccx_courses(self, mock_get_model): + mock_get_model.return_value = mock_model = Mock() + + get_ccx_courses("dummy_key") + + mock_model.objects.filter.assert_called_once_with(course_id="dummy_key") + + @patch.object(settings, "FEATURES", {"CUSTOM_COURSES_EDX": False}) + def test_get_ccx_courses_feature_disabled(self): + courses = get_ccx_courses("dummy_key") + + self.assertEqual(list(courses), []) + @patch("platform_plugin_aspects.utils.generate_guest_token") def test_generate_superset_context(self, mock_generate_guest_token): """ @@ -40,7 +116,9 @@ def test_generate_superset_context(self, mock_generate_guest_token): self.assertNotIn("exception", context) @patch("platform_plugin_aspects.utils.SupersetClient") - def test_generate_superset_context_with_superset_client_exception(self, mock_superset_client): + def test_generate_superset_context_with_superset_client_exception( + self, mock_superset_client + ): """ Test generate_superset_context """ @@ -59,7 +137,9 @@ def test_generate_superset_context_with_superset_client_exception(self, mock_sup @patch("platform_plugin_aspects.utils.SupersetClient") @patch("platform_plugin_aspects.utils.get_current_user") - def test_generate_superset_context_succesful(self, mock_get_current_user, mock_superset_client): + def test_generate_superset_context_succesful( + self, mock_get_current_user, mock_superset_client + ): """ Test generate_superset_context """ diff --git a/platform_plugin_aspects/utils.py b/platform_plugin_aspects/utils.py index ce09db1..d17c767 100644 --- a/platform_plugin_aspects/utils.py +++ b/platform_plugin_aspects/utils.py @@ -4,6 +4,7 @@ import logging import os +from importlib import import_module from crum import get_current_user from django.conf import settings @@ -16,9 +17,7 @@ def generate_superset_context( # pylint: disable=dangerous-default-value - context, - dashboard_uuid="", - filters=[] + context, dashboard_uuid="", filters=[] ): """ Update context with superset token and dashboard id. @@ -115,3 +114,64 @@ def generate_guest_token(user, course, dashboard_uuid, filters): except Exception as exc: # pylint: disable=broad-except logger.error(exc) return None, exc + + +def get_model(model_setting): + """Load a model from a setting.""" + MODEL_CONFIG = getattr(settings, "EVENT_SINK_CLICKHOUSE_MODEL_CONFIG", {}) + + model_config = MODEL_CONFIG.get(model_setting) + if not model_config: + logger.error("Unable to find model config for %s", model_setting) + return None + + module = model_config.get("module") + if not module: + logger.error("Module was not specified in %s", model_setting) + return None + + model_name = model_config.get("model") + if not model_name: + logger.error("Model was not specified in %s", model_setting) + return None + + try: + model = getattr(import_module(module), model_name) + return model + except (ImportError, AttributeError, ModuleNotFoundError): + logger.error("Unable to load model %s.%s", module, model_name) + + return None + + +def get_modulestore(): # pragma: no cover + """ + Import and return modulestore. + + Placed here to avoid model import at startup and to facilitate mocking them in testing. + """ + # pylint: disable=import-outside-toplevel,import-error + from xmodule.modulestore.django import modulestore + + return modulestore() + + +def get_detached_xblock_types(): # pragma: no cover + """ + Import and return DETACHED_XBLOCK_TYPES. + + Placed here to avoid model import at startup and to facilitate mocking them in testing. + """ + # pylint: disable=import-outside-toplevel,import-error + from xmodule.modulestore.store_utilities import DETACHED_XBLOCK_TYPES + + return DETACHED_XBLOCK_TYPES + + +def get_ccx_courses(course_id): + """ + Get the CCX courses for a given course. + """ + if settings.FEATURES.get("CUSTOM_COURSES_EDX"): + return get_model("custom_course_edx").objects.filter(course_id=course_id) + return [] diff --git a/platform_plugin_aspects/waffle.py b/platform_plugin_aspects/waffle.py new file mode 100644 index 0000000..cf55bb1 --- /dev/null +++ b/platform_plugin_aspects/waffle.py @@ -0,0 +1,5 @@ +""" +Configuration for event sink clickhouse. +""" + +WAFFLE_FLAG_NAMESPACE = "event_sink_clickhouse" diff --git a/requirements/base.in b/requirements/base.in index a23d8e4..0e0bac7 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -1,10 +1,16 @@ # Core requirements for using this application -c constraints.txt -Django # Web application framework openedx-atlas openedx-filters web_fragments superset-api-client web_fragments django_crum +celery # Asynchronous task execution library +Django # Web application framework +requests # HTTP request library +edx-django-utils # Django utilities, we use caching and monitoring +edx-opaque-keys # Parsing library for course and usage keys +django-rest-framework # REST API framework +edx-toggles diff --git a/requirements/base.txt b/requirements/base.txt index 54a3abb..335de9d 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,45 +4,147 @@ # # make upgrade # +amqp==5.2.0 + # via kombu asgiref==3.7.2 # via django -backports-zoneinfo==0.2.1 - # via django +backports-zoneinfo[tzdata]==0.2.1 + # via + # celery + # django + # kombu +billiard==4.2.0 + # via celery +celery==5.3.6 + # via -r requirements/base.in certifi==2024.2.2 # via requests +cffi==1.16.0 + # via pynacl charset-normalizer==3.3.2 # via requests -django==4.2.10 +click==8.1.7 + # via + # celery + # click-didyoumean + # click-plugins + # click-repl + # code-annotations + # edx-django-utils +click-didyoumean==0.3.0 + # via celery +click-plugins==1.1.1 + # via celery +click-repl==0.3.0 + # via celery +code-annotations==1.6.0 + # via edx-toggles +django==4.2.11 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/base.in # django-crum + # django-waffle + # djangorestframework + # edx-django-utils + # edx-toggles # openedx-filters django-crum==0.7.9 + # via + # -r requirements/base.in + # edx-django-utils + # edx-toggles +django-rest-framework==0.1.0 + # via -r requirements/base.in +django-waffle==4.1.0 + # via + # edx-django-utils + # edx-toggles +djangorestframework==3.14.0 + # via django-rest-framework +edx-django-utils==5.10.1 + # via + # -r requirements/base.in + # edx-toggles +edx-opaque-keys==2.5.1 + # via -r requirements/base.in +edx-toggles==5.1.1 # via -r requirements/base.in idna==3.6 # via requests +jinja2==3.1.3 + # via code-annotations +kombu==5.3.5 + # via celery +markupsafe==2.1.5 + # via jinja2 +newrelic==9.7.0 + # via edx-django-utils oauthlib==3.2.2 # via requests-oauthlib openedx-atlas==0.6.0 # via -r requirements/base.in openedx-filters==1.6.0 # via -r requirements/base.in +pbr==6.0.0 + # via stevedore +prompt-toolkit==3.0.43 + # via click-repl +psutil==5.9.8 + # via edx-django-utils +pycparser==2.21 + # via cffi +pymongo==3.13.0 + # via edx-opaque-keys +pynacl==1.5.0 + # via edx-django-utils +python-dateutil==2.9.0.post0 + # via celery +python-slugify==8.0.4 + # via code-annotations +pytz==2024.1 + # via djangorestframework pyyaml==6.0.1 - # via superset-api-client + # via + # code-annotations + # superset-api-client requests==2.31.0 # via + # -r requirements/base.in # requests-oauthlib # superset-api-client requests-oauthlib==1.3.1 # via superset-api-client +six==1.16.0 + # via python-dateutil sqlparse==0.4.4 # via django +stevedore==5.2.0 + # via + # code-annotations + # edx-django-utils + # edx-opaque-keys superset-api-client==0.6.0 # via -r requirements/base.in +text-unidecode==1.3 + # via python-slugify typing-extensions==4.10.0 - # via asgiref + # via + # asgiref + # edx-opaque-keys + # kombu +tzdata==2024.1 + # via + # backports-zoneinfo + # celery urllib3==2.2.1 # via requests +vine==5.1.0 + # via + # amqp + # celery + # kombu +wcwidth==0.2.13 + # via prompt-toolkit web-fragments==2.1.0 # via -r requirements/base.in diff --git a/requirements/dev.in b/requirements/dev.in index eb8c92c..52f525e 100644 --- a/requirements/dev.in +++ b/requirements/dev.in @@ -7,3 +7,4 @@ diff-cover # Changeset diff test coverage edx-i18n-tools # For i18n_tool dummy +black # For formatting diff --git a/requirements/dev.txt b/requirements/dev.txt index cb77544..78d779c 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -4,6 +4,10 @@ # # make upgrade # +amqp==5.2.0 + # via + # -r requirements/quality.txt + # kombu asgiref==3.7.2 # via # -r requirements/quality.txt @@ -13,10 +17,20 @@ astroid==3.1.0 # -r requirements/quality.txt # pylint # pylint-celery -backports-zoneinfo==0.2.1 +backports-zoneinfo[tzdata]==0.2.1 # via # -r requirements/quality.txt + # celery # django + # kombu +billiard==4.2.0 + # via + # -r requirements/quality.txt + # celery +black==24.2.0 + # via + # -r requirements/dev.in + # -r requirements/quality.txt build==1.1.1 # via # -r requirements/pip-tools.txt @@ -25,10 +39,16 @@ cachetools==5.3.3 # via # -r requirements/ci.txt # tox +celery==5.3.6 + # via -r requirements/quality.txt certifi==2024.2.2 # via # -r requirements/quality.txt # requests +cffi==1.16.0 + # via + # -r requirements/quality.txt + # pynacl chardet==5.2.0 # via # -r requirements/ci.txt @@ -42,18 +62,37 @@ click==8.1.7 # via # -r requirements/pip-tools.txt # -r requirements/quality.txt + # black + # celery + # click-didyoumean # click-log + # click-plugins + # click-repl # code-annotations + # edx-django-utils # edx-lint # pip-tools +click-didyoumean==0.3.0 + # via + # -r requirements/quality.txt + # celery click-log==0.4.0 # via # -r requirements/quality.txt # edx-lint +click-plugins==1.1.1 + # via + # -r requirements/quality.txt + # celery +click-repl==0.3.0 + # via + # -r requirements/quality.txt + # celery code-annotations==1.6.0 # via # -r requirements/quality.txt # edx-lint + # edx-toggles colorama==0.4.6 # via # -r requirements/ci.txt @@ -62,6 +101,8 @@ coverage[toml]==7.4.3 # via # -r requirements/quality.txt # pytest-cov +ddt==1.7.2 + # via -r requirements/quality.txt diff-cover==8.0.3 # via -r requirements/dev.in dill==0.3.8 @@ -72,19 +113,50 @@ distlib==0.3.8 # via # -r requirements/ci.txt # virtualenv -django==4.2.10 +django==4.2.11 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/quality.txt # django-crum + # django-mock-queries + # django-waffle + # djangorestframework + # edx-django-utils # edx-i18n-tools + # edx-toggles + # model-bakery # openedx-filters django-crum==0.7.9 + # via + # -r requirements/quality.txt + # edx-django-utils + # edx-toggles +django-mock-queries==2.2.0 + # via -r requirements/quality.txt +django-rest-framework==0.1.0 # via -r requirements/quality.txt +django-waffle==4.1.0 + # via + # -r requirements/quality.txt + # edx-django-utils + # edx-toggles +djangorestframework==3.14.0 + # via + # -r requirements/quality.txt + # django-mock-queries + # django-rest-framework +edx-django-utils==5.10.1 + # via + # -r requirements/quality.txt + # edx-toggles edx-i18n-tools==1.3.0 # via -r requirements/dev.in edx-lint==5.3.6 # via -r requirements/quality.txt +edx-opaque-keys==2.5.1 + # via -r requirements/quality.txt +edx-toggles==5.1.1 + # via -r requirements/quality.txt exceptiongroup==1.2.0 # via # -r requirements/quality.txt @@ -115,6 +187,10 @@ jinja2==3.1.3 # -r requirements/quality.txt # code-annotations # diff-cover +kombu==5.3.5 + # via + # -r requirements/quality.txt + # celery lxml==5.1.0 # via edx-i18n-tools markupsafe==2.1.5 @@ -125,6 +201,18 @@ mccabe==0.7.0 # via # -r requirements/quality.txt # pylint +model-bakery==1.17.0 + # via + # -r requirements/quality.txt + # django-mock-queries +mypy-extensions==1.0.0 + # via + # -r requirements/quality.txt + # black +newrelic==9.7.0 + # via + # -r requirements/quality.txt + # edx-django-utils oauthlib==3.2.2 # via # -r requirements/quality.txt @@ -138,12 +226,17 @@ packaging==23.2 # -r requirements/ci.txt # -r requirements/pip-tools.txt # -r requirements/quality.txt + # black # build # pyproject-api # pytest # tox path==16.10.0 # via edx-i18n-tools +pathspec==0.12.1 + # via + # -r requirements/quality.txt + # black pbr==6.0.0 # via # -r requirements/quality.txt @@ -154,6 +247,7 @@ platformdirs==4.2.0 # via # -r requirements/ci.txt # -r requirements/quality.txt + # black # pylint # tox # virtualenv @@ -166,8 +260,20 @@ pluggy==1.4.0 # tox polib==1.2.0 # via edx-i18n-tools +prompt-toolkit==3.0.43 + # via + # -r requirements/quality.txt + # click-repl +psutil==5.9.8 + # via + # -r requirements/quality.txt + # edx-django-utils pycodestyle==2.11.1 # via -r requirements/quality.txt +pycparser==2.21 + # via + # -r requirements/quality.txt + # cffi pydocstyle==6.3.0 # via -r requirements/quality.txt pygments==2.17.2 @@ -192,6 +298,14 @@ pylint-plugin-utils==0.8.2 # -r requirements/quality.txt # pylint-celery # pylint-django +pymongo==3.13.0 + # via + # -r requirements/quality.txt + # edx-opaque-keys +pynacl==1.5.0 + # via + # -r requirements/quality.txt + # edx-django-utils pyproject-api==1.6.1 # via # -r requirements/ci.txt @@ -210,29 +324,42 @@ pytest-cov==4.1.0 # via -r requirements/quality.txt pytest-django==4.8.0 # via -r requirements/quality.txt +python-dateutil==2.9.0.post0 + # via + # -r requirements/quality.txt + # celery python-slugify==8.0.4 # via # -r requirements/quality.txt # code-annotations +pytz==2024.1 + # via + # -r requirements/quality.txt + # djangorestframework pyyaml==6.0.1 # via # -r requirements/quality.txt # code-annotations # edx-i18n-tools + # responses # superset-api-client requests==2.31.0 # via # -r requirements/quality.txt # requests-oauthlib + # responses # superset-api-client requests-oauthlib==1.3.1 # via # -r requirements/quality.txt # superset-api-client +responses==0.25.0 + # via -r requirements/quality.txt six==1.16.0 # via # -r requirements/quality.txt # edx-lint + # python-dateutil snowballstemmer==2.2.0 # via # -r requirements/quality.txt @@ -245,6 +372,8 @@ stevedore==5.2.0 # via # -r requirements/quality.txt # code-annotations + # edx-django-utils + # edx-opaque-keys superset-api-client==0.6.0 # via -r requirements/quality.txt text-unidecode==1.3 @@ -256,6 +385,7 @@ tomli==2.0.1 # -r requirements/ci.txt # -r requirements/pip-tools.txt # -r requirements/quality.txt + # black # build # coverage # pip-tools @@ -275,15 +405,34 @@ typing-extensions==4.10.0 # -r requirements/quality.txt # asgiref # astroid + # black + # edx-opaque-keys + # kombu # pylint +tzdata==2024.1 + # via + # -r requirements/quality.txt + # backports-zoneinfo + # celery urllib3==2.2.1 # via # -r requirements/quality.txt # requests + # responses +vine==5.1.0 + # via + # -r requirements/quality.txt + # amqp + # celery + # kombu virtualenv==20.25.1 # via # -r requirements/ci.txt # tox +wcwidth==0.2.13 + # via + # -r requirements/quality.txt + # prompt-toolkit web-fragments==2.1.0 # via -r requirements/quality.txt wheel==0.42.0 diff --git a/requirements/doc.txt b/requirements/doc.txt index 33837bf..a1a6928 100644 --- a/requirements/doc.txt +++ b/requirements/doc.txt @@ -8,6 +8,10 @@ accessible-pygments==0.0.4 # via pydata-sphinx-theme alabaster==0.7.13 # via sphinx +amqp==5.2.0 + # via + # -r requirements/test.txt + # kombu asgiref==3.7.2 # via # -r requirements/test.txt @@ -16,20 +20,31 @@ babel==2.14.0 # via # pydata-sphinx-theme # sphinx -backports-zoneinfo==0.2.1 +backports-zoneinfo[tzdata]==0.2.1 # via # -r requirements/test.txt + # celery # django + # kombu beautifulsoup4==4.12.3 # via pydata-sphinx-theme +billiard==4.2.0 + # via + # -r requirements/test.txt + # celery build==1.1.1 # via -r requirements/doc.in +celery==5.3.6 + # via -r requirements/test.txt certifi==2024.2.2 # via # -r requirements/test.txt # requests cffi==1.16.0 - # via cryptography + # via + # -r requirements/test.txt + # cryptography + # pynacl charset-normalizer==3.3.2 # via # -r requirements/test.txt @@ -37,23 +52,67 @@ charset-normalizer==3.3.2 click==8.1.7 # via # -r requirements/test.txt + # celery + # click-didyoumean + # click-plugins + # click-repl # code-annotations + # edx-django-utils +click-didyoumean==0.3.0 + # via + # -r requirements/test.txt + # celery +click-plugins==1.1.1 + # via + # -r requirements/test.txt + # celery +click-repl==0.3.0 + # via + # -r requirements/test.txt + # celery code-annotations==1.6.0 - # via -r requirements/test.txt + # via + # -r requirements/test.txt + # edx-toggles coverage[toml]==7.4.3 # via # -r requirements/test.txt # pytest-cov cryptography==42.0.5 # via secretstorage -django==4.2.10 +ddt==1.7.2 + # via -r requirements/test.txt +django==4.2.11 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/test.txt # django-crum + # django-mock-queries + # django-waffle + # djangorestframework + # edx-django-utils + # edx-toggles + # model-bakery # openedx-filters django-crum==0.7.9 + # via + # -r requirements/test.txt + # edx-django-utils + # edx-toggles +django-mock-queries==2.2.0 # via -r requirements/test.txt +django-rest-framework==0.1.0 + # via -r requirements/test.txt +django-waffle==4.1.0 + # via + # -r requirements/test.txt + # edx-django-utils + # edx-toggles +djangorestframework==3.14.0 + # via + # -r requirements/test.txt + # django-mock-queries + # django-rest-framework doc8==1.1.1 # via -r requirements/doc.in docutils==0.19 @@ -63,6 +122,14 @@ docutils==0.19 # readme-renderer # restructuredtext-lint # sphinx +edx-django-utils==5.10.1 + # via + # -r requirements/test.txt + # edx-toggles +edx-opaque-keys==2.5.1 + # via -r requirements/test.txt +edx-toggles==5.1.1 + # via -r requirements/test.txt exceptiongroup==1.2.0 # via # -r requirements/test.txt @@ -98,6 +165,10 @@ jinja2==3.1.3 # sphinx keyring==24.3.1 # via twine +kombu==5.3.5 + # via + # -r requirements/test.txt + # celery markdown-it-py==3.0.0 # via rich markupsafe==2.1.5 @@ -106,8 +177,16 @@ markupsafe==2.1.5 # jinja2 mdurl==0.1.2 # via markdown-it-py +model-bakery==1.17.0 + # via + # -r requirements/test.txt + # django-mock-queries more-itertools==10.2.0 # via jaraco-classes +newrelic==9.7.0 + # via + # -r requirements/test.txt + # edx-django-utils nh3==0.2.15 # via readme-renderer oauthlib==3.2.2 @@ -135,8 +214,18 @@ pluggy==1.4.0 # via # -r requirements/test.txt # pytest +prompt-toolkit==3.0.43 + # via + # -r requirements/test.txt + # click-repl +psutil==5.9.8 + # via + # -r requirements/test.txt + # edx-django-utils pycparser==2.21 - # via cffi + # via + # -r requirements/test.txt + # cffi pydata-sphinx-theme==0.14.4 # via sphinx-book-theme pygments==2.17.2 @@ -147,6 +236,14 @@ pygments==2.17.2 # readme-renderer # rich # sphinx +pymongo==3.13.0 + # via + # -r requirements/test.txt + # edx-opaque-keys +pynacl==1.5.0 + # via + # -r requirements/test.txt + # edx-django-utils pyproject-hooks==1.0.0 # via build pytest==8.1.0 @@ -158,16 +255,24 @@ pytest-cov==4.1.0 # via -r requirements/test.txt pytest-django==4.8.0 # via -r requirements/test.txt +python-dateutil==2.9.0.post0 + # via + # -r requirements/test.txt + # celery python-slugify==8.0.4 # via # -r requirements/test.txt # code-annotations pytz==2024.1 - # via babel + # via + # -r requirements/test.txt + # babel + # djangorestframework pyyaml==6.0.1 # via # -r requirements/test.txt # code-annotations + # responses # superset-api-client readme-renderer==43.0 # via twine @@ -176,6 +281,7 @@ requests==2.31.0 # -r requirements/test.txt # requests-oauthlib # requests-toolbelt + # responses # sphinx # superset-api-client # twine @@ -185,6 +291,8 @@ requests-oauthlib==1.3.1 # superset-api-client requests-toolbelt==1.0.0 # via twine +responses==0.25.0 + # via -r requirements/test.txt restructuredtext-lint==1.4.0 # via doc8 rfc3986==2.0.0 @@ -193,6 +301,10 @@ rich==13.7.1 # via twine secretstorage==3.3.3 # via keyring +six==1.16.0 + # via + # -r requirements/test.txt + # python-dateutil snowballstemmer==2.2.0 # via sphinx soupsieve==2.5 @@ -225,6 +337,8 @@ stevedore==5.2.0 # -r requirements/test.txt # code-annotations # doc8 + # edx-django-utils + # edx-opaque-keys superset-api-client==0.6.0 # via -r requirements/test.txt text-unidecode==1.3 @@ -245,13 +359,31 @@ typing-extensions==4.10.0 # via # -r requirements/test.txt # asgiref + # edx-opaque-keys + # kombu # pydata-sphinx-theme # rich +tzdata==2024.1 + # via + # -r requirements/test.txt + # backports-zoneinfo + # celery urllib3==2.2.1 # via # -r requirements/test.txt # requests + # responses # twine +vine==5.1.0 + # via + # -r requirements/test.txt + # amqp + # celery + # kombu +wcwidth==0.2.13 + # via + # -r requirements/test.txt + # prompt-toolkit web-fragments==2.1.0 # via -r requirements/test.txt zipp==3.17.0 diff --git a/requirements/quality.in b/requirements/quality.in index 93661d9..ec3e342 100644 --- a/requirements/quality.in +++ b/requirements/quality.in @@ -8,3 +8,4 @@ edx-lint # edX pylint rules and plugins isort # to standardize order of imports pycodestyle # PEP 8 compliance validation pydocstyle # PEP 257 compliance validation +black # code formatting diff --git a/requirements/quality.txt b/requirements/quality.txt index fb3b9f4..478f824 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -4,6 +4,10 @@ # # make upgrade # +amqp==5.2.0 + # via + # -r requirements/test.txt + # kombu asgiref==3.7.2 # via # -r requirements/test.txt @@ -12,14 +16,28 @@ astroid==3.1.0 # via # pylint # pylint-celery -backports-zoneinfo==0.2.1 +backports-zoneinfo[tzdata]==0.2.1 # via # -r requirements/test.txt + # celery # django + # kombu +billiard==4.2.0 + # via + # -r requirements/test.txt + # celery +black==24.2.0 + # via -r requirements/quality.in +celery==5.3.6 + # via -r requirements/test.txt certifi==2024.2.2 # via # -r requirements/test.txt # requests +cffi==1.16.0 + # via + # -r requirements/test.txt + # pynacl charset-normalizer==3.3.2 # via # -r requirements/test.txt @@ -27,31 +45,83 @@ charset-normalizer==3.3.2 click==8.1.7 # via # -r requirements/test.txt + # black + # celery + # click-didyoumean # click-log + # click-plugins + # click-repl # code-annotations + # edx-django-utils # edx-lint +click-didyoumean==0.3.0 + # via + # -r requirements/test.txt + # celery click-log==0.4.0 # via edx-lint +click-plugins==1.1.1 + # via + # -r requirements/test.txt + # celery +click-repl==0.3.0 + # via + # -r requirements/test.txt + # celery code-annotations==1.6.0 # via # -r requirements/test.txt # edx-lint + # edx-toggles coverage[toml]==7.4.3 # via # -r requirements/test.txt # pytest-cov +ddt==1.7.2 + # via -r requirements/test.txt dill==0.3.8 # via pylint -django==4.2.10 +django==4.2.11 # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/test.txt # django-crum + # django-mock-queries + # django-waffle + # djangorestframework + # edx-django-utils + # edx-toggles + # model-bakery # openedx-filters django-crum==0.7.9 + # via + # -r requirements/test.txt + # edx-django-utils + # edx-toggles +django-mock-queries==2.2.0 # via -r requirements/test.txt +django-rest-framework==0.1.0 + # via -r requirements/test.txt +django-waffle==4.1.0 + # via + # -r requirements/test.txt + # edx-django-utils + # edx-toggles +djangorestframework==3.14.0 + # via + # -r requirements/test.txt + # django-mock-queries + # django-rest-framework +edx-django-utils==5.10.1 + # via + # -r requirements/test.txt + # edx-toggles edx-lint==5.3.6 # via -r requirements/quality.in +edx-opaque-keys==2.5.1 + # via -r requirements/test.txt +edx-toggles==5.1.1 + # via -r requirements/test.txt exceptiongroup==1.2.0 # via # -r requirements/test.txt @@ -72,12 +142,26 @@ jinja2==3.1.3 # via # -r requirements/test.txt # code-annotations +kombu==5.3.5 + # via + # -r requirements/test.txt + # celery markupsafe==2.1.5 # via # -r requirements/test.txt # jinja2 mccabe==0.7.0 # via pylint +model-bakery==1.17.0 + # via + # -r requirements/test.txt + # django-mock-queries +mypy-extensions==1.0.0 + # via black +newrelic==9.7.0 + # via + # -r requirements/test.txt + # edx-django-utils oauthlib==3.2.2 # via # -r requirements/test.txt @@ -89,19 +173,36 @@ openedx-filters==1.6.0 packaging==23.2 # via # -r requirements/test.txt + # black # pytest +pathspec==0.12.1 + # via black pbr==6.0.0 # via # -r requirements/test.txt # stevedore platformdirs==4.2.0 - # via pylint + # via + # black + # pylint pluggy==1.4.0 # via # -r requirements/test.txt # pytest +prompt-toolkit==3.0.43 + # via + # -r requirements/test.txt + # click-repl +psutil==5.9.8 + # via + # -r requirements/test.txt + # edx-django-utils pycodestyle==2.11.1 # via -r requirements/quality.in +pycparser==2.21 + # via + # -r requirements/test.txt + # cffi pydocstyle==6.3.0 # via -r requirements/quality.in pylint==3.1.0 @@ -118,6 +219,14 @@ pylint-plugin-utils==0.8.2 # via # pylint-celery # pylint-django +pymongo==3.13.0 + # via + # -r requirements/test.txt + # edx-opaque-keys +pynacl==1.5.0 + # via + # -r requirements/test.txt + # edx-django-utils pytest==8.1.0 # via # -r requirements/test.txt @@ -127,26 +236,41 @@ pytest-cov==4.1.0 # via -r requirements/test.txt pytest-django==4.8.0 # via -r requirements/test.txt +python-dateutil==2.9.0.post0 + # via + # -r requirements/test.txt + # celery python-slugify==8.0.4 # via # -r requirements/test.txt # code-annotations +pytz==2024.1 + # via + # -r requirements/test.txt + # djangorestframework pyyaml==6.0.1 # via # -r requirements/test.txt # code-annotations + # responses # superset-api-client requests==2.31.0 # via # -r requirements/test.txt # requests-oauthlib + # responses # superset-api-client requests-oauthlib==1.3.1 # via # -r requirements/test.txt # superset-api-client +responses==0.25.0 + # via -r requirements/test.txt six==1.16.0 - # via edx-lint + # via + # -r requirements/test.txt + # edx-lint + # python-dateutil snowballstemmer==2.2.0 # via pydocstyle sqlparse==0.4.4 @@ -157,6 +281,8 @@ stevedore==5.2.0 # via # -r requirements/test.txt # code-annotations + # edx-django-utils + # edx-opaque-keys superset-api-client==0.6.0 # via -r requirements/test.txt text-unidecode==1.3 @@ -166,6 +292,7 @@ text-unidecode==1.3 tomli==2.0.1 # via # -r requirements/test.txt + # black # coverage # pylint # pytest @@ -176,10 +303,29 @@ typing-extensions==4.10.0 # -r requirements/test.txt # asgiref # astroid + # black + # edx-opaque-keys + # kombu # pylint +tzdata==2024.1 + # via + # -r requirements/test.txt + # backports-zoneinfo + # celery urllib3==2.2.1 # via # -r requirements/test.txt # requests + # responses +vine==5.1.0 + # via + # -r requirements/test.txt + # amqp + # celery + # kombu +wcwidth==0.2.13 + # via + # -r requirements/test.txt + # prompt-toolkit web-fragments==2.1.0 # via -r requirements/test.txt diff --git a/requirements/test.in b/requirements/test.in index 6797160..1bc4cb8 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -6,3 +6,6 @@ pytest-cov # pytest extension for code coverage statistics pytest-django # pytest extension for better Django support code-annotations # provides commands used by the pii_check make target. +responses # mocks for the requests library +ddt +django-mock-queries diff --git a/requirements/test.txt b/requirements/test.txt index 91a9cc0..7e75b72 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,34 +4,105 @@ # # make upgrade # +amqp==5.2.0 + # via + # -r requirements/base.txt + # kombu asgiref==3.7.2 # via # -r requirements/base.txt # django -backports-zoneinfo==0.2.1 +backports-zoneinfo[tzdata]==0.2.1 # via # -r requirements/base.txt + # celery # django + # kombu +billiard==4.2.0 + # via + # -r requirements/base.txt + # celery +celery==5.3.6 + # via -r requirements/base.txt certifi==2024.2.2 # via # -r requirements/base.txt # requests +cffi==1.16.0 + # via + # -r requirements/base.txt + # pynacl charset-normalizer==3.3.2 # via # -r requirements/base.txt # requests click==8.1.7 - # via code-annotations + # via + # -r requirements/base.txt + # celery + # click-didyoumean + # click-plugins + # click-repl + # code-annotations + # edx-django-utils +click-didyoumean==0.3.0 + # via + # -r requirements/base.txt + # celery +click-plugins==1.1.1 + # via + # -r requirements/base.txt + # celery +click-repl==0.3.0 + # via + # -r requirements/base.txt + # celery code-annotations==1.6.0 - # via -r requirements/test.in + # via + # -r requirements/base.txt + # -r requirements/test.in + # edx-toggles coverage[toml]==7.4.3 # via pytest-cov +ddt==1.7.2 + # via -r requirements/test.in # via # -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt # -r requirements/base.txt # django-crum + # django-mock-queries + # django-waffle + # djangorestframework + # edx-django-utils + # edx-toggles + # model-bakery # openedx-filters django-crum==0.7.9 + # via + # -r requirements/base.txt + # edx-django-utils + # edx-toggles +django-mock-queries==2.2.0 + # via -r requirements/test.in +django-rest-framework==0.1.0 + # via -r requirements/base.txt +django-waffle==4.1.0 + # via + # -r requirements/base.txt + # edx-django-utils + # edx-toggles +djangorestframework==3.14.0 + # via + # -r requirements/base.txt + # django-mock-queries + # django-rest-framework +edx-django-utils==5.10.1 + # via + # -r requirements/base.txt + # edx-toggles +edx-opaque-keys==2.5.1 + # via -r requirements/base.txt +edx-toggles==5.1.1 # via -r requirements/base.txt exceptiongroup==1.2.0 # via pytest @@ -42,9 +113,23 @@ idna==3.6 iniconfig==2.0.0 # via pytest jinja2==3.1.3 - # via code-annotations + # via + # -r requirements/base.txt + # code-annotations +kombu==5.3.5 + # via + # -r requirements/base.txt + # celery markupsafe==2.1.5 - # via jinja2 + # via + # -r requirements/base.txt + # jinja2 +model-bakery==1.17.0 + # via django-mock-queries +newrelic==9.7.0 + # via + # -r requirements/base.txt + # edx-django-utils oauthlib==3.2.2 # via # -r requirements/base.txt @@ -56,9 +141,31 @@ openedx-filters==1.6.0 packaging==23.2 # via pytest pbr==6.0.0 - # via stevedore + # via + # -r requirements/base.txt + # stevedore pluggy==1.4.0 # via pytest +prompt-toolkit==3.0.43 + # via + # -r requirements/base.txt + # click-repl +psutil==5.9.8 + # via + # -r requirements/base.txt + # edx-django-utils +pycparser==2.21 + # via + # -r requirements/base.txt + # cffi +pymongo==3.13.0 + # via + # -r requirements/base.txt + # edx-opaque-keys +pynacl==1.5.0 + # via + # -r requirements/base.txt + # edx-django-utils pytest==8.1.0 # via # pytest-cov @@ -67,32 +174,56 @@ pytest-cov==4.1.0 # via -r requirements/test.in pytest-django==4.8.0 # via -r requirements/test.in +python-dateutil==2.9.0.post0 + # via + # -r requirements/base.txt + # celery python-slugify==8.0.4 - # via code-annotations + # via + # -r requirements/base.txt + # code-annotations +pytz==2024.1 + # via + # -r requirements/base.txt + # djangorestframework pyyaml==6.0.1 # via # -r requirements/base.txt # code-annotations + # responses # superset-api-client requests==2.31.0 # via # -r requirements/base.txt # requests-oauthlib + # responses # superset-api-client requests-oauthlib==1.3.1 # via # -r requirements/base.txt # superset-api-client +responses==0.25.0 + # via -r requirements/test.in +six==1.16.0 + # via + # -r requirements/base.txt + # python-dateutil sqlparse==0.4.4 # via # -r requirements/base.txt # django stevedore==5.2.0 - # via code-annotations + # via + # -r requirements/base.txt + # code-annotations + # edx-django-utils + # edx-opaque-keys superset-api-client==0.6.0 # via -r requirements/base.txt text-unidecode==1.3 - # via python-slugify + # via + # -r requirements/base.txt + # python-slugify tomli==2.0.1 # via # coverage @@ -101,9 +232,27 @@ typing-extensions==4.10.0 # via # -r requirements/base.txt # asgiref + # edx-opaque-keys + # kombu +tzdata==2024.1 + # via + # -r requirements/base.txt + # backports-zoneinfo + # celery urllib3==2.2.1 # via # -r requirements/base.txt # requests + # responses +vine==5.1.0 + # via + # -r requirements/base.txt + # amqp + # celery + # kombu +wcwidth==0.2.13 + # via + # -r requirements/base.txt + # prompt-toolkit web-fragments==2.1.0 # via -r requirements/base.txt diff --git a/setup.cfg b/setup.cfg index d782599..90e26ed 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,10 +1,11 @@ [isort] -include_trailing_comma = True indent = ' ' -line_length = 120 -multi_line_output = 3 skip= migrations +profile = black +multi_line_output = 3 +include_trailing_comma = True +line_length = 88 [wheel] universal = 1 diff --git a/setup.py b/setup.py index f173ec7..76bf498 100755 --- a/setup.py +++ b/setup.py @@ -19,11 +19,10 @@ def get_version(*file_paths): """ filename = os.path.join(os.path.dirname(__file__), *file_paths) version_file = open(filename, encoding="utf8").read() - version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", - version_file, re.M) + version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M) if version_match: return version_match.group(1) - raise RuntimeError('Unable to find version string.') + raise RuntimeError("Unable to find version string.") def load_requirements(*requirements_paths): @@ -46,14 +45,14 @@ def check_name_consistent(package): with extras we don't constrain it without mentioning the extras (since that too would interfere with matching constraints.) """ - canonical = package.lower().replace('_', '-').split('[')[0] + canonical = package.lower().replace("_", "-").split("[")[0] seen_spelling = by_canonical_name.get(canonical) if seen_spelling is None: by_canonical_name[canonical] = package elif seen_spelling != package: raise Exception( f'Encountered both "{seen_spelling}" and "{package}" in requirements ' - 'and constraints files; please use just one or the other.' + "and constraints files; please use just one or the other." ) requirements = {} @@ -67,7 +66,9 @@ def check_name_consistent(package): % (re_package_name_base_chars, re_package_name_base_chars) ) - def add_version_constraint_or_raise(current_line, current_requirements, add_if_not_present): + def add_version_constraint_or_raise( + current_line, current_requirements, add_if_not_present + ): regex_match = requirement_line_regex.match(current_line) if regex_match: package = regex_match.group(1) @@ -76,11 +77,16 @@ def add_version_constraint_or_raise(current_line, current_requirements, add_if_n existing_version_constraints = current_requirements.get(package, None) # It's fine to add constraints to an unconstrained package, # but raise an error if there are already constraints in place. - if existing_version_constraints and existing_version_constraints != version_constraints: - raise BaseException(f'Multiple constraint definitions found for {package}:' - f' "{existing_version_constraints}" and "{version_constraints}".' - f'Combine constraints into one location with {package}' - f'{existing_version_constraints},{version_constraints}.') + if ( + existing_version_constraints + and existing_version_constraints != version_constraints + ): + raise BaseException( + f"Multiple constraint definitions found for {package}:" + f' "{existing_version_constraints}" and "{version_constraints}".' + f"Combine constraints into one location with {package}" + f"{existing_version_constraints},{version_constraints}." + ) if add_if_not_present or package in current_requirements: current_requirements[package] = version_constraints @@ -91,8 +97,12 @@ def add_version_constraint_or_raise(current_line, current_requirements, add_if_n for line in reqs: if is_requirement(line): add_version_constraint_or_raise(line, requirements, True) - if line and line.startswith('-c') and not line.startswith('-c http'): - constraint_files.add(os.path.dirname(path) + '/' + line.split('#')[0].replace('-c', '').strip()) + if line and line.startswith("-c") and not line.startswith("-c http"): + constraint_files.add( + os.path.dirname(path) + + "/" + + line.split("#")[0].replace("-c", "").strip() + ) # process constraint files: add constraints to existing requirements for constraint_file in constraint_files: @@ -102,7 +112,9 @@ def add_version_constraint_or_raise(current_line, current_requirements, add_if_n add_version_constraint_or_raise(line, requirements, False) # process back into list of pkg><=constraints strings - constrained_requirements = [f'{pkg}{version or ""}' for (pkg, version) in sorted(requirements.items())] + constrained_requirements = [ + f'{pkg}{version or ""}' for (pkg, version) in sorted(requirements.items()) + ] return constrained_requirements @@ -114,55 +126,60 @@ def is_requirement(line): bool: True if the line is not blank, a comment, a URL, or an included file """ - return line and line.strip() and not line.startswith(("-r", "#", "-e", "git+", "-c")) + return ( + line and line.strip() and not line.startswith(("-r", "#", "-e", "git+", "-c")) + ) -VERSION = get_version('platform_plugin_aspects', '__init__.py') +VERSION = get_version("platform_plugin_aspects", "__init__.py") -if sys.argv[-1] == 'tag': +if sys.argv[-1] == "tag": print("Tagging the version on github:") os.system("git tag -a %s -m 'version %s'" % (VERSION, VERSION)) os.system("git push --tags") sys.exit() -README = open(os.path.join(os.path.dirname(__file__), 'README.rst'), encoding="utf8").read() -CHANGELOG = open(os.path.join(os.path.dirname(__file__), 'CHANGELOG.rst'), encoding="utf8").read() +README = open( + os.path.join(os.path.dirname(__file__), "README.rst"), encoding="utf8" +).read() +CHANGELOG = open( + os.path.join(os.path.dirname(__file__), "CHANGELOG.rst"), encoding="utf8" +).read() setup( - name='platform-plugin-aspects', + name="platform-plugin-aspects", version=VERSION, description="""Aspects plugins for edx-platform""", - long_description=README + '\n\n' + CHANGELOG, - author='Open edX Project', - author_email='oscm@openedx.org', - url='https://github.com/openedx/platform-plugin-aspects', + long_description=README + "\n\n" + CHANGELOG, + author="Open edX Project", + author_email="oscm@openedx.org", + url="https://github.com/openedx/platform-plugin-aspects", packages=find_packages( - include=['platform_plugin_aspects', 'platform_plugin_aspects.*'], + include=["platform_plugin_aspects", "platform_plugin_aspects.*"], exclude=["*tests"], ), - include_package_data=True, - install_requires=load_requirements('requirements/base.in'), + install_requires=load_requirements("requirements/base.in"), python_requires=">=3.8", license="AGPL 3.0", zip_safe=False, - keywords='Python edx', + keywords="Python edx", classifiers=[ - 'Development Status :: 3 - Alpha', - 'Framework :: Django', - 'Framework :: Django :: 3.2', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)', - 'Natural Language :: English', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.8', + "Development Status :: 3 - Alpha", + "Framework :: Django", + "Framework :: Django :: 3.2", + "Intended Audience :: Developers", + "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", + "Natural Language :: English", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", ], entry_points={ - 'lms.djangoapp': [ - 'platform_plugin_aspects = platform_plugin_aspects.apps:PlatformPluginAspectsConfig', + "lms.djangoapp": [ + "platform_plugin_aspects = platform_plugin_aspects.apps:PlatformPluginAspectsConfig", ], - 'cms.djangoapp': [ - 'platform_plugin_aspects = platform_plugin_aspects.apps:PlatformPluginAspectsConfig', + "cms.djangoapp": [ + "platform_plugin_aspects = platform_plugin_aspects.apps:PlatformPluginAspectsConfig", ], }, ) diff --git a/test_settings.py b/test_settings.py index d2e72bd..9fb85b1 100644 --- a/test_settings.py +++ b/test_settings.py @@ -5,59 +5,59 @@ Django applications, so these settings will not be used. """ -from os.path import abspath, dirname, join +DATABASES = { + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": "default.db", + "USER": "", + "PASSWORD": "", + "HOST": "", + "PORT": "", + }, + "read_replica": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": "read_replica.db", + "USER": "", + "PASSWORD": "", + "HOST": "", + "PORT": "", + }, +} -def root(*args): - """ - Get the absolute path of the given path relative to the project root. - """ - return join(abspath(dirname(__file__)), *args) +INSTALLED_APPS = ("platform_plugin_aspects",) -DEBUG = True - -DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': 'default.db', - 'USER': '', - 'PASSWORD': '', - 'HOST': '', - 'PORT': '', - } +EVENT_SINK_CLICKHOUSE_MODEL_CONFIG = { + "user_profile": { + "module": "common.djangoapps.student.models", + "model": "UserProfile", + }, + "course_overviews": { + "module": "openedx.core.djangoapps.content.course_overviews.models", + "model": "CourseOverview", + }, } -INSTALLED_APPS = ( - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.messages', - 'django.contrib.sessions', - 'platform_plugin_aspects', -) - -LOCALE_PATHS = [ - root('platform_plugin_aspects', 'conf', 'locale'), -] +EVENT_SINK_CLICKHOUSE_COURSE_OVERVIEWS_ENABLED = True -SECRET_KEY = 'insecure-secret-key' +FEATURES = { + "CUSTOM_COURSES_EDX": True, +} -MIDDLEWARE = ( - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.contrib.sessions.middleware.SessionMiddleware', -) +DEBUG = True -TEMPLATES = [{ - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'APP_DIRS': False, - 'OPTIONS': { - 'context_processors': [ - 'django.contrib.auth.context_processors.auth', # this is required for admin - 'django.contrib.messages.context_processors.messages', # this is required for admin - ], - }, -}] +TEMPLATES = [ + { + "BACKEND": "django.template.backends.django.DjangoTemplates", + "APP_DIRS": False, + "OPTIONS": { + "context_processors": [ + "django.contrib.auth.context_processors.auth", # this is required for admin + "django.contrib.messages.context_processors.messages", # this is required for admin + ], + }, + } +] ASPECTS_INSTRUCTOR_DASHBOARD_UUID = "test-dashboard-uuid" diff --git a/test_utils/helpers.py b/test_utils/helpers.py new file mode 100644 index 0000000..8e09fa1 --- /dev/null +++ b/test_utils/helpers.py @@ -0,0 +1,363 @@ +""" +Helper functions for tests +""" + +import csv +import json +import random +import string +from collections import namedtuple +from datetime import datetime, timedelta +from io import StringIO +from unittest.mock import MagicMock, Mock + +from opaque_keys.edx.keys import CourseKey +from opaque_keys.edx.locator import BlockUsageLocator + +ORIG_IMPORT = __import__ +ORG = "testorg" +COURSE = "testcourse" +COURSE_RUN = "2023_Fall" + +FakeCourse = namedtuple("FakeCourse", ["id"]) +FakeCourseOverview = namedtuple( + "FakeCourseOverview", + [ + # Key fields we keep at the top level + "id", + "org", + "display_name", + "start", + "end", + "enrollment_start", + "enrollment_end", + "self_paced", + "created", + "modified", + # Fields we stuff in JSON + "advertised_start", + "announcement", + "lowest_passing_grade", + "invitation_only", + "max_student_enrollments_allowed", + "effort", + "enable_proctored_exams", + "entrance_exam_enabled", + "external_id", + "language", + ], +) + +FakeUser = namedtuple("FakeUser", ["id"]) + + +class FakeXBlock: + """ + Fakes the parameters of an XBlock that we care about. + """ + + def __init__( + self, identifier, block_type="vertical", graded=False, completion_mode="unknown" + ): + self.block_type = block_type + self.scope_ids = Mock() + self.scope_ids.usage_id.course_key = course_key_factory() + self.scope_ids.block_type = self.block_type + self.location = block_usage_locator_factory() + self.display_name_with_default = f"Display name {identifier}" + self.edited_on = datetime.now() + self.children = [] + self.graded = graded + self.completion_mode = completion_mode + + def get_children(self): + """ + Fakes the method of the same name from an XBlock. + """ + return self.children + + +def course_str_factory(course_id=None): + """ + Return a valid course key string. + """ + if not course_id: + return f"course-v1:{ORG}+{COURSE}+{COURSE_RUN}" + return f"course-v1:{ORG}+{course_id}+{COURSE_RUN}" + + +def course_key_factory(): + """ + Return a CourseKey object from our course key string. + """ + return CourseKey.from_string(course_str_factory()) + + +def block_usage_locator_factory(): + """ + Create a BlockUsageLocator with a random id. + """ + block_id = "".join(random.choices(string.ascii_letters, k=10)) + return BlockUsageLocator( + course_key_factory(), block_type="category", block_id=block_id, deprecated=True + ) + + +def fake_course_overview_factory(modified=None): + """ + Create a fake CourseOverview object with just the fields we care about. + + Modified is overridable, but can also be None. + """ + return FakeCourseOverview( + course_key_factory(), # id + ORG, # org + "Test Course", # display_name + datetime.now() - timedelta(days=90), # start + datetime.now() + timedelta(days=90), # end + datetime.now() - timedelta(days=90), # enrollment_start + datetime.now() + timedelta(days=90), # enrollment_end + False, # self_paced + datetime.now() - timedelta(days=180), # created + modified, # modified + datetime.now() - timedelta(days=90), # advertised_start + datetime.now() - timedelta(days=90), # announcement + 71.05, # lowest_passing_grade + False, # invitation_only + 1000, # max_student_enrollments_allowed + "Pretty easy", # effort + False, # enable_proctored_exams + True, # entrance_exam_enabled + "abcd1234", # external_id + "Polish", # language + ) + + +def fake_serialize_fake_course_overview(course_overview): + """ + Return a dict representation of a FakeCourseOverview. + """ + json_fields = { + "advertised_start": str(course_overview.advertised_start), + "announcement": str(course_overview.announcement), + "lowest_passing_grade": float(course_overview.lowest_passing_grade), + "invitation_only": course_overview.invitation_only, + "max_student_enrollments_allowed": course_overview.max_student_enrollments_allowed, + "effort": course_overview.effort, + "enable_proctored_exams": course_overview.enable_proctored_exams, + "entrance_exam_enabled": course_overview.entrance_exam_enabled, + "external_id": course_overview.external_id, + "language": course_overview.language, + } + + return { + "org": course_overview.org, + "course_key": str(course_overview.id), + "display_name": course_overview.display_name, + "course_start": course_overview.start, + "course_end": course_overview.end, + "enrollment_start": course_overview.enrollment_start, + "enrollment_end": course_overview.enrollment_end, + "self_paced": course_overview.self_paced, + "course_data_json": json.dumps(json_fields), + "created": course_overview.created, + "modified": course_overview.modified, + "dump_id": "", + "time_last_dumped": "", + } + + +def mock_course_overview(): + """ + Create a fake CourseOverview object that supports just the things we care about. + """ + mock_overview = MagicMock() + mock_overview.return_value = fake_course_overview_factory(datetime.now()) + return mock_overview + + +def mock_detached_xblock_types(): + """ + Mock the return results of xmodule.modulestore.store_utilities.DETACHED_XBLOCK_TYPES + """ + # Current values as of 2023-05-01 + return {"static_tab", "about", "course_info"} + + +def get_clickhouse_http_params(): + """ + Get the params used in ClickHouse queries. + """ + overview_params = { + "input_format_allow_errors_num": 1, + "input_format_allow_errors_ratio": 0.1, + "query": "INSERT INTO cool_data.course_overviews FORMAT CSV", + } + blocks_params = { + "input_format_allow_errors_num": 1, + "input_format_allow_errors_ratio": 0.1, + "query": "INSERT INTO cool_data.course_blocks FORMAT CSV", + } + + return overview_params, blocks_params + + +def course_factory(): + """ + Return a fake course structure that exercises most of the serialization features. + """ + # Create a base block + top_block = FakeXBlock("top", block_type="course") + course = [ + top_block, + ] + + # Create a few sections + for i in range(3): + block = FakeXBlock(f"Section {i}", block_type="chapter") + course.append(block) + top_block.children.append(block) + + # Create some subsections + if i > 0: + for ii in range(3): + sub_block = FakeXBlock(f"Subsection {ii}", block_type="sequential") + course.append(sub_block) + block.children.append(sub_block) + + for iii in range(3): + # Create some units + unit_block = FakeXBlock(f"Unit {iii}", block_type="vertical") + course.append(unit_block) + sub_block.children.append(unit_block) + + # Create some detached blocks at the top level + for i in range(3): + course.append(FakeXBlock(f"Detached {i}", block_type="course_info")) + + # Create some graded blocks at the top level + for i in range(3): + course.append(FakeXBlock(f"Graded {i}", graded=True)) + + # Create some completable blocks at the top level + course.append(FakeXBlock("Completable", completion_mode="completable")) + course.append(FakeXBlock("Aggregator", completion_mode="aggregator")) + course.append(FakeXBlock("Excluded", completion_mode="excluded")) + + return course + + +def check_overview_csv_matcher(course_overview): + """ + Match the course overview CSV against the test course. + + This is a matcher for the "responses" library. It returns a function + that actually does the matching. + """ + + def match(request): + body = request.body + + f = StringIO(body.decode("utf-8")) + reader = csv.reader(f) + + i = 0 + try: + # The CSV should be in the same order as our course, make sure + # everything matches + for row in reader: + assert row[0] == course_overview.org + assert row[1] == str(course_overview.id) + assert row[2] == course_overview.display_name + assert row[3] == str(course_overview.start) + assert row[4] == str(course_overview.end) + assert row[5] == str(course_overview.enrollment_start) + assert row[6] == str(course_overview.enrollment_end) + assert row[7] == str(course_overview.self_paced) + + # Get our JSON string back out from the CSV, confirm that it's + # real JSON, compare values + dumped_json = json.loads(row[8]) + + assert dumped_json["advertised_start"] == str( + course_overview.advertised_start + ) + assert dumped_json["announcement"] == str(course_overview.announcement) + assert dumped_json["lowest_passing_grade"] == float( + course_overview.lowest_passing_grade + ) + assert dumped_json["invitation_only"] == course_overview.invitation_only + assert ( + dumped_json["max_student_enrollments_allowed"] + == course_overview.max_student_enrollments_allowed + ) + assert dumped_json["effort"] == course_overview.effort + assert ( + dumped_json["enable_proctored_exams"] + == course_overview.enable_proctored_exams + ) + assert ( + dumped_json["entrance_exam_enabled"] + == course_overview.entrance_exam_enabled + ) + assert dumped_json["external_id"] == course_overview.external_id + assert dumped_json["language"] == course_overview.language + + assert row[9] == str(course_overview.created) + assert row[10] == str(course_overview.modified) + + i += 1 + except EOFError as e: + return False, f"Mismatch in row {i}: {e}" + return True, "" + + return match + + +def check_block_csv_matcher(course): + """ + Match the course structure CSV against the test course. + + This is a matcher for the "responses" library. It returns a function + that actually does the matching. + """ + + def match(request): + body = request.body.decode("utf-8") + lines = body.split("\n")[:-1] + + # There should be one CSV line for each block in the test course + if len(lines) != len(course): + return False, f"Body has {len(lines)} lines, course has {len(course)}" + + f = StringIO(body) + reader = csv.reader(f) + + i = 0 + try: + # The CSV should be in the same order as our course, make sure + # everything matches + for row in reader: + block = course[i] + assert row[0] == block.location.org + assert row[1] == str(block.location.course_key) + assert row[2] == str(course[i].location) + assert row[3] == block.display_name_with_default + + block_json_data = { + "course": block.location.course, + "run": block.location.run, + "block_type": str(block.block_type), + } + csv_json = json.loads(row[4]) + + # Check some json data + assert block_json_data["course"] == csv_json["course"] + assert block_json_data["run"] == csv_json["run"] + assert block_json_data["block_type"] == csv_json["block_type"] + i += 1 + except AssertionError as e: + return False, f"Mismatch in row {i}: {e}" + return True, "" + + return match diff --git a/tox.ini b/tox.ini index acf45d4..e57b6ff 100644 --- a/tox.ini +++ b/tox.ini @@ -31,10 +31,12 @@ match-dir = (?!migrations) [pytest] DJANGO_SETTINGS_MODULE = test_settings -addopts = --cov platform_plugin_aspects --cov-report term-missing --cov-report xml +addopts = --cov platform_plugin_aspects --cov-report term-missing --cov-report xml --log-level=INFO norecursedirs = .* docs requirements site-packages [testenv] +setenv: + DJANGO_SETTINGS_MODULE = test_settings deps = django32: Django>=3.2,<4.0 django40: Django>=4.0,<4.1 @@ -68,13 +70,15 @@ allowlist_externals = make rm touch + black deps = -r{toxinidir}/requirements/quality.txt commands = pylint platform_plugin_aspects test_utils manage.py setup.py pycodestyle platform_plugin_aspects manage.py setup.py pydocstyle platform_plugin_aspects manage.py setup.py - isort --check-only --diff test_utils platform_plugin_aspects manage.py setup.py test_settings.py + isort --check-only --diff test_utils platform_plugin_aspects manage.py setup.py + black --check --diff platform_plugin_aspects test_utils manage.py setup.py make selfcheck [testenv:pii_check] @@ -84,4 +88,3 @@ deps = -r{toxinidir}/requirements/test.txt commands = code_annotations django_find_annotations --config_file .pii_annotations.yml --lint --report --coverage -