From d355a4f6f06e7a8c6f68878d6e0682b3fbc236fa Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Wed, 28 Aug 2024 15:34:49 -0500 Subject: [PATCH 1/4] feat: add tag and taxonomy dump fix: add lineage information for tag sink chore: quality fixes fix: serialize lineage data as json --- platform_plugin_aspects/settings/common.py | 9 +++ platform_plugin_aspects/signals.py | 81 ++++++++++++++++++++ platform_plugin_aspects/sinks/__init__.py | 1 + platform_plugin_aspects/sinks/serializers.py | 64 ++++++++++++++++ platform_plugin_aspects/sinks/tag_sink.py | 47 ++++++++++++ platform_plugin_aspects/tasks.py | 3 + 6 files changed, 205 insertions(+) create mode 100644 platform_plugin_aspects/sinks/tag_sink.py diff --git a/platform_plugin_aspects/settings/common.py b/platform_plugin_aspects/settings/common.py index 870e14b..b536195 100644 --- a/platform_plugin_aspects/settings/common.py +++ b/platform_plugin_aspects/settings/common.py @@ -94,4 +94,13 @@ def plugin_settings(settings): "module": "openedx.core.djangoapps.user_api.models", "model": "UserPreference", }, + "tag": {"module": "openedx_tagging.core.tagging.models", "model": "Tag"}, + "taxonomy": { + "module": "openedx_tagging.core.tagging.models", + "model": "Taxonomy", + }, + "object_tag": { + "module": "openedx_tagging.core.tagging.models", + "model": "ObjectTag", + }, } diff --git a/platform_plugin_aspects/signals.py b/platform_plugin_aspects/signals.py index 26bad3f..2122426 100644 --- a/platform_plugin_aspects/signals.py +++ b/platform_plugin_aspects/signals.py @@ -9,6 +9,9 @@ from platform_plugin_aspects.sinks import ( CourseEnrollmentSink, ExternalIdSink, + ObjectTagSink, + TagSink, + TaxonomySink, UserProfileSink, UserRetirementSink, ) @@ -143,3 +146,81 @@ def on_user_retirement( # pylint: disable=unused-argument # pragma: no cover sink_name=sink.__class__.__name__, object_id=str(user.id), ) + + +def on_tag_saved( # pylint: disable=unused-argument # pragma: no cover + sender, instance, **kwargs +): + """ + Receives post save signal and queues the dump job. + """ + # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded + from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel + dump_data_to_clickhouse, + ) + + sink = TagSink(None, None) + dump_data_to_clickhouse.delay( + sink_module=sink.__module__, + sink_name=sink.__class__.__name__, + object_id=str(instance.id), + ) + + +# Connect the ExternalId.post_save signal handler only if we have a model to attach to. +# (prevents celery errors during tests) +_tag = get_model("tag") +if _tag: + post_save.connect(on_tag_saved, sender=_tag) # pragma: no cover + + +def on_taxonomy_saved( # pylint: disable=unused-argument # pragma: no cover + sender, instance, **kwargs +): + """ + Receives post save signal and queues the dump job. + """ + # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded + from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel + dump_data_to_clickhouse, + ) + + sink = TaxonomySink(None, None) + dump_data_to_clickhouse.delay( + sink_module=sink.__module__, + sink_name=sink.__class__.__name__, + object_id=str(instance.id), + ) + + +# Connect the ExternalId.post_save signal handler only if we have a model to attach to. +# (prevents celery errors during tests) +_taxonomy = get_model("taxonomy") +if _taxonomy: + post_save.connect(on_taxonomy_saved, sender=_taxonomy) # pragma: no cover + + +def on_object_tag_saved( # pylint: disable=unused-argument # pragma: no cover + sender, instance, **kwargs +): + """ + Receives post save signal and queues the dump job. + """ + # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded + from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel + dump_data_to_clickhouse, + ) + + sink = ObjectTagSink(None, None) + dump_data_to_clickhouse.delay( + sink_module=sink.__module__, + sink_name=sink.__class__.__name__, + object_id=str(instance.id), + ) + + +# Connect the ExternalId.post_save signal handler only if we have a model to attach to. +# (prevents celery errors during tests) +_object_tag = get_model("object_tag") +if _object_tag: + post_save.connect(on_object_tag_saved, sender=_object_tag) # pragma: no cover diff --git a/platform_plugin_aspects/sinks/__init__.py b/platform_plugin_aspects/sinks/__init__.py index a4c62d2..f3c059a 100644 --- a/platform_plugin_aspects/sinks/__init__.py +++ b/platform_plugin_aspects/sinks/__init__.py @@ -6,5 +6,6 @@ from .course_enrollment_sink import CourseEnrollmentSink from .course_overview_sink import CourseOverviewSink, XBlockSink from .external_id_sink import ExternalIdSink +from .tag_sink import ObjectTagSink, TagSink, TaxonomySink from .user_profile_sink import UserProfileSink from .user_retire_sink import UserRetirementSink diff --git a/platform_plugin_aspects/sinks/serializers.py b/platform_plugin_aspects/sinks/serializers.py index 32950e8..ca04686 100644 --- a/platform_plugin_aspects/sinks/serializers.py +++ b/platform_plugin_aspects/sinks/serializers.py @@ -206,3 +206,67 @@ class Meta: def get_course_key(self, obj): """Return the course key as a string.""" return str(obj.course_id) + + +class TagSerializer(BaseSinkSerializer, serializers.ModelSerializer): + """Serializer for the Tag model.""" + + lineage = serializers.SerializerMethodField() + + class Meta: + """Meta class for the TagSerializer.""" + + model = get_model("tag") + fields = [ + "id", + "taxonomy", + "parent", + "value", + "external_id", + "lineage", + "dump_id", + "time_last_dumped", + ] + + def get_lineage(self, instance): + return json.dumps(instance.get_lineage()) + + +class ObjectTagSerializer(BaseSinkSerializer, serializers.ModelSerializer): + """Serializer for the ObjectTag model.""" + + lineage = serializers.SerializerMethodField() + + class Meta: + """Meta class for the ObjectTagSerializer""" + + model = get_model("object_tag") + fields = [ + "id", + "object_id", + "taxonomy", + "tag", + "_value", + "_export_id", + "lineage", + "dump_id", + "time_last_dumped", + ] + + def get_lineage(self, instance): + return json.dumps(instance.get_lineage()) + + +class TaxonomySerializer(BaseSinkSerializer, serializers.ModelSerializer): + """Serializer for the Taxonomy model.""" + + class Meta: + """Meta class for the TaxonomySerializer.""" + + model = get_model("taxonomy") + fields = [ + "id", + "name", + "dump_id", + "time_last_dumped", + ] diff --git a/platform_plugin_aspects/sinks/tag_sink.py b/platform_plugin_aspects/sinks/tag_sink.py new file mode 100644 index 0000000..00511de --- /dev/null +++ b/platform_plugin_aspects/sinks/tag_sink.py @@ -0,0 +1,47 @@ +"""Tag sink""" + +from platform_plugin_aspects.sinks.base_sink import ModelBaseSink +from platform_plugin_aspects.sinks.serializers import ( + ObjectTagSerializer, + TagSerializer, + TaxonomySerializer, +) + + +class TagSink(ModelBaseSink): # pylint: disable=abstract-method + """ + Sink for content tags + """ + + model = "tag" + unique_key = "id" + clickhouse_table_name = "tag" + timestamp_field = "time_last_dumped" + name = "Tag" + serializer_class = TagSerializer + + +class TaxonomySink(ModelBaseSink): # pylint: disable=abstract-method + """ + Sink for content taxonomy + """ + + model = "taxonomy" + unique_key = "id" + clickhouse_table_name = "taxonomy" + timestamp_field = "time_last_dumped" + name = "Taxonomy" + serializer_class = TaxonomySerializer + + +class ObjectTagSink(ModelBaseSink): # pylint: disable=abstract-method + """ + Sink for tagged objects + """ + + model = "object_tag" + unique_key = "id" + clickhouse_table_name = "object_tag" + timestamp_field = "time_last_dumped" + name = "ObjectTag" + serializer_class = ObjectTagSerializer diff --git a/platform_plugin_aspects/tasks.py b/platform_plugin_aspects/tasks.py index 51c7379..b0b880e 100644 --- a/platform_plugin_aspects/tasks.py +++ b/platform_plugin_aspects/tasks.py @@ -59,3 +59,6 @@ def dump_data_to_clickhouse( if Sink.is_enabled(): sink = Sink(connection_overrides=connection_overrides, log=celery_log) sink.dump(object_id) + return "Dumped" + + return "Disabled" From cc90b17d15b5e28303f7a1d849d7812b41460132 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Thu, 29 Aug 2024 15:07:07 -0500 Subject: [PATCH 2/4] fix: updating course tags trigger update on courses fix: deleting course tags trigger update on courses --- platform_plugin_aspects/signals.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/platform_plugin_aspects/signals.py b/platform_plugin_aspects/signals.py index 2122426..c323b3a 100644 --- a/platform_plugin_aspects/signals.py +++ b/platform_plugin_aspects/signals.py @@ -3,7 +3,7 @@ """ from django.db import transaction -from django.db.models.signals import post_save +from django.db.models.signals import post_save, post_delete from django.dispatch import Signal, receiver from platform_plugin_aspects.sinks import ( @@ -209,6 +209,7 @@ def on_object_tag_saved( # pylint: disable=unused-argument # pragma: no cover # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel dump_data_to_clickhouse, + dump_course_to_clickhouse ) sink = ObjectTagSink(None, None) @@ -218,9 +219,31 @@ def on_object_tag_saved( # pylint: disable=unused-argument # pragma: no cover object_id=str(instance.id), ) + on_object_tag_deleted(sender, instance, **kwargs) + + +def on_object_tag_deleted( # pylint: disable=unused-argument # pragma: no cover + sender, instance, **kwargs +): + """ + Receives post save signal and queues the dump job. + """ + # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded + from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel + dump_course_to_clickhouse + ) + + CourseOverview = get_model("course_overviews") + if CourseOverview: + try: + CourseOverview.objects.get(id=instance.object_id) + dump_course_to_clickhouse.delay(instance.object_id) + except CourseOverview.DoesNotExist as exc: + pass # Connect the ExternalId.post_save signal handler only if we have a model to attach to. # (prevents celery errors during tests) _object_tag = get_model("object_tag") if _object_tag: post_save.connect(on_object_tag_saved, sender=_object_tag) # pragma: no cover + post_delete.connect(on_object_tag_deleted, sender=_object_tag) From e3fea7cf01767a51fcc6f57b8b29b1b63b5dcb91 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Thu, 29 Aug 2024 17:02:44 -0500 Subject: [PATCH 3/4] fix: sink only object tag id fix: dump tags ids fix: add use set to store tags chore: quality fixes --- platform_plugin_aspects/signals.py | 16 +++++----- platform_plugin_aspects/tests/test_utils.py | 23 ++++++++------ platform_plugin_aspects/utils.py | 35 +++++++-------------- 3 files changed, 31 insertions(+), 43 deletions(-) diff --git a/platform_plugin_aspects/signals.py b/platform_plugin_aspects/signals.py index c323b3a..fc630f8 100644 --- a/platform_plugin_aspects/signals.py +++ b/platform_plugin_aspects/signals.py @@ -3,7 +3,7 @@ """ from django.db import transaction -from django.db.models.signals import post_save, post_delete +from django.db.models.signals import post_delete, post_save from django.dispatch import Signal, receiver from platform_plugin_aspects.sinks import ( @@ -200,16 +200,13 @@ def on_taxonomy_saved( # pylint: disable=unused-argument # pragma: no cover post_save.connect(on_taxonomy_saved, sender=_taxonomy) # pragma: no cover -def on_object_tag_saved( # pylint: disable=unused-argument # pragma: no cover - sender, instance, **kwargs -): +def on_object_tag_saved(sender, instance, **kwargs): # pragma: no cover """ Receives post save signal and queues the dump job. """ # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel dump_data_to_clickhouse, - dump_course_to_clickhouse ) sink = ObjectTagSink(None, None) @@ -230,7 +227,7 @@ def on_object_tag_deleted( # pylint: disable=unused-argument # pragma: no cove """ # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel - dump_course_to_clickhouse + dump_course_to_clickhouse, ) CourseOverview = get_model("course_overviews") @@ -238,12 +235,13 @@ def on_object_tag_deleted( # pylint: disable=unused-argument # pragma: no cove try: CourseOverview.objects.get(id=instance.object_id) dump_course_to_clickhouse.delay(instance.object_id) - except CourseOverview.DoesNotExist as exc: + except CourseOverview.DoesNotExist: pass + # Connect the ExternalId.post_save signal handler only if we have a model to attach to. # (prevents celery errors during tests) _object_tag = get_model("object_tag") -if _object_tag: - post_save.connect(on_object_tag_saved, sender=_object_tag) # pragma: no cover +if _object_tag: # pragma: no cover + post_save.connect(on_object_tag_saved, sender=_object_tag) post_delete.connect(on_object_tag_deleted, sender=_object_tag) diff --git a/platform_plugin_aspects/tests/test_utils.py b/platform_plugin_aspects/tests/test_utils.py index ee43729..86e9d42 100644 --- a/platform_plugin_aspects/tests/test_utils.py +++ b/platform_plugin_aspects/tests/test_utils.py @@ -244,27 +244,30 @@ def test_get_tags_for_block(self, mock_get_object_tags): mock_taxonomy2 = Mock() mock_taxonomy2.name = "Taxonomy Two" - def mock_tag(taxonomy, value, parent=None): + def mock_tag(taxonomy, value, i, parent=None): """ Returns a mock ObjectTag. """ mock_tag = Mock() + mock_tag.id = i mock_tag.taxonomy = taxonomy mock_tag.value = value mock_tag.tag = mock_tag mock_tag.tag.parent = parent return mock_tag - mock_tag11 = mock_tag(mock_taxonomy1, "tag1.1") - mock_tag12 = mock_tag(mock_taxonomy1, "tag1.2", mock_tag11.tag) - mock_tag13 = mock_tag(mock_taxonomy1, "tag1.3", mock_tag12.tag) - mock_tag21 = mock_tag(mock_taxonomy2, "tag2.1") - mock_tag22 = mock_tag(mock_taxonomy2, "tag2.2") + i = 1 + mock_tag11 = mock_tag(mock_taxonomy1, "tag1.1", i) + i += 1 + mock_tag12 = mock_tag(mock_taxonomy1, "tag1.2", i, mock_tag11.tag) + i += 1 + mock_tag13 = mock_tag(mock_taxonomy1, "tag1.3", i, mock_tag12.tag) + i += 1 + mock_tag21 = mock_tag(mock_taxonomy2, "tag2.1", i) + i += 1 + mock_tag22 = mock_tag(mock_taxonomy2, "tag2.2", i) mock_get_object_tags.return_value = [mock_tag13, mock_tag21, mock_tag22] course_tags = get_tags_for_block(course.location) - assert course_tags == { - "Taxonomy One": ["tag1.3", "tag1.2", "tag1.1"], - "Taxonomy Two": ["tag2.1", "tag2.2"], - } + assert course_tags == [1, 2, 3, 4, 5] mock_get_object_tags.assert_called_once_with(course.location) diff --git a/platform_plugin_aspects/utils.py b/platform_plugin_aspects/utils.py index d120bcf..f9b2147 100644 --- a/platform_plugin_aspects/utils.py +++ b/platform_plugin_aspects/utils.py @@ -294,31 +294,18 @@ def _get_object_tags(usage_key): # pragma: no cover return {} -def get_tags_for_block(usage_key) -> dict: +def get_tags_for_block(usage_key) -> set: """ Return all the tags (and their parent tags) applied to the given block. - Returns a dict of [taxonomy]: [tag, tag, tag] + Returns a set of tag id: [1, 2, 3] """ - tags = _get_object_tags(usage_key) - serialized_tags = {} - - for explicit_tag in tags: - _add_tag(explicit_tag, serialized_tags) - implicit_tag = explicit_tag.tag.parent - - while implicit_tag: - _add_tag(implicit_tag, serialized_tags) - implicit_tag = implicit_tag.parent - - return serialized_tags - - -def _add_tag(tag, serialized_tags): - """ - Add a tag to our serialized list of tags. - """ - if tag.taxonomy.name not in serialized_tags: - serialized_tags[tag.taxonomy.name] = [tag.value] - else: - serialized_tags[tag.taxonomy.name].append(tag.value) + object_tags = _get_object_tags(usage_key) + serialized_tags = set() + for object_tag in object_tags: + tag = object_tag.tag + while tag: + serialized_tags.add(tag.id) + tag = tag.parent + + return list(serialized_tags) From 0dbd5d08d5710d056375c4059dd1df4dabdbbc91 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Wed, 4 Sep 2024 07:47:41 -0500 Subject: [PATCH 4/4] chore: bump version to v0.11.0 --- CHANGELOG.rst | 8 ++++++++ platform_plugin_aspects/__init__.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e7124ab..c603d8a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -14,6 +14,14 @@ Change Log Unreleased ********** +0.11.0 - 2024-09-04 +******************* + +Added +===== + +* A sink for the object tags, tags and taxonomy. + 0.10.0 - 2024-06-17 ******************* diff --git a/platform_plugin_aspects/__init__.py b/platform_plugin_aspects/__init__.py index 8b6f4ff..635cc19 100644 --- a/platform_plugin_aspects/__init__.py +++ b/platform_plugin_aspects/__init__.py @@ -5,6 +5,6 @@ import os from pathlib import Path -__version__ = "0.10.0" +__version__ = "0.11.0" ROOT_DIRECTORY = Path(os.path.dirname(os.path.abspath(__file__)))