Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add tag and taxonomy sink #82

Merged
merged 4 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ Change Log
Unreleased
**********

0.11.0 - 2024-09-04
*******************

Added
=====

* A sink for the object tags, tags and taxonomy.

0.10.0 - 2024-06-17
*******************

Expand Down
2 changes: 1 addition & 1 deletion platform_plugin_aspects/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@
import os
from pathlib import Path

__version__ = "0.10.0"
__version__ = "0.11.0"

ROOT_DIRECTORY = Path(os.path.dirname(os.path.abspath(__file__)))
9 changes: 9 additions & 0 deletions platform_plugin_aspects/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,13 @@ def plugin_settings(settings):
"module": "openedx.core.djangoapps.user_api.models",
"model": "UserPreference",
},
"tag": {"module": "openedx_tagging.core.tagging.models", "model": "Tag"},
"taxonomy": {
"module": "openedx_tagging.core.tagging.models",
"model": "Taxonomy",
},
"object_tag": {
"module": "openedx_tagging.core.tagging.models",
"model": "ObjectTag",
},
}
104 changes: 103 additions & 1 deletion platform_plugin_aspects/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
"""

from django.db import transaction
from django.db.models.signals import post_save
from django.db.models.signals import post_delete, post_save
from django.dispatch import Signal, receiver

from platform_plugin_aspects.sinks import (
CourseEnrollmentSink,
ExternalIdSink,
ObjectTagSink,
TagSink,
TaxonomySink,
UserProfileSink,
UserRetirementSink,
)
Expand Down Expand Up @@ -143,3 +146,102 @@ def on_user_retirement( # pylint: disable=unused-argument # pragma: no cover
sink_name=sink.__class__.__name__,
object_id=str(user.id),
)


def on_tag_saved( # pylint: disable=unused-argument # pragma: no cover
sender, instance, **kwargs
):
"""
Receives post save signal and queues the dump job.
"""
# import here, because signal is registered at startup, but items in tasks are not yet able to be loaded
from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel
dump_data_to_clickhouse,
)

sink = TagSink(None, None)
dump_data_to_clickhouse.delay(
sink_module=sink.__module__,
sink_name=sink.__class__.__name__,
object_id=str(instance.id),
)


# Connect the ExternalId.post_save signal handler only if we have a model to attach to.
# (prevents celery errors during tests)
_tag = get_model("tag")
if _tag:
post_save.connect(on_tag_saved, sender=_tag) # pragma: no cover


def on_taxonomy_saved( # pylint: disable=unused-argument # pragma: no cover
sender, instance, **kwargs
):
"""
Receives post save signal and queues the dump job.
"""
# import here, because signal is registered at startup, but items in tasks are not yet able to be loaded
from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel
dump_data_to_clickhouse,
)

sink = TaxonomySink(None, None)
dump_data_to_clickhouse.delay(
sink_module=sink.__module__,
sink_name=sink.__class__.__name__,
object_id=str(instance.id),
)


# Connect the ExternalId.post_save signal handler only if we have a model to attach to.
# (prevents celery errors during tests)
_taxonomy = get_model("taxonomy")
if _taxonomy:
post_save.connect(on_taxonomy_saved, sender=_taxonomy) # pragma: no cover


def on_object_tag_saved(sender, instance, **kwargs): # pragma: no cover
"""
Receives post save signal and queues the dump job.
"""
# import here, because signal is registered at startup, but items in tasks are not yet able to be loaded
from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel
dump_data_to_clickhouse,
)

sink = ObjectTagSink(None, None)
dump_data_to_clickhouse.delay(
sink_module=sink.__module__,
sink_name=sink.__class__.__name__,
object_id=str(instance.id),
)

on_object_tag_deleted(sender, instance, **kwargs)


def on_object_tag_deleted( # pylint: disable=unused-argument # pragma: no cover
sender, instance, **kwargs
):
"""
Receives post save signal and queues the dump job.
"""
# import here, because signal is registered at startup, but items in tasks are not yet able to be loaded
from platform_plugin_aspects.tasks import ( # pylint: disable=import-outside-toplevel
dump_course_to_clickhouse,
)

CourseOverview = get_model("course_overviews")
if CourseOverview:
try:
CourseOverview.objects.get(id=instance.object_id)
dump_course_to_clickhouse.delay(instance.object_id)
except CourseOverview.DoesNotExist:
pass


# Connect the ExternalId.post_save signal handler only if we have a model to attach to.
# (prevents celery errors during tests)
_object_tag = get_model("object_tag")
if _object_tag: # pragma: no cover
post_save.connect(on_object_tag_saved, sender=_object_tag)
post_delete.connect(on_object_tag_deleted, sender=_object_tag)
1 change: 1 addition & 0 deletions platform_plugin_aspects/sinks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
from .course_enrollment_sink import CourseEnrollmentSink
from .course_overview_sink import CourseOverviewSink, XBlockSink
from .external_id_sink import ExternalIdSink
from .tag_sink import ObjectTagSink, TagSink, TaxonomySink
from .user_profile_sink import UserProfileSink
from .user_retire_sink import UserRetirementSink
64 changes: 64 additions & 0 deletions platform_plugin_aspects/sinks/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,3 +206,67 @@
def get_course_key(self, obj):
"""Return the course key as a string."""
return str(obj.course_id)


class TagSerializer(BaseSinkSerializer, serializers.ModelSerializer):
"""Serializer for the Tag model."""

lineage = serializers.SerializerMethodField()

class Meta:
"""Meta class for the TagSerializer."""

model = get_model("tag")
fields = [
"id",
"taxonomy",
"parent",
"value",
"external_id",
"lineage",
"dump_id",
"time_last_dumped",
]

def get_lineage(self, instance):
return json.dumps(instance.get_lineage())

Check failure on line 232 in platform_plugin_aspects/sinks/serializers.py

View workflow job for this annotation

GitHub Actions / tests (ubuntu-24.04, 3.11, django42)

Missing coverage

Missing coverage on line 232


class ObjectTagSerializer(BaseSinkSerializer, serializers.ModelSerializer):
"""Serializer for the ObjectTag model."""

lineage = serializers.SerializerMethodField()

class Meta:
"""Meta class for the ObjectTagSerializer"""

model = get_model("object_tag")
fields = [
"id",
"object_id",
"taxonomy",
"tag",
"_value",
"_export_id",
"lineage",
"dump_id",
"time_last_dumped",
]

def get_lineage(self, instance):
return json.dumps(instance.get_lineage())

Check failure on line 257 in platform_plugin_aspects/sinks/serializers.py

View workflow job for this annotation

GitHub Actions / tests (ubuntu-24.04, 3.11, django42)

Missing coverage

Missing coverage on line 257


class TaxonomySerializer(BaseSinkSerializer, serializers.ModelSerializer):
"""Serializer for the Taxonomy model."""

class Meta:
"""Meta class for the TaxonomySerializer."""

model = get_model("taxonomy")
fields = [
"id",
"name",
"dump_id",
"time_last_dumped",
]
47 changes: 47 additions & 0 deletions platform_plugin_aspects/sinks/tag_sink.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Tag sink"""

from platform_plugin_aspects.sinks.base_sink import ModelBaseSink
from platform_plugin_aspects.sinks.serializers import (
ObjectTagSerializer,
TagSerializer,
TaxonomySerializer,
)


class TagSink(ModelBaseSink): # pylint: disable=abstract-method
"""
Sink for content tags
"""

model = "tag"
unique_key = "id"
clickhouse_table_name = "tag"
timestamp_field = "time_last_dumped"
name = "Tag"
serializer_class = TagSerializer


class TaxonomySink(ModelBaseSink): # pylint: disable=abstract-method
"""
Sink for content taxonomy
"""

model = "taxonomy"
unique_key = "id"
clickhouse_table_name = "taxonomy"
timestamp_field = "time_last_dumped"
name = "Taxonomy"
serializer_class = TaxonomySerializer


class ObjectTagSink(ModelBaseSink): # pylint: disable=abstract-method
"""
Sink for tagged objects
"""

model = "object_tag"
unique_key = "id"
clickhouse_table_name = "object_tag"
timestamp_field = "time_last_dumped"
name = "ObjectTag"
serializer_class = ObjectTagSerializer
3 changes: 3 additions & 0 deletions platform_plugin_aspects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,6 @@ def dump_data_to_clickhouse(
if Sink.is_enabled():
sink = Sink(connection_overrides=connection_overrides, log=celery_log)
sink.dump(object_id)
return "Dumped"

return "Disabled"
23 changes: 13 additions & 10 deletions platform_plugin_aspects/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,27 +244,30 @@ def test_get_tags_for_block(self, mock_get_object_tags):
mock_taxonomy2 = Mock()
mock_taxonomy2.name = "Taxonomy Two"

def mock_tag(taxonomy, value, parent=None):
def mock_tag(taxonomy, value, i, parent=None):
"""
Returns a mock ObjectTag.
"""
mock_tag = Mock()
mock_tag.id = i
mock_tag.taxonomy = taxonomy
mock_tag.value = value
mock_tag.tag = mock_tag
mock_tag.tag.parent = parent
return mock_tag

mock_tag11 = mock_tag(mock_taxonomy1, "tag1.1")
mock_tag12 = mock_tag(mock_taxonomy1, "tag1.2", mock_tag11.tag)
mock_tag13 = mock_tag(mock_taxonomy1, "tag1.3", mock_tag12.tag)
mock_tag21 = mock_tag(mock_taxonomy2, "tag2.1")
mock_tag22 = mock_tag(mock_taxonomy2, "tag2.2")
i = 1
mock_tag11 = mock_tag(mock_taxonomy1, "tag1.1", i)
i += 1
mock_tag12 = mock_tag(mock_taxonomy1, "tag1.2", i, mock_tag11.tag)
i += 1
mock_tag13 = mock_tag(mock_taxonomy1, "tag1.3", i, mock_tag12.tag)
i += 1
mock_tag21 = mock_tag(mock_taxonomy2, "tag2.1", i)
i += 1
mock_tag22 = mock_tag(mock_taxonomy2, "tag2.2", i)
mock_get_object_tags.return_value = [mock_tag13, mock_tag21, mock_tag22]

course_tags = get_tags_for_block(course.location)
assert course_tags == {
"Taxonomy One": ["tag1.3", "tag1.2", "tag1.1"],
"Taxonomy Two": ["tag2.1", "tag2.2"],
}
assert course_tags == [1, 2, 3, 4, 5]
mock_get_object_tags.assert_called_once_with(course.location)
35 changes: 11 additions & 24 deletions platform_plugin_aspects/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,31 +294,18 @@ def _get_object_tags(usage_key): # pragma: no cover
return {}


def get_tags_for_block(usage_key) -> dict:
def get_tags_for_block(usage_key) -> set:
"""
Return all the tags (and their parent tags) applied to the given block.

Returns a dict of [taxonomy]: [tag, tag, tag]
Returns a set of tag id: [1, 2, 3]
"""
tags = _get_object_tags(usage_key)
serialized_tags = {}

for explicit_tag in tags:
_add_tag(explicit_tag, serialized_tags)
implicit_tag = explicit_tag.tag.parent

while implicit_tag:
_add_tag(implicit_tag, serialized_tags)
implicit_tag = implicit_tag.parent

return serialized_tags


def _add_tag(tag, serialized_tags):
"""
Add a tag to our serialized list of tags.
"""
if tag.taxonomy.name not in serialized_tags:
serialized_tags[tag.taxonomy.name] = [tag.value]
else:
serialized_tags[tag.taxonomy.name].append(tag.value)
object_tags = _get_object_tags(usage_key)
serialized_tags = set()
for object_tag in object_tags:
tag = object_tag.tag
while tag:
serialized_tags.add(tag.id)
tag = tag.parent

return list(serialized_tags)
Loading