From c847837ade2f6238c38dd7d1666943eee07d280c Mon Sep 17 00:00:00 2001 From: Adam Stankiewicz Date: Tue, 10 Sep 2024 08:47:24 -0400 Subject: [PATCH] feat: introduce normalized_metadata_by_run in persisted ContentMetadata course JSON (#930) --- enterprise_catalog/apps/api/tasks.py | 24 +++--- .../apps/api/tests/test_tasks.py | 74 +++++++++++------ .../apps/api/v1/export_utils.py | 1 - .../apps/catalog/serializers.py | 82 ++++++++----------- .../apps/catalog/tests/test_serializers.py | 80 +++++++++++++----- 5 files changed, 157 insertions(+), 104 deletions(-) diff --git a/enterprise_catalog/apps/api/tasks.py b/enterprise_catalog/apps/api/tasks.py index cfdf33336..b489418af 100644 --- a/enterprise_catalog/apps/api/tasks.py +++ b/enterprise_catalog/apps/api/tasks.py @@ -288,21 +288,21 @@ def _update_full_content_metadata_course(content_keys, dry_run=False): # Merge the full metadata from discovery's /api/v1/courses into the local metadata object. metadata_record.json_metadata.update(course_metadata_dict) - # Exec ed provides the start/end dates in additional_metadata, so we should copy those over to the keys that - # we use (inside the advertised course run). - if metadata_record.is_exec_ed_2u_course: - json_meta = metadata_record.json_metadata - start_date = json_meta.get('additional_metadata', {}).get('start_date') - end_date = json_meta.get('additional_metadata', {}).get('end_date') - course_run_uuid = json_meta.get('advertised_course_run_uuid') - for run in json_meta.get('course_runs', []): - if run.get('uuid') == course_run_uuid: - run.update({'start': start_date, 'end': end_date}) - # Perform more steps to normalize and move keys around # for more consistency across content types. + normalized_metadata_input = { + 'course': metadata_record, + } metadata_record.json_metadata['normalized_metadata'] =\ - NormalizedContentMetadataSerializer(metadata_record).data + NormalizedContentMetadataSerializer(normalized_metadata_input).data + metadata_record.json_metadata['normalized_metadata_by_run'] = {} + for run in metadata_record.json_metadata.get('course_runs', []): + metadata_record.json_metadata['normalized_metadata_by_run'].update({ + run['key']: NormalizedContentMetadataSerializer({ + 'course_run_metadata': run, + 'course': metadata_record, + }).data + }) if review := reviews_for_courses_dict.get(content_key): metadata_record.json_metadata['reviews_count'] = review.get('reviews_count') diff --git a/enterprise_catalog/apps/api/tests/test_tasks.py b/enterprise_catalog/apps/api/tests/test_tasks.py index cc6333f53..78d552528 100644 --- a/enterprise_catalog/apps/api/tests/test_tasks.py +++ b/enterprise_catalog/apps/api/tests/test_tasks.py @@ -422,7 +422,15 @@ def test_update_full_metadata(self, mock_oauth_client, mock_partition_course_key 'course_runs': [{ 'uuid': course_run_2_uuid, 'key': f'course-v1:{course_key_2}+1', + 'start': '2023-03-01T00:00:00Z', + 'end': '2023-03-01T00:00:00Z', 'first_enrollable_paid_seat_price': None, # should cause fallback to DEFAULT_NORMALIZED_PRICE + 'seats': [ + { + 'type': CourseMode.VERIFIED, + 'upgrade_deadline': '2023-02-01T00:00:00Z', + }, + ], }], } @@ -452,7 +460,7 @@ def test_update_full_metadata(self, mock_oauth_client, mock_partition_course_key 'advertised_course_run_uuid': course_run_3_uuid, } course_key_4 = 'edX+superDuperFakeX' - course_data_4 = {'key': course_key_4, 'full_course_only_field': 'test_4', 'programs': []} + course_data_4 = {'key': course_key_4, 'full_course_only_field': 'test_4', 'programs': [], 'course_runs': []} non_course_key = 'course-runX' @@ -505,6 +513,19 @@ def test_update_full_metadata(self, mock_oauth_client, mock_partition_course_key assert metadata_2.json_metadata['aggregation_key'] == f'course:{course_key_2}' assert metadata_2.json_metadata['full_course_only_field'] == 'test_2' assert metadata_2.json_metadata['normalized_metadata']['content_price'] == DEFAULT_NORMALIZED_PRICE + assert metadata_2.json_metadata['normalized_metadata']['start_date'] == '2023-03-01T00:00:00Z' + assert metadata_2.json_metadata['normalized_metadata']['end_date'] == '2023-03-01T00:00:00Z' + assert metadata_2.json_metadata['normalized_metadata']['enroll_by_date'] == '2023-02-01T00:00:00Z' + expected_normalized_metadata_by_run = { + course_run['key']: { + 'start_date': course_run['start'], + 'end_date': course_run['end'], + 'enroll_by_date': course_run['seats'][0]['upgrade_deadline'], + 'content_price': course_run['first_enrollable_paid_seat_price'] or DEFAULT_NORMALIZED_PRICE, + } + for course_run in metadata_2.json_metadata['course_runs'] + } + assert metadata_2.json_metadata['normalized_metadata_by_run'] == expected_normalized_metadata_by_run assert set(program_data.items()).issubset(set(metadata_2.json_metadata['programs'][0].items())) assert metadata_3.json_metadata['aggregation_key'] == f'course:{course_key_3}' @@ -512,6 +533,16 @@ def test_update_full_metadata(self, mock_oauth_client, mock_partition_course_key assert metadata_3.json_metadata['normalized_metadata']['end_date'] == '2023-03-01T00:00:00Z' assert metadata_3.json_metadata['normalized_metadata']['enroll_by_date'] == '2023-02-01T00:00:00Z' assert metadata_3.json_metadata['normalized_metadata']['content_price'] == 90 + expected_normalized_metadata_by_run = { + course_run['key']: { + 'start_date': course_run['start'], + 'end_date': course_run['end'], + 'enroll_by_date': course_run['seats'][0]['upgrade_deadline'], + 'content_price': course_run['first_enrollable_paid_seat_price'] or DEFAULT_NORMALIZED_PRICE, + } + for course_run in metadata_3.json_metadata['course_runs'] + } + assert metadata_3.json_metadata['normalized_metadata_by_run'] == expected_normalized_metadata_by_run assert metadata_4.json_metadata['aggregation_key'] == f'course:{course_key_4}' assert metadata_4.json_metadata['full_course_only_field'] == 'test_4' @@ -635,20 +666,17 @@ def test_update_full_metadata_exec_ed(self, mock_oauth_client, mock_partition_co course_data = { 'aggregation_key': f'course:{course_key}', 'key': course_key, - 'course_type': 'executive-education-2u', + 'course_type': EXEC_ED_2U_COURSE_TYPE, 'course_runs': [{ 'key': course_run_key, 'uuid': course_run_uuid, - # Use dummy 2022 dates that we will assert are overwritten. - 'start': '2022-03-01T00:00:00Z', - 'end': '2022-03-01T00:00:00Z', + # start/end dates are the same across course types + 'start': '2023-03-01T00:00:00Z', + 'end': '2023-04-09T23:59:59Z', + # `enrollment_end` in place of seat.upgrade_deadline for Exec Ed courses + 'enrollment_end': '2023-02-01T00:00:00Z', }], 'programs': [], - 'additional_metadata': { - 'start_date': '2023-03-01T00:00:00Z', - 'end_date': '2023-04-09T23:59:59Z', - 'registration_deadline': '2023-02-01T00:00:00Z', - }, 'entitlements': [ { 'price': 2900, @@ -685,22 +713,15 @@ def test_update_full_metadata_exec_ed(self, mock_oauth_client, mock_partition_co 'course_type': EXEC_ED_2U_COURSE_TYPE, 'course_runs': [{ 'key': course_run_key, - # Use dummy 2022 dates that we will assert are overwritten. - 'start': '2022-03-01T00:00:00Z', - 'end': '2022-03-01T00:00:00Z', + # start/end dates are the same across course types + 'start': '2023-03-01T00:00:00Z', + 'end': '2023-04-09T23:59:59Z', + # `enrollment_end` in place of seat.upgrade_deadline for Exec Ed courses + 'enrollment_end': '2023-02-01T00:00:00Z', }], 'programs': [], - # Intentionally exclude additional_metadata that we will assert is added by the - # update_full_content_metadata_task. - # - # 'additional_metadata': { - # 'start_date': '2023-03-01T00:00:00Z', - # 'end_date': '2023-04-09T23:59:59Z', - # 'registration_deadline': '2023-02-01T00:00:00Z', - # }, - - # Also `advertised_course_run_uuid` is ONLY in the output of /api/v1/courses/, not /api/v1/search/all/ + # `advertised_course_run_uuid` is ONLY in the output of /api/v1/courses/, not /api/v1/search/all/ # 'advertised_course_run_uuid': course_run_uuid, # Intentionally exclude net-new fields that we will assert are added by the @@ -724,11 +745,18 @@ def test_update_full_metadata_exec_ed(self, mock_oauth_client, mock_partition_co # Make sure the field normalization step caused the creation of expected net-new fields. course_cm = ContentMetadata.objects.get(content_key=course_key) assert course_cm.content_type == COURSE + assert course_cm.json_metadata['normalized_metadata']['start_date'] == '2023-03-01T00:00:00Z' assert course_cm.json_metadata['normalized_metadata']['end_date'] == '2023-04-09T23:59:59Z' assert course_cm.json_metadata['normalized_metadata']['enroll_by_date'] == '2023-02-01T00:00:00Z' assert course_cm.json_metadata['normalized_metadata']['content_price'] == 2900 + normalized_metadata_by_run = course_cm.json_metadata['normalized_metadata_by_run'] + assert normalized_metadata_by_run[course_run_key]['start_date'] == '2023-03-01T00:00:00Z' + assert normalized_metadata_by_run[course_run_key]['end_date'] == '2023-04-09T23:59:59Z' + assert normalized_metadata_by_run[course_run_key]['enroll_by_date'] == '2023-02-01T00:00:00Z' + assert normalized_metadata_by_run[course_run_key]['content_price'] == 2900 + # Make sure the start/end dates are copied from the additional_metadata into the course run dict of the course. # This checks that the dummy 2022 dates are overwritten. course_run_json = course_cm.json_metadata.get('course_runs')[0] diff --git a/enterprise_catalog/apps/api/v1/export_utils.py b/enterprise_catalog/apps/api/v1/export_utils.py index 154cae6eb..61332553f 100644 --- a/enterprise_catalog/apps/api/v1/export_utils.py +++ b/enterprise_catalog/apps/api/v1/export_utils.py @@ -169,7 +169,6 @@ def program_hit_to_row(hit): return csv_row -# pylint: disable=too-many-statements def course_hit_to_row(hit): """ Helper function to construct a CSV row according to a single Algolia result course hit. diff --git a/enterprise_catalog/apps/catalog/serializers.py b/enterprise_catalog/apps/catalog/serializers.py index 6cfeb0cf3..dd91887be 100644 --- a/enterprise_catalog/apps/catalog/serializers.py +++ b/enterprise_catalog/apps/catalog/serializers.py @@ -83,76 +83,66 @@ class NormalizedContentMetadataSerializer(ReadOnlySerializer): downstream consumers, who should be able to use this dictionary instead of doing their own independent normalization. + The serializer expects the following keys in the input dictionary: + - course: a ContentMetadata object representing the course + - [course_run_metadata]: Optionally, a dictionary representing a specific course run's metadata + + When no course run metadata is provided, the serializer will attempt to use the course's advertised course run + metadata. If that is not available, the serializer will return None for all fields. + Note that course-type-specific definitions of each of these keys may be more nuanced. """ + start_date = serializers.SerializerMethodField(help_text='When the course starts') end_date = serializers.SerializerMethodField(help_text='When the course ends') enroll_by_date = serializers.SerializerMethodField(help_text='The deadline for enrollment') content_price = serializers.SerializerMethodField(help_text='The price of a course in USD') @cached_property - def advertised_course_run(self): - advertised_course_run_uuid = self.instance.json_metadata.get('advertised_course_run_uuid') - return _get_course_run_by_uuid(self.instance.json_metadata, advertised_course_run_uuid) + def course(self): + return self.instance.get('course') @cached_property - def additional_metadata(self): - return self.instance.json_metadata.get('additional_metadata', {}) + def course_metadata(self): + return self.course.json_metadata - @extend_schema_field(serializers.DateTimeField) - def get_start_date(self, obj) -> str: - if obj.is_exec_ed_2u_course: - return self.additional_metadata.get('start_date') + @cached_property + def course_run_metadata(self): + run_metadata = self.instance.get('course_run_metadata') + if run_metadata: + return run_metadata + advertised_course_run_uuid = self.course_metadata.get('advertised_course_run_uuid') + return _get_course_run_by_uuid(self.course_metadata, advertised_course_run_uuid) - if not self.advertised_course_run: + @extend_schema_field(serializers.DateTimeField) + def get_start_date(self, obj) -> str: # pylint: disable=unused-argument + if not self.course_run_metadata: return None - - if start_date_string := self.advertised_course_run.get('start'): - return start_date_string - - return None + return self.course_run_metadata.get('start') @extend_schema_field(serializers.DateTimeField) - def get_end_date(self, obj) -> str: - if obj.is_exec_ed_2u_course: - return self.additional_metadata.get('end_date') - - if not self.advertised_course_run: + def get_end_date(self, obj) -> str: # pylint: disable=unused-argument + if not self.course_run_metadata: return None - - if end_date_string := self.advertised_course_run.get('end'): - return end_date_string - - return None + return self.course_run_metadata.get('end') @extend_schema_field(serializers.DateTimeField) - def get_enroll_by_date(self, obj) -> str: - if not self.advertised_course_run: + def get_enroll_by_date(self, obj) -> str: # pylint: disable=unused-argument + if not self.course_run_metadata: return None - - if obj.is_exec_ed_2u_course: - return ( - self.advertised_course_run.get('enrollment_end') - or self.additional_metadata.get('registration_deadline') - ) - - upgrade_deadline = None - - all_seats = self.advertised_course_run.get('seats', []) + all_seats = self.course_run_metadata.get('seats', []) seat = _find_best_mode_seat(all_seats) + upgrade_deadline = None if seat: upgrade_deadline = seat.get('upgrade_deadline_override') or seat.get('upgrade_deadline') - - return upgrade_deadline or self.advertised_course_run.get('enrollment_end') + return upgrade_deadline or self.course_run_metadata.get('enrollment_end') @extend_schema_field(serializers.FloatField) - def get_content_price(self, obj) -> float: - if obj.is_exec_ed_2u_course: - for entitlement in obj.json_metadata.get('entitlements', []): + def get_content_price(self, obj) -> float: # pylint: disable=unused-argument + if self.course.is_exec_ed_2u_course: + for entitlement in self.course_metadata.get('entitlements', []): if entitlement.get('mode') == CourseMode.PAID_EXECUTIVE_EDUCATION: return entitlement.get('price') or DEFAULT_NORMALIZED_PRICE - - if not self.advertised_course_run: + if not self.course_run_metadata: return None - - return self.advertised_course_run.get('first_enrollable_paid_seat_price') or DEFAULT_NORMALIZED_PRICE + return self.course_run_metadata.get('first_enrollable_paid_seat_price') or DEFAULT_NORMALIZED_PRICE diff --git a/enterprise_catalog/apps/catalog/tests/test_serializers.py b/enterprise_catalog/apps/catalog/tests/test_serializers.py index 0042b8fe2..e4d3d55ef 100644 --- a/enterprise_catalog/apps/catalog/tests/test_serializers.py +++ b/enterprise_catalog/apps/catalog/tests/test_serializers.py @@ -19,25 +19,30 @@ class NormalizedContentMetadataSerializerTests(TestCase): """ Tests for ``NormalizedContentMetadataSerializer``. """ + def test_enroll_by_date_no_advertised_run(self): course_content = factories.ContentMetadataFactory( content_type=COURSE, ) course_content.json_metadata['advertised_course_run_uuid'] = None - serialized_data = NormalizedContentMetadataSerializer(course_content).data + normalized_metadata_input = { + 'course': course_content, + } + serialized_data = NormalizedContentMetadataSerializer(normalized_metadata_input).data self.assertIsNone(serialized_data['enroll_by_date']) - def test_enroll_by_date_is_exec_ed_course_with_enrollment_end(self): + @ddt.data( + {'has_course_run': False}, + {'has_course_run': True}, + ) + @ddt.unpack + def test_enroll_by_date_is_exec_ed_course_with_enrollment_end(self, has_course_run): course_content = factories.ContentMetadataFactory( content_type=COURSE, ) course_content.json_metadata['course_type'] = EXEC_ED_2U_COURSE_TYPE - course_content.json_metadata['additional_metadata'] = { - 'will': 'be ignored', - 'registration_deadline': '1999-12-31T23:59:59Z', - } course_content.json_metadata['course_runs'] = [ { 'uuid': 'the-course-run-uuid', @@ -46,19 +51,25 @@ def test_enroll_by_date_is_exec_ed_course_with_enrollment_end(self): ] course_content.json_metadata['advertised_course_run_uuid'] = 'the-course-run-uuid' - serialized_data = NormalizedContentMetadataSerializer(course_content).data + normalized_metadata_input = { + 'course': course_content, + } + if has_course_run: + normalized_metadata_input['course_run_metadata'] = course_content.json_metadata['course_runs'][0] + serialized_data = NormalizedContentMetadataSerializer(normalized_metadata_input).data self.assertEqual(serialized_data['enroll_by_date'], '2024-01-01T00:00:00Z') - def test_enroll_by_date_is_exec_ed_course_no_enrollment_end(self): + @ddt.data( + {'has_course_run': False}, + {'has_course_run': True}, + ) + @ddt.unpack + def test_enroll_by_date_is_exec_ed_course_no_enrollment_end(self, has_course_run): course_content = factories.ContentMetadataFactory( content_type=COURSE, ) course_content.json_metadata['course_type'] = EXEC_ED_2U_COURSE_TYPE - course_content.json_metadata['additional_metadata'] = { - 'will': 'not be ignored', - 'registration_deadline': '1999-12-31T23:59:59Z', - } course_content.json_metadata['course_runs'] = [ { 'uuid': 'the-course-run-uuid', @@ -66,13 +77,23 @@ def test_enroll_by_date_is_exec_ed_course_no_enrollment_end(self): } ] course_content.json_metadata['advertised_course_run_uuid'] = 'the-course-run-uuid' - - serialized_data = NormalizedContentMetadataSerializer(course_content).data - - self.assertEqual(serialized_data['enroll_by_date'], '1999-12-31T23:59:59Z') - - @ddt.data(True, False) - def test_enroll_by_date_verified_course_with_seat(self, has_override): + normalized_metadata_input = { + 'course': course_content, + } + if has_course_run: + normalized_metadata_input['course_run_metadata'] = course_content.json_metadata['course_runs'][0] + serialized_data = NormalizedContentMetadataSerializer(normalized_metadata_input).data + + self.assertEqual(serialized_data['enroll_by_date'], None) + + @ddt.data( + {'has_override': True, 'has_course_run': False}, + {'has_override': False, 'has_course_run': False}, + {'has_override': True, 'has_course_run': True}, + {'has_override': False, 'has_course_run': True}, + ) + @ddt.unpack + def test_enroll_by_date_verified_course_with_seat(self, has_override, has_course_run): course_content = factories.ContentMetadataFactory( content_type=COURSE, ) @@ -92,14 +113,24 @@ def test_enroll_by_date_verified_course_with_seat(self, has_override): } ] - serialized_data = NormalizedContentMetadataSerializer(course_content).data + normalized_metadata_input = { + 'course': course_content, + } + if has_course_run: + normalized_metadata_input['course_run_metadata'] = course_content.json_metadata['course_runs'][0] + serialized_data = NormalizedContentMetadataSerializer(normalized_metadata_input).data if has_override: self.assertEqual(serialized_data['enroll_by_date'], actual_deadline_override) else: self.assertEqual(serialized_data['enroll_by_date'], actual_deadline) - def test_enroll_by_date_verified_course_no_seat(self): + @ddt.data( + {'has_course_run': False}, + {'has_course_run': True}, + ) + @ddt.unpack + def test_enroll_by_date_verified_course_no_seat(self, has_course_run): course_content = factories.ContentMetadataFactory( content_type=COURSE, ) @@ -107,6 +138,11 @@ def test_enroll_by_date_verified_course_no_seat(self): course_content.json_metadata['course_runs'][0]['seats'] = [] course_content.json_metadata['course_runs'][0]['enrollment_end'] = actual_deadline - serialized_data = NormalizedContentMetadataSerializer(course_content).data + normalized_metadata_input = { + 'course': course_content, + } + if has_course_run: + normalized_metadata_input['course_run_metadata'] = course_content.json_metadata['course_runs'][0] + serialized_data = NormalizedContentMetadataSerializer(normalized_metadata_input).data self.assertEqual(serialized_data['enroll_by_date'], actual_deadline)