Skip to content

Commit

Permalink
feat: introduce normalized_metadata_by_run in persisted ContentMetada…
Browse files Browse the repository at this point in the history
…ta course JSON (#930)
  • Loading branch information
adamstankiewicz authored Sep 10, 2024
1 parent db89535 commit c847837
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 104 deletions.
24 changes: 12 additions & 12 deletions enterprise_catalog/apps/api/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,21 +288,21 @@ def _update_full_content_metadata_course(content_keys, dry_run=False):
# Merge the full metadata from discovery's /api/v1/courses into the local metadata object.
metadata_record.json_metadata.update(course_metadata_dict)

# Exec ed provides the start/end dates in additional_metadata, so we should copy those over to the keys that
# we use (inside the advertised course run).
if metadata_record.is_exec_ed_2u_course:
json_meta = metadata_record.json_metadata
start_date = json_meta.get('additional_metadata', {}).get('start_date')
end_date = json_meta.get('additional_metadata', {}).get('end_date')
course_run_uuid = json_meta.get('advertised_course_run_uuid')
for run in json_meta.get('course_runs', []):
if run.get('uuid') == course_run_uuid:
run.update({'start': start_date, 'end': end_date})

# Perform more steps to normalize and move keys around
# for more consistency across content types.
normalized_metadata_input = {
'course': metadata_record,
}
metadata_record.json_metadata['normalized_metadata'] =\
NormalizedContentMetadataSerializer(metadata_record).data
NormalizedContentMetadataSerializer(normalized_metadata_input).data
metadata_record.json_metadata['normalized_metadata_by_run'] = {}
for run in metadata_record.json_metadata.get('course_runs', []):
metadata_record.json_metadata['normalized_metadata_by_run'].update({
run['key']: NormalizedContentMetadataSerializer({
'course_run_metadata': run,
'course': metadata_record,
}).data
})

if review := reviews_for_courses_dict.get(content_key):
metadata_record.json_metadata['reviews_count'] = review.get('reviews_count')
Expand Down
74 changes: 51 additions & 23 deletions enterprise_catalog/apps/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,15 @@ def test_update_full_metadata(self, mock_oauth_client, mock_partition_course_key
'course_runs': [{
'uuid': course_run_2_uuid,
'key': f'course-v1:{course_key_2}+1',
'start': '2023-03-01T00:00:00Z',
'end': '2023-03-01T00:00:00Z',
'first_enrollable_paid_seat_price': None, # should cause fallback to DEFAULT_NORMALIZED_PRICE
'seats': [
{
'type': CourseMode.VERIFIED,
'upgrade_deadline': '2023-02-01T00:00:00Z',
},
],
}],
}

Expand Down Expand Up @@ -452,7 +460,7 @@ def test_update_full_metadata(self, mock_oauth_client, mock_partition_course_key
'advertised_course_run_uuid': course_run_3_uuid,
}
course_key_4 = 'edX+superDuperFakeX'
course_data_4 = {'key': course_key_4, 'full_course_only_field': 'test_4', 'programs': []}
course_data_4 = {'key': course_key_4, 'full_course_only_field': 'test_4', 'programs': [], 'course_runs': []}

non_course_key = 'course-runX'

Expand Down Expand Up @@ -505,13 +513,36 @@ def test_update_full_metadata(self, mock_oauth_client, mock_partition_course_key
assert metadata_2.json_metadata['aggregation_key'] == f'course:{course_key_2}'
assert metadata_2.json_metadata['full_course_only_field'] == 'test_2'
assert metadata_2.json_metadata['normalized_metadata']['content_price'] == DEFAULT_NORMALIZED_PRICE
assert metadata_2.json_metadata['normalized_metadata']['start_date'] == '2023-03-01T00:00:00Z'
assert metadata_2.json_metadata['normalized_metadata']['end_date'] == '2023-03-01T00:00:00Z'
assert metadata_2.json_metadata['normalized_metadata']['enroll_by_date'] == '2023-02-01T00:00:00Z'
expected_normalized_metadata_by_run = {
course_run['key']: {
'start_date': course_run['start'],
'end_date': course_run['end'],
'enroll_by_date': course_run['seats'][0]['upgrade_deadline'],
'content_price': course_run['first_enrollable_paid_seat_price'] or DEFAULT_NORMALIZED_PRICE,
}
for course_run in metadata_2.json_metadata['course_runs']
}
assert metadata_2.json_metadata['normalized_metadata_by_run'] == expected_normalized_metadata_by_run
assert set(program_data.items()).issubset(set(metadata_2.json_metadata['programs'][0].items()))

assert metadata_3.json_metadata['aggregation_key'] == f'course:{course_key_3}'
assert metadata_3.json_metadata['normalized_metadata']['start_date'] == '2023-03-01T00:00:00Z'
assert metadata_3.json_metadata['normalized_metadata']['end_date'] == '2023-03-01T00:00:00Z'
assert metadata_3.json_metadata['normalized_metadata']['enroll_by_date'] == '2023-02-01T00:00:00Z'
assert metadata_3.json_metadata['normalized_metadata']['content_price'] == 90
expected_normalized_metadata_by_run = {
course_run['key']: {
'start_date': course_run['start'],
'end_date': course_run['end'],
'enroll_by_date': course_run['seats'][0]['upgrade_deadline'],
'content_price': course_run['first_enrollable_paid_seat_price'] or DEFAULT_NORMALIZED_PRICE,
}
for course_run in metadata_3.json_metadata['course_runs']
}
assert metadata_3.json_metadata['normalized_metadata_by_run'] == expected_normalized_metadata_by_run

assert metadata_4.json_metadata['aggregation_key'] == f'course:{course_key_4}'
assert metadata_4.json_metadata['full_course_only_field'] == 'test_4'
Expand Down Expand Up @@ -635,20 +666,17 @@ def test_update_full_metadata_exec_ed(self, mock_oauth_client, mock_partition_co
course_data = {
'aggregation_key': f'course:{course_key}',
'key': course_key,
'course_type': 'executive-education-2u',
'course_type': EXEC_ED_2U_COURSE_TYPE,
'course_runs': [{
'key': course_run_key,
'uuid': course_run_uuid,
# Use dummy 2022 dates that we will assert are overwritten.
'start': '2022-03-01T00:00:00Z',
'end': '2022-03-01T00:00:00Z',
# start/end dates are the same across course types
'start': '2023-03-01T00:00:00Z',
'end': '2023-04-09T23:59:59Z',
# `enrollment_end` in place of seat.upgrade_deadline for Exec Ed courses
'enrollment_end': '2023-02-01T00:00:00Z',
}],
'programs': [],
'additional_metadata': {
'start_date': '2023-03-01T00:00:00Z',
'end_date': '2023-04-09T23:59:59Z',
'registration_deadline': '2023-02-01T00:00:00Z',
},
'entitlements': [
{
'price': 2900,
Expand Down Expand Up @@ -685,22 +713,15 @@ def test_update_full_metadata_exec_ed(self, mock_oauth_client, mock_partition_co
'course_type': EXEC_ED_2U_COURSE_TYPE,
'course_runs': [{
'key': course_run_key,
# Use dummy 2022 dates that we will assert are overwritten.
'start': '2022-03-01T00:00:00Z',
'end': '2022-03-01T00:00:00Z',
# start/end dates are the same across course types
'start': '2023-03-01T00:00:00Z',
'end': '2023-04-09T23:59:59Z',
# `enrollment_end` in place of seat.upgrade_deadline for Exec Ed courses
'enrollment_end': '2023-02-01T00:00:00Z',
}],
'programs': [],

# Intentionally exclude additional_metadata that we will assert is added by the
# update_full_content_metadata_task.
#
# 'additional_metadata': {
# 'start_date': '2023-03-01T00:00:00Z',
# 'end_date': '2023-04-09T23:59:59Z',
# 'registration_deadline': '2023-02-01T00:00:00Z',
# },

# Also `advertised_course_run_uuid` is ONLY in the output of /api/v1/courses/, not /api/v1/search/all/
# `advertised_course_run_uuid` is ONLY in the output of /api/v1/courses/, not /api/v1/search/all/
# 'advertised_course_run_uuid': course_run_uuid,

# Intentionally exclude net-new fields that we will assert are added by the
Expand All @@ -724,11 +745,18 @@ def test_update_full_metadata_exec_ed(self, mock_oauth_client, mock_partition_co
# Make sure the field normalization step caused the creation of expected net-new fields.
course_cm = ContentMetadata.objects.get(content_key=course_key)
assert course_cm.content_type == COURSE

assert course_cm.json_metadata['normalized_metadata']['start_date'] == '2023-03-01T00:00:00Z'
assert course_cm.json_metadata['normalized_metadata']['end_date'] == '2023-04-09T23:59:59Z'
assert course_cm.json_metadata['normalized_metadata']['enroll_by_date'] == '2023-02-01T00:00:00Z'
assert course_cm.json_metadata['normalized_metadata']['content_price'] == 2900

normalized_metadata_by_run = course_cm.json_metadata['normalized_metadata_by_run']
assert normalized_metadata_by_run[course_run_key]['start_date'] == '2023-03-01T00:00:00Z'
assert normalized_metadata_by_run[course_run_key]['end_date'] == '2023-04-09T23:59:59Z'
assert normalized_metadata_by_run[course_run_key]['enroll_by_date'] == '2023-02-01T00:00:00Z'
assert normalized_metadata_by_run[course_run_key]['content_price'] == 2900

# Make sure the start/end dates are copied from the additional_metadata into the course run dict of the course.
# This checks that the dummy 2022 dates are overwritten.
course_run_json = course_cm.json_metadata.get('course_runs')[0]
Expand Down
1 change: 0 additions & 1 deletion enterprise_catalog/apps/api/v1/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ def program_hit_to_row(hit):
return csv_row


# pylint: disable=too-many-statements
def course_hit_to_row(hit):
"""
Helper function to construct a CSV row according to a single Algolia result course hit.
Expand Down
82 changes: 36 additions & 46 deletions enterprise_catalog/apps/catalog/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,76 +83,66 @@ class NormalizedContentMetadataSerializer(ReadOnlySerializer):
downstream consumers, who should be able to use this dictionary
instead of doing their own independent normalization.
The serializer expects the following keys in the input dictionary:
- course: a ContentMetadata object representing the course
- [course_run_metadata]: Optionally, a dictionary representing a specific course run's metadata
When no course run metadata is provided, the serializer will attempt to use the course's advertised course run
metadata. If that is not available, the serializer will return None for all fields.
Note that course-type-specific definitions of each of these keys may be more nuanced.
"""

start_date = serializers.SerializerMethodField(help_text='When the course starts')
end_date = serializers.SerializerMethodField(help_text='When the course ends')
enroll_by_date = serializers.SerializerMethodField(help_text='The deadline for enrollment')
content_price = serializers.SerializerMethodField(help_text='The price of a course in USD')

@cached_property
def advertised_course_run(self):
advertised_course_run_uuid = self.instance.json_metadata.get('advertised_course_run_uuid')
return _get_course_run_by_uuid(self.instance.json_metadata, advertised_course_run_uuid)
def course(self):
return self.instance.get('course')

@cached_property
def additional_metadata(self):
return self.instance.json_metadata.get('additional_metadata', {})
def course_metadata(self):
return self.course.json_metadata

@extend_schema_field(serializers.DateTimeField)
def get_start_date(self, obj) -> str:
if obj.is_exec_ed_2u_course:
return self.additional_metadata.get('start_date')
@cached_property
def course_run_metadata(self):
run_metadata = self.instance.get('course_run_metadata')
if run_metadata:
return run_metadata
advertised_course_run_uuid = self.course_metadata.get('advertised_course_run_uuid')
return _get_course_run_by_uuid(self.course_metadata, advertised_course_run_uuid)

if not self.advertised_course_run:
@extend_schema_field(serializers.DateTimeField)
def get_start_date(self, obj) -> str: # pylint: disable=unused-argument
if not self.course_run_metadata:
return None

if start_date_string := self.advertised_course_run.get('start'):
return start_date_string

return None
return self.course_run_metadata.get('start')

@extend_schema_field(serializers.DateTimeField)
def get_end_date(self, obj) -> str:
if obj.is_exec_ed_2u_course:
return self.additional_metadata.get('end_date')

if not self.advertised_course_run:
def get_end_date(self, obj) -> str: # pylint: disable=unused-argument
if not self.course_run_metadata:
return None

if end_date_string := self.advertised_course_run.get('end'):
return end_date_string

return None
return self.course_run_metadata.get('end')

@extend_schema_field(serializers.DateTimeField)
def get_enroll_by_date(self, obj) -> str:
if not self.advertised_course_run:
def get_enroll_by_date(self, obj) -> str: # pylint: disable=unused-argument
if not self.course_run_metadata:
return None

if obj.is_exec_ed_2u_course:
return (
self.advertised_course_run.get('enrollment_end')
or self.additional_metadata.get('registration_deadline')
)

upgrade_deadline = None

all_seats = self.advertised_course_run.get('seats', [])
all_seats = self.course_run_metadata.get('seats', [])
seat = _find_best_mode_seat(all_seats)
upgrade_deadline = None
if seat:
upgrade_deadline = seat.get('upgrade_deadline_override') or seat.get('upgrade_deadline')

return upgrade_deadline or self.advertised_course_run.get('enrollment_end')
return upgrade_deadline or self.course_run_metadata.get('enrollment_end')

@extend_schema_field(serializers.FloatField)
def get_content_price(self, obj) -> float:
if obj.is_exec_ed_2u_course:
for entitlement in obj.json_metadata.get('entitlements', []):
def get_content_price(self, obj) -> float: # pylint: disable=unused-argument
if self.course.is_exec_ed_2u_course:
for entitlement in self.course_metadata.get('entitlements', []):
if entitlement.get('mode') == CourseMode.PAID_EXECUTIVE_EDUCATION:
return entitlement.get('price') or DEFAULT_NORMALIZED_PRICE

if not self.advertised_course_run:
if not self.course_run_metadata:
return None

return self.advertised_course_run.get('first_enrollable_paid_seat_price') or DEFAULT_NORMALIZED_PRICE
return self.course_run_metadata.get('first_enrollable_paid_seat_price') or DEFAULT_NORMALIZED_PRICE
Loading

0 comments on commit c847837

Please sign in to comment.