From 21d6c12c04dbd313aa5f902588a3721e3cd118e7 Mon Sep 17 00:00:00 2001 From: Alexander J Sheehan Date: Wed, 20 Dec 2023 22:15:06 +0000 Subject: [PATCH] feat: adding course review metrics to content metadata and algolia index --- enterprise_catalog/apps/api/tasks.py | 14 ++- .../apps/api/tests/test_tasks.py | 57 +++++++++- .../apps/api_client/constants.py | 1 + .../apps/api_client/discovery.py | 70 ++++++++++++ .../apps/api_client/tests/test_discovery.py | 100 ++++++++++++++++++ .../apps/catalog/algolia_utils.py | 30 ++++++ 6 files changed, 269 insertions(+), 3 deletions(-) diff --git a/enterprise_catalog/apps/api/tasks.py b/enterprise_catalog/apps/api/tasks.py index 87e1d7bc8..46e002c81 100644 --- a/enterprise_catalog/apps/api/tasks.py +++ b/enterprise_catalog/apps/api/tasks.py @@ -355,6 +355,9 @@ def _update_full_content_metadata_course(content_keys): # Build a dictionary of the metadata that corresponds to the fetched keys to avoid a query for every course fetched_course_keys = [course['key'] for course in full_course_dicts] + + reviews_for_courses_dict = DiscoveryApiClient().get_course_reviews(fetched_course_keys) + metadata_records_for_fetched_keys = ContentMetadata.objects.filter( content_key__in=fetched_course_keys, ) @@ -384,15 +387,22 @@ def _update_full_content_metadata_course(content_keys): start_date = json_meta.get('additional_metadata', {}).get('start_date') end_date = json_meta.get('additional_metadata', {}).get('end_date') course_run_uuid = json_meta.get('advertised_course_run_uuid') - for run in json_meta.get('course_runs'): + for run in json_meta.get('course_runs', []): if run.get('uuid') == course_run_uuid: run.update({'start': start_date, 'end': end_date}) # Perform more steps to normalize and move keys around for more consistency across content types. _normalize_course_metadata(metadata_record) + if review := reviews_for_courses_dict.get(content_key): + metadata_record.json_metadata['reviews_count'] = review.get('reviews_count') + metadata_record.json_metadata['avg_course_rating'] = review.get('avg_course_rating') + modified_content_metadata_records.append(metadata_record) - program_content_keys = create_course_associated_programs(course_metadata_dict['programs'], metadata_record) + program_content_keys = create_course_associated_programs( + course_metadata_dict.get('programs', []), + metadata_record, + ) _update_full_content_metadata_program(program_content_keys) ContentMetadata.objects.bulk_update( diff --git a/enterprise_catalog/apps/api/tests/test_tasks.py b/enterprise_catalog/apps/api/tests/test_tasks.py index e19021bc1..73f5fb6c9 100644 --- a/enterprise_catalog/apps/api/tests/test_tasks.py +++ b/enterprise_catalog/apps/api/tests/test_tasks.py @@ -440,12 +440,15 @@ def test_update_full_metadata(self, mock_oauth_client, mock_partition_course_key non_course_key = 'course-runX' + mock_oauth_client.return_value.get.return_value.status_code = 200 + # Mock out the data that should be returned from discovery's /api/v1/courses and /api/v1/programs endpoints mock_oauth_client.return_value.get.return_value.json.side_effect = [ # first call will be /api/v1/courses {'results': [course_data_1, course_data_2, course_data_3]}, # second call will be to /api/v1/programs {'results': [program_data]}, + {'results': []}, ] mock_partition_course_keys.return_value = ([], [],) @@ -588,9 +591,12 @@ def test_update_full_metadata_exec_ed(self, mock_oauth_client, mock_partition_co # } } + mock_oauth_client.return_value.get.return_value.status_code = 200 + # Mock out the data that should be returned from discovery's /api/v1/courses endpoint mock_oauth_client.return_value.get.return_value.json.side_effect = [ - {'results': [course_data]} + {'results': [course_data]}, + {'results': []}, ] mock_partition_course_keys.return_value = ([], [],) @@ -2003,3 +2009,52 @@ def test_index_algolia_dry_run(self, mock_search_client): '[ENTERPRISE_CATALOG_ALGOLIA_REINDEX] [DRY RUN] skipping algolia_client.replace_all_objects().' in record for record in info_logs.output ) + + @mock.patch('enterprise_catalog.apps.api.tasks._fetch_courses_by_keys') + @mock.patch('enterprise_catalog.apps.api.tasks.DiscoveryApiClient.get_course_reviews') + @mock.patch('enterprise_catalog.apps.api.tasks.ContentMetadata.objects.filter') + @mock.patch('enterprise_catalog.apps.api.tasks.create_course_associated_programs') + @mock.patch('enterprise_catalog.apps.api.tasks._update_full_content_metadata_program') + def test_update_full_content_metadata_course( + self, + mock_update_content_metadata_program, + mock_create_course_associated_programs, + mock_filter, + mock_get_course_reviews, + mock_fetch_courses_by_keys + ): + # Mock data + content_keys = ['course1', 'course2'] + full_course_dicts = [ + {'key': 'course1', 'title': 'Course 1'}, + {'key': 'course2', 'title': 'Course 2'} + ] + reviews_for_courses_dict = { + 'course1': {'reviews_count': 10, 'avg_course_rating': 4.5}, + 'course2': {'reviews_count': 5, 'avg_course_rating': 3.8} + } + content_metadata_1 = ContentMetadataFactory(content_type=COURSE, content_key='course1') + content_metadata_2 = ContentMetadataFactory(content_type=COURSE, content_key='course2') + metadata_records_for_fetched_keys = [content_metadata_1, content_metadata_2] + + # Configure mock objects + mock_fetch_courses_by_keys.return_value = full_course_dicts + mock_get_course_reviews.return_value = reviews_for_courses_dict + mock_filter.return_value = metadata_records_for_fetched_keys + + # Call the function + tasks._update_full_content_metadata_course(content_keys) # pylint: disable=protected-access + + mock_fetch_courses_by_keys.assert_called_once_with(content_keys) + mock_get_course_reviews.assert_called_once_with(['course1', 'course2']) + mock_filter.assert_called_once_with(content_key__in=['course1', 'course2']) + + content_metadata_1.refresh_from_db() + content_metadata_2.refresh_from_db() + assert content_metadata_1.json_metadata.get('reviews_count') == 10 + assert content_metadata_1.json_metadata.get('avg_course_rating') == 4.5 + assert content_metadata_2.json_metadata.get('reviews_count') == 5 + assert content_metadata_2.json_metadata.get('avg_course_rating') == 3.8 + + self.assertEqual(mock_update_content_metadata_program.call_count, 2) + self.assertEqual(mock_create_course_associated_programs.call_count, 2) diff --git a/enterprise_catalog/apps/api_client/constants.py b/enterprise_catalog/apps/api_client/constants.py index 0fe7f1e36..198ac176d 100644 --- a/enterprise_catalog/apps/api_client/constants.py +++ b/enterprise_catalog/apps/api_client/constants.py @@ -10,6 +10,7 @@ DISCOVERY_SEARCH_ALL_ENDPOINT = urljoin(settings.DISCOVERY_SERVICE_API_URL, 'search/all/') DISCOVERY_COURSES_ENDPOINT = urljoin(settings.DISCOVERY_SERVICE_API_URL, 'courses/') DISCOVERY_PROGRAMS_ENDPOINT = urljoin(settings.DISCOVERY_SERVICE_API_URL, 'programs/') +DISCOVERY_COURSE_REVIEWS_ENDPOINT = urljoin(settings.DISCOVERY_SERVICE_API_URL, 'course_review/') DISCOVERY_OFFSET_SIZE = 200 DISCOVERY_CATALOG_QUERY_CACHE_KEY_TPL = 'catalog_query:{id}' diff --git a/enterprise_catalog/apps/api_client/discovery.py b/enterprise_catalog/apps/api_client/discovery.py index d287eb5d8..89ef7d5bf 100644 --- a/enterprise_catalog/apps/api_client/discovery.py +++ b/enterprise_catalog/apps/api_client/discovery.py @@ -10,6 +10,7 @@ from .base_oauth import BaseOAuthClient from .constants import ( + DISCOVERY_COURSE_REVIEWS_ENDPOINT, DISCOVERY_COURSES_ENDPOINT, DISCOVERY_OFFSET_SIZE, DISCOVERY_PROGRAMS_ENDPOINT, @@ -80,6 +81,75 @@ def _retrieve_metadata_for_content_filter(self, content_filter, page, request_pa break return response.json() + def _retrieve_course_reviews(self, request_params): + """ + Makes a request to discovery's /api/v1/course_review/ paginated endpoint + """ + page = request_params.get('page', 1) + LOGGER.info(f'Retrieving course reviews from course-discovery for page {page}...') + attempts = 0 + while True: + attempts = attempts + 1 + successful = True + exception = None + try: + response = self.client.get( + DISCOVERY_COURSE_REVIEWS_ENDPOINT, + params=request_params, + timeout=self.HTTP_TIMEOUT, + ) + successful = response.status_code < 400 + elapsed_seconds = response.elapsed.total_seconds() + LOGGER.info( + f'Retrieved course review results from course-discovery for page {page} in ' + f'retrieve_course_reviews_seconds={elapsed_seconds} seconds.' + ) + except requests.exceptions.RequestException as err: + exception = err + LOGGER.exception(f'Error while retrieving course review results from course-discovery for page {page}') + successful = False + if attempts <= self.MAX_RETRIES and not successful: + sleep_seconds = self._calculate_backoff(attempts) + LOGGER.warning( + f'failed request detected from {DISCOVERY_SEARCH_ALL_ENDPOINT}, ' + 'backing-off before retrying, ' + f'sleeping {sleep_seconds} seconds...' + ) + time.sleep(sleep_seconds) + else: + if exception: + raise exception + break + return response.json() + + def get_course_reviews(self, course_keys=None): + """ + Return results from the discovery service's /course_review endpoint as an object of key = course key, value = + course review. If course_keys is specified, only return results for those course keys. + """ + page = 1 + results = [] + request_params = {'page': page} + try: + response = self._retrieve_course_reviews(request_params) + results += response.get('results', []) + # Traverse all pages and concatenate results + while response.get('next'): + page += 1 + response = self._retrieve_course_reviews(request_params) + results += response.get('results', []) + except Exception as exc: + LOGGER.exception(f'Could not retrieve course reviews from course-discovery (page {page}) {exc}') + raise exc + + results = { + result.get('course_key'): result for result in results if ( + course_keys is None or result.get('course_key') in course_keys + ) + } + + return results + def get_metadata_by_query(self, catalog_query): """ Return results from the discovery service's search/all endpoint. diff --git a/enterprise_catalog/apps/api_client/tests/test_discovery.py b/enterprise_catalog/apps/api_client/tests/test_discovery.py index 386f3e9d1..c61bac229 100644 --- a/enterprise_catalog/apps/api_client/tests/test_discovery.py +++ b/enterprise_catalog/apps/api_client/tests/test_discovery.py @@ -5,6 +5,9 @@ from django.test import TestCase from simplejson import JSONDecodeError +from enterprise_catalog.apps.api_client.constants import ( + DISCOVERY_COURSE_REVIEWS_ENDPOINT, +) from enterprise_catalog.apps.catalog.tests.factories import CatalogQueryFactory from ..discovery import DiscoveryApiClient @@ -116,3 +119,100 @@ def test_get_courses_with_error(self, mock_oauth_client): expected_response = [] self.assertEqual(actual_response, expected_response) + + @mock.patch('enterprise_catalog.apps.api_client.discovery.time.sleep') + @mock.patch('enterprise_catalog.apps.api_client.discovery.LOGGER') + @mock.patch('enterprise_catalog.apps.api_client.base_oauth.OAuthAPIClient') + def test_retrieve_course_reviews_with_successful_response( + self, + mock_oauth_client, + mock_logger, + mock_sleep + ): + """ + Test _retrieve_course_reviews method with successful response. + """ + mock_response = mock.Mock() + mock_response.status_code = 200 + mock_response_json = { + "count": 2, + "next": None, + "previous": None, + "results": [ + { + "course_key": "edX+DemoX", + "reviews_count": 150, + "avg_course_rating": "0.830000", + "confident_learners_percentage": "0.800000", + "most_common_goal": "Learn valuable skills", + "most_common_goal_learners_percentage": "0.400000", + "total_enrollments": 300 + }, + { + "course_key": "edX+E2E-101", + "reviews_count": 150, + "avg_course_rating": "0.830000", + "confident_learners_percentage": "0.800000", + "most_common_goal": "Learn valuable skills", + "most_common_goal_learners_percentage": "0.400000", + "total_enrollments": 300 + } + ] + } + mock_response.json.return_value = mock_response_json + mock_oauth_client.return_value.get.return_value = mock_response + + request_params = {'page': 1} + client = DiscoveryApiClient() + response = client.get_course_reviews() + + assert mock_logger.info.called + mock_oauth_client.return_value.get.assert_called_with( + DISCOVERY_COURSE_REVIEWS_ENDPOINT, + params=request_params, + timeout=client.HTTP_TIMEOUT, + ) + self.assertEqual(response, { + "edX+DemoX": { + "course_key": "edX+DemoX", + "reviews_count": 150, + "avg_course_rating": "0.830000", + "confident_learners_percentage": "0.800000", + "most_common_goal": "Learn valuable skills", + "most_common_goal_learners_percentage": "0.400000", + "total_enrollments": 300 + }, + "edX+E2E-101": { + "course_key": "edX+E2E-101", + "reviews_count": 150, + "avg_course_rating": "0.830000", + "confident_learners_percentage": "0.800000", + "most_common_goal": "Learn valuable skills", + "most_common_goal_learners_percentage": "0.400000", + "total_enrollments": 300 + } + }) + assert mock_sleep.not_called() + + @mock.patch('enterprise_catalog.apps.api_client.discovery.time.sleep') + @mock.patch('enterprise_catalog.apps.api_client.discovery.LOGGER') + @mock.patch('enterprise_catalog.apps.api_client.base_oauth.OAuthAPIClient') + def test_retrieve_course_reviews_with_failed_response(self, mock_oauth_client, mock_logger, mock_sleep): + """ + Test _retrieve_course_reviews method with failed response. + """ + mock_oauth_client.return_value.get.side_effect = requests.exceptions.RequestException('error') + + request_params = {'page': 1} + client = DiscoveryApiClient() + with self.assertRaises(requests.exceptions.RequestException): + client.get_course_reviews() + + assert mock_logger.info.called + mock_oauth_client.return_value.get.assert_called_with( + DISCOVERY_COURSE_REVIEWS_ENDPOINT, + params=request_params, + timeout=client.HTTP_TIMEOUT, + ) + assert mock_logger.exception.called + assert mock_sleep.called diff --git a/enterprise_catalog/apps/catalog/algolia_utils.py b/enterprise_catalog/apps/catalog/algolia_utils.py index b63552198..3f511f484 100644 --- a/enterprise_catalog/apps/catalog/algolia_utils.py +++ b/enterprise_catalog/apps/catalog/algolia_utils.py @@ -83,6 +83,8 @@ # transformed metadata to consistent schema across all course # types (e.g., start date, end date, enroll by date). 'normalized_metadata', + 'reviews_count', + 'avg_course_rating', ] # default configuration for the index @@ -1141,6 +1143,32 @@ def get_learning_type_v2(content): return content.get('content_type') +def get_reviews_count(content): + """ + Gets the content's reviews count + + Arguments: + course (dict): a dictionary representing a piece of content + + Returns: + int: the reviews count of the course. + """ + return content.get('reviews_count') + + +def get_avg_course_rating(content): + """ + Gets the content's average course rating + + Arguments: + course (dict): a dictionary representing a piece of content + + Returns: + float: the average course rating of the course. + """ + return content.get('avg_course_rating') + + def _algolia_object_from_product(product, algolia_fields): """ Transforms a course or program into an Algolia object. @@ -1174,6 +1202,8 @@ def _algolia_object_from_product(product, algolia_fields): 'prerequisites': get_course_prerequisites(searchable_product), 'learning_type': get_learning_type(searchable_product), 'learning_type_v2': get_learning_type_v2(searchable_product), + 'reviews_count': get_reviews_count(searchable_product), + 'avg_course_rating': get_avg_course_rating(searchable_product), }) elif searchable_product.get('content_type') == PROGRAM: searchable_product.update({