Skip to content

Commit

Permalink
feat: adding course review metrics to content metadata and algolia index
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-sheehan-edx committed Jan 4, 2024
1 parent 4276916 commit 04457f3
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 3 deletions.
14 changes: 12 additions & 2 deletions enterprise_catalog/apps/api/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,9 @@ def _update_full_content_metadata_course(content_keys):

# Build a dictionary of the metadata that corresponds to the fetched keys to avoid a query for every course
fetched_course_keys = [course['key'] for course in full_course_dicts]

reviews_for_courses_dict = DiscoveryApiClient().get_course_reviews(fetched_course_keys)

metadata_records_for_fetched_keys = ContentMetadata.objects.filter(
content_key__in=fetched_course_keys,
)
Expand Down Expand Up @@ -384,15 +387,22 @@ def _update_full_content_metadata_course(content_keys):
start_date = json_meta.get('additional_metadata', {}).get('start_date')
end_date = json_meta.get('additional_metadata', {}).get('end_date')
course_run_uuid = json_meta.get('advertised_course_run_uuid')
for run in json_meta.get('course_runs'):
for run in json_meta.get('course_runs', []):
if run.get('uuid') == course_run_uuid:
run.update({'start': start_date, 'end': end_date})

# Perform more steps to normalize and move keys around for more consistency across content types.
_normalize_course_metadata(metadata_record)

if review := reviews_for_courses_dict.get(content_key):
metadata_record.json_metadata['reviews_count'] = review.get('reviews_count')
metadata_record.json_metadata['avg_course_rating'] = review.get('avg_course_rating')

modified_content_metadata_records.append(metadata_record)
program_content_keys = create_course_associated_programs(course_metadata_dict['programs'], metadata_record)
program_content_keys = create_course_associated_programs(
course_metadata_dict.get('programs', []),
metadata_record,
)
_update_full_content_metadata_program(program_content_keys)

ContentMetadata.objects.bulk_update(
Expand Down
57 changes: 56 additions & 1 deletion enterprise_catalog/apps/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,12 +440,15 @@ def test_update_full_metadata(self, mock_oauth_client, mock_partition_course_key

non_course_key = 'course-runX'

mock_oauth_client.return_value.get.return_value.status_code = 200

# Mock out the data that should be returned from discovery's /api/v1/courses and /api/v1/programs endpoints
mock_oauth_client.return_value.get.return_value.json.side_effect = [
# first call will be /api/v1/courses
{'results': [course_data_1, course_data_2, course_data_3]},
# second call will be to /api/v1/programs
{'results': [program_data]},
{'results': []},
]
mock_partition_course_keys.return_value = ([], [],)

Expand Down Expand Up @@ -588,9 +591,12 @@ def test_update_full_metadata_exec_ed(self, mock_oauth_client, mock_partition_co
# }
}

mock_oauth_client.return_value.get.return_value.status_code = 200

# Mock out the data that should be returned from discovery's /api/v1/courses endpoint
mock_oauth_client.return_value.get.return_value.json.side_effect = [
{'results': [course_data]}
{'results': [course_data]},
{'results': []},
]
mock_partition_course_keys.return_value = ([], [],)

Expand Down Expand Up @@ -2003,3 +2009,52 @@ def test_index_algolia_dry_run(self, mock_search_client):
'[ENTERPRISE_CATALOG_ALGOLIA_REINDEX] [DRY RUN] skipping algolia_client.replace_all_objects().' in record
for record in info_logs.output
)

@mock.patch('enterprise_catalog.apps.api.tasks._fetch_courses_by_keys')
@mock.patch('enterprise_catalog.apps.api.tasks.DiscoveryApiClient.get_course_reviews')
@mock.patch('enterprise_catalog.apps.api.tasks.ContentMetadata.objects.filter')
@mock.patch('enterprise_catalog.apps.api.tasks.create_course_associated_programs')
@mock.patch('enterprise_catalog.apps.api.tasks._update_full_content_metadata_program')
def test_update_full_content_metadata_course(
self,
mock_update_content_metadata_program,
mock_create_course_associated_programs,
mock_filter,
mock_get_course_reviews,
mock_fetch_courses_by_keys
):
# Mock data
content_keys = ['course1', 'course2']
full_course_dicts = [
{'key': 'course1', 'title': 'Course 1'},
{'key': 'course2', 'title': 'Course 2'}
]
reviews_for_courses_dict = {
'course1': {'reviews_count': 10, 'avg_course_rating': 4.5},
'course2': {'reviews_count': 5, 'avg_course_rating': 3.8}
}
content_metadata_1 = ContentMetadataFactory(content_type=COURSE, content_key='course1')
content_metadata_2 = ContentMetadataFactory(content_type=COURSE, content_key='course2')
metadata_records_for_fetched_keys = [content_metadata_1, content_metadata_2]

# Configure mock objects
mock_fetch_courses_by_keys.return_value = full_course_dicts
mock_get_course_reviews.return_value = reviews_for_courses_dict
mock_filter.return_value = metadata_records_for_fetched_keys

# Call the function
tasks._update_full_content_metadata_course(content_keys) # pylint: disable=protected-access

mock_fetch_courses_by_keys.assert_called_once_with(content_keys)
mock_get_course_reviews.assert_called_once_with(['course1', 'course2'])
mock_filter.assert_called_once_with(content_key__in=['course1', 'course2'])

content_metadata_1.refresh_from_db()
content_metadata_2.refresh_from_db()
assert content_metadata_1.json_metadata.get('reviews_count') == 10
assert content_metadata_1.json_metadata.get('avg_course_rating') == 4.5
assert content_metadata_2.json_metadata.get('reviews_count') == 5
assert content_metadata_2.json_metadata.get('avg_course_rating') == 3.8

self.assertEqual(mock_update_content_metadata_program.call_count, 2)
self.assertEqual(mock_create_course_associated_programs.call_count, 2)
1 change: 1 addition & 0 deletions enterprise_catalog/apps/api_client/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
DISCOVERY_SEARCH_ALL_ENDPOINT = urljoin(settings.DISCOVERY_SERVICE_API_URL, 'search/all/')
DISCOVERY_COURSES_ENDPOINT = urljoin(settings.DISCOVERY_SERVICE_API_URL, 'courses/')
DISCOVERY_PROGRAMS_ENDPOINT = urljoin(settings.DISCOVERY_SERVICE_API_URL, 'programs/')
DISCOVERY_COURSE_REVIEWS_ENDPOINT = urljoin(settings.DISCOVERY_SERVICE_API_URL, 'course_review/')
DISCOVERY_OFFSET_SIZE = 200
DISCOVERY_CATALOG_QUERY_CACHE_KEY_TPL = 'catalog_query:{id}'

Expand Down
70 changes: 70 additions & 0 deletions enterprise_catalog/apps/api_client/discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from .base_oauth import BaseOAuthClient
from .constants import (
DISCOVERY_COURSE_REVIEWS_ENDPOINT,
DISCOVERY_COURSES_ENDPOINT,
DISCOVERY_OFFSET_SIZE,
DISCOVERY_PROGRAMS_ENDPOINT,
Expand Down Expand Up @@ -80,6 +81,75 @@ def _retrieve_metadata_for_content_filter(self, content_filter, page, request_pa
break
return response.json()

def _retrieve_course_reviews(self, request_params):
"""
Makes a request to discovery's /api/v1/course_review/ paginated endpoint
"""
page = request_params.get('page', 1)
LOGGER.info(f'Retrieving course reviews from course-discovery for page {page}...')
attempts = 0
while True:
attempts = attempts + 1
successful = True
exception = None
try:
response = self.client.get(
DISCOVERY_COURSE_REVIEWS_ENDPOINT,
params=request_params,
timeout=self.HTTP_TIMEOUT,
)
successful = response.status_code < 400
elapsed_seconds = response.elapsed.total_seconds()
LOGGER.info(
f'Retrieved course review results from course-discovery for page {page} in '
f'retrieve_course_reviews_seconds={elapsed_seconds} seconds.'
)
except requests.exceptions.RequestException as err:
exception = err
LOGGER.exception(f'Error while retrieving course review results from course-discovery for page {page}')
successful = False
if attempts <= self.MAX_RETRIES and not successful:
sleep_seconds = self._calculate_backoff(attempts)
LOGGER.warning(
f'failed request detected from {DISCOVERY_SEARCH_ALL_ENDPOINT}, '
'backing-off before retrying, '
f'sleeping {sleep_seconds} seconds...'
)
time.sleep(sleep_seconds)
else:
if exception:
raise exception
break

Check warning on line 122 in enterprise_catalog/apps/api_client/discovery.py

View check run for this annotation

Codecov / codecov/patch

enterprise_catalog/apps/api_client/discovery.py#L122

Added line #L122 was not covered by tests
return response.json()

def get_course_reviews(self, course_keys=None):
"""
Return results from the discovery service's /course_review endpoint as an object of key = course key, value =
course review. If course_keys is specified, only return results for those course keys.
"""
page = 1
results = []
request_params = {'page': page}
try:
response = self._retrieve_course_reviews(request_params)
results += response.get('results', [])
# Traverse all pages and concatenate results
while response.get('next'):
page += 1
response = self._retrieve_course_reviews(request_params)
results += response.get('results', [])
except Exception as exc:
LOGGER.exception(f'Could not retrieve course reviews from course-discovery (page {page}) {exc}')
raise exc

Check warning on line 143 in enterprise_catalog/apps/api_client/discovery.py

View check run for this annotation

Codecov / codecov/patch

enterprise_catalog/apps/api_client/discovery.py#L138-L143

Added lines #L138 - L143 were not covered by tests

results = {
result.get('course_key'): result for result in results if (
course_keys is None or result.get('course_key') in course_keys
)
}

return results

def get_metadata_by_query(self, catalog_query):
"""
Return results from the discovery service's search/all endpoint.
Expand Down
81 changes: 81 additions & 0 deletions enterprise_catalog/apps/api_client/tests/test_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from django.test import TestCase
from simplejson import JSONDecodeError

from enterprise_catalog.apps.api_client.constants import (
DISCOVERY_COURSE_REVIEWS_ENDPOINT,
)
from enterprise_catalog.apps.catalog.tests.factories import CatalogQueryFactory

from ..discovery import DiscoveryApiClient
Expand Down Expand Up @@ -116,3 +119,81 @@ def test_get_courses_with_error(self, mock_oauth_client):

expected_response = []
self.assertEqual(actual_response, expected_response)

@mock.patch('enterprise_catalog.apps.api_client.discovery.time.sleep')
@mock.patch('enterprise_catalog.apps.api_client.discovery.LOGGER')
@mock.patch('enterprise_catalog.apps.api_client.base_oauth.OAuthAPIClient')
def test_retrieve_course_reviews_with_successful_response(
self,
mock_oauth_client,
mock_logger,
mock_sleep
):
"""
Test _retrieve_course_reviews method with successful response.
"""
mock_response = mock.Mock()
mock_response.status_code = 200
mock_response_json = {
"count": 2,
"next": None,
"previous": None,
"results": [
{
"course_key": "edX+DemoX",
"reviews_count": 150,
"avg_course_rating": "0.830000",
"confident_learners_percentage": "0.800000",
"most_common_goal": "Learn valuable skills",
"most_common_goal_learners_percentage": "0.400000",
"total_enrollments": 300
},
{
"course_key": "edX+E2E-101",
"reviews_count": 150,
"avg_course_rating": "0.830000",
"confident_learners_percentage": "0.800000",
"most_common_goal": "Learn valuable skills",
"most_common_goal_learners_percentage": "0.400000",
"total_enrollments": 300
}
]
}
mock_response.json.return_value = mock_response_json
mock_oauth_client.return_value.get.return_value = mock_response

request_params = {'page': 1}
client = DiscoveryApiClient()
response = client._retrieve_course_reviews(request_params) # pylint: disable=protected-access

assert mock_logger.info.called
mock_oauth_client.return_value.get.assert_called_with(
DISCOVERY_COURSE_REVIEWS_ENDPOINT,
params=request_params,
timeout=client.HTTP_TIMEOUT,
)
self.assertEqual(response, mock_response_json)
assert mock_sleep.not_called()

@mock.patch('enterprise_catalog.apps.api_client.discovery.time.sleep')
@mock.patch('enterprise_catalog.apps.api_client.discovery.LOGGER')
@mock.patch('enterprise_catalog.apps.api_client.base_oauth.OAuthAPIClient')
def test_retrieve_course_reviews_with_failed_response(self, mock_oauth_client, mock_logger, mock_sleep):
"""
Test _retrieve_course_reviews method with failed response.
"""
mock_oauth_client.return_value.get.side_effect = requests.exceptions.RequestException('error')

request_params = {'page': 1}
client = DiscoveryApiClient()
with self.assertRaises(requests.exceptions.RequestException):
client._retrieve_course_reviews(request_params) # pylint: disable=protected-access

assert mock_logger.info.called
mock_oauth_client.return_value.get.assert_called_with(
DISCOVERY_COURSE_REVIEWS_ENDPOINT,
params=request_params,
timeout=client.HTTP_TIMEOUT,
)
assert mock_logger.exception.called
assert mock_sleep.called
30 changes: 30 additions & 0 deletions enterprise_catalog/apps/catalog/algolia_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@
# transformed metadata to consistent schema across all course
# types (e.g., start date, end date, enroll by date).
'normalized_metadata',
'reviews_count',
'avg_course_rating',
]

# default configuration for the index
Expand Down Expand Up @@ -1141,6 +1143,32 @@ def get_learning_type_v2(content):
return content.get('content_type')


def get_reviews_count(content):
"""
Gets the content's reviews count
Arguments:
course (dict): a dictionary representing a piece of content
Returns:
int: the reviews count of the course.
"""
return content.get('reviews_count')


def get_avg_course_rating(content):
"""
Gets the content's average course rating
Arguments:
course (dict): a dictionary representing a piece of content
Returns:
float: the average course rating of the course.
"""
return content.get('avg_course_rating')


def _algolia_object_from_product(product, algolia_fields):
"""
Transforms a course or program into an Algolia object.
Expand Down Expand Up @@ -1174,6 +1202,8 @@ def _algolia_object_from_product(product, algolia_fields):
'prerequisites': get_course_prerequisites(searchable_product),
'learning_type': get_learning_type(searchable_product),
'learning_type_v2': get_learning_type_v2(searchable_product),
'reviews_count': get_reviews_count(searchable_product),
'avg_course_rating': get_avg_course_rating(searchable_product),
})
elif searchable_product.get('content_type') == PROGRAM:
searchable_product.update({
Expand Down

0 comments on commit 04457f3

Please sign in to comment.