Skip to content

Commit

Permalink
feat: add performance metrics commands (#657)
Browse files Browse the repository at this point in the history
* fix: only filter datasets on dashboards

* fix: restore query context for charts

* fix: use default locale on failed oauth remote connection

* feat: add performance metrics commands

* build: add performance metrics step in CI

* chore: quality fixes

* chore: use logger instead of print

* chore: restore instructor dashboard chart ids

* fix: inject datasource_id

* chore: only filter datasets on dashboard

* chore: address PR suggestions
  • Loading branch information
Cristhian Garcia authored Mar 18, 2024
1 parent 0b4caff commit 59139d6
Show file tree
Hide file tree
Showing 32 changed files with 507 additions and 44 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ jobs:
tutor local do dump-data-to-clickhouse --options "--object course_overviews"
make extract_translations
tutor local do import-assets
- name: Performance metrics
run: tutor local do performance-metrics
- name: Tutor stop
run: tutor local stop

Expand Down Expand Up @@ -136,6 +138,8 @@ jobs:
tutor dev do dump-data-to-clickhouse --options "--object course_overviews"
make extract_translations
tutor dev do import-assets
- name: Performance metrics
run: tutor dev do performance-metrics
- name: Tutor stop
run: tutor dev stop

Expand Down Expand Up @@ -218,6 +222,8 @@ jobs:
tutor k8s do dump-data-to-clickhouse --options "--object course_overviews"
make extract_translations
tutor k8s do import-assets
- name: Performance metrics
run: tutor k8s do performance-metrics
- name: Check failure logs
if: failure()
run: |
Expand Down
3 changes: 1 addition & 2 deletions tutoraspects/asset_command_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,12 +157,11 @@ class ChartAsset(Asset):

path = "charts"
omitted_vars = [
"query_context",
"params.dashboards",
"params.datasource",
"params.slice_id",
]
raw_vars = ["sqlExpression"]
raw_vars = ["sqlExpression", "query_context"]


class DashboardAsset(Asset):
Expand Down
17 changes: 17 additions & 0 deletions tutoraspects/commands_v0.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,23 @@ def alembic(context, command) -> None:
runner.run_job("aspects", command)


# Ex: "tutor local do performance-metrics "
@click.command(context_settings={"ignore_unknown_options": True})
@click.pass_obj
def performance_metrics(context) -> None:
"""
Job to measure performance metrics of charts and its queries in Superset and ClickHouse.
"""
config = tutor_config.load(context.root)
runner = context.job_runner(config)

command = """echo 'Performance...' &&
python /app/pythonpath/performance_metrics.py &&
echo 'Done!';
"""
runner.run_job("superset", command)


# Ex: "tutor local do import_assets "
@click.command(context_settings={"ignore_unknown_options": True})
@click.pass_obj
Expand Down
17 changes: 17 additions & 0 deletions tutoraspects/commands_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,22 @@ def import_assets() -> list[tuple[str, str]]:
]


# Ex: "tutor local do performance-metrics "
@click.command(context_settings={"ignore_unknown_options": True})
def performance_metrics() -> list[tuple[str, str]]:
"""
Job to measure performance metrics of charts and its queries in Superset and ClickHouse.
"""
return [
(
"superset",
"echo 'Performance...' && "
"python /app/pythonpath/performance_metrics.py &&"
"echo 'Done!';",
),
]


# Ex: "tutor local do dump_data_to_clickhouse "
@click.command(context_settings={"ignore_unknown_options": True})
@click.option(
Expand Down Expand Up @@ -311,6 +327,7 @@ def check_superset_assets():
dump_data_to_clickhouse,
transform_tracking_logs,
import_assets,
performance_metrics,
)

COMMANDS = (aspects,)
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,18 @@ def get_preferences(self, username):
openedx_apis = current_app.config["OPENEDX_API_URLS"]
url = openedx_apis["get_preference"].format(username=username)
oauth_remote = self.oauth_remotes.get("openedxsso")
response = oauth_remote.get(url, token=self.get_oauth_token()).json()
locale_preference = response.get("pref-lang", "en").replace("-", "_")
locale_preference = "en"
try:
response = oauth_remote.get(url, token=self.get_oauth_token()).json()
locale_preference = response.get("pref-lang", "en").replace("-", "_")
except Exception as e:
return locale_preference

if locale_preference not in current_app.config["DASHBOARD_LOCALES"]:
log.warning(
f"Language {locale_preference} is not supported by Superset"
)
locale_preference = "en"
log.warning(
f"Language {locale_preference} is not supported by Superset"
)
return locale_preference

return locale_preference

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
from superset.app import create_app

app = create_app()
app.app_context().push()


import json
import logging
import time
import uuid
from datetime import datetime
from unittest.mock import patch

import sqlparse
from flask import g
from superset import security_manager
from superset.charts.data.commands.get_data_command import ChartDataCommand
from superset.charts.schemas import ChartDataQueryContextSchema
from superset.extensions import db
from superset.models.dashboard import Dashboard
from superset.models.slice import Slice

logger = logging.getLogger("performance_metrics")

ASPECTS_VERSION = "{{ASPECTS_VERSION}}"
UUID = str(uuid.uuid4())[0:6]
RUN_ID = f"aspects-{ASPECTS_VERSION}-{UUID}"

report_format = "{i}. {slice}\n" "Superset time: {superset_time} (s).\n"

query_format = (
"Query duration: {query_duration_ms} (s).\n"
"Result rows: {result_rows}\n"
"Memory Usage (MB): {memory_usage_mb}\n"
"Row count (superset) {rowcount:}\n"
"Filters: {filters}\n\n"
)


def performance_metrics():
"""Measure the performance of the dashboard."""
# Mock the client name to identify the queries in the clickhouse system.query_log table by
# by the http_user_agent field.
with patch("clickhouse_connect.common.build_client_name") as mock_build_client_name:
mock_build_client_name.return_value = RUN_ID
embedable_dashboards = {{SUPERSET_EMBEDDABLE_DASHBOARDS}}
dashboards = (
db.session.query(Dashboard)
.filter(Dashboard.slug.in_(embedable_dashboards))
.all()
)
report = []
for dashboard in dashboards:
logger.info(f"Dashboard: {dashboard.slug}")
for slice in dashboard.slices:
result = measure_chart(slice)
for query in result["queries"]:
# Remove the data from the query to avoid memory issues on large datasets.
query.pop("data")
report.append(result)
return report


def measure_chart(slice, extra_filters=[]):
"""
Measure the performance of a chart and return the results.
"""
logger.info(f"Fetching slice data: {slice}")
query_context = json.loads(slice.query_context)
query_context.update(
{
"result_format": "json",
"result_type": "full",
"force": True,
"datasource": {
"type": "table",
"id": slice.datasource_id,
},
}
)

if extra_filters:
query_context["filters"].extend(extra_filters)

g.user = security_manager.find_user(username="{{SUPERSET_ADMIN_USERNAME}}")
query_context = ChartDataQueryContextSchema().load(query_context)
command = ChartDataCommand(query_context)

start_time = datetime.now()
result = command.run()
end_time = datetime.now()

result["time_elapsed"] = (end_time - start_time).total_seconds()
result["slice"] = slice
return result


def get_query_log_from_clickhouse(report):
"""
Get the query log from clickhouse and print the results.
"""
chart_uuid = "bb13bb31-c797-4ed3-a7f9-7825cc6dc482"

slice = db.session.query(Slice).filter(Slice.uuid == chart_uuid).one()

query_context = json.loads(slice.query_context)
query_context["queries"][0]["filters"].append(
{"col": "http_user_agent", "op": "==", "val": RUN_ID}
)
slice.query_context = json.dumps(query_context)

result = measure_chart(slice)

clickhouse_queries = {}
for query in result["queries"]:
for row in query["data"]:
parsed_sql = str(sqlparse.parse(row.pop("query"))[0])
clickhouse_queries[parsed_sql] = row

# Sort report by slowest queries
report = sorted(report, key=lambda x: x["time_elapsed"], reverse=True)

report_str = f"\nSuperset Reports: {RUN_ID}\n\n"
for i, result in enumerate(report):
report_str+=(
report_format.format(
i=(i + 1), slice=result["slice"], superset_time=result["time_elapsed"]
)
)
for i, query in enumerate(result["queries"]):
parsed_sql = (
str(sqlparse.parse(query["query"])[0]).replace(";", "")
+ "\n FORMAT Native"
)
clickhouse_report = clickhouse_queries.get(parsed_sql, {})
report_str+=(
query_format.format(
query_duration_ms=clickhouse_report.get("query_duration_ms") / 1000,
memory_usage_mb=clickhouse_report.get("memory_usage_mb"),
result_rows=clickhouse_report.get("result_rows"),
rowcount=query["rowcount"],
filters=query["applied_filters"],
)
)
logger.info(report_str)


if __name__ == "__main__":
logger.info(f"Running performance metrics. RUN ID: {RUN_ID}")
report = performance_metrics()
# Clickhouse query log takes some seconds to log queries.
logger.info("Waiting for clickhouse log...")
time.sleep(10)
get_query_log_from_clickhouse(report)
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@ dataset_uuid: d777bd95-2110-46db-a1c4-8358be81a85a
description: The distribution of grades for a course, out of 100%. Grades are grouped
in ranges of 10%.
params:
adhoc_filters: []
adhoc_filters:
- clause: WHERE
comparator: No filter
expressionType: SIMPLE
operator: TEMPORAL_RANGE
subject: emission_time
bottom_margin: auto
color_scheme: supersetColors
columns: []
extra_form_data: {}
granularity_sqla: emission_time
groupby:
- grade_bucket
metrics:
Expand All @@ -29,6 +33,10 @@ params:
- null
y_axis_format: SMART_NUMBER
y_axis_label: Number Of Students
query_context: '{"datasource":{"id":236,"type":"table"},"force":false,"queries":[{"filters":[{"col":"emission_time","op":"TEMPORAL_RANGE","val":"No
filter"}],"extras":{"having":"","where":""},"applied_time_extras":{},"columns":["grade_bucket"],"metrics":["students"],"annotation_layers":[],"row_limit":10000,"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{}}],"form_data":{"datasource":"236__table","viz_type":"dist_bar","slice_id":298,"metrics":["students"],"adhoc_filters":[{"clause":"WHERE","subject":"emission_time","operator":"TEMPORAL_RANGE","comparator":"No
filter","expressionType":"SIMPLE"}],"groupby":["grade_bucket"],"columns":[],"row_limit":10000,"order_desc":true,"color_scheme":"supersetColors","show_legend":false,"rich_tooltip":true,"order_bars":true,"y_axis_format":"SMART_NUMBER","y_axis_label":"Number
Of Students","y_axis_bounds":[null,null],"x_axis_label":"Course Grade (out of 100%)","bottom_margin":"auto","x_ticks_layout":"auto","extra_form_data":{},"dashboards":[148],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
slice_name: Course Grade Distribution
uuid: f9adbc85-1f50-4c04-ace3-31ba7390de5e
version: 1.0.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@ params:
operatorId: EQUALS
sqlExpression: null
subject: enrollment_status
- clause: WHERE
comparator: No filter
expressionType: SIMPLE
operator: TEMPORAL_RANGE
subject: enrollment_status_date
color_picker:
a: 1
b: 135
g: 122
r: 0
extra_form_data: {}
granularity_sqla: enrollment_status_date
header_font_size: 0.4
metric: count
rolling_type: None
Expand All @@ -34,9 +38,12 @@ params:
subheader_font_size: 0.15
time_format: smart_date
time_grain_sqla: P1D
time_range: No filter
viz_type: big_number
x_axis: enrollment_status_date
y_axis_format: SMART_NUMBER
query_context: '{"datasource":{"id":188,"type":"table"},"force":false,"queries":[{"filters":[{"col":"enrollment_status","op":"==","val":"registered"},{"col":"enrollment_status_date","op":"TEMPORAL_RANGE","val":"No
filter"}],"extras":{"having":"","where":""},"applied_time_extras":{},"columns":[{"timeGrain":"P1D","columnType":"BASE_AXIS","sqlExpression":"enrollment_status_date","label":"enrollment_status_date","expressionType":"SQL"}],"metrics":["count"],"annotation_layers":[],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"post_processing":[{"operation":"pivot","options":{"index":["enrollment_status_date"],"columns":[],"aggregates":{"count":{"operator":"mean"}},"drop_missing_columns":true}},{"operation":"flatten"}]}],"form_data":{"datasource":"188__table","viz_type":"big_number","slice_id":344,"x_axis":"enrollment_status_date","time_grain_sqla":"P1D","metric":"count","adhoc_filters":[{"clause":"WHERE","comparator":"registered","expressionType":"SIMPLE","filterOptionName":"filter_hcnm4t7piq6_hfbtt65nqqs","isExtra":false,"isNew":false,"operator":"==","operatorId":"EQUALS","sqlExpression":null,"subject":"enrollment_status"},{"clause":"WHERE","subject":"enrollment_status_date","operator":"TEMPORAL_RANGE","comparator":"No
filter","expressionType":"SIMPLE"}],"show_trend_line":true,"start_y_axis_at_zero":true,"color_picker":{"a":1,"b":135,"g":122,"r":0},"header_font_size":0.4,"subheader_font_size":0.15,"y_axis_format":"SMART_NUMBER","time_format":"smart_date","rolling_type":"None","extra_form_data":{},"dashboards":[148],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
slice_name: Currently Enrolled Learners Per Day
uuid: ed2fe731-6544-422f-bc55-42f399f48b2c
version: 1.0.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ certified_by: null
dataset_uuid: 39d1e786-c0c8-4c56-81c8-56fb0df88001
description: null
params:
adhoc_filters: []
adhoc_filters:
- clause: WHERE
comparator: No filter
expressionType: SIMPLE
operator: TEMPORAL_RANGE
subject: emission_time
extra_form_data: {}
granularity_sqla: emission_time
header_font_size: 0.4
metric:
aggregate: COUNT_DISTINCT
Expand Down Expand Up @@ -36,9 +40,11 @@ params:
sqlExpression: null
subheader_font_size: 0.15
time_format: smart_date
time_range: No filter
viz_type: big_number_total
y_axis_format: SMART_NUMBER
query_context: '{"datasource":{"id":242,"type":"table"},"force":false,"queries":[{"filters":[{"col":"emission_time","op":"TEMPORAL_RANGE","val":"No
filter"}],"extras":{"having":"","where":""},"applied_time_extras":{},"columns":[],"metrics":[{"aggregate":"COUNT_DISTINCT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"actor_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":445,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"String","type_generic":1,"verbose_name":null,"warning_markdown":null},"datasourceWarning":false,"expressionType":"SIMPLE","hasCustomLabel":false,"label":"COUNT_DISTINCT(actor_id)","optionName":"metric_5y4uvwa13v4_f12i3twecs6","sqlExpression":null}],"annotation_layers":[],"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{}}],"form_data":{"datasource":"242__table","viz_type":"big_number_total","slice_id":395,"metric":{"aggregate":"COUNT_DISTINCT","column":{"advanced_data_type":null,"certification_details":null,"certified_by":null,"column_name":"actor_id","description":null,"expression":null,"filterable":true,"groupby":true,"id":445,"is_certified":false,"is_dttm":false,"python_date_format":null,"type":"String","type_generic":1,"verbose_name":null,"warning_markdown":null},"datasourceWarning":false,"expressionType":"SIMPLE","hasCustomLabel":false,"label":"COUNT_DISTINCT(actor_id)","optionName":"metric_5y4uvwa13v4_f12i3twecs6","sqlExpression":null},"adhoc_filters":[{"clause":"WHERE","subject":"emission_time","operator":"TEMPORAL_RANGE","comparator":"No
filter","expressionType":"SIMPLE"}],"header_font_size":0.4,"subheader_font_size":0.15,"y_axis_format":"SMART_NUMBER","time_format":"smart_date","extra_form_data":{},"dashboards":[148],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
slice_name: Distinct forum users
uuid: feb323ad-c819-49ca-a336-584bd9ff1a2e
version: 1.0.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ params:
- null
y_axis_format: SMART_NUMBER
y_axis_label: Students
query_context: '{"datasource":{"id":209,"type":"table"},"force":false,"queries":[{"filters":[{"col":"success","op":"==","val":"true"}],"extras":{"having":"","where":""},"applied_time_extras":{},"columns":["attempts"],"metrics":["students"],"annotation_layers":[],"row_limit":10000,"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{}}],"form_data":{"datasource":"209__table","viz_type":"dist_bar","slice_id":331,"metrics":["students"],"adhoc_filters":[{"clause":"WHERE","comparator":"true","expressionType":"SIMPLE","filterOptionName":"filter_0fpmws3t1h6a_md2ud9xse7m","isExtra":false,"isNew":false,"operator":"==","operatorId":"EQUALS","sqlExpression":null,"subject":"success"}],"groupby":["attempts"],"columns":[],"row_limit":10000,"order_desc":true,"color_scheme":"supersetColors","show_legend":false,"rich_tooltip":true,"order_bars":true,"y_axis_format":"SMART_NUMBER","y_axis_label":"Students","y_axis_bounds":[null,null],"x_axis_label":"Number
Of Attempts To Find Correct Answer","bottom_margin":"auto","x_ticks_layout":"auto","extra_form_data":{},"dashboards":[148],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
slice_name: Distribution Of Attempts
uuid: db90930f-f16e-4c32-8050-0e4abae28f4c
version: 1.0.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ params:
- null
y_axis_format: SMART_NUMBER
y_axis_label: Students
query_context: '{"datasource":{"id":191,"type":"table"},"force":false,"queries":[{"filters":[],"extras":{"having":"","where":""},"applied_time_extras":{},"columns":["total_hints"],"metrics":["students"],"annotation_layers":[],"row_limit":10000,"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{}}],"form_data":{"datasource":"191__table","viz_type":"dist_bar","slice_id":307,"metrics":["students"],"adhoc_filters":[],"groupby":["total_hints"],"columns":[],"row_limit":10000,"order_desc":true,"color_scheme":"supersetColors","show_legend":false,"rich_tooltip":true,"y_axis_format":"SMART_NUMBER","y_axis_label":"Students","y_axis_bounds":[null,null],"x_axis_label":"Hints
/ Answer Displayed Before Correct Answer Chosen","bottom_margin":"auto","x_ticks_layout":"auto","extra_form_data":{},"dashboards":[148],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
slice_name: Distribution Of Hints Per Correct Answer
uuid: ee94be4c-6fdd-4295-b43c-40890d6c549d
version: 1.0.0
Expand Down
Loading

0 comments on commit 59139d6

Please sign in to comment.