From db3cc91a0303a23c16b2677b71f0b8ead66bb684 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Fri, 15 Mar 2024 13:26:58 -0500 Subject: [PATCH] feat: add performance metrics commands --- tutoraspects/commands_v0.py | 15 ++ tutoraspects/commands_v1.py | 16 +++ .../pythonpath/performance_metrics.py | 136 ++++++++++++++++++ .../assets/charts/Query_performance.yaml | 43 ++++++ .../assets/datasets/query_log.yaml | 125 ++++++++++++++++ .../queries/slowest_clickhouse_queries.sql | 2 +- 6 files changed, 336 insertions(+), 1 deletion(-) create mode 100644 tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py create mode 100644 tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/charts/Query_performance.yaml create mode 100644 tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml diff --git a/tutoraspects/commands_v0.py b/tutoraspects/commands_v0.py index cbbcf2cde..7b3dec040 100644 --- a/tutoraspects/commands_v0.py +++ b/tutoraspects/commands_v0.py @@ -106,6 +106,21 @@ def alembic(context, command) -> None: """ runner.run_job("aspects", command) +# Ex: "tutor local do performance-metrics " +@click.command(context_settings={"ignore_unknown_options": True}) +@click.pass_obj +def performance_metrics(context) -> None: + """ + Job to import Superset assets. + """ + config = tutor_config.load(context.root) + runner = context.job_runner(config) + + command = """echo 'Performance...' && + python /app/pythonpath/performance_metrics.py && + echo 'Done!'; + """ + runner.run_job("superset", command) # Ex: "tutor local do import_assets " @click.command(context_settings={"ignore_unknown_options": True}) diff --git a/tutoraspects/commands_v1.py b/tutoraspects/commands_v1.py index c9d799da5..8540b44a5 100644 --- a/tutoraspects/commands_v1.py +++ b/tutoraspects/commands_v1.py @@ -129,6 +129,21 @@ def import_assets() -> list[tuple[str, str]]: ), ] +# Ex: "tutor local do import_assets " +@click.command(context_settings={"ignore_unknown_options": True}) +def performance_metrics() -> list[tuple[str, str]]: + """ + Job to import Superset assets. + """ + return [ + ( + "superset", + "echo 'Performance...' && " + "python /app/pythonpath/performance_metrics.py &&" + "echo 'Done!';", + ), + ] + # Ex: "tutor local do dump_data_to_clickhouse " @click.command(context_settings={"ignore_unknown_options": True}) @@ -311,6 +326,7 @@ def check_superset_assets(): dump_data_to_clickhouse, transform_tracking_logs, import_assets, + performance_metrics, ) COMMANDS = (aspects,) diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py new file mode 100644 index 000000000..f08ac420f --- /dev/null +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py @@ -0,0 +1,136 @@ +from superset.app import create_app +app = create_app() +app.app_context().push() + +""" +The plan + +1. Load the instructor dashboard +2. Load the instructor dashboard datasets +3. Perform the following operations for every dataset: + a. Load the dataset + b. Perform queries on the dataset with/without filters + c. Perform aggregations on the dataset + d. Measure the time taken for each operation + e. Repeat 5 times and take metrics such as average, median, max, min, etc. +""" +from superset.extensions import db +from flask import g +from superset.models.dashboard import Dashboard +from superset.models.slice import Slice +from superset.charts.schemas import ChartDataQueryContextSchema +from superset.charts.data.commands.get_data_command import ChartDataCommand +from superset import security_manager +import json +from datetime import datetime +import sqlparse +from unittest.mock import patch +import uuid +import time + +ASPECTS_VERSION = "{{ASPECTS_VERSION}}" +UUID = str(uuid.uuid4())[0:6] +RUN_ID = f'aspects-{ASPECTS_VERSION}-{UUID}' + +def load_dashboards(): + """Load dashboards from the database.""" + embedable_dashboards = {{ SUPERSET_EMBEDDABLE_DASHBOARDS }} + dashboards = db.session.query(Dashboard).filter(Dashboard.slug.in_(embedable_dashboards)).all() + return dashboards + + +def performance_metrics(): + """Measure the performance of the dashboard.""" + with patch('clickhouse_connect.common.build_client_name') as mock_build_client_name: + mock_build_client_name.return_value = RUN_ID + dashboards = load_dashboards() + report = [] + for dashboard in dashboards: + print("Dashboard:", dashboard.slug) + for slice in dashboard.slices: + report.append(meassure_chart(slice)) + return report + + +report_format = """ + {i}. {slice} + Superset time: {superset_time}""" + +query_format = """ Query duration: {query_duration_ms} s. + Result rows: {result_rows} + Memory Usage (MB): {memory_usage_mb} + Row count Clickhouse {rowcount:} + Filters: {filters}""" + +def meassure_chart(slice, extra_filters=None): + """ + Meassure the performance of a chart and return the results. + """ + print("Slice:", slice) + query_context = json.loads(slice.query_context) + query_context.update({ + "result_format": "json", + "result_type": "full", + "force": True, + }) + if extra_filters: + query_context["filters"] = extra_filters + g.user = security_manager.find_user(username="admin") + query_context = ChartDataQueryContextSchema().load(query_context) + command = ChartDataCommand(query_context) + start_time = datetime.now() + result = command.run() + end_time = datetime.now() + result["time_elapsed"] = '{} s.'.format((end_time-start_time).total_seconds()) + result["slice"] = slice + return result + +def get_query_log_from_clickhouse(report): + chart_uuid = "bb13bb31-c797-4ed3-a7f9-7825cc6dc482" + + slice = db.session.query(Slice).filter(Slice.uuid == chart_uuid).one() + query_context = json.loads(slice.query_context) + query_context["queries"][0]["filters"].append( + { + 'col': 'http_user_agent', 'op': '==', 'val': RUN_ID + } + ) + slice.query_context = json.dumps(query_context) + result = meassure_chart(slice) + clickhouse_queries = {} + for query in result["queries"]: + for row in query["data"]: + #for field in ["query_duration_ms", "result_rows", "event_time", "memory_usage_mb"]: + #print(f"{field}: {row[field]}") + parsed_sql = str(sqlparse.parse(row.pop("query"))[0]) + clickhouse_queries[parsed_sql] = row + + + print("\nSuperset Reports") + for i, result in enumerate(report): + print(report_format.format( + i=(i+1), + slice=result["slice"], + superset_time=result["time_elapsed"] + )) + for i, query in enumerate(result["queries"]): + parsed_sql = str(sqlparse.parse(query["query"])[0]).replace(";", "") + "\n FORMAT Native" + clickhouse_report = clickhouse_queries.get(parsed_sql, {}) + print(query_format.format( + query_duration_ms=clickhouse_report.get("query_duration_ms") / 1000, + memory_usage_mb =clickhouse_report.get("memory_usage_mb"), + result_rows=clickhouse_report.get("result_rows"), + rowcount=query["rowcount"], + filters=query["applied_filters"], + )) + + + + +if __name__ == "__main__": + print("Running performance metrics. RUN ID:", RUN_ID) + report = performance_metrics() + # Clickhouse query log takes some seconds to log queries. + print("Waiting for clickhouse log...") + time.sleep(10) + get_query_log_from_clickhouse(report) diff --git a/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/charts/Query_performance.yaml b/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/charts/Query_performance.yaml new file mode 100644 index 000000000..32af356d8 --- /dev/null +++ b/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/charts/Query_performance.yaml @@ -0,0 +1,43 @@ +_file_name: Query_performance.yaml +cache_timeout: null +certification_details: null +certified_by: null +dataset_uuid: 53099fd2-6739-45e5-9a93-fcb9fa70119f +description: null +params: + adhoc_filters: + - clause: WHERE + comparator: No filter + expressionType: SIMPLE + operator: TEMPORAL_RANGE + subject: event_time + all_columns: [] + color_pn: true + extra_form_data: {} + groupby: + - http_user_agent + - query_duration_ms + - result_rows + - event_time + - memory_usage_mb + - tables + - query + order_by_cols: [] + order_desc: true + percent_metrics: [] + query_mode: aggregate + row_limit: 1000 + server_page_length: 10 + show_cell_bars: true + table_timestamp_format: smart_date + temporal_columns_lookup: + event_time: true + time_grain_sqla: P1D + viz_type: table +query_context: '{"datasource":{"id":266,"type":"table"},"force":false,"queries":[{"filters":[{"col":"event_time","op":"TEMPORAL_RANGE","val":"No + filter"}],"extras":{"time_grain_sqla":"P1D","having":"","where":""},"applied_time_extras":{},"columns":["http_user_agent","query_duration_ms","result_rows",{"timeGrain":"P1D","columnType":"BASE_AXIS","sqlExpression":"event_time","label":"event_time","expressionType":"SQL"},"memory_usage_mb","tables","query"],"metrics":[],"orderby":[],"annotation_layers":[],"row_limit":1000,"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"post_processing":[]}],"form_data":{"datasource":"266__table","viz_type":"table","slice_id":422,"query_mode":"aggregate","groupby":["http_user_agent","query_duration_ms","result_rows","event_time","memory_usage_mb","tables","query"],"time_grain_sqla":"P1D","temporal_columns_lookup":{"event_time":true},"all_columns":[],"percent_metrics":[],"adhoc_filters":[{"clause":"WHERE","comparator":"No + filter","expressionType":"SIMPLE","operator":"TEMPORAL_RANGE","subject":"event_time"}],"order_by_cols":[],"row_limit":1000,"server_page_length":10,"order_desc":true,"table_timestamp_format":"smart_date","show_cell_bars":true,"color_pn":true,"extra_form_data":{},"dashboards":[],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}' +slice_name: Query performance +uuid: bb13bb31-c797-4ed3-a7f9-7825cc6dc482 +version: 1.0.0 +viz_type: table diff --git a/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml b/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml new file mode 100644 index 000000000..6ed95e148 --- /dev/null +++ b/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml @@ -0,0 +1,125 @@ +_file_name: query_log.yaml +cache_timeout: null +columns: +- advanced_data_type: null + column_name: event_time + description: null + expression: null + extra: {} + filterable: true + groupby: true + is_active: true + is_dttm: true + python_date_format: null + type: DateTime + verbose_name: Event Time +- advanced_data_type: null + column_name: query + description: null + expression: null + extra: {} + filterable: true + groupby: true + is_active: true + is_dttm: false + python_date_format: null + type: String + verbose_name: Query +- advanced_data_type: null + column_name: tables + description: null + expression: null + extra: {} + filterable: true + groupby: true + is_active: true + is_dttm: false + python_date_format: null + type: Array(LowCardinality(String)) + verbose_name: null +- advanced_data_type: null + column_name: http_user_agent + description: null + expression: null + extra: {} + filterable: true + groupby: true + is_active: true + is_dttm: false + python_date_format: null + type: LowCardinality(String) + verbose_name: null +- advanced_data_type: null + column_name: memory_usage_mb + description: null + expression: null + extra: {} + filterable: true + groupby: true + is_active: true + is_dttm: false + python_date_format: null + type: Float64 + verbose_name: null +- advanced_data_type: null + column_name: query_duration_ms + description: null + expression: null + extra: {} + filterable: true + groupby: true + is_active: true + is_dttm: false + python_date_format: null + type: UInt64 + verbose_name: null +- advanced_data_type: null + column_name: result_rows + description: null + expression: null + extra: {} + filterable: true + groupby: true + is_active: true + is_dttm: false + python_date_format: null + type: UInt64 + verbose_name: null +database_uuid: 21174b6c-4d40-4958-8161-d6c3cf5e77b6 +default_endpoint: null +description: null +extra: null +fetch_values_predicate: null +filter_select_enabled: true +main_dttm_col: null +metrics: +- currency: null + d3format: null + description: null + expression: COUNT(*) + extra: + warning_markdown: '' + metric_name: count + metric_type: count + verbose_name: COUNT(*) + warning_text: null +normalize_columns: false +offset: 0 +params: null +schema: main +sql: | + SELECT + query_duration_ms, + result_rows, + memory_usage / (1024 * 1024 ) AS memory_usage_mb, + event_time, + tables, + query, + http_user_agent + FROM system.query_log + WHERE (has(databases, '{{ASPECTS_XAPI_DATABASE}}') OR has(databases, '{{ASPECTS_EVENT_SINK_DATABASE}}') OR has(databases, '{{DBT_PROFILE_TARGET_DATABASE}}')) AND (http_user_agent LIKE 'aspects-%') AND (type = 'QueryFinish') + ORDER BY event_time DESC +table_name: query_log +template_params: null +uuid: 53099fd2-6739-45e5-9a93-fcb9fa70119f +version: 1.0.0 diff --git a/tutoraspects/templates/openedx-assets/queries/slowest_clickhouse_queries.sql b/tutoraspects/templates/openedx-assets/queries/slowest_clickhouse_queries.sql index 530da31f6..4a450f40d 100644 --- a/tutoraspects/templates/openedx-assets/queries/slowest_clickhouse_queries.sql +++ b/tutoraspects/templates/openedx-assets/queries/slowest_clickhouse_queries.sql @@ -7,6 +7,6 @@ SELECT FROM system.query_log WHERE - TYPE = 'QueryFinish' + type = 'QueryFinish' ORDER BY query_duration_ms DESC;