From a5906feb41f62e2703b7f9f0cfb0fba66aefc201 Mon Sep 17 00:00:00 2001 From: Brian Mesick Date: Mon, 18 Mar 2024 16:30:32 -0400 Subject: [PATCH 1/2] feat: Add support for data pipeline load testing --- tutoraspects/patches/openedx-common-settings | 3 + ...penedx-dockerfile-post-python-requirements | 4 +- tutoraspects/plugin.py | 7 ++- ...0032_partition_tables_by_year_and_month.py | 20 +++---- .../alembic/versions/0034_load_test_table.py | 60 +++++++++++++++++++ .../apps/vector/partials/common-post.toml | 55 +++++++++-------- .../apps/vector/partials/common-pre.toml | 2 +- 7 files changed, 110 insertions(+), 41 deletions(-) create mode 100644 tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0034_load_test_table.py diff --git a/tutoraspects/patches/openedx-common-settings b/tutoraspects/patches/openedx-common-settings index 3cbe0615e..9997627cd 100644 --- a/tutoraspects/patches/openedx-common-settings +++ b/tutoraspects/patches/openedx-common-settings @@ -13,6 +13,9 @@ SUPERSET_CONFIG = { } EVENT_SINK_CLICKHOUSE_PII_MODELS = {{ EVENT_SINK_PII_MODELS }} +EVENT_ROUTING_BACKEND_BATCH_SIZE = 10 +EVENT_ROUTING_BACKEND_BATCHING_ENABLED = False + ASPECTS_INSTRUCTOR_DASHBOARDS = {{ ASPECTS_INSTRUCTOR_DASHBOARDS }} SUPERSET_EXTRA_FILTERS_FORMAT = {{ ASPECTS_SUPERSET_EXTRA_FILTERS_FORMAT }} {% if ASPECTS_ENABLE_INSTRUCTOR_DASHBOARD_PLUGIN %} diff --git a/tutoraspects/patches/openedx-dockerfile-post-python-requirements b/tutoraspects/patches/openedx-dockerfile-post-python-requirements index 7896ed796..8decd527c 100644 --- a/tutoraspects/patches/openedx-dockerfile-post-python-requirements +++ b/tutoraspects/patches/openedx-dockerfile-post-python-requirements @@ -1,4 +1,4 @@ RUN --mount=type=cache,target=/openedx/.cache/pip,sharing=shared \ - pip install "platform-plugin-aspects==0.4.0" + pip install "platform-plugin-aspects==v0.5.0" RUN --mount=type=cache,target=/openedx/.cache/pip,sharing=shared \ - pip install "edx-event-routing-backends==v8.1.1" + pip install "edx-event-routing-backends==v8.3.1" diff --git a/tutoraspects/plugin.py b/tutoraspects/plugin.py index b43d9de70..b6290c54f 100644 --- a/tutoraspects/plugin.py +++ b/tutoraspects/plugin.py @@ -28,8 +28,9 @@ # Each new setting is a pair: (setting_name, default_value). # Prefix your setting names with 'ASPECTS_'. ("ASPECTS_VERSION", __version__), - # General tutor specific settings - ("RUN_VECTOR", True), + # For out default deployment we currently use Celery -> Ralph for transport, + # so Vector is off by default. + ("RUN_VECTOR", False), ("RUN_CLICKHOUSE", True), ("RUN_RALPH", True), ("RUN_SUPERSET", True), @@ -136,6 +137,8 @@ ("ASPECTS_EVENT_SINK_RECENT_BLOCKS_MV", "most_recent_course_blocks_mv"), # Vector settings ("ASPECTS_DOCKER_HOST_SOCK_PATH", "/var/run/docker.sock"), + ("ASPECTS_VECTOR_STORE_TRACKING_LOGS", False), + ("ASPECTS_VECTOR_STORE_XAPI", True), ("ASPECTS_VECTOR_DATABASE", "openedx"), ("ASPECTS_VECTOR_RAW_TRACKING_LOGS_TABLE", "_tracking"), ("ASPECTS_VECTOR_RAW_XAPI_TABLE", "xapi_events_all"), diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0032_partition_tables_by_year_and_month.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0032_partition_tables_by_year_and_month.py index d6e2bd355..5ad90cb7d 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0032_partition_tables_by_year_and_month.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0032_partition_tables_by_year_and_month.py @@ -1,9 +1,5 @@ """ -Partition the event_sink.user_profile table - -.. pii: Stores Open edX user profile data. -.. pii_types: user_id, name, username, location, phone_number, email_address, birth_date, biography, gender -.. pii_retirement: local_api, consumer_api +Partition the xapi table by year and month """ from alembic import op @@ -15,7 +11,7 @@ on_cluster = " ON CLUSTER '{{CLICKHOUSE_CLUSTER_NAME}}' " if "{{CLICKHOUSE_CLUSTER_NAME}}" else "" engine = "ReplicatedReplacingMergeTree" if "{{CLICKHOUSE_CLUSTER_NAME}}" else "ReplacingMergeTree" -old_user_profile_table = "{{ASPECTS_XAPI_DATABASE}}.old_{{ASPECTS_RAW_XAPI_TABLE}}" +old_xapi_table = "{{ASPECTS_XAPI_DATABASE}}.old_{{ASPECTS_RAW_XAPI_TABLE}}" def upgrade(): # Partition event_sink.user_profile table @@ -23,7 +19,7 @@ def upgrade(): op.execute( f""" RENAME TABLE {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} - TO {old_user_profile_table} + TO {old_xapi_table} {on_cluster} """ ) @@ -46,13 +42,13 @@ def upgrade(): op.execute( f""" INSERT INTO {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} - SELECT * FROM {old_user_profile_table} + SELECT event_id, emission_time, event FROM {old_xapi_table} """ ) # 4. Drop the old table op.execute( f""" - DROP TABLE {old_user_profile_table} + DROP TABLE {old_xapi_table} {on_cluster} """ ) @@ -64,7 +60,7 @@ def downgrade(): op.execute( f""" RENAME TABLE {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} - TO {old_user_profile_table} + TO {old_xapi_table} {on_cluster} """ ) @@ -87,14 +83,14 @@ def downgrade(): op.execute( f""" INSERT INTO {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_RAW_XAPI_TABLE }} - SELECT * FROM {old_user_profile_table} + SELECT * FROM {old_xapi_table} """ ) # 4. Drop the old table op.execute( f""" - DROP TABLE {old_user_profile_table} + DROP TABLE {old_xapi_table} {on_cluster} """ ) diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0034_load_test_table.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0034_load_test_table.py new file mode 100644 index 000000000..aa0fc4494 --- /dev/null +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0034_load_test_table.py @@ -0,0 +1,60 @@ +""" +Create the load_test_stats table + +This table is always created, but it will only be populated if the load test +management commands are run from the platform_plugin_aspects app. +""" +from alembic import op + + +revision = "0034" +down_revision = "0033" +branch_labels = None +depends_on = None +on_cluster = " ON CLUSTER '{{CLICKHOUSE_CLUSTER_NAME}}' " if "{{CLICKHOUSE_CLUSTER_NAME}}" else "" +engine = "ReplicatedMergeTree" if "{{CLICKHOUSE_CLUSTER_NAME}}" else "MergeTree" + + +def upgrade(): + op.execute( + f""" + CREATE TABLE IF NOT EXISTS {{ ASPECTS_EVENT_SINK_DATABASE }}.load_test_runs + {on_cluster} + ( + run_id String, + timestamp DateTime default now(), + event_type String, + extra String + ) + engine = {engine} PRIMARY KEY (run_id, timestamp) + ORDER BY (run_id, timestamp) + SETTINGS index_granularity = 8192; + """ + ) + + op.execute( + f""" + CREATE TABLE IF NOT EXISTS {{ ASPECTS_EVENT_SINK_DATABASE }}.load_test_stats + {on_cluster} + ( + run_id String, + timestamp DateTime default now(), + stats String + ) + engine = {engine} PRIMARY KEY (run_id, timestamp) + ORDER BY (run_id, timestamp) + SETTINGS index_granularity = 8192; + """ + ) + + +def downgrade(): + op.execute( + "DROP TABLE IF EXISTS {{ ASPECTS_EVENT_SINK_DATABASE }}.load_test_stats" + f"{on_cluster}" + ) + + op.execute( + "DROP TABLE IF EXISTS {{ASPECTS_EVENT_SINK_DATABASE}}.load_test_runs" + f"{on_cluster}" + ) diff --git a/tutoraspects/templates/aspects/apps/vector/partials/common-post.toml b/tutoraspects/templates/aspects/apps/vector/partials/common-post.toml index c07692d70..f76053ab4 100644 --- a/tutoraspects/templates/aspects/apps/vector/partials/common-post.toml +++ b/tutoraspects/templates/aspects/apps/vector/partials/common-post.toml @@ -1,5 +1,6 @@ -### Transforms +### Tracking logs +{% if ASPECTS_VECTOR_STORE_TRACKING_LOGS %} # Parse tracking logs: extract time [transforms.tracking] type = "remap" @@ -30,6 +31,7 @@ if err_timestamp != null { drop_on_error = true drop_on_abort = true + [transforms.tracking_debug] type = "remap" inputs = ["tracking"] @@ -38,6 +40,32 @@ source = ''' .message = parse_json!(.message) ''' +# Log all events to stdout, for debugging +[sinks.out] +type = "console" +inputs = ["tracking_debug"] +encoding.codec = "json" +encoding.only_fields = ["time", "message.context.course_id", "message.context.user_id", "message.name"] + +# # Send logs to clickhouse +[sinks.clickhouse] +type = "clickhouse" +auth.strategy = "basic" +auth.user = "{{ ASPECTS_CLICKHOUSE_VECTOR_USER }}" +auth.password = "{{ ASPECTS_CLICKHOUSE_VECTOR_PASSWORD }}" +# Required: https://github.com/timberio/vector/issues/5797 +encoding.timestamp_format = "unix" +inputs = ["tracking"] +endpoint = "{% if CLICKHOUSE_SECURE_CONNECTION %}https{% else %}http{% endif %}://{{ CLICKHOUSE_HOST }}:{{ CLICKHOUSE_INTERNAL_HTTP_PORT }}" +database = "{{ ASPECTS_VECTOR_DATABASE }}" +table = "{{ ASPECTS_VECTOR_RAW_TRACKING_LOGS_TABLE }}" +healthcheck = true + +{% endif %} + +### xAPI +{% if ASPECTS_VECTOR_STORE_XAPI %} + [transforms.xapi] type = "remap" inputs = ["openedx_containers"] @@ -80,35 +108,12 @@ source = ''' .message = parse_json!(.event) ''' -### Sinks - -# Log all events to stdout, for debugging -[sinks.out] -type = "console" -inputs = ["tracking_debug"] -encoding.codec = "json" -encoding.only_fields = ["time", "message.context.course_id", "message.context.user_id", "message.name"] - [sinks.out_xapi] type = "console" inputs = ["xapi_debug"] encoding.codec = "json" encoding.only_fields = ["event_id", "emission_time", "event"] -# # Send logs to clickhouse -[sinks.clickhouse] -type = "clickhouse" -auth.strategy = "basic" -auth.user = "{{ ASPECTS_CLICKHOUSE_VECTOR_USER }}" -auth.password = "{{ ASPECTS_CLICKHOUSE_VECTOR_PASSWORD }}" -# Required: https://github.com/timberio/vector/issues/5797 -encoding.timestamp_format = "unix" -inputs = ["tracking"] -endpoint = "{% if CLICKHOUSE_SECURE_CONNECTION %}https{% else %}http{% endif %}://{{ CLICKHOUSE_HOST }}:{{ CLICKHOUSE_INTERNAL_HTTP_PORT }}" -database = "{{ ASPECTS_VECTOR_DATABASE }}" -table = "{{ ASPECTS_VECTOR_RAW_TRACKING_LOGS_TABLE }}" -healthcheck = true - [sinks.clickhouse_xapi] type = "clickhouse" auth.strategy = "basic" @@ -124,4 +129,6 @@ database = "{{ ASPECTS_VECTOR_DATABASE }}" table = "{{ ASPECTS_VECTOR_RAW_XAPI_TABLE }}" healthcheck = true +{% endif %} + {{ patch("vector-common-toml") }} diff --git a/tutoraspects/templates/aspects/apps/vector/partials/common-pre.toml b/tutoraspects/templates/aspects/apps/vector/partials/common-pre.toml index 917af5692..e6c917710 100644 --- a/tutoraspects/templates/aspects/apps/vector/partials/common-pre.toml +++ b/tutoraspects/templates/aspects/apps/vector/partials/common-pre.toml @@ -3,4 +3,4 @@ data_dir = "/vector-data-dir" # Vector's API for introspection [api] enabled = true -address = "127.0.0.1:8686" +address = "0.0.0.0:8686" From f6f5235fe8dabf6c95ad0023f2351f853b38b0db Mon Sep 17 00:00:00 2001 From: Brian Mesick Date: Mon, 1 Apr 2024 16:40:57 -0400 Subject: [PATCH 2/2] refactor: Remove ERB batching settings --- tutoraspects/patches/openedx-common-settings | 3 --- 1 file changed, 3 deletions(-) diff --git a/tutoraspects/patches/openedx-common-settings b/tutoraspects/patches/openedx-common-settings index 9997627cd..3cbe0615e 100644 --- a/tutoraspects/patches/openedx-common-settings +++ b/tutoraspects/patches/openedx-common-settings @@ -13,9 +13,6 @@ SUPERSET_CONFIG = { } EVENT_SINK_CLICKHOUSE_PII_MODELS = {{ EVENT_SINK_PII_MODELS }} -EVENT_ROUTING_BACKEND_BATCH_SIZE = 10 -EVENT_ROUTING_BACKEND_BATCHING_ENABLED = False - ASPECTS_INSTRUCTOR_DASHBOARDS = {{ ASPECTS_INSTRUCTOR_DASHBOARDS }} SUPERSET_EXTRA_FILTERS_FORMAT = {{ ASPECTS_SUPERSET_EXTRA_FILTERS_FORMAT }} {% if ASPECTS_ENABLE_INSTRUCTOR_DASHBOARD_PLUGIN %}