Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow to translate dataset text on chart #648

Merged
merged 16 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ name: Tutor Integration Test

on:
pull_request:
branches: [ main ]
paths-ignore:
- .github/**
- .ci/**
Expand Down
8 changes: 7 additions & 1 deletion scripts/translate_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,13 @@ class DatasetAsset(TranslatableAsset):
"table_name": ("datasets", DatasetAsset),
}

BASE_PATH = "tutoraspects/templates/aspects/build/aspects-superset/"

def get_text_for_translations(root_path):
assets_path = (
os.path.join(
root_path,
"tutoraspects/templates/aspects/build/aspects-superset/"
BASE_PATH,
"openedx-assets/assets/"
)
)
Expand All @@ -112,6 +113,11 @@ def get_text_for_translations(root_path):
asset = yaml.safe_load(asset_str)
strings.extend(mark_text_for_translation(asset))

with open(BASE_PATH + "localization/datasets_strings.yaml", 'r') as file:
dataset_strings = yaml.safe_load(file.read())
for key in dataset_strings:
strings.extend(dataset_strings[key])
print(f"Extracted {len(dataset_strings[key])} strings for dataset {key}")
return strings


Expand Down
25 changes: 23 additions & 2 deletions tutoraspects/asset_command_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class Asset:
templated_vars = None
required_vars = None
omitted_vars = None
raw_vars = None

def __init__(self):
if not self.path:
Expand Down Expand Up @@ -92,6 +93,12 @@ def get_omitted_vars(self):
"""
return self.omitted_vars or []

def get_raw_vars(self):
"""
Returns a list of variables which should be omitted from the content.
"""
return self.raw_vars or []

def remove_content(self, content: dict):
"""
Remove any variables from the content which should be omitted.
Expand Down Expand Up @@ -122,18 +129,32 @@ def omit_templated_vars(self, content: dict, existing: dict):
continue
if isinstance(existing[key], str):
if "{{" in existing.get(key, "") or "{%" in existing.get(key, ""):
content[key] = existing[key]
if key in self.get_raw_vars():
raw_expression = "{% raw %}" + content[key] + "{% endraw %}"
content[key] = raw_expression
else:
content[key] = existing[key]

if isinstance(existing[key], dict):
self.omit_templated_vars(content[key], existing[key])

if isinstance(existing[key], list):
for i, item in enumerate(content[key]):
if isinstance(item, dict):
try:
existing[key][i]
self.omit_templated_vars(item, existing[key][i] or None)
except IndexError:
pass

class ChartAsset(Asset):
"""
Chart assets.
"""

path = "charts"
omitted_vars = ["query_context"]
omitted_vars = ["query_context", "params.dashboards", "params.datasource", "params.slice_id"]
raw_vars = ["sqlExpression"]


class DashboardAsset(Asset):
Expand Down
2 changes: 1 addition & 1 deletion tutoraspects/patches/local-docker-compose-dev-services
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ superset:
{% include 'base-docker-compose-services' %}
OPENEDX_LMS_ROOT_URL: "http://{{ LMS_HOST }}:8000"
OAUTH2_CLIENT_ID: {{ SUPERSET_OAUTH2_CLIENT_ID_DEV }}
command: ["bash", "/app/docker/docker-bootstrap.sh", "app-gunicorn"]
command: ["bash", "/app/docker/docker-bootstrap.sh", "app"]
ports:
- 8088:{{ SUPERSET_PORT }}
depends_on:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ elif [[ "${1}" == "beat" ]]; then
celery --app=superset.tasks.celery_app:app beat --pidfile /tmp/celerybeat.pid -l INFO -s "${SUPERSET_HOME}"/celerybeat-schedule
elif [[ "${1}" == "app" ]]; then
echo "Starting web app..."
flask run -p 8088 --with-threads --reload --debugger --host=0.0.0.0
flask run -p 8088 --with-threads --reload --debugger --debug --host=0.0.0.0
Copy link
Contributor

@SoryRawyer SoryRawyer Mar 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the --debug flag meant to stay in?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this only will affect dev environment, local en k8s works the same as before.

elif [[ "${1}" == "app-gunicorn" ]]; then
echo "Starting web app..."
/usr/bin/run-server.sh
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from superset.models.dashboard import Dashboard
from superset.utils.database import get_or_create_db
from superset.models.embedded_dashboard import EmbeddedDashboard
from pythonpath.localization import get_translation

BASE_DIR = "/app/assets/superset"

Expand All @@ -34,26 +35,9 @@

FILE_NAME_ATTRIBUTE = "_file_name"

TRANSLATIONS_FILE_PATH = "/app/localization/locale.yaml"
ASSETS_FILE_PATH = "/app/pythonpath/assets.yaml"
ASSETS_PATH = "/app/openedx-assets"

merged_data = {}
with open(TRANSLATIONS_FILE_PATH, "r") as file:
yaml_content = file.read()
yaml_documents = yaml_content.split("\n---\n")

for doc in yaml_documents:
data = yaml.safe_load(doc)
if data is not None:
for lang, translations in data.items():
if lang not in merged_data:
merged_data[lang] = {}
merged_data[lang].update(translations)


ASSETS_TRANSLATIONS = merged_data


def main():
create_assets()
Expand Down Expand Up @@ -297,14 +281,6 @@ def update_embeddable_uuids():
db.session.add(embedded_dashboard)
db.session.commit()

def get_translation(text, language):
"""Get a translation for a text in a language"""
default_text = f"{text}"
LANGUAGE = ASSETS_TRANSLATIONS.get(language, {})
if not LANGUAGE:
return default_text
return ASSETS_TRANSLATIONS.get(language, {}).get(text) or default_text


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
This file is used to load translations from the locale.yaml file and provide a function to get translations
"""
import yaml

TRANSLATIONS_FILE_PATH = "/app/localization/locale.yaml"
DATASET_STRINGS_FILE_PATH = "/app/localization/datasets_strings.yaml"

merged_data = {}
with open(TRANSLATIONS_FILE_PATH, "r") as file:
yaml_content = file.read()
yaml_documents = yaml_content.split("\n---\n")

for doc in yaml_documents:
data = yaml.safe_load(doc)
if data is not None:
for lang, translations in data.items():
if lang not in merged_data:
merged_data[lang] = {}
merged_data[lang].update(translations)


ASSETS_TRANSLATIONS = merged_data

DATASET_STRINGS = yaml.safe_load(open(DATASET_STRINGS_FILE_PATH, "r"))


def get_translation(text, language):
"""Get a translation for a text in a language"""
default_text = f"{text}"
LANGUAGE = ASSETS_TRANSLATIONS.get(language, {})
if not LANGUAGE:
return default_text
return ASSETS_TRANSLATIONS.get(language, {}).get(text) or default_text
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
cf https://superset.apache.org/docs/installation/sql-templating/
"""
from superset.extensions import security_manager
from pythonpath.localization import get_translation, DATASET_STRINGS
from superset import security_manager
import logging

log = logging.getLogger(__name__)
ALL_COURSES = "1 = 1"
NO_COURSES = "1 = 0"

Expand Down Expand Up @@ -44,4 +48,31 @@ def can_view_courses(username, field_name="course_id", **kwargs):
return NO_COURSES


def translate_column(column_name):
"""
Translate a string to the given language.
"""
roles = security_manager.get_user_roles()
log.info(f"Roles: {roles}")
lang = "en"
if roles:
for role in roles:
role_str = role.name.split(" - ")
Ian2012 marked this conversation as resolved.
Show resolved Hide resolved
if len(role_str) > 1:
lang = role_str[1]
break

strings = DATASET_STRINGS.get(column_name, [])
case_format = """CASE \n {cases} \n ELSE {column_name} \n END"""
single_case_format = "WHEN {column_name} = '{string}' THEN '{translation}'"
cases = "\n".join(
single_case_format.format(column_name=column_name, string=string, translation=get_translation(string, lang))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When get_translation is called, localization.py will load all of the translations for all languages into memory. I'm not sure how long that will stick around for, but loading a bunch of files off disk repeatedly might cause performance issues. I doubt we'll have so many strings that it will make much of a dent in server memory, but we might want to compare before and after this change to see.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this change, it's using: 347.1MiB. Before it was: 307Mi. We could load the file only when needed but the disk operation is slower. The file content will be permanently in memory when the first request ask for translations

for string in strings
)

log.info(f"Translating {column_name} to {lang}")
Ian2012 marked this conversation as resolved.
Show resolved Hide resolved
log.info(case_format.format(column_name=column_name, cases=cases))

return case_format.format(column_name=column_name, cases=cases)

{{patch("superset-jinja-filters")}}
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def can_view_courses_wrapper(*args, **kwargs):

JINJA_CONTEXT_ADDONS = {
'can_view_courses': can_view_courses_wrapper,
'translate_column': translate_column,
{% for filter in SUPERSET_EXTRA_JINJA_FILTERS %}'{{ filter }}': {{filter}},{% endfor %}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
enrollment_mode:
- audit
- verified
- honor
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ params:
row_limit: 10000
show_legend: false
slice_id: 352
time_range: 'DATEADD(DATETIME("now"), -1, month) : DATEADD(DATETIME("now"), 1, day)'
viz_type: dist_bar
x_axis_label: Course Grade (out of 100%)
x_ticks_layout: auto
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ params:
row_limit: 10000
show_legend: false
slice_id: 30
time_range: 'DATEADD(DATETIME("now"), -1, month) : DATEADD(DATETIME("now"), 1, day)'
viz_type: dist_bar
x_axis_label: Number Of Attempts To Find Correct Answer
x_ticks_layout: auto
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ params:
row_limit: 10000
show_legend: false
slice_id: 166
time_range: 'DATEADD(DATETIME("now"), -1, month) : DATEADD(DATETIME("now"), 1, day)'
viz_type: dist_bar
x_axis_label: Hints / Answer Displayed Before Correct Answer Chosen
x_ticks_layout: auto
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ params:
row_limit: 10000
show_legend: false
slice_id: 345
time_range: 'DATEADD(DATETIME("now"), -1, month) : DATEADD(DATETIME("now"), 1, day)'
viz_type: dist_bar
x_axis_label: Percentage Grade
x_ticks_layout: auto
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ params:
row_limit: 10000
show_legend: false
slice_id: 314
time_range: 'DATEADD(DATETIME("now"), -1, month) : DATEADD(DATETIME("now"), 1, day)'
viz_type: dist_bar
x_axis_label: Answers Chosen
x_ticks_layout: auto
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,13 @@ params:
sqlExpression: null
subject: enrollment_status
color_scheme: supersetColors
dashboards:
- 23
datasource: 11__table
date_format: smart_date
extra_form_data: {}
granularity_sqla: emission_time
groupby:
- enrollment_mode
- expressionType: SQL
label: Enrollment Mode
sqlExpression: |-
{% raw %}{{translate_column('enrollment_mode')}}{% endraw %}
innerRadius: 30
label_type: key
labels_outside: true
Expand All @@ -39,11 +38,8 @@ params:
show_labels: true
show_labels_threshold: 5
show_legend: true
slice_id: 40
sort_by_metric: true
time_range: 'DATEADD(DATETIME("now"), -1, month) : DATEADD(DATETIME("now"), 1, day)'
viz_type: pie
query_context: null
slice_name: Enrollments By Enrollment Mode
uuid: 05ed7102-5464-4e2f-86ae-31700b787cc3
version: 1.0.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ params:
show_controls: false
show_legend: false
slice_id: 262
time_range: 'DATEADD(DATETIME("now"), -1, month) : DATEADD(DATETIME("now"), 1, day)'
viz_type: dist_bar
x_axis_label: Question Title
x_ticks_layout: auto
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ params:
row_limit: 10000
show_legend: true
slice_id: 47
time_range: 'DATEADD(DATETIME("now"), -1, month) : DATEADD(DATETIME("now"), 1, day)'
viz_type: dist_bar
x_axis_label: Video Title
x_ticks_layout: auto
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ params:
row_limit: 10000
show_legend: true
slice_id: 204
time_range: 'DATEADD(DATETIME("now"), -1, month) : DATEADD(DATETIME("now"), 1, day)'
viz_type: dist_bar
x_axis_label: Seconds Of Video
x_ticks_layout: auto
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ params:
row_limit: 10000
show_legend: true
slice_id: 25
time_range: 'DATEADD(DATETIME("now"), -1, month) : DATEADD(DATETIME("now"), 1, day)'
viz_type: dist_bar
x_axis_label: Video Title
x_ticks_layout: auto
Expand Down
Loading