From 6843f0f7eee812fd59f5734d4a721fe7940580dc Mon Sep 17 00:00:00 2001 From: JarbasAI <33701864+JarbasAl@users.noreply.github.com> Date: Fri, 11 Oct 2024 18:56:36 +0100 Subject: [PATCH] fix:langcodes (#241) * fix:langcodes the lang code standard assumes region is upper case, however mycroft forced a lower case lang code which is now causing issues if the standard is used besides handling this, dialect support is now improved by using langcodes library distance function, ensuring the best dialect is selected * fix:update requirements * fix:remove_deprecated_test no longer applies when latest ovos-utils is installed --- ovos_workshop/app.py | 13 +++++++++---- ovos_workshop/resource_files.py | 22 +++++++++++++--------- requirements/requirements.txt | 3 ++- test/unittests/skills/test_base.py | 9 --------- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/ovos_workshop/app.py b/ovos_workshop/app.py index 27951c3f..98e2acf5 100644 --- a/ovos_workshop/app.py +++ b/ovos_workshop/app.py @@ -2,6 +2,7 @@ from typing import Optional from ovos_config.locations import get_xdg_config_save_path from ovos_bus_client.util import get_mycroft_bus +from ovos_utils.lang import standardize_lang_tag from ovos_utils.log import log_deprecation from ovos_bus_client.apis.gui import GUIInterface from ovos_bus_client.client.client import MessageBusClient @@ -77,17 +78,21 @@ def get_language_dir(self, base_path: Optional[str] = None, base_path = base_path or self.res_dir lang = lang or self.lang - lang_path = join(base_path, lang) + lang = str(standardize_lang_tag(lang)) - # base_path/en-us - if isdir(lang_path): - return lang_path + # base_path/lang-CODE (region is upper case) + if isdir(join(base_path, lang)): + return join(base_path, lang) + # base_path/lang-code (lowercase) + if isdir(join(base_path, lang.lower())): + return join(base_path, lang.lower()) # check for subdialects of same language as a fallback # eg, language is set to en-au but only en-us resources are available similar_dialect_directories = locate_lang_directories(lang, base_path) for directory in similar_dialect_directories: if directory.exists(): + # NOTE: these are already sorted, the first is the best match return str(directory) def clear_intents(self): diff --git a/ovos_workshop/resource_files.py b/ovos_workshop/resource_files.py index 556ef9dd..4608cc5d 100644 --- a/ovos_workshop/resource_files.py +++ b/ovos_workshop/resource_files.py @@ -13,7 +13,6 @@ # limitations under the License. # """Handling of skill data such as intents and regular expressions.""" -import os import re from collections import namedtuple from os import walk @@ -21,12 +20,11 @@ from pathlib import Path from typing import List, Optional, Tuple +from langcodes import tag_distance from ovos_config.config import Configuration -from ovos_config.locations import get_xdg_data_dirs, \ - get_xdg_data_save_path -from ovos_config.meta import get_xdg_base -from ovos_utils.bracket_expansion import expand_options +from ovos_config.locations import get_xdg_data_save_path from ovos_utils import flatten_list +from ovos_utils.bracket_expansion import expand_options from ovos_utils.dialog import MustacheDialogRenderer, load_dialogs from ovos_utils.log import LOG, log_deprecation @@ -78,7 +76,6 @@ def locate_lang_directories(lang: str, skill_directory: str, @param resource_subdirectory: optional extra resource directory to prepend @return: list of existing skill resource directories for the given lang """ - base_lang = lang.split("-")[0] base_dirs = [Path(skill_directory, "locale"), Path(skill_directory, "text")] if resource_subdirectory: @@ -87,9 +84,16 @@ def locate_lang_directories(lang: str, skill_directory: str, for directory in base_dirs: if directory.exists(): for folder in directory.iterdir(): - if folder.name.startswith(base_lang): - candidates.append(folder) - return candidates + score = tag_distance(lang, folder.name) + # https://langcodes-hickford.readthedocs.io/en/sphinx/index.html#distance-values + # 0 -> These codes represent the same language, possibly after filling in values and normalizing. + # 1- 3 -> These codes indicate a minor regional difference. + # 4 - 10 -> These codes indicate a significant but unproblematic regional difference. + if score < 10: + candidates.append((folder, score)) + # sort by distance to target lang code + candidates = sorted(candidates, key=lambda k: k[1]) + return [c[0] for c in candidates] def resolve_resource_file(res_name: str) -> Optional[str]: diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 2a94bfba..3c535adf 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,6 +1,7 @@ -ovos-utils>=0.0.38,<1.0.0 +ovos-utils>= 0.2.1,<1.0.0 ovos_bus_client>=0.0.8,<1.0.0 ovos-config>=0.0.12,<1.0.0 ovos-backend-client>=0.1.0,<2.0.0 ovos-lingua-franca>=0.4.6,<1.0.0 rapidfuzz +langcodes \ No newline at end of file diff --git a/test/unittests/skills/test_base.py b/test/unittests/skills/test_base.py index c81d3ef3..33fbf804 100644 --- a/test/unittests/skills/test_base.py +++ b/test/unittests/skills/test_base.py @@ -573,12 +573,3 @@ def test_skill_gui(self, interface_init): config=old_skill.config_core['gui'], ui_directories={"qt5": join(old_skill.root_dir, "ui")}) - # New skill with `gui` directory in root - new_skill = self.GuiSkill() - new_gui = SkillGUI(new_skill) - self.assertEqual(new_gui.skill, new_skill) - self.assertIsInstance(new_gui, GUIInterface) - interface_init.assert_called_with( - new_gui, skill_id=new_skill.skill_id, bus=new_skill.bus, - config=new_skill.config_core['gui'], - ui_directories={"all": join(new_skill.root_dir, "gui")})