Fix tests now that utils args are lower and make cli fxn args explicit

scribe-org · Nov 8, 2024 · 24c0e8f · 24c0e8f
1 parent b72c928
commit 24c0e8f
Show file tree

Hide file tree

Showing 6 changed files with 60 additions and 51 deletions.
diff --git a/src/scribe_data/cli/convert.py b/src/scribe_data/cli/convert.py
@@ -389,7 +389,7 @@ def convert_to_sqlite(
 
     data_to_sqlite(languages, specific_tables)
 
-    source_file = f"{get_language_iso(language).upper()}LanguageData.sqlite"
+    source_file = f"{get_language_iso(language).capitalize()}LanguageData.sqlite"
     source_path = input_file.parent / source_file
     target_path = output_dir / source_file
 

diff --git a/src/scribe_data/cli/total.py b/src/scribe_data/cli/total.py
@@ -210,7 +210,9 @@ def print_total_header(language, dt, total_lexemes):
 
             first_row = True
             for dt in data_types:
-                total_lexemes = get_total_lexemes(lang, dt, False)
+                total_lexemes = get_total_lexemes(
+                    language=lang, data_type=dt, do_print=False
+                )
                 total_lexemes = f"{total_lexemes:,}"
                 if first_row:
                     print_total_header(lang, dt, total_lexemes)
@@ -233,7 +235,9 @@ def print_total_header(language, dt, total_lexemes):
             data_types = get_datatype_list(language)
 
         for dt in data_types:
-            total_lexemes = get_total_lexemes(language, dt, False)
+            total_lexemes = get_total_lexemes(
+                language=language, data_type=dt, do_print=False
+            )
             total_lexemes = f"{total_lexemes:,}"
             if first_row:
                 print_total_header(language, dt, total_lexemes)
@@ -248,7 +252,7 @@ def print_total_header(language, dt, total_lexemes):
 # MARK: Get Total
 
 
-def get_total_lexemes(language, data_type, doPrint=True):
+def get_total_lexemes(language, data_type, do_print=True):
     """
     Get the total number of lexemes for a given language and data type from Wikidata.
 
@@ -349,8 +353,8 @@ def get_total_lexemes(language, data_type, doPrint=True):
         if data_type:
             output_template += f"Data type: {data_type}\n"
 
-        output_template += f"Total number of lexemes: {total_lexemes}\n"
-        if doPrint:
+        output_template += f"Total number of lexemes: {total_lexemes:,}\n"
+        if do_print:
             print(output_template)
 
         return total_lexemes
@@ -399,7 +403,9 @@ def total_wrapper(
                 True  # flag to check if it's the first data type for the language
             )
             for dt in data_types:
-                total_lexemes = get_total_lexemes(lang, dt, False)
+                total_lexemes = get_total_lexemes(
+                    language=lang, data_type=dt, do_print=False
+                )
                 total_lexemes = (
                     f"{total_lexemes:,}" if total_lexemes is not None else "N/A"
                 )
@@ -413,16 +419,16 @@ def total_wrapper(
             print()
 
     elif language is not None and data_type is None:
-        print_total_lexemes(language)
+        print_total_lexemes(language=language)
 
     elif language is not None and not all_bool:
-        get_total_lexemes(language, data_type)
+        get_total_lexemes(language=language, data_type=data_type)
 
     elif language is not None:
         print(
             f"You have already specified language {language.capitalize()} and data type {data_type} - no need to specify --all."
         )
-        get_total_lexemes(language, data_type)
+        get_total_lexemes(language=language, data_type=data_type)
 
     else:
         raise ValueError("Invalid input or missing information")
diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py
@@ -159,23 +159,24 @@ def _find(source_key: str, source_value: str, target_key: str, error_msg: str) -
     """
     # Check if we're searching by language name.
     if source_key == "language":
-        norm_source_value = source_value
-
         # First, check the main language entries (e.g., mandarin, french, etc.).
         for language, entry in _languages.items():
             # If the language name matches the top-level key, return the target value.
-            if language == norm_source_value:
+            if language == source_value:
                 if "sub_languages" in entry:
-                    sub_languages = ", ".join(entry["sub_languages"].keys())
+                    sub_languages = entry["sub_languages"].keys()
+                    sub_languages = ", ".join(
+                        lang.capitalize() for lang in sub_languages
+                    )
                     raise ValueError(
-                        f"'{language.capitalize()}' has sub-languages, but is not queryable directly. Available sub-languages: {sub_languages.capitalize()}"
+                        f"'{language.capitalize()}' has sub-languages, but is not queryable directly. Available sub-languages: {sub_languages}"
                     )
                 return entry.get(target_key)
 
             # If there are sub-languages, check them too.
             if "sub_languages" in entry:
                 for sub_language, sub_entry in entry["sub_languages"].items():
-                    if sub_language == norm_source_value:
+                    if sub_language == source_value:
                         return sub_entry.get(target_key)
 
     # If no match was found, raise an error.
@@ -197,10 +198,10 @@ def get_language_qid(language: str) -> str:
             The Wikidata QID for the language.
     """
     return _find(
-        "language",
-        language,
-        "qid",
-        f"{language.upper()} is currently not a supported language for QID conversion.",
+        source_key="language",
+        source_value=language,
+        target_key="qid",
+        error_msg=f"{language.capitalize()} is currently not a supported language for QID conversion.",
     )
 
 
@@ -220,10 +221,10 @@ def get_language_iso(language: str) -> str:
     """
 
     return _find(
-        "language",
-        language,
-        "iso",
-        f"{language.upper()} is currently not a supported language for ISO conversion.",
+        source_key="language",
+        source_value=language,
+        target_key="iso",
+        error_msg=f"{language.capitalize()} is currently not a supported language for ISO conversion.",
     )
 
 
@@ -597,7 +598,7 @@ def format_sublanguage_name(lang, language_metadata=_languages):
                     return f"{main_lang}/{sub_lang}"
 
     # Raise ValueError if no match is found.
-    raise ValueError(f"{lang.upper()} is not a valid language or sub-language.")
+    raise ValueError(f"{lang.capitalize()} is not a valid language or sub-language.")
 
 
 def list_all_languages(language_metadata=_languages):

diff --git a/tests/cli/test_list.py b/tests/cli/test_list.py
@@ -88,7 +88,7 @@ def test_list_data_types_all_languages(self, mock_print):
 
     @patch("builtins.print")
     def test_list_data_types_specific_language(self, mock_print):
-        list_data_types("English")
+        list_data_types("english")
 
         expected_calls = [
             call(),

diff --git a/tests/cli/test_total.py b/tests/cli/test_total.py
@@ -46,7 +46,7 @@ def test_get_total_lexemes_valid(self, mock_query, mock_get_qid):
         mock_query.return_value = mock_results
 
         with patch("builtins.print") as mock_print:
-            get_total_lexemes("English", "nouns")
+            get_total_lexemes(language="English", data_type="nouns")
 
         mock_print.assert_called_once_with(
             "\nLanguage: English\nData type: nouns\nTotal number of lexemes: 42\n"
@@ -63,7 +63,7 @@ def test_get_total_lexemes_no_results(self, mock_query, mock_get_qid):
         mock_query.return_value = mock_results
 
         with patch("builtins.print") as mock_print:
-            get_total_lexemes("English", "nouns")
+            get_total_lexemes(language="English", data_type="nouns")
 
         mock_print.assert_called_once_with("Total number of lexemes: Not found")
 
@@ -74,7 +74,7 @@ def test_get_total_lexemes_invalid_language(self, mock_query, mock_get_qid):
         mock_query.return_value = MagicMock()
 
         with patch("builtins.print") as mock_print:
-            get_total_lexemes("InvalidLanguage", "nouns")
+            get_total_lexemes(language="InvalidLanguage", data_type="nouns")
 
         mock_print.assert_called_once_with("Total number of lexemes: Not found")
 
@@ -86,7 +86,7 @@ def test_get_total_lexemes_empty_and_none_inputs(self, mock_query, mock_get_qid)
 
         # Call the function with empty and None inputs
         with patch("builtins.print") as mock_print:
-            get_total_lexemes("", "nouns")
+            get_total_lexemes(language="", data_type="nouns")
             get_total_lexemes(None, "verbs")
 
         expected_calls = [
@@ -102,7 +102,7 @@ def test_get_total_lexemes_nonexistent_language(self, mock_query, mock_get_qid):
         mock_query.return_value = MagicMock()
 
         with patch("builtins.print") as mock_print:
-            get_total_lexemes("Martian", "nouns")
+            get_total_lexemes(language="Martian", data_type="nouns")
 
         mock_print.assert_called_once_with("Total number of lexemes: Not found")
 
@@ -123,8 +123,8 @@ def test_get_total_lexemes_various_data_types(self, mock_query, mock_get_qid):
 
         # Call the function with different data types
         with patch("builtins.print") as mock_print:
-            get_total_lexemes("English", "verbs")
-            get_total_lexemes("English", "nouns")
+            get_total_lexemes(language="English", data_type="verbs")
+            get_total_lexemes(language="English", data_type="nouns")
 
         expected_calls = [
             call(
@@ -159,8 +159,8 @@ def test_get_total_lexemes_sub_languages(self, mock_dir, mock_query, mock_get_qi
         ]
 
         with patch("builtins.print") as mock_print:
-            get_total_lexemes("Norwegian", "verbs")
-            get_total_lexemes("Norwegian", "nouns")
+            get_total_lexemes(language="Norwegian", data_type="verbs")
+            get_total_lexemes(language="Norwegian", data_type="nouns")
 
         expected_calls = [
             call(
@@ -250,12 +250,14 @@ def test_total_wrapper_all_bool(self, mock_print_total_lexemes):
     @patch("scribe_data.cli.total.print_total_lexemes")
     def test_total_wrapper_language_only(self, mock_print_total_lexemes):
         total_wrapper(language="English")
-        mock_print_total_lexemes.assert_called_once_with("English")
+        mock_print_total_lexemes.assert_called_once_with(language="English")
 
     @patch("scribe_data.cli.total.get_total_lexemes")
     def test_total_wrapper_language_and_data_type(self, mock_get_total_lexemes):
         total_wrapper(language="English", data_type="nouns")
-        mock_get_total_lexemes.assert_called_once_with("English", "nouns")
+        mock_get_total_lexemes.assert_called_once_with(
+            language="English", data_type="nouns"
+        )
 
     def test_total_wrapper_invalid_input(self):
         with self.assertRaises(ValueError):

diff --git a/tests/load/test_update_utils.py b/tests/load/test_update_utils.py
@@ -33,11 +33,11 @@
 @pytest.mark.parametrize(
     "language, qid_code",
     [
-        ("English", "Q1860"),
+        ("english", "Q1860"),
         ("french", "Q150"),
-        ("GERMAN", "Q188"),
-        ("iTalian", "Q652"),
-        ("poRTUGuese", "Q5146"),
+        ("german", "Q188"),
+        ("italian", "Q652"),
+        ("portuguese", "Q5146"),
         ("russian", "Q7737"),
         ("spanish", "Q1321"),
         ("swedish", "Q9027"),
@@ -54,21 +54,21 @@ def test_get_language_qid_negative():
 
     assert (
         str(excp.value)
-        == "NEWSPEAK is currently not a supported language for QID conversion."
+        == "Newspeak is currently not a supported language for QID conversion."
     )
 
 
 @pytest.mark.parametrize(
     "language, iso_code",
     [
-        ("English", "en"),
+        ("english", "en"),
         ("french", "fr"),
-        ("GERMAN", "de"),
-        ("iTalian", "it"),
-        ("poRTUGuese", "pt"),
+        ("german", "de"),
+        ("italian", "it"),
+        ("portuguese", "pt"),
         ("russian", "ru"),
         ("spanish", "es"),
-        ("SwedisH", "sv"),
+        ("swedish", "sv"),
         ("bokmål", "nb"),
     ],
 )
@@ -78,11 +78,11 @@ def test_get_language_iso_positive(language, iso_code):
 
 def test_get_language_iso_negative():
     with pytest.raises(ValueError) as excp:
-        _ = utils.get_language_iso("gibberish")
+        _ = utils.get_language_iso("Gibberish")
 
     assert (
         str(excp.value)
-        == "GIBBERISH is currently not a supported language for ISO conversion."
+        == "Gibberish is currently not a supported language for ISO conversion."
     )
 
 
@@ -125,9 +125,9 @@ def test_format_sublanguage_name_positive(lang, expected_output):
 
 def test_format_sublanguage_name_negative():
     with pytest.raises(ValueError) as excp:
-        _ = utils.format_sublanguage_name("soccer")
+        _ = utils.format_sublanguage_name("Silence")
 
-    assert str(excp.value) == "SOCCER is not a valid language or sub-language."
+    assert str(excp.value) == "Silence is not a valid language or sub-language."
 
 
 def test_list_all_languages():