From 24c0e8f4395b572ba475b70976d5df3ba7569b51 Mon Sep 17 00:00:00 2001 From: Andrew Tavis McAllister Date: Fri, 8 Nov 2024 17:45:45 +0100 Subject: [PATCH] Fix tests now that utils args are lower and make cli fxn args explicit --- src/scribe_data/cli/convert.py | 2 +- src/scribe_data/cli/total.py | 24 +++++++++++++++--------- src/scribe_data/utils.py | 31 ++++++++++++++++--------------- tests/cli/test_list.py | 2 +- tests/cli/test_total.py | 24 +++++++++++++----------- tests/load/test_update_utils.py | 28 ++++++++++++++-------------- 6 files changed, 60 insertions(+), 51 deletions(-) diff --git a/src/scribe_data/cli/convert.py b/src/scribe_data/cli/convert.py index eee3862b..569e6e2b 100644 --- a/src/scribe_data/cli/convert.py +++ b/src/scribe_data/cli/convert.py @@ -389,7 +389,7 @@ def convert_to_sqlite( data_to_sqlite(languages, specific_tables) - source_file = f"{get_language_iso(language).upper()}LanguageData.sqlite" + source_file = f"{get_language_iso(language).capitalize()}LanguageData.sqlite" source_path = input_file.parent / source_file target_path = output_dir / source_file diff --git a/src/scribe_data/cli/total.py b/src/scribe_data/cli/total.py index 58dc1aba..eeafdf15 100644 --- a/src/scribe_data/cli/total.py +++ b/src/scribe_data/cli/total.py @@ -210,7 +210,9 @@ def print_total_header(language, dt, total_lexemes): first_row = True for dt in data_types: - total_lexemes = get_total_lexemes(lang, dt, False) + total_lexemes = get_total_lexemes( + language=lang, data_type=dt, do_print=False + ) total_lexemes = f"{total_lexemes:,}" if first_row: print_total_header(lang, dt, total_lexemes) @@ -233,7 +235,9 @@ def print_total_header(language, dt, total_lexemes): data_types = get_datatype_list(language) for dt in data_types: - total_lexemes = get_total_lexemes(language, dt, False) + total_lexemes = get_total_lexemes( + language=language, data_type=dt, do_print=False + ) total_lexemes = f"{total_lexemes:,}" if first_row: print_total_header(language, dt, total_lexemes) @@ -248,7 +252,7 @@ def print_total_header(language, dt, total_lexemes): # MARK: Get Total -def get_total_lexemes(language, data_type, doPrint=True): +def get_total_lexemes(language, data_type, do_print=True): """ Get the total number of lexemes for a given language and data type from Wikidata. @@ -349,8 +353,8 @@ def get_total_lexemes(language, data_type, doPrint=True): if data_type: output_template += f"Data type: {data_type}\n" - output_template += f"Total number of lexemes: {total_lexemes}\n" - if doPrint: + output_template += f"Total number of lexemes: {total_lexemes:,}\n" + if do_print: print(output_template) return total_lexemes @@ -399,7 +403,9 @@ def total_wrapper( True # flag to check if it's the first data type for the language ) for dt in data_types: - total_lexemes = get_total_lexemes(lang, dt, False) + total_lexemes = get_total_lexemes( + language=lang, data_type=dt, do_print=False + ) total_lexemes = ( f"{total_lexemes:,}" if total_lexemes is not None else "N/A" ) @@ -413,16 +419,16 @@ def total_wrapper( print() elif language is not None and data_type is None: - print_total_lexemes(language) + print_total_lexemes(language=language) elif language is not None and not all_bool: - get_total_lexemes(language, data_type) + get_total_lexemes(language=language, data_type=data_type) elif language is not None: print( f"You have already specified language {language.capitalize()} and data type {data_type} - no need to specify --all." ) - get_total_lexemes(language, data_type) + get_total_lexemes(language=language, data_type=data_type) else: raise ValueError("Invalid input or missing information") diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py index 5ce3639c..08194a63 100644 --- a/src/scribe_data/utils.py +++ b/src/scribe_data/utils.py @@ -159,23 +159,24 @@ def _find(source_key: str, source_value: str, target_key: str, error_msg: str) - """ # Check if we're searching by language name. if source_key == "language": - norm_source_value = source_value - # First, check the main language entries (e.g., mandarin, french, etc.). for language, entry in _languages.items(): # If the language name matches the top-level key, return the target value. - if language == norm_source_value: + if language == source_value: if "sub_languages" in entry: - sub_languages = ", ".join(entry["sub_languages"].keys()) + sub_languages = entry["sub_languages"].keys() + sub_languages = ", ".join( + lang.capitalize() for lang in sub_languages + ) raise ValueError( - f"'{language.capitalize()}' has sub-languages, but is not queryable directly. Available sub-languages: {sub_languages.capitalize()}" + f"'{language.capitalize()}' has sub-languages, but is not queryable directly. Available sub-languages: {sub_languages}" ) return entry.get(target_key) # If there are sub-languages, check them too. if "sub_languages" in entry: for sub_language, sub_entry in entry["sub_languages"].items(): - if sub_language == norm_source_value: + if sub_language == source_value: return sub_entry.get(target_key) # If no match was found, raise an error. @@ -197,10 +198,10 @@ def get_language_qid(language: str) -> str: The Wikidata QID for the language. """ return _find( - "language", - language, - "qid", - f"{language.upper()} is currently not a supported language for QID conversion.", + source_key="language", + source_value=language, + target_key="qid", + error_msg=f"{language.capitalize()} is currently not a supported language for QID conversion.", ) @@ -220,10 +221,10 @@ def get_language_iso(language: str) -> str: """ return _find( - "language", - language, - "iso", - f"{language.upper()} is currently not a supported language for ISO conversion.", + source_key="language", + source_value=language, + target_key="iso", + error_msg=f"{language.capitalize()} is currently not a supported language for ISO conversion.", ) @@ -597,7 +598,7 @@ def format_sublanguage_name(lang, language_metadata=_languages): return f"{main_lang}/{sub_lang}" # Raise ValueError if no match is found. - raise ValueError(f"{lang.upper()} is not a valid language or sub-language.") + raise ValueError(f"{lang.capitalize()} is not a valid language or sub-language.") def list_all_languages(language_metadata=_languages): diff --git a/tests/cli/test_list.py b/tests/cli/test_list.py index 1e4d708a..16f34394 100644 --- a/tests/cli/test_list.py +++ b/tests/cli/test_list.py @@ -88,7 +88,7 @@ def test_list_data_types_all_languages(self, mock_print): @patch("builtins.print") def test_list_data_types_specific_language(self, mock_print): - list_data_types("English") + list_data_types("english") expected_calls = [ call(), diff --git a/tests/cli/test_total.py b/tests/cli/test_total.py index 0bbe340f..a8145f04 100644 --- a/tests/cli/test_total.py +++ b/tests/cli/test_total.py @@ -46,7 +46,7 @@ def test_get_total_lexemes_valid(self, mock_query, mock_get_qid): mock_query.return_value = mock_results with patch("builtins.print") as mock_print: - get_total_lexemes("English", "nouns") + get_total_lexemes(language="English", data_type="nouns") mock_print.assert_called_once_with( "\nLanguage: English\nData type: nouns\nTotal number of lexemes: 42\n" @@ -63,7 +63,7 @@ def test_get_total_lexemes_no_results(self, mock_query, mock_get_qid): mock_query.return_value = mock_results with patch("builtins.print") as mock_print: - get_total_lexemes("English", "nouns") + get_total_lexemes(language="English", data_type="nouns") mock_print.assert_called_once_with("Total number of lexemes: Not found") @@ -74,7 +74,7 @@ def test_get_total_lexemes_invalid_language(self, mock_query, mock_get_qid): mock_query.return_value = MagicMock() with patch("builtins.print") as mock_print: - get_total_lexemes("InvalidLanguage", "nouns") + get_total_lexemes(language="InvalidLanguage", data_type="nouns") mock_print.assert_called_once_with("Total number of lexemes: Not found") @@ -86,7 +86,7 @@ def test_get_total_lexemes_empty_and_none_inputs(self, mock_query, mock_get_qid) # Call the function with empty and None inputs with patch("builtins.print") as mock_print: - get_total_lexemes("", "nouns") + get_total_lexemes(language="", data_type="nouns") get_total_lexemes(None, "verbs") expected_calls = [ @@ -102,7 +102,7 @@ def test_get_total_lexemes_nonexistent_language(self, mock_query, mock_get_qid): mock_query.return_value = MagicMock() with patch("builtins.print") as mock_print: - get_total_lexemes("Martian", "nouns") + get_total_lexemes(language="Martian", data_type="nouns") mock_print.assert_called_once_with("Total number of lexemes: Not found") @@ -123,8 +123,8 @@ def test_get_total_lexemes_various_data_types(self, mock_query, mock_get_qid): # Call the function with different data types with patch("builtins.print") as mock_print: - get_total_lexemes("English", "verbs") - get_total_lexemes("English", "nouns") + get_total_lexemes(language="English", data_type="verbs") + get_total_lexemes(language="English", data_type="nouns") expected_calls = [ call( @@ -159,8 +159,8 @@ def test_get_total_lexemes_sub_languages(self, mock_dir, mock_query, mock_get_qi ] with patch("builtins.print") as mock_print: - get_total_lexemes("Norwegian", "verbs") - get_total_lexemes("Norwegian", "nouns") + get_total_lexemes(language="Norwegian", data_type="verbs") + get_total_lexemes(language="Norwegian", data_type="nouns") expected_calls = [ call( @@ -250,12 +250,14 @@ def test_total_wrapper_all_bool(self, mock_print_total_lexemes): @patch("scribe_data.cli.total.print_total_lexemes") def test_total_wrapper_language_only(self, mock_print_total_lexemes): total_wrapper(language="English") - mock_print_total_lexemes.assert_called_once_with("English") + mock_print_total_lexemes.assert_called_once_with(language="English") @patch("scribe_data.cli.total.get_total_lexemes") def test_total_wrapper_language_and_data_type(self, mock_get_total_lexemes): total_wrapper(language="English", data_type="nouns") - mock_get_total_lexemes.assert_called_once_with("English", "nouns") + mock_get_total_lexemes.assert_called_once_with( + language="English", data_type="nouns" + ) def test_total_wrapper_invalid_input(self): with self.assertRaises(ValueError): diff --git a/tests/load/test_update_utils.py b/tests/load/test_update_utils.py index 264fe94c..8a1e4f2f 100644 --- a/tests/load/test_update_utils.py +++ b/tests/load/test_update_utils.py @@ -33,11 +33,11 @@ @pytest.mark.parametrize( "language, qid_code", [ - ("English", "Q1860"), + ("english", "Q1860"), ("french", "Q150"), - ("GERMAN", "Q188"), - ("iTalian", "Q652"), - ("poRTUGuese", "Q5146"), + ("german", "Q188"), + ("italian", "Q652"), + ("portuguese", "Q5146"), ("russian", "Q7737"), ("spanish", "Q1321"), ("swedish", "Q9027"), @@ -54,21 +54,21 @@ def test_get_language_qid_negative(): assert ( str(excp.value) - == "NEWSPEAK is currently not a supported language for QID conversion." + == "Newspeak is currently not a supported language for QID conversion." ) @pytest.mark.parametrize( "language, iso_code", [ - ("English", "en"), + ("english", "en"), ("french", "fr"), - ("GERMAN", "de"), - ("iTalian", "it"), - ("poRTUGuese", "pt"), + ("german", "de"), + ("italian", "it"), + ("portuguese", "pt"), ("russian", "ru"), ("spanish", "es"), - ("SwedisH", "sv"), + ("swedish", "sv"), ("bokmål", "nb"), ], ) @@ -78,11 +78,11 @@ def test_get_language_iso_positive(language, iso_code): def test_get_language_iso_negative(): with pytest.raises(ValueError) as excp: - _ = utils.get_language_iso("gibberish") + _ = utils.get_language_iso("Gibberish") assert ( str(excp.value) - == "GIBBERISH is currently not a supported language for ISO conversion." + == "Gibberish is currently not a supported language for ISO conversion." ) @@ -125,9 +125,9 @@ def test_format_sublanguage_name_positive(lang, expected_output): def test_format_sublanguage_name_negative(): with pytest.raises(ValueError) as excp: - _ = utils.format_sublanguage_name("soccer") + _ = utils.format_sublanguage_name("Silence") - assert str(excp.value) == "SOCCER is not a valid language or sub-language." + assert str(excp.value) == "Silence is not a valid language or sub-language." def test_list_all_languages():