Skip to content

Commit

Permalink
Fix tests now that utils args are lower and make cli fxn args explicit
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewtavis committed Nov 8, 2024
1 parent b72c928 commit 24c0e8f
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 51 deletions.
2 changes: 1 addition & 1 deletion src/scribe_data/cli/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ def convert_to_sqlite(

data_to_sqlite(languages, specific_tables)

source_file = f"{get_language_iso(language).upper()}LanguageData.sqlite"
source_file = f"{get_language_iso(language).capitalize()}LanguageData.sqlite"
source_path = input_file.parent / source_file
target_path = output_dir / source_file

Expand Down
24 changes: 15 additions & 9 deletions src/scribe_data/cli/total.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,9 @@ def print_total_header(language, dt, total_lexemes):

first_row = True
for dt in data_types:
total_lexemes = get_total_lexemes(lang, dt, False)
total_lexemes = get_total_lexemes(
language=lang, data_type=dt, do_print=False
)
total_lexemes = f"{total_lexemes:,}"
if first_row:
print_total_header(lang, dt, total_lexemes)
Expand All @@ -233,7 +235,9 @@ def print_total_header(language, dt, total_lexemes):
data_types = get_datatype_list(language)

for dt in data_types:
total_lexemes = get_total_lexemes(language, dt, False)
total_lexemes = get_total_lexemes(
language=language, data_type=dt, do_print=False
)
total_lexemes = f"{total_lexemes:,}"
if first_row:
print_total_header(language, dt, total_lexemes)
Expand All @@ -248,7 +252,7 @@ def print_total_header(language, dt, total_lexemes):
# MARK: Get Total


def get_total_lexemes(language, data_type, doPrint=True):
def get_total_lexemes(language, data_type, do_print=True):
"""
Get the total number of lexemes for a given language and data type from Wikidata.
Expand Down Expand Up @@ -349,8 +353,8 @@ def get_total_lexemes(language, data_type, doPrint=True):
if data_type:
output_template += f"Data type: {data_type}\n"

output_template += f"Total number of lexemes: {total_lexemes}\n"
if doPrint:
output_template += f"Total number of lexemes: {total_lexemes:,}\n"
if do_print:
print(output_template)

return total_lexemes
Expand Down Expand Up @@ -399,7 +403,9 @@ def total_wrapper(
True # flag to check if it's the first data type for the language
)
for dt in data_types:
total_lexemes = get_total_lexemes(lang, dt, False)
total_lexemes = get_total_lexemes(
language=lang, data_type=dt, do_print=False
)
total_lexemes = (
f"{total_lexemes:,}" if total_lexemes is not None else "N/A"
)
Expand All @@ -413,16 +419,16 @@ def total_wrapper(
print()

elif language is not None and data_type is None:
print_total_lexemes(language)
print_total_lexemes(language=language)

elif language is not None and not all_bool:
get_total_lexemes(language, data_type)
get_total_lexemes(language=language, data_type=data_type)

elif language is not None:
print(
f"You have already specified language {language.capitalize()} and data type {data_type} - no need to specify --all."
)
get_total_lexemes(language, data_type)
get_total_lexemes(language=language, data_type=data_type)

else:
raise ValueError("Invalid input or missing information")
31 changes: 16 additions & 15 deletions src/scribe_data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,23 +159,24 @@ def _find(source_key: str, source_value: str, target_key: str, error_msg: str) -
"""
# Check if we're searching by language name.
if source_key == "language":
norm_source_value = source_value

# First, check the main language entries (e.g., mandarin, french, etc.).
for language, entry in _languages.items():
# If the language name matches the top-level key, return the target value.
if language == norm_source_value:
if language == source_value:
if "sub_languages" in entry:
sub_languages = ", ".join(entry["sub_languages"].keys())
sub_languages = entry["sub_languages"].keys()
sub_languages = ", ".join(
lang.capitalize() for lang in sub_languages
)
raise ValueError(
f"'{language.capitalize()}' has sub-languages, but is not queryable directly. Available sub-languages: {sub_languages.capitalize()}"
f"'{language.capitalize()}' has sub-languages, but is not queryable directly. Available sub-languages: {sub_languages}"
)
return entry.get(target_key)

# If there are sub-languages, check them too.
if "sub_languages" in entry:
for sub_language, sub_entry in entry["sub_languages"].items():
if sub_language == norm_source_value:
if sub_language == source_value:
return sub_entry.get(target_key)

# If no match was found, raise an error.
Expand All @@ -197,10 +198,10 @@ def get_language_qid(language: str) -> str:
The Wikidata QID for the language.
"""
return _find(
"language",
language,
"qid",
f"{language.upper()} is currently not a supported language for QID conversion.",
source_key="language",
source_value=language,
target_key="qid",
error_msg=f"{language.capitalize()} is currently not a supported language for QID conversion.",
)


Expand All @@ -220,10 +221,10 @@ def get_language_iso(language: str) -> str:
"""

return _find(
"language",
language,
"iso",
f"{language.upper()} is currently not a supported language for ISO conversion.",
source_key="language",
source_value=language,
target_key="iso",
error_msg=f"{language.capitalize()} is currently not a supported language for ISO conversion.",
)


Expand Down Expand Up @@ -597,7 +598,7 @@ def format_sublanguage_name(lang, language_metadata=_languages):
return f"{main_lang}/{sub_lang}"

# Raise ValueError if no match is found.
raise ValueError(f"{lang.upper()} is not a valid language or sub-language.")
raise ValueError(f"{lang.capitalize()} is not a valid language or sub-language.")


def list_all_languages(language_metadata=_languages):
Expand Down
2 changes: 1 addition & 1 deletion tests/cli/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def test_list_data_types_all_languages(self, mock_print):

@patch("builtins.print")
def test_list_data_types_specific_language(self, mock_print):
list_data_types("English")
list_data_types("english")

expected_calls = [
call(),
Expand Down
24 changes: 13 additions & 11 deletions tests/cli/test_total.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_get_total_lexemes_valid(self, mock_query, mock_get_qid):
mock_query.return_value = mock_results

with patch("builtins.print") as mock_print:
get_total_lexemes("English", "nouns")
get_total_lexemes(language="English", data_type="nouns")

mock_print.assert_called_once_with(
"\nLanguage: English\nData type: nouns\nTotal number of lexemes: 42\n"
Expand All @@ -63,7 +63,7 @@ def test_get_total_lexemes_no_results(self, mock_query, mock_get_qid):
mock_query.return_value = mock_results

with patch("builtins.print") as mock_print:
get_total_lexemes("English", "nouns")
get_total_lexemes(language="English", data_type="nouns")

mock_print.assert_called_once_with("Total number of lexemes: Not found")

Expand All @@ -74,7 +74,7 @@ def test_get_total_lexemes_invalid_language(self, mock_query, mock_get_qid):
mock_query.return_value = MagicMock()

with patch("builtins.print") as mock_print:
get_total_lexemes("InvalidLanguage", "nouns")
get_total_lexemes(language="InvalidLanguage", data_type="nouns")

mock_print.assert_called_once_with("Total number of lexemes: Not found")

Expand All @@ -86,7 +86,7 @@ def test_get_total_lexemes_empty_and_none_inputs(self, mock_query, mock_get_qid)

# Call the function with empty and None inputs
with patch("builtins.print") as mock_print:
get_total_lexemes("", "nouns")
get_total_lexemes(language="", data_type="nouns")
get_total_lexemes(None, "verbs")

expected_calls = [
Expand All @@ -102,7 +102,7 @@ def test_get_total_lexemes_nonexistent_language(self, mock_query, mock_get_qid):
mock_query.return_value = MagicMock()

with patch("builtins.print") as mock_print:
get_total_lexemes("Martian", "nouns")
get_total_lexemes(language="Martian", data_type="nouns")

mock_print.assert_called_once_with("Total number of lexemes: Not found")

Expand All @@ -123,8 +123,8 @@ def test_get_total_lexemes_various_data_types(self, mock_query, mock_get_qid):

# Call the function with different data types
with patch("builtins.print") as mock_print:
get_total_lexemes("English", "verbs")
get_total_lexemes("English", "nouns")
get_total_lexemes(language="English", data_type="verbs")
get_total_lexemes(language="English", data_type="nouns")

expected_calls = [
call(
Expand Down Expand Up @@ -159,8 +159,8 @@ def test_get_total_lexemes_sub_languages(self, mock_dir, mock_query, mock_get_qi
]

with patch("builtins.print") as mock_print:
get_total_lexemes("Norwegian", "verbs")
get_total_lexemes("Norwegian", "nouns")
get_total_lexemes(language="Norwegian", data_type="verbs")
get_total_lexemes(language="Norwegian", data_type="nouns")

expected_calls = [
call(
Expand Down Expand Up @@ -250,12 +250,14 @@ def test_total_wrapper_all_bool(self, mock_print_total_lexemes):
@patch("scribe_data.cli.total.print_total_lexemes")
def test_total_wrapper_language_only(self, mock_print_total_lexemes):
total_wrapper(language="English")
mock_print_total_lexemes.assert_called_once_with("English")
mock_print_total_lexemes.assert_called_once_with(language="English")

@patch("scribe_data.cli.total.get_total_lexemes")
def test_total_wrapper_language_and_data_type(self, mock_get_total_lexemes):
total_wrapper(language="English", data_type="nouns")
mock_get_total_lexemes.assert_called_once_with("English", "nouns")
mock_get_total_lexemes.assert_called_once_with(
language="English", data_type="nouns"
)

def test_total_wrapper_invalid_input(self):
with self.assertRaises(ValueError):
Expand Down
28 changes: 14 additions & 14 deletions tests/load/test_update_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@
@pytest.mark.parametrize(
"language, qid_code",
[
("English", "Q1860"),
("english", "Q1860"),
("french", "Q150"),
("GERMAN", "Q188"),
("iTalian", "Q652"),
("poRTUGuese", "Q5146"),
("german", "Q188"),
("italian", "Q652"),
("portuguese", "Q5146"),
("russian", "Q7737"),
("spanish", "Q1321"),
("swedish", "Q9027"),
Expand All @@ -54,21 +54,21 @@ def test_get_language_qid_negative():

assert (
str(excp.value)
== "NEWSPEAK is currently not a supported language for QID conversion."
== "Newspeak is currently not a supported language for QID conversion."
)


@pytest.mark.parametrize(
"language, iso_code",
[
("English", "en"),
("english", "en"),
("french", "fr"),
("GERMAN", "de"),
("iTalian", "it"),
("poRTUGuese", "pt"),
("german", "de"),
("italian", "it"),
("portuguese", "pt"),
("russian", "ru"),
("spanish", "es"),
("SwedisH", "sv"),
("swedish", "sv"),
("bokmål", "nb"),
],
)
Expand All @@ -78,11 +78,11 @@ def test_get_language_iso_positive(language, iso_code):

def test_get_language_iso_negative():
with pytest.raises(ValueError) as excp:
_ = utils.get_language_iso("gibberish")
_ = utils.get_language_iso("Gibberish")

assert (
str(excp.value)
== "GIBBERISH is currently not a supported language for ISO conversion."
== "Gibberish is currently not a supported language for ISO conversion."
)


Expand Down Expand Up @@ -125,9 +125,9 @@ def test_format_sublanguage_name_positive(lang, expected_output):

def test_format_sublanguage_name_negative():
with pytest.raises(ValueError) as excp:
_ = utils.format_sublanguage_name("soccer")
_ = utils.format_sublanguage_name("Silence")

assert str(excp.value) == "SOCCER is not a valid language or sub-language."
assert str(excp.value) == "Silence is not a valid language or sub-language."


def test_list_all_languages():
Expand Down

0 comments on commit 24c0e8f

Please sign in to comment.