From 362b5ee15af4076074976ddb6a34b36c7718c6f3 Mon Sep 17 00:00:00 2001 From: Andrew Tavis McAllister Date: Fri, 23 Feb 2024 17:52:15 +0100 Subject: [PATCH] #57 WIP fix of paths to queried language data and update files --- src/scribe_data/checkquery.py | 13 ++-- .../languages/English/nouns/format_nouns.py | 16 +++-- .../languages/English/verbs/format_verbs.py | 10 +-- .../languages/French/nouns/format_nouns.py | 10 +-- .../languages/French/verbs/format_verbs.py | 10 +-- .../languages/German/nouns/format_nouns.py | 10 +-- .../prepositions/format_prepositions.py | 8 +-- .../languages/German/verbs/format_verbs.py | 10 +-- .../languages/Italian/nouns/format_nouns.py | 10 +-- .../languages/Italian/verbs/format_verbs.py | 10 +-- .../Portuguese/nouns/format_nouns.py | 10 +-- .../Portuguese/verbs/format_verbs.py | 10 +-- .../languages/Russian/nouns/format_nouns.py | 10 +-- .../prepositions/format_prepositions.py | 8 +-- .../languages/Russian/verbs/format_verbs.py | 10 +-- .../languages/Spanish/nouns/format_nouns.py | 10 +-- .../languages/Spanish/verbs/format_verbs.py | 10 +-- .../languages/Swedish/nouns/format_nouns.py | 22 ++++--- .../languages/Swedish/verbs/format_verbs.py | 10 +-- .../extract_transform/update_data.py | 64 +++++++++++-------- 20 files changed, 157 insertions(+), 114 deletions(-) diff --git a/src/scribe_data/checkquery.py b/src/scribe_data/checkquery.py index 7a470738d..1714bcd9b 100755 --- a/src/scribe_data/checkquery.py +++ b/src/scribe_data/checkquery.py @@ -40,7 +40,6 @@ from SPARQLWrapper import SPARQLExceptions from tqdm.auto import tqdm - EXIT_SUCCESS = 0 EXIT_FAILURE = 1 EXIT_CLI_ERROR = 2 @@ -138,11 +137,11 @@ def all_queries() -> list[QueryFile]: queries: list[QueryFile] = [] - for root, _, fnames in os.walk(prj_root): - for fname in fnames: - fpath = Path(root, fname) - if fpath.suffix == ".sparql": - queries.append(QueryFile(fpath)) + for root, _, files in os.walk(prj_root): + for f in files: + file_path = Path(root, f) + if file_path.suffix == ".sparql": + queries.append(QueryFile(file_path)) return queries @@ -501,7 +500,7 @@ def error_report(failures: list[QueryExecutionException]) -> None: qword = "query" if len(failures) == 1 else "queries" print(f"\nFollowing {qword} failed:\n", file=sys.stderr) for failed_query in failures: - print(str(failed_query), file=sys.stderr) + print(failed_query, file=sys.stderr) def success_report(successes: list[tuple[QueryFile, dict]], display: bool) -> None: diff --git a/src/scribe_data/extract_transform/languages/English/nouns/format_nouns.py b/src/scribe_data/extract_transform/languages/English/nouns/format_nouns.py index 599680fa8..0cb5b2b1c 100644 --- a/src/scribe_data/extract_transform/languages/English/nouns/format_nouns.py +++ b/src/scribe_data/extract_transform/languages/English/nouns/format_nouns.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/nouns/" not in file_path: +if f"languages/{LANGUAGE}/nouns/" not in file_path: with open("nouns_queried.json", encoding="utf-8") as f: nouns_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8" + ) as f: nouns_list = json.load(f) nouns_formatted = {} @@ -94,12 +96,16 @@ export_dir = "../formatted_data/" export_path = os.path.join(export_dir, "nouns.json") if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/nouns.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/nouns.json" if not os.path.exists(export_dir): os.makedirs(export_dir) -with open(export_path, "w", encoding="utf-8",) as file: +with open( + export_path, + "w", + encoding="utf-8", +) as file: json.dump(nouns_formatted, file, ensure_ascii=False, indent=0) print(f"Wrote file nouns.json with {len(nouns_formatted)} nouns.") diff --git a/src/scribe_data/extract_transform/languages/English/verbs/format_verbs.py b/src/scribe_data/extract_transform/languages/English/verbs/format_verbs.py index 2aea8efd9..296648aef 100644 --- a/src/scribe_data/extract_transform/languages/English/verbs/format_verbs.py +++ b/src/scribe_data/extract_transform/languages/English/verbs/format_verbs.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/verbs/" not in file_path: +if f"languages/{LANGUAGE}/verbs/" not in file_path: with open("verbs_queried.json", encoding="utf-8") as f: verbs_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8" + ) as f: verbs_list = json.load(f) verbs_formatted = {} @@ -81,7 +83,7 @@ export_dir = "../formatted_data/" export_path = os.path.join(export_dir, "verbs.json") if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/verbs.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/verbs.json" if not os.path.exists(export_dir): os.makedirs(export_dir) diff --git a/src/scribe_data/extract_transform/languages/French/nouns/format_nouns.py b/src/scribe_data/extract_transform/languages/French/nouns/format_nouns.py index 31a1756ac..5e551ed7a 100644 --- a/src/scribe_data/extract_transform/languages/French/nouns/format_nouns.py +++ b/src/scribe_data/extract_transform/languages/French/nouns/format_nouns.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/nouns/" not in file_path: +if f"languages/{LANGUAGE}/nouns/" not in file_path: with open("nouns_queried.json", encoding="utf-8") as f: nouns_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8" + ) as f: nouns_list = json.load(f) @@ -124,7 +126,7 @@ def order_annotations(annotation): org_path = get_path_from_et_dir() export_path = "../formatted_data/nouns.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/nouns.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/nouns.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/French/verbs/format_verbs.py b/src/scribe_data/extract_transform/languages/French/verbs/format_verbs.py index 6ec3eab8e..ccff6731c 100644 --- a/src/scribe_data/extract_transform/languages/French/verbs/format_verbs.py +++ b/src/scribe_data/extract_transform/languages/French/verbs/format_verbs.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/verbs/" not in file_path: +if f"languages/{LANGUAGE}/verbs/" not in file_path: with open("verbs_queried.json", encoding="utf-8") as f: verbs_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8" + ) as f: verbs_list = json.load(f) verbs_formatted = {} @@ -77,7 +79,7 @@ org_path = get_path_from_et_dir() export_path = "../formatted_data/verbs.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/verbs.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/verbs.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/German/nouns/format_nouns.py b/src/scribe_data/extract_transform/languages/German/nouns/format_nouns.py index 2810c8428..abc700c7e 100644 --- a/src/scribe_data/extract_transform/languages/German/nouns/format_nouns.py +++ b/src/scribe_data/extract_transform/languages/German/nouns/format_nouns.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/nouns/" not in file_path: +if f"languages/{LANGUAGE}/nouns/" not in file_path: with open("nouns_queried.json", encoding="utf-8") as f: nouns_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8" + ) as f: nouns_list = json.load(f) @@ -193,7 +195,7 @@ def order_annotations(annotation): org_path = get_path_from_et_dir() export_path = "../formatted_data/nouns.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/nouns.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/nouns.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/German/prepositions/format_prepositions.py b/src/scribe_data/extract_transform/languages/German/prepositions/format_prepositions.py index f48e7d5e8..3f9a06a2d 100644 --- a/src/scribe_data/extract_transform/languages/German/prepositions/format_prepositions.py +++ b/src/scribe_data/extract_transform/languages/German/prepositions/format_prepositions.py @@ -15,18 +15,18 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/prepositions/" not in file_path: +if f"languages/{LANGUAGE}/prepositions/" not in file_path: with open("prepositions_queried.json", encoding="utf-8") as f: prepositions_list = json.load(f) else: update_data_in_use = True with open( - f"./{LANGUAGE}/prepositions/prepositions_queried.json", + f"./languages/{LANGUAGE}/prepositions/prepositions_queried.json", encoding="utf-8", ) as f: prepositions_list = json.load(f) @@ -121,7 +121,7 @@ def order_annotations(annotation): org_path = get_path_from_et_dir() export_path = "../formatted_data/prepositions.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/prepositions.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/prepositions.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/German/verbs/format_verbs.py b/src/scribe_data/extract_transform/languages/German/verbs/format_verbs.py index a24cae5b1..de433517b 100644 --- a/src/scribe_data/extract_transform/languages/German/verbs/format_verbs.py +++ b/src/scribe_data/extract_transform/languages/German/verbs/format_verbs.py @@ -20,17 +20,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/verbs/" not in file_path: +if f"languages/{LANGUAGE}/verbs/" not in file_path: with open("verbs_queried.json", encoding="utf-8") as f: verbs_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8" + ) as f: verbs_list = json.load(f) verbs_formatted = {} @@ -156,7 +158,7 @@ def assign_past_participle(verb, tense): org_path = get_path_from_et_dir() export_path = "../formatted_data/verbs.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/verbs.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/verbs.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/Italian/nouns/format_nouns.py b/src/scribe_data/extract_transform/languages/Italian/nouns/format_nouns.py index 87d4abcfb..e213c98e7 100644 --- a/src/scribe_data/extract_transform/languages/Italian/nouns/format_nouns.py +++ b/src/scribe_data/extract_transform/languages/Italian/nouns/format_nouns.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/nouns/" not in file_path: +if f"languages/{LANGUAGE}/nouns/" not in file_path: with open("nouns_queried.json", encoding="utf-8") as f: nouns_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8" + ) as f: nouns_list = json.load(f) @@ -125,7 +127,7 @@ def order_annotations(annotation): org_path = get_path_from_et_dir() export_path = "../formatted_data/nouns.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/nouns.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/nouns.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/Italian/verbs/format_verbs.py b/src/scribe_data/extract_transform/languages/Italian/verbs/format_verbs.py index c908194ef..653ce98bf 100644 --- a/src/scribe_data/extract_transform/languages/Italian/verbs/format_verbs.py +++ b/src/scribe_data/extract_transform/languages/Italian/verbs/format_verbs.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/verbs/" not in file_path: +if f"languages/{LANGUAGE}/verbs/" not in file_path: with open("verbs_queried.json", encoding="utf-8") as f: verbs_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8" + ) as f: verbs_list = json.load(f) verbs_formatted = {} @@ -65,7 +67,7 @@ org_path = get_path_from_et_dir() export_path = "../formatted_data/verbs.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/verbs.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/verbs.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/Portuguese/nouns/format_nouns.py b/src/scribe_data/extract_transform/languages/Portuguese/nouns/format_nouns.py index 15c53ee08..9c438a392 100644 --- a/src/scribe_data/extract_transform/languages/Portuguese/nouns/format_nouns.py +++ b/src/scribe_data/extract_transform/languages/Portuguese/nouns/format_nouns.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/nouns/" not in file_path: +if f"languages/{LANGUAGE}/nouns/" not in file_path: with open("nouns_queried.json", encoding="utf-8") as f: nouns_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8" + ) as f: nouns_list = json.load(f) @@ -125,7 +127,7 @@ def order_annotations(annotation): org_path = get_path_from_et_dir() export_path = "../formatted_data/nouns.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/nouns.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/nouns.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/Portuguese/verbs/format_verbs.py b/src/scribe_data/extract_transform/languages/Portuguese/verbs/format_verbs.py index f96de4dae..a23c3b8a5 100644 --- a/src/scribe_data/extract_transform/languages/Portuguese/verbs/format_verbs.py +++ b/src/scribe_data/extract_transform/languages/Portuguese/verbs/format_verbs.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/verbs/" not in file_path: +if f"languages/{LANGUAGE}/verbs/" not in file_path: with open("verbs_queried.json", encoding="utf-8") as f: verbs_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8" + ) as f: verbs_list = json.load(f) verbs_formatted = {} @@ -71,7 +73,7 @@ org_path = get_path_from_et_dir() export_path = "../formatted_data/verbs.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/verbs.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/verbs.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/Russian/nouns/format_nouns.py b/src/scribe_data/extract_transform/languages/Russian/nouns/format_nouns.py index c983e38c6..9694feef8 100644 --- a/src/scribe_data/extract_transform/languages/Russian/nouns/format_nouns.py +++ b/src/scribe_data/extract_transform/languages/Russian/nouns/format_nouns.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/nouns/" not in file_path: +if f"languages/{LANGUAGE}/nouns/" not in file_path: with open("nouns_queried.json", encoding="utf-8") as f: nouns_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8" + ) as f: nouns_list = json.load(f) @@ -193,7 +195,7 @@ def order_annotations(annotation): org_path = get_path_from_et_dir() export_path = "../formatted_data/nouns.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/nouns.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/nouns.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/Russian/prepositions/format_prepositions.py b/src/scribe_data/extract_transform/languages/Russian/prepositions/format_prepositions.py index 254921411..239b40a19 100644 --- a/src/scribe_data/extract_transform/languages/Russian/prepositions/format_prepositions.py +++ b/src/scribe_data/extract_transform/languages/Russian/prepositions/format_prepositions.py @@ -15,18 +15,18 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/prepositions/" not in file_path: +if f"languages/{LANGUAGE}/prepositions/" not in file_path: with open("prepositions_queried.json", encoding="utf-8") as f: prepositions_list = json.load(f) else: update_data_in_use = True with open( - f"./{LANGUAGE}/prepositions/prepositions_queried.json", + f"./languages/{LANGUAGE}/prepositions/prepositions_queried.json", encoding="utf-8", ) as f: prepositions_list = json.load(f) @@ -93,7 +93,7 @@ def order_annotations(annotation): org_path = get_path_from_et_dir() export_path = "../formatted_data/prepositions.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/prepositions.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/prepositions.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/Russian/verbs/format_verbs.py b/src/scribe_data/extract_transform/languages/Russian/verbs/format_verbs.py index 57f6b73d6..2d4bb530b 100644 --- a/src/scribe_data/extract_transform/languages/Russian/verbs/format_verbs.py +++ b/src/scribe_data/extract_transform/languages/Russian/verbs/format_verbs.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/verbs/" not in file_path: +if f"languages/{LANGUAGE}/verbs/" not in file_path: with open("verbs_queried.json", encoding="utf-8") as f: verbs_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8" + ) as f: verbs_list = json.load(f) verbs_formatted = {} @@ -57,7 +59,7 @@ org_path = get_path_from_et_dir() export_path = "../formatted_data/verbs.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/verbs.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/verbs.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/Spanish/nouns/format_nouns.py b/src/scribe_data/extract_transform/languages/Spanish/nouns/format_nouns.py index 20b170bf6..0c924ffcb 100644 --- a/src/scribe_data/extract_transform/languages/Spanish/nouns/format_nouns.py +++ b/src/scribe_data/extract_transform/languages/Spanish/nouns/format_nouns.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/nouns/" not in file_path: +if f"languages/{LANGUAGE}/nouns/" not in file_path: with open("nouns_queried.json", encoding="utf-8") as f: nouns_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8" + ) as f: nouns_list = json.load(f) @@ -125,7 +127,7 @@ def order_annotations(annotation): org_path = get_path_from_et_dir() export_path = "../formatted_data/nouns.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/nouns.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/nouns.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/Spanish/verbs/format_verbs.py b/src/scribe_data/extract_transform/languages/Spanish/verbs/format_verbs.py index 6a1ab91fb..2c4f200c1 100644 --- a/src/scribe_data/extract_transform/languages/Spanish/verbs/format_verbs.py +++ b/src/scribe_data/extract_transform/languages/Spanish/verbs/format_verbs.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/verbs/" not in file_path: +if f"languages/{LANGUAGE}/verbs/" not in file_path: with open("verbs_queried.json", encoding="utf-8") as f: verbs_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8" + ) as f: verbs_list = json.load(f) verbs_formatted = {} @@ -71,7 +73,7 @@ org_path = get_path_from_et_dir() export_path = "../formatted_data/verbs.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/verbs.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/verbs.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/Swedish/nouns/format_nouns.py b/src/scribe_data/extract_transform/languages/Swedish/nouns/format_nouns.py index 522fa4961..ab11ee2a9 100644 --- a/src/scribe_data/extract_transform/languages/Swedish/nouns/format_nouns.py +++ b/src/scribe_data/extract_transform/languages/Swedish/nouns/format_nouns.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/nouns/" not in file_path: +if f"languages/{LANGUAGE}/nouns/" not in file_path: with open("nouns_queried.json", encoding="utf-8") as f: nouns_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8" + ) as f: nouns_list = json.load(f) @@ -100,9 +102,9 @@ def order_annotations(annotation): nouns_formatted[noun_vals["nominativeSingular"]]["form"] != noun_vals["gender"] ): - nouns_formatted[noun_vals["nominativeSingular"]][ - "form" - ] += "/" + map_genders(noun_vals["gender"]) + nouns_formatted[noun_vals["nominativeSingular"]]["form"] += ( + "/" + map_genders(noun_vals["gender"]) + ) elif nouns_formatted[noun_vals["nominativeSingular"]]["gender"] == "": nouns_formatted[noun_vals["nominativeSingular"]][ @@ -147,9 +149,9 @@ def order_annotations(annotation): nouns_formatted[noun_vals["genitiveSingular"]]["form"] != noun_vals["gender"] ): - nouns_formatted[noun_vals["genitiveSingular"]][ - "form" - ] += "/" + map_genders(noun_vals["gender"]) + nouns_formatted[noun_vals["genitiveSingular"]]["form"] += ( + "/" + map_genders(noun_vals["gender"]) + ) elif nouns_formatted[noun_vals["genitiveSingular"]]["gender"] == "": nouns_formatted[noun_vals["genitiveSingular"]][ @@ -198,7 +200,7 @@ def order_annotations(annotation): org_path = get_path_from_et_dir() export_path = "../formatted_data/nouns.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/nouns.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/nouns.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/languages/Swedish/verbs/format_verbs.py b/src/scribe_data/extract_transform/languages/Swedish/verbs/format_verbs.py index 6a6d9d406..0ecaadf63 100644 --- a/src/scribe_data/extract_transform/languages/Swedish/verbs/format_verbs.py +++ b/src/scribe_data/extract_transform/languages/Swedish/verbs/format_verbs.py @@ -15,17 +15,19 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import get_path_from_et_dir +from scribe_data.utils import get_path_from_et_dir # noqa: E402 file_path = sys.argv[0] update_data_in_use = False # check if update_data.py is being used -if f"{LANGUAGE}/verbs/" not in file_path: +if f"languages/{LANGUAGE}/verbs/" not in file_path: with open("verbs_queried.json", encoding="utf-8") as f: verbs_list = json.load(f) else: update_data_in_use = True - with open(f"./{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8") as f: + with open( + f"./languages/{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8" + ) as f: verbs_list = json.load(f) verbs_formatted = {} @@ -64,7 +66,7 @@ org_path = get_path_from_et_dir() export_path = "../formatted_data/verbs.json" if update_data_in_use: - export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/verbs.json" + export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/verbs.json" with open( export_path, diff --git a/src/scribe_data/extract_transform/update_data.py b/src/scribe_data/extract_transform/update_data.py index d2f508054..c1898082d 100644 --- a/src/scribe_data/extract_transform/update_data.py +++ b/src/scribe_data/extract_transform/update_data.py @@ -31,10 +31,8 @@ PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) -from scribe_data.utils import ( +from scribe_data.utils import ( # noqa: E402 check_and_return_command_line_args, - get_ios_data_path, - get_path_from_et_dir, ) PATH_TO_ET_FILES = "./" @@ -44,7 +42,7 @@ sparql.setReturnFormat(JSON) sparql.setMethod(POST) -with open("../load/_update_files/total_data.json", encoding="utf-8") as f: +with open("src/scribe_data/load/_update_files/total_data.json", encoding="utf-8") as f: current_data = json.load(f) current_languages = list(current_data.keys()) @@ -235,7 +233,9 @@ current_data[lang][target_type] = len(new_keyboard_data) # Update total_data.json. -with open("../load/_update_files/total_data.json", "w", encoding="utf-8") as f: +with open( + "src/scribe_data/load/_update_files/total_data.json", "w", encoding="utf-8" +) as f: json.dump(current_data, f, ensure_ascii=False, indent=0) @@ -257,14 +257,16 @@ # Get the current emoji data so that it can be appended at the end of the table. current_emoji_data_strings = [] -with open("../load/_update_files/data_table.txt", encoding="utf-8") as f: +with open("src/scribe_data/load/_update_files/data_table.txt", encoding="utf-8") as f: old_table_values = f.read() -for l in old_table_values.splitlines(): - current_emoji_data_strings.append(l.split("|")[-2] + "|") +for line in old_table_values.splitlines(): + current_emoji_data_strings.append(line.split("|")[-2] + "|") # Write the new values to the table, which overwrites the emoji keyword values. -with open("../load/_update_files/data_table.txt", "w+", encoding="utf-8") as f: +with open( + "src/scribe_data/load/_update_files/data_table.txt", "w+", encoding="utf-8" +) as f: table_string = str(current_data_df.to_markdown()).replace(" nan ", " - ") # Right justify the data and left justify the language indexes. table_string = ( @@ -276,16 +278,18 @@ # Get the new table values and then rewrite the file with the full table. new_table_value_strings = [] -with open("../load/_update_files/data_table.txt", encoding="utf-8") as f: +with open("src/scribe_data/load/_update_files/data_table.txt", encoding="utf-8") as f: new_table_values = f.read() -for l in new_table_values.splitlines(): +for line in new_table_values.splitlines(): # Replace headers while translation is still in beta and always for prepositions to annotate missing values. - l = l.replace("Translations", "Translations\*") - l = l.replace("Prepositions", "Prepositions†") - new_table_value_strings.append(l) + line = line.replace("Translations", "Translations\*") + line = line.replace("Prepositions", "Prepositions†") + new_table_value_strings.append(line) -with open("../load/_update_files/data_table.txt", "w+", encoding="utf-8") as f: +with open( + "src/scribe_data/load/_update_files/data_table.txt", "w+", encoding="utf-8" +) as f: for i in range(len(new_table_value_strings)): f.writelines(new_table_value_strings[i] + current_emoji_data_strings[i] + "\n") @@ -294,28 +298,32 @@ language_keys = sorted(list(data_added_dict.keys())) # Check if all data added values are 0 and remove if so. -for l in language_keys: - if all(v <= 0 for v in data_added_dict[l].values()): - language_keys.remove(l) +for lang in language_keys: + if all(v <= 0 for v in data_added_dict[lang].values()): + language_keys.remove(lang) -for l in language_keys: - if l == language_keys[0]: - data_added_string += f"- {l} (New):" if l in new_language_list else f"- {l}:" +for lang in language_keys: + if lang == language_keys[0]: + data_added_string += ( + f"- {lang} (New):" if lang in new_language_list else f"- {lang}:" + ) else: data_added_string += ( - f"\n- {l} (New):" if l in new_language_list else f"\n- {l}:" + f"\n- {lang} (New):" if lang in new_language_list else f"\n- {lang}:" ) for wt in word_types_update: - if wt in data_added_dict[l].keys(): - if data_added_dict[l][wt] <= 0: + if wt in data_added_dict[lang].keys(): + if data_added_dict[lang][wt] <= 0: pass - elif data_added_dict[l][wt] == 1: # remove the s for label - data_added_string += f" {data_added_dict[l][wt]} {wt[:-1]}," + elif data_added_dict[lang][wt] == 1: # remove the s for label + data_added_string += f" {data_added_dict[lang][wt]} {wt[:-1]}," else: - data_added_string += f" {data_added_dict[l][wt]:,} {wt}," + data_added_string += f" {data_added_dict[lang][wt]:,} {wt}," data_added_string = data_added_string[:-1] # remove the last comma -with open("../load/_update_files/data_updates.txt", "w+", encoding="utf-8") as f: +with open( + "src/scribe_data/load/_update_files/data_updates.txt", "w+", encoding="utf-8" +) as f: f.writelines(data_added_string)