#57 WIP fix of paths to queried language data and update files

scribe-org · Feb 23, 2024 · 362b5ee · 362b5ee
1 parent ff7d3b5
commit 362b5ee
Show file tree

Hide file tree

Showing 20 changed files with 157 additions and 114 deletions.
diff --git a/src/scribe_data/checkquery.py b/src/scribe_data/checkquery.py
@@ -40,7 +40,6 @@
 from SPARQLWrapper import SPARQLExceptions
 from tqdm.auto import tqdm
 
-
 EXIT_SUCCESS = 0
 EXIT_FAILURE = 1
 EXIT_CLI_ERROR = 2
@@ -138,11 +137,11 @@ def all_queries() -> list[QueryFile]:
 
     queries: list[QueryFile] = []
 
-    for root, _, fnames in os.walk(prj_root):
-        for fname in fnames:
-            fpath = Path(root, fname)
-            if fpath.suffix == ".sparql":
-                queries.append(QueryFile(fpath))
+    for root, _, files in os.walk(prj_root):
+        for f in files:
+            file_path = Path(root, f)
+            if file_path.suffix == ".sparql":
+                queries.append(QueryFile(file_path))
 
     return queries
 
@@ -501,7 +500,7 @@ def error_report(failures: list[QueryExecutionException]) -> None:
     qword = "query" if len(failures) == 1 else "queries"
     print(f"\nFollowing {qword} failed:\n", file=sys.stderr)
     for failed_query in failures:
-        print(str(failed_query), file=sys.stderr)
+        print(failed_query, file=sys.stderr)
 
 
 def success_report(successes: list[tuple[QueryFile, dict]], display: bool) -> None:

diff --git a/src/scribe_data/extract_transform/languages/English/nouns/format_nouns.py b/src/scribe_data/extract_transform/languages/English/nouns/format_nouns.py
@@ -15,17 +15,19 @@
 PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
 sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)
 
-from scribe_data.utils import get_path_from_et_dir
+from scribe_data.utils import get_path_from_et_dir  # noqa: E402
 
 file_path = sys.argv[0]
 
 update_data_in_use = False  # check if update_data.py is being used
-if f"{LANGUAGE}/nouns/" not in file_path:
+if f"languages/{LANGUAGE}/nouns/" not in file_path:
     with open("nouns_queried.json", encoding="utf-8") as f:
         nouns_list = json.load(f)
 else:
     update_data_in_use = True
-    with open(f"./{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8") as f:
+    with open(
+        f"./languages/{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8"
+    ) as f:
         nouns_list = json.load(f)
 
 nouns_formatted = {}
@@ -94,12 +96,16 @@
 export_dir = "../formatted_data/"
 export_path = os.path.join(export_dir, "nouns.json")
 if update_data_in_use:
-    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/nouns.json"
+    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/nouns.json"
 
 if not os.path.exists(export_dir):
     os.makedirs(export_dir)
 
-with open(export_path, "w", encoding="utf-8",) as file:
+with open(
+    export_path,
+    "w",
+    encoding="utf-8",
+) as file:
     json.dump(nouns_formatted, file, ensure_ascii=False, indent=0)
 
 print(f"Wrote file nouns.json with {len(nouns_formatted)} nouns.")
diff --git a/src/scribe_data/extract_transform/languages/English/verbs/format_verbs.py b/src/scribe_data/extract_transform/languages/English/verbs/format_verbs.py
@@ -15,17 +15,19 @@
 PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
 sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)
 
-from scribe_data.utils import get_path_from_et_dir
+from scribe_data.utils import get_path_from_et_dir  # noqa: E402
 
 file_path = sys.argv[0]
 
 update_data_in_use = False  # check if update_data.py is being used
-if f"{LANGUAGE}/verbs/" not in file_path:
+if f"languages/{LANGUAGE}/verbs/" not in file_path:
     with open("verbs_queried.json", encoding="utf-8") as f:
         verbs_list = json.load(f)
 else:
     update_data_in_use = True
-    with open(f"./{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8") as f:
+    with open(
+        f"./languages/{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8"
+    ) as f:
         verbs_list = json.load(f)
 
 verbs_formatted = {}
@@ -81,7 +83,7 @@
 export_dir = "../formatted_data/"
 export_path = os.path.join(export_dir, "verbs.json")
 if update_data_in_use:
-    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/verbs.json"
+    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/verbs.json"
 
 if not os.path.exists(export_dir):
     os.makedirs(export_dir)

diff --git a/src/scribe_data/extract_transform/languages/French/nouns/format_nouns.py b/src/scribe_data/extract_transform/languages/French/nouns/format_nouns.py
@@ -15,17 +15,19 @@
 PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
 sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)
 
-from scribe_data.utils import get_path_from_et_dir
+from scribe_data.utils import get_path_from_et_dir  # noqa: E402
 
 file_path = sys.argv[0]
 
 update_data_in_use = False  # check if update_data.py is being used
-if f"{LANGUAGE}/nouns/" not in file_path:
+if f"languages/{LANGUAGE}/nouns/" not in file_path:
     with open("nouns_queried.json", encoding="utf-8") as f:
         nouns_list = json.load(f)
 else:
     update_data_in_use = True
-    with open(f"./{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8") as f:
+    with open(
+        f"./languages/{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8"
+    ) as f:
         nouns_list = json.load(f)
 
 
@@ -124,7 +126,7 @@ def order_annotations(annotation):
 org_path = get_path_from_et_dir()
 export_path = "../formatted_data/nouns.json"
 if update_data_in_use:
-    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/nouns.json"
+    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/nouns.json"
 
 with open(
     export_path,

diff --git a/src/scribe_data/extract_transform/languages/French/verbs/format_verbs.py b/src/scribe_data/extract_transform/languages/French/verbs/format_verbs.py
@@ -15,17 +15,19 @@
 PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
 sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)
 
-from scribe_data.utils import get_path_from_et_dir
+from scribe_data.utils import get_path_from_et_dir  # noqa: E402
 
 file_path = sys.argv[0]
 
 update_data_in_use = False  # check if update_data.py is being used
-if f"{LANGUAGE}/verbs/" not in file_path:
+if f"languages/{LANGUAGE}/verbs/" not in file_path:
     with open("verbs_queried.json", encoding="utf-8") as f:
         verbs_list = json.load(f)
 else:
     update_data_in_use = True
-    with open(f"./{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8") as f:
+    with open(
+        f"./languages/{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8"
+    ) as f:
         verbs_list = json.load(f)
 
 verbs_formatted = {}
@@ -77,7 +79,7 @@
 org_path = get_path_from_et_dir()
 export_path = "../formatted_data/verbs.json"
 if update_data_in_use:
-    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/verbs.json"
+    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/verbs.json"
 
 with open(
     export_path,

diff --git a/src/scribe_data/extract_transform/languages/German/nouns/format_nouns.py b/src/scribe_data/extract_transform/languages/German/nouns/format_nouns.py
@@ -15,17 +15,19 @@
 PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
 sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)
 
-from scribe_data.utils import get_path_from_et_dir
+from scribe_data.utils import get_path_from_et_dir  # noqa: E402
 
 file_path = sys.argv[0]
 
 update_data_in_use = False  # check if update_data.py is being used
-if f"{LANGUAGE}/nouns/" not in file_path:
+if f"languages/{LANGUAGE}/nouns/" not in file_path:
     with open("nouns_queried.json", encoding="utf-8") as f:
         nouns_list = json.load(f)
 else:
     update_data_in_use = True
-    with open(f"./{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8") as f:
+    with open(
+        f"./languages/{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8"
+    ) as f:
         nouns_list = json.load(f)
 
 
@@ -193,7 +195,7 @@ def order_annotations(annotation):
 org_path = get_path_from_et_dir()
 export_path = "../formatted_data/nouns.json"
 if update_data_in_use:
-    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/nouns.json"
+    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/nouns.json"
 
 with open(
     export_path,

diff --git a/src/scribe_data/extract_transform/languages/German/prepositions/format_prepositions.py b/src/scribe_data/extract_transform/languages/German/prepositions/format_prepositions.py
@@ -15,18 +15,18 @@
 PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
 sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)
 
-from scribe_data.utils import get_path_from_et_dir
+from scribe_data.utils import get_path_from_et_dir  # noqa: E402
 
 file_path = sys.argv[0]
 
 update_data_in_use = False  # check if update_data.py is being used
-if f"{LANGUAGE}/prepositions/" not in file_path:
+if f"languages/{LANGUAGE}/prepositions/" not in file_path:
     with open("prepositions_queried.json", encoding="utf-8") as f:
         prepositions_list = json.load(f)
 else:
     update_data_in_use = True
     with open(
-        f"./{LANGUAGE}/prepositions/prepositions_queried.json",
+        f"./languages/{LANGUAGE}/prepositions/prepositions_queried.json",
         encoding="utf-8",
     ) as f:
         prepositions_list = json.load(f)
@@ -121,7 +121,7 @@ def order_annotations(annotation):
 org_path = get_path_from_et_dir()
 export_path = "../formatted_data/prepositions.json"
 if update_data_in_use:
-    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/prepositions.json"
+    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/prepositions.json"
 
 with open(
     export_path,

diff --git a/src/scribe_data/extract_transform/languages/German/verbs/format_verbs.py b/src/scribe_data/extract_transform/languages/German/verbs/format_verbs.py
@@ -20,17 +20,19 @@
 PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
 sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)
 
-from scribe_data.utils import get_path_from_et_dir
+from scribe_data.utils import get_path_from_et_dir  # noqa: E402
 
 file_path = sys.argv[0]
 
 update_data_in_use = False  # check if update_data.py is being used
-if f"{LANGUAGE}/verbs/" not in file_path:
+if f"languages/{LANGUAGE}/verbs/" not in file_path:
     with open("verbs_queried.json", encoding="utf-8") as f:
         verbs_list = json.load(f)
 else:
     update_data_in_use = True
-    with open(f"./{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8") as f:
+    with open(
+        f"./languages/{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8"
+    ) as f:
         verbs_list = json.load(f)
 
 verbs_formatted = {}
@@ -156,7 +158,7 @@ def assign_past_participle(verb, tense):
 org_path = get_path_from_et_dir()
 export_path = "../formatted_data/verbs.json"
 if update_data_in_use:
-    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/verbs.json"
+    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/verbs.json"
 
 with open(
     export_path,

diff --git a/src/scribe_data/extract_transform/languages/Italian/nouns/format_nouns.py b/src/scribe_data/extract_transform/languages/Italian/nouns/format_nouns.py
@@ -15,17 +15,19 @@
 PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
 sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)
 
-from scribe_data.utils import get_path_from_et_dir
+from scribe_data.utils import get_path_from_et_dir  # noqa: E402
 
 file_path = sys.argv[0]
 
 update_data_in_use = False  # check if update_data.py is being used
-if f"{LANGUAGE}/nouns/" not in file_path:
+if f"languages/{LANGUAGE}/nouns/" not in file_path:
     with open("nouns_queried.json", encoding="utf-8") as f:
         nouns_list = json.load(f)
 else:
     update_data_in_use = True
-    with open(f"./{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8") as f:
+    with open(
+        f"./languages/{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8"
+    ) as f:
         nouns_list = json.load(f)
 
 
@@ -125,7 +127,7 @@ def order_annotations(annotation):
 org_path = get_path_from_et_dir()
 export_path = "../formatted_data/nouns.json"
 if update_data_in_use:
-    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/nouns.json"
+    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/nouns.json"
 
 with open(
     export_path,

diff --git a/src/scribe_data/extract_transform/languages/Italian/verbs/format_verbs.py b/src/scribe_data/extract_transform/languages/Italian/verbs/format_verbs.py
@@ -15,17 +15,19 @@
 PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
 sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)
 
-from scribe_data.utils import get_path_from_et_dir
+from scribe_data.utils import get_path_from_et_dir  # noqa: E402
 
 file_path = sys.argv[0]
 
 update_data_in_use = False  # check if update_data.py is being used
-if f"{LANGUAGE}/verbs/" not in file_path:
+if f"languages/{LANGUAGE}/verbs/" not in file_path:
     with open("verbs_queried.json", encoding="utf-8") as f:
         verbs_list = json.load(f)
 else:
     update_data_in_use = True
-    with open(f"./{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8") as f:
+    with open(
+        f"./languages/{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8"
+    ) as f:
         verbs_list = json.load(f)
 
 verbs_formatted = {}
@@ -65,7 +67,7 @@
 org_path = get_path_from_et_dir()
 export_path = "../formatted_data/verbs.json"
 if update_data_in_use:
-    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/verbs.json"
+    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/verbs.json"
 
 with open(
     export_path,

diff --git a/src/scribe_data/extract_transform/languages/Portuguese/nouns/format_nouns.py b/src/scribe_data/extract_transform/languages/Portuguese/nouns/format_nouns.py
@@ -15,17 +15,19 @@
 PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
 sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)
 
-from scribe_data.utils import get_path_from_et_dir
+from scribe_data.utils import get_path_from_et_dir  # noqa: E402
 
 file_path = sys.argv[0]
 
 update_data_in_use = False  # check if update_data.py is being used
-if f"{LANGUAGE}/nouns/" not in file_path:
+if f"languages/{LANGUAGE}/nouns/" not in file_path:
     with open("nouns_queried.json", encoding="utf-8") as f:
         nouns_list = json.load(f)
 else:
     update_data_in_use = True
-    with open(f"./{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8") as f:
+    with open(
+        f"./languages/{LANGUAGE}/nouns/nouns_queried.json", encoding="utf-8"
+    ) as f:
         nouns_list = json.load(f)
 
 
@@ -125,7 +127,7 @@ def order_annotations(annotation):
 org_path = get_path_from_et_dir()
 export_path = "../formatted_data/nouns.json"
 if update_data_in_use:
-    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/nouns.json"
+    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/nouns.json"
 
 with open(
     export_path,

diff --git a/src/scribe_data/extract_transform/languages/Portuguese/verbs/format_verbs.py b/src/scribe_data/extract_transform/languages/Portuguese/verbs/format_verbs.py
@@ -15,17 +15,19 @@
 PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
 sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)
 
-from scribe_data.utils import get_path_from_et_dir
+from scribe_data.utils import get_path_from_et_dir  # noqa: E402
 
 file_path = sys.argv[0]
 
 update_data_in_use = False  # check if update_data.py is being used
-if f"{LANGUAGE}/verbs/" not in file_path:
+if f"languages/{LANGUAGE}/verbs/" not in file_path:
     with open("verbs_queried.json", encoding="utf-8") as f:
         verbs_list = json.load(f)
 else:
     update_data_in_use = True
-    with open(f"./{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8") as f:
+    with open(
+        f"./languages/{LANGUAGE}/verbs/verbs_queried.json", encoding="utf-8"
+    ) as f:
         verbs_list = json.load(f)
 
 verbs_formatted = {}
@@ -71,7 +73,7 @@
 org_path = get_path_from_et_dir()
 export_path = "../formatted_data/verbs.json"
 if update_data_in_use:
-    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/{LANGUAGE}/formatted_data/verbs.json"
+    export_path = f"{org_path}/Scribe-Data/src/scribe_data/extract_transform/languages/{LANGUAGE}/formatted_data/verbs.json"
 
 with open(
     export_path,