Skip to content

Commit

Permalink
#125 further path fixes and correct changes to data query process
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewtavis committed Aug 18, 2024
1 parent c146355 commit b9fa981
Show file tree
Hide file tree
Showing 36 changed files with 126 additions and 335 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ Emojis for the following are chosen based on [gitmoji](https://gitmoji.dev/).
- Case and gender map reference functions were updated to use hashmaps.
- The SPARQLWrapper access method has been extracted to the Wikidata utils and is imported into the files that need it ([#164](https://github.com/scribe-org/Scribe-Data/issues/164)).
- Export data paths have been converted to centrally saved variables to reduce hard coded string repetition.
- Many files were renamed including `update_data.py` being renamed `query_data.py`
- Paths within the package have been updated to work for all operating systems via `pathlib` ([#125](https://github.com/scribe-org/Scribe-Data/issues/125)).
- The language formatting scripts have been dramatically simplified given changes to export paths all being the same.

## Scribe-Data 3.3.0

Expand Down
13 changes: 6 additions & 7 deletions docs/source/scribe_data/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,13 @@ Behavior and Output:
.. code-block:: text
Existing file(s) found for English verbs:
1. verbs_2024_07_07_00_28_44.json
2. verbs_2024_07_07_00_29_20.json
1. verbs.json
Choose an option:
1. Keep existing (skip update)
2. Overwrite existing
3. Keep both
4. Cancel
Enter your choice (1-4):
1. Overwrite existing data (press 'o')
2. Skip process (press anything else)
Enter your choice:
3. After making a selection, the get process begins:

Expand Down
2 changes: 1 addition & 1 deletion docs/source/scribe_data/wikidata/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ wikidata
:maxdepth: 1

query_profanity
update_data
query_data
8 changes: 8 additions & 0 deletions docs/source/scribe_data/wikidata/query_data.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
query_data.py
=============

`View code on Github <https://github.com/scribe-org/Scribe-Data/tree/main/src/scribe_data/wikidata/query_data.py>`_

.. automodule:: scribe_data.wikidata.query_data
:members:
:private-members:
8 changes: 0 additions & 8 deletions docs/source/scribe_data/wikidata/update_data.rst

This file was deleted.

35 changes: 3 additions & 32 deletions src/scribe_data/cli/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from scribe_data.cli.convert import convert_to_csv_or_tsv, export_json
from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR
from scribe_data.wikidata.update_data import update_data
from scribe_data.wikidata.query_data import query_data

DATA_DIR = Path(DEFAULT_JSON_EXPORT_DIR)

Expand All @@ -53,7 +53,7 @@ def get_data(

if all:
print("Updating all languages and data types ...")
update_data()
query_data()

elif data_type in ["emoji-keywords", "emoji_keywords"]:
for lang in languages:
Expand Down Expand Up @@ -82,7 +82,7 @@ def get_data(
elif language or data_type:
data_type = [data_type] if data_type else None
print(f"Updating data for language: {language}, data type: {data_type}")
update_data(languages, data_type)
query_data(languages, data_type)

else:
raise ValueError(
Expand Down Expand Up @@ -111,32 +111,3 @@ def get_data(
print(
f"No output directory specified for exporting results. Updated data was saved in: {Path(DEFAULT_JSON_EXPORT_DIR).resolve()}."
)

# Check if data was actually updated.
data_path = Path(DEFAULT_JSON_EXPORT_DIR)
if language:
lang_path = data_path / language.capitalize()
if not lang_path.exists():
print(f"Warning: No data directory found for language '{language}'")

elif data_type:
dt_file = lang_path / f"{data_type.replace('-', '_')}.json"
if not dt_file.exists():
print(
f"Warning: No data file found for '{language}' {data_type}. The command must not have worked."
)

else:
print(f"Data updated for language: {language}")

elif data_type:
dt_updated = any(
lang_dir.is_dir() and (lang_dir / f"{data_type}.json").exists()
for lang_dir in data_path.iterdir()
)

if not dt_updated:
print(f"Warning: No data files found for data type '{data_type}'")

else:
print(f"Data updated for data type: {data_type}")
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

export_formatted_data(
formatted_data=emoji_keywords_dict,
update_data_in_use=True,
query_data_in_use=True,
language=LANGUAGE,
data_type=DATA_TYPE,
)
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,13 @@
"""

import collections
import os
import sys

from scribe_data.utils import export_formatted_data, load_queried_data

LANGUAGE = "English"
DATA_TYPE = "nouns"
file_path = sys.argv[0]

nouns_list, update_data_in_use, data_path = load_queried_data(
file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE
)
nouns_list, data_path = load_queried_data(language=LANGUAGE, data_type=DATA_TYPE)

nouns_formatted = {}

Expand Down Expand Up @@ -98,9 +93,6 @@

export_formatted_data(
formatted_data=nouns_formatted,
update_data_in_use=update_data_in_use,
language=LANGUAGE,
data_type=DATA_TYPE,
)

os.remove(data_path)
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,13 @@
"""

import collections
import os
import sys

from scribe_data.utils import export_formatted_data, load_queried_data

LANGUAGE = "English"
DATA_TYPE = "verbs"
file_path = sys.argv[0]

verbs_list, update_data_in_use, data_path = load_queried_data(
file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE
)
verbs_list, data_path = load_queried_data(language=LANGUAGE, data_type=DATA_TYPE)

verbs_formatted = {}

Expand Down Expand Up @@ -141,9 +136,6 @@

export_formatted_data(
formatted_data=verbs_formatted,
update_data_in_use=update_data_in_use,
language=LANGUAGE,
data_type=DATA_TYPE,
)

os.remove(data_path)
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

export_formatted_data(
formatted_data=emoji_keywords_dict,
update_data_in_use=True,
query_data_in_use=True,
language=LANGUAGE,
data_type=DATA_TYPE,
)
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
"""

import collections
import os
import sys

from scribe_data.utils import (
export_formatted_data,
Expand All @@ -33,11 +31,8 @@

LANGUAGE = "French"
DATA_TYPE = "nouns"
file_path = sys.argv[0]

nouns_list, update_data_in_use, data_path = load_queried_data(
file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE
)
nouns_list, data_path = load_queried_data(language=LANGUAGE, data_type=DATA_TYPE)

nouns_formatted = {}

Expand Down Expand Up @@ -103,9 +98,6 @@

export_formatted_data(
formatted_data=nouns_formatted,
update_data_in_use=update_data_in_use,
language=LANGUAGE,
data_type=DATA_TYPE,
)

os.remove(data_path)
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,13 @@
"""

import collections
import os
import sys

from scribe_data.utils import export_formatted_data, load_queried_data

LANGUAGE = "French"
DATA_TYPE = "verbs"
file_path = sys.argv[0]

verbs_list, update_data_in_use, data_path = load_queried_data(
file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE
)
verbs_list, data_path = load_queried_data(language=LANGUAGE, data_type=DATA_TYPE)

verbs_formatted = {}

Expand Down Expand Up @@ -82,9 +77,6 @@

export_formatted_data(
formatted_data=verbs_formatted,
update_data_in_use=update_data_in_use,
language=LANGUAGE,
data_type=DATA_TYPE,
)

os.remove(data_path)
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

export_formatted_data(
formatted_data=emoji_keywords_dict,
update_data_in_use=True,
query_data_in_use=True,
language=LANGUAGE,
data_type=DATA_TYPE,
)
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
"""

import collections
import os
import sys

from scribe_data.utils import (
export_formatted_data,
Expand All @@ -33,11 +31,8 @@

LANGUAGE = "German"
DATA_TYPE = "nouns"
file_path = sys.argv[0]

nouns_list, update_data_in_use, data_path = load_queried_data(
file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE
)
nouns_list, data_path = load_queried_data(language=LANGUAGE, data_type=DATA_TYPE)

nouns_formatted = {}

Expand Down Expand Up @@ -165,9 +160,6 @@

export_formatted_data(
formatted_data=nouns_formatted,
update_data_in_use=update_data_in_use,
language=LANGUAGE,
data_type=DATA_TYPE,
)

os.remove(data_path)
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
"""

import collections
import os
import sys

from scribe_data.utils import (
export_formatted_data,
Expand All @@ -33,11 +31,8 @@

LANGUAGE = "German"
DATA_TYPE = "prepositions"
file_path = sys.argv[0]

prepositions_list, update_data_in_use, data_path = load_queried_data(
file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE
)
prepositions_list, data_path = load_queried_data(language=LANGUAGE, data_type=DATA_TYPE)

prepositions_formatted = {}

Expand Down Expand Up @@ -94,9 +89,6 @@

export_formatted_data(
formatted_data=prepositions_formatted,
update_data_in_use=update_data_in_use,
language=LANGUAGE,
data_type=DATA_TYPE,
)

os.remove(data_path)
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,13 @@
"""

import collections
import os
import sys

from scribe_data.utils import export_formatted_data, load_queried_data

LANGUAGE = "German"
DATA_TYPE = "verbs"
file_path = sys.argv[0]

verbs_list, update_data_in_use, data_path = load_queried_data(
file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE
)
verbs_list, data_path = load_queried_data(language=LANGUAGE, data_type=DATA_TYPE)

verbs_formatted = {}

Expand Down Expand Up @@ -161,9 +156,6 @@ def assign_past_participle(verb, tense):

export_formatted_data(
formatted_data=verbs_formatted,
update_data_in_use=update_data_in_use,
language=LANGUAGE,
data_type=DATA_TYPE,
)

os.remove(data_path)
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

export_formatted_data(
formatted_data=emoji_keywords_dict,
update_data_in_use=True,
query_data_in_use=True,
language=LANGUAGE,
data_type=DATA_TYPE,
)
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
"""

import collections
import os
import sys

from scribe_data.utils import (
export_formatted_data,
Expand All @@ -33,11 +31,8 @@

LANGUAGE = "Italian"
DATA_TYPE = "nouns"
file_path = sys.argv[0]

nouns_list, update_data_in_use, data_path = load_queried_data(
file_path=file_path, language=LANGUAGE, data_type=DATA_TYPE
)
nouns_list, data_path = load_queried_data(language=LANGUAGE, data_type=DATA_TYPE)

nouns_formatted = {}

Expand Down Expand Up @@ -104,9 +99,6 @@

export_formatted_data(
formatted_data=nouns_formatted,
update_data_in_use=update_data_in_use,
language=LANGUAGE,
data_type=DATA_TYPE,
)

os.remove(data_path)
Loading

0 comments on commit b9fa981

Please sign in to comment.