From 264c4cc0be8f7fa4180d9383bbee996de597c84b Mon Sep 17 00:00:00 2001 From: MdSayemkhan Date: Sun, 19 Jan 2025 18:39:52 +0600 Subject: [PATCH 1/2] fix: resolve numpydoc docstring warnings - Convert function summaries to infinitive verb form - Fix parameter and return value descriptions - Make docstrings more concise and accurate Fixes #547 --- src/scribe_data/check/check_query_forms.py | 32 +++++------ .../check/check_query_identifiers.py | 8 +-- src/scribe_data/cli/cli_utils.py | 25 ++++++--- src/scribe_data/cli/convert.py | 56 +++++++------------ src/scribe_data/cli/download.py | 36 ++++++------ src/scribe_data/cli/interactive.py | 6 +- src/scribe_data/cli/list.py | 28 +++++++--- src/scribe_data/cli/total.py | 16 +++--- src/scribe_data/load/data_to_sqlite.py | 21 ++++--- src/scribe_data/load/send_dbs_to_scribe.py | 5 +- .../unicode/generate_emoji_keywords.py | 3 +- src/scribe_data/unicode/process_unicode.py | 2 +- src/scribe_data/unicode/unicode_utils.py | 5 +- src/scribe_data/utils.py | 3 +- src/scribe_data/wikidata/check_query/check.py | 15 +++-- src/scribe_data/wikidata/check_query/query.py | 16 +++--- src/scribe_data/wikidata/format_data.py | 9 ++- src/scribe_data/wikidata/query_data.py | 4 +- src/scribe_data/wikipedia/process_wiki.py | 2 +- 19 files changed, 147 insertions(+), 145 deletions(-) diff --git a/src/scribe_data/check/check_query_forms.py b/src/scribe_data/check/check_query_forms.py index af936b75b..cd9b0a1eb 100644 --- a/src/scribe_data/check/check_query_forms.py +++ b/src/scribe_data/check/check_query_forms.py @@ -175,7 +175,7 @@ def extract_form_qids(form_text: str): def check_form_label(form_text: str): """ - Checks that the label of the form matches the representation label. + Check that the label of the form matches the representation label. Parameters ---------- @@ -217,11 +217,11 @@ def check_form_label(form_text: str): def check_query_formatting(form_text: str): """ - Checks the formatting of the given SPARQL query text for common formatting issues. + Check the formatting of the given SPARQL query text for common formatting issues. Parameters ---------- - query_text : str + form_text : str The SPARQL query text to check. Returns @@ -245,7 +245,7 @@ def check_query_formatting(form_text: str): def return_correct_form_label(qids: list): """ - Returns the correct label for a lexeme form representation given the QIDs that compose it. + Return the correct label for a lexeme form representation given the QIDs that compose it. Parameters ---------- @@ -254,8 +254,8 @@ def return_correct_form_label(qids: list): Returns ------- - correct_label : str - The label for the representation given the QIDs. + str + The label for the representation given the QIDs.. """ if not qids: return "Invalid query formatting found" @@ -281,22 +281,17 @@ def return_correct_form_label(qids: list): def validate_forms(query_text: str) -> str: """ - Validates the SPARQL query by checking: - 1. Order of variables in SELECT and WHERE clauses - 2. Presence and correct definition of forms - 3. Form labels and representations - 4. Query formatting + Validate SPARQL query by checking variable order in SELECT and WHERE clauses, Presence and correct definition of forms, Form labels and representations and Query formatting. Parameters ---------- - query_file : str + query_text : str The SPARQL query text as a string. Returns ------- str - Error message if there are any issues with the order of variables or forms, - otherwise an empty string. + Error message if there are any issues with the order of variables or forms, otherwise an empty string. """ select_pattern = r"SELECT\s+(.*?)\s+WHERE" @@ -377,7 +372,7 @@ def validate_forms(query_text: str) -> str: def check_docstring(query_text: str) -> bool: """ - Checks the docstring of a SPARQL query text to ensure it follows the standard format. + Check the docstring of a SPARQL query text to ensure it follows the standard format. Parameters ---------- @@ -419,7 +414,7 @@ def check_docstring(query_text: str) -> bool: def check_forms_order(query_text): """ - Parses and orders variable names from a SPARQL query text based on a lexeme_form_metadata.json. + Parse and order variable names from a SPARQL query text based on a lexeme_form_metadata.json. Parameters ---------- @@ -496,8 +491,7 @@ def compare_key(components): def check_optional_qid_order(query_file: str) -> str: """ - Checks the order of QIDs in optional statements within a SPARQL query file to ensure they - align with the expected sequence based on label features. + Check the order of QIDs in optional statements within a SPARQL query file to ensure they align with the expected sequence based on label features. Parameters ---------- @@ -538,7 +532,7 @@ def check_optional_qid_order(query_file: str) -> str: def check_query_forms() -> None: """ - Validates SPARQL queries in the language data directory to check for correct form QIDs and formatting. + Validate SPARQL queries in the language data directory to check for correct form QIDs and formatting. """ error_output = "" index = 0 diff --git a/src/scribe_data/check/check_query_identifiers.py b/src/scribe_data/check/check_query_identifiers.py index 5337b972c..d85e96b1b 100644 --- a/src/scribe_data/check/check_query_identifiers.py +++ b/src/scribe_data/check/check_query_identifiers.py @@ -37,7 +37,7 @@ def is_valid_language(query_file: Path, lang_qid: str) -> bool: """ - Validates the language QID against the expected QID for the directory. + Validate the language QID against the expected QID for the directory. Parameters ---------- @@ -75,7 +75,7 @@ def is_valid_language(query_file: Path, lang_qid: str) -> bool: def is_valid_data_type(query_file: Path, data_type_qid: str) -> bool: """ - Validates the data type QID against the expected QID for the directory. + Validate the data type QID against the expected QID for the directory. Parameters ---------- @@ -103,7 +103,7 @@ def is_valid_data_type(query_file: Path, data_type_qid: str) -> bool: def extract_qid_from_sparql(file_path: Path, pattern: str) -> str: """ - Extracts the QID from a SPARQL query file based on the provided pattern. + Extract the QID from a SPARQL query file based on the provided pattern. Parameters ---------- @@ -137,7 +137,7 @@ def extract_qid_from_sparql(file_path: Path, pattern: str) -> str: def check_query_identifiers() -> None: """ - Validates SPARQL queries in the language data directory to check for correct language and data type QIDs. + Validate SPARQL queries in the language data directory to check for correct language and data type QIDs. """ language_pattern = r"\?lexeme dct:language wd:Q\d+" data_type_pattern = r"wikibase:lexicalCategory\s+wd:Q\d+" diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py index 6fa466515..cc129a9ab 100644 --- a/src/scribe_data/cli/cli_utils.py +++ b/src/scribe_data/cli/cli_utils.py @@ -30,7 +30,7 @@ def correct_data_type(data_type: str) -> str: """ - Corrects common versions of data type arguments so users can choose between them. + Correct common versions of data type arguments to their standardized form. Parameters ---------- @@ -39,7 +39,8 @@ def correct_data_type(data_type: str) -> str: Returns ------- - The data_type value or a corrected version of it. + str + The data_type value or a corrected version of it. """ all_data_types = data_type_metadata.keys() @@ -56,7 +57,14 @@ def correct_data_type(data_type: str) -> str: def print_formatted_data(data: Union[dict, list], data_type: str) -> None: """ - Prints a formatted output from the Scribe-Data CLI. + Print formatted output from the Scribe-Data CLI. + + Parameters + ---------- + data : Union[dict, list] + The data to format and print. + data_type : str + The type of data being printed, used to determine formatting style. """ if not data: print(f"No data available for data type '{data_type}'.") @@ -118,7 +126,7 @@ def validate_language_and_data_type( data_type: Union[str, List[str], bool, None], ): """ - Validates that the language and data type QIDs are not None. + Validate that the language and data type QIDs are not None. Parameters ---------- @@ -128,6 +136,11 @@ def validate_language_and_data_type( data_type : str or list The data type(s) to validate. + Returns + ------- + bool + True if validation passes, otherwise raises ValueError. + Raises ------ ValueError @@ -136,16 +149,14 @@ def validate_language_and_data_type( def validate_single_item(item, valid_options, item_type): """ - Validates a single item against a list of valid options, providing error messages and suggestions. + Validate a single item against a list of valid options, providing error messages and suggestions. Parameters ---------- item : str The item to validate. - valid_options : list A list of valid options against which the item will be validated. - item_type : str A description of the item type (e.g., "language", "data-type") used in error messages. diff --git a/src/scribe_data/cli/convert.py b/src/scribe_data/cli/convert.py index 275182444..aba4db445 100644 --- a/src/scribe_data/cli/convert.py +++ b/src/scribe_data/cli/convert.py @@ -53,28 +53,23 @@ def convert_to_json( ---------- language : str The language of the file to convert. - data_type : Union[str, List[str]] The data type of the file to convert. - output_type : str The output format, should be "json". - input_file : str The input CSV/TSV file path. - output_dir : Path The output directory path for results. - overwrite : bool Whether to overwrite existing files. - identifier_case : str The case format for identifiers. Default is "camel". Returns ------- None + A JSON file. """ if not language: raise ValueError(f"Language '{language.capitalize()}' is not recognized.") @@ -207,13 +202,10 @@ def convert_to_csv_or_tsv( ---------- language : str The language of the file to convert. - data_type : Union[str, List[str]] The data type of the file to convert. - output_type : str The output format, should be "csv" or "tsv". - input_file : str The input JSON file path. @@ -229,6 +221,7 @@ def convert_to_csv_or_tsv( Returns ------- None + A CSV/TSV files. """ if not language: raise ValueError(f"Language '{language.capitalize()}' is not recognized.") @@ -387,34 +380,29 @@ def convert_to_sqlite( identifier_case: str = "camel", ) -> None: """ - Converts a Scribe-Data output file to an SQLite file. + Convert a Scribe-Data output file to SQLite format. Parameters ---------- language : str The language of the file to convert. - data_type : str The data type of the file to convert. - output_type : str The output format, should be "sqlite". - - input_file : Path + input_file : str, optional The input file path for the data to be converted. - - output_dir : Path + output_dir : str, optional The output directory path for results. - - overwrite : bool + overwrite : bool, optional Whether to overwrite existing files. - - identifier_case : str + identifier_case : str, optional The case format for identifiers. Default is "camel". Returns ------- - A SQLite file saved in the given location. + None + A SQLite file saved in the given location. """ if input_file: input_file = Path(input_file) @@ -461,33 +449,27 @@ def convert_wrapper( Parameters ---------- - language : Union[str, List[str]] + languages : Union[str, List[str]] The language(s) of the data to convert. - - data_type : Union[str, List[str]] + data_types : Union[str, List[str]] The data type(s) of the data to convert. - output_type : str - The desired output format. It can be 'json', 'csv', 'tsv', or 'sqlite'. - - input_file : Union[str, List[str]] + The desired output format. Can be 'json', 'csv', 'tsv', or 'sqlite'. + input_files : Union[str, List[str]] The path(s) to the input file(s). - - output_dir : str, optional - The output directory where converted files will be stored. Defaults to None. - + output_dir : str + The output directory where converted files will be stored. overwrite : bool, optional - Whether to overwrite existing output files. Defaults to False. - - identifier_case : str + Whether to overwrite existing output files. + identifier_case : str, optional The case format for identifiers. Default is "camel". - - all : bool + all : bool, optional Convert all languages and data types. Returns ------- None + This function does not return any value; it performs a conversion operation. """ output_type = output_type.lower() diff --git a/src/scribe_data/cli/download.py b/src/scribe_data/cli/download.py index f7f29adf9..69bd42fa4 100644 --- a/src/scribe_data/cli/download.py +++ b/src/scribe_data/cli/download.py @@ -37,12 +37,7 @@ def parse_date(date_string): """ - Parses a date string into a `datetime.date` object. - - Supported formats: - - YYYYMMDD - - YYYY/MM/DD - - YYYY-MM-DD + Parse a date string into a datetime.date object (formats: YYYYMMDD, YYYY/MM/DD, YYYY-MM-DD). Parameters ---------- @@ -53,7 +48,6 @@ def parse_date(date_string): ------- datetime.date Parsed date object if the format is valid. - None If the date format is invalid. """ @@ -75,16 +69,14 @@ def available_closest_lexeme_dumpfile( target_entity: str, other_old_dumps: str, check_wd_dump_exists ): """ - Finds the closest available dump file based on the target date. + Find the closest available dump file based on the target date. Parameters ---------- target_entity : str The target date for which the dump is requested (format: YYYY/MM/DD or similar). - other_old_dumps : list List of available dump folders as strings. - check_wd_dump_exists : function A function to validate if the dump file exists. @@ -92,7 +84,6 @@ def available_closest_lexeme_dumpfile( ------- str The closest available dump file date (as a string). - None If no suitable dump is found. """ @@ -124,13 +115,12 @@ def available_closest_lexeme_dumpfile( def download_wd_lexeme_dump(target_entity: str = "latest-lexemes"): """ - Downloads a Wikimedia lexeme dump based on the specified target entity or date. + Download a Wikimedia lexeme dump based on the specified target entity or date. Parameters ---------- target_entity : str, optional The target dump to download. Defaults to "latest-lexemes". - - If "latest-lexemes", downloads the latest dump. - If a valid date (e.g., YYYYMMDD), attempts to download the dump for that date. @@ -138,7 +128,6 @@ def download_wd_lexeme_dump(target_entity: str = "latest-lexemes"): ------- str The URL of the requested or closest available dump. - None If no suitable dump is found or the request fails. """ @@ -146,17 +135,19 @@ def download_wd_lexeme_dump(target_entity: str = "latest-lexemes"): def check_wd_dump_exists(target_entity): """ - Checks if the specified dump file exists for a target entity. + Check if the specified dump file exists for a target entity. Parameters ---------- - target_entity : str - The target entity or date folder to check. + target_entity : str + The target entity or date folder to check. Returns ------- - str : The URL of the dump file if it exists. - None : If the dump file does not exist. + str + The URL of the dump file if it exists. + None + If the dump file does not exist. """ entity_url = f"{base_url}/{target_entity}/" entity_response = requests.get(entity_url) @@ -234,6 +225,13 @@ def wd_lexeme_dump_download_wrapper( output_dir : str Optional directory path for the downloaded file. Defaults to 'scribe_data_wikidata_dumps_export' directory. + + Returns + ------- + str or None + - If successful and a dump is downloaded, returns the file path to the downloaded dump. + - If an existing usable dump is detected, returns the path to the existing dump. + - Returns None if the user chooses not to proceed with the download or no valid dump URL is found. """ dump_url = download_wd_lexeme_dump(wikidata_dump or "latest-lexemes") diff --git a/src/scribe_data/cli/interactive.py b/src/scribe_data/cli/interactive.py index 5e5dec74b..539006090 100644 --- a/src/scribe_data/cli/interactive.py +++ b/src/scribe_data/cli/interactive.py @@ -209,7 +209,11 @@ def configure_settings(): def run_request(): """ - Runs the interactive mode request given the configuration. + Execute the interactive mode request based on current configuration. + + Returns + ------- + None """ if not config.selected_languages or not config.selected_data_types: rprint("[bold red]Error: Please configure languages and data types.[/bold red]") diff --git a/src/scribe_data/cli/list.py b/src/scribe_data/cli/list.py index 1e10eec7e..134c56c94 100644 --- a/src/scribe_data/cli/list.py +++ b/src/scribe_data/cli/list.py @@ -38,7 +38,11 @@ def list_languages() -> None: """ - Generates a table of languages, their ISO-2 codes and their Wikidata QIDs. + Generate a table of languages with their ISO-2 codes and Wikidata QIDs. + + Returns + ------- + None """ languages = list_all_languages(language_metadata) @@ -64,7 +68,7 @@ def list_languages() -> None: def list_data_types(language: str = None) -> None: """ - Lists all data types or those available for a given language. + List all data types or those available for a given language. Parameters ---------- @@ -128,7 +132,11 @@ def list_data_types(language: str = None) -> None: def list_all() -> None: """ - Lists all available languages and data types. + List all available languages and data types. + + Returns + ------- + None """ list_languages() list_data_types() @@ -136,12 +144,16 @@ def list_all() -> None: def list_languages_for_data_type(data_type: str) -> None: """ - Lists the available languages for a given data type. + List the available languages for a given data type. Parameters ---------- data_type : str The data type to check for. + + Returns + ------- + None """ data_type = correct_data_type(data_type=data_type) all_languages = list_languages_with_metadata_for_data_type(language_metadata) @@ -179,12 +191,14 @@ def list_wrapper( ---------- language : str The language to potentially list data types for. - data_type : str The data type to check for. - - all_bool : boolean + all_bool : bool Whether all languages and data types should be listed. + + Returns + ------- + None """ if (not language and not data_type) or all_bool: list_all() diff --git a/src/scribe_data/cli/total.py b/src/scribe_data/cli/total.py index 89396f724..17d63fd56 100644 --- a/src/scribe_data/cli/total.py +++ b/src/scribe_data/cli/total.py @@ -64,7 +64,7 @@ def get_qid_by_input(input_str): def get_datatype_list(language): """ - Get the data types for a given language based on the project directory structure, including handling sub-languages. + Get the data types for a given language based on the project directory structure. Parameters ---------- @@ -73,7 +73,7 @@ def get_datatype_list(language): Returns ------- - data_types : list[str] or None + list[str] A list of the corresponding data types. """ language_key = language.strip().lower() # normalize input @@ -129,17 +129,16 @@ def get_datatype_list(language): def print_total_lexemes(language: str = None): """ - Displays the total number of available entities for all data types for a given language or all the languages. + Print the total number of available entities for all data types. Parameters ---------- - language : str (Default=None) + language : str, optional The language to display data type entity counts for. Outputs ------- - str - A formatted string indicating the language, data type, and total number of lexemes for all the languages, if found. + A formatted string indicating the language, data type, and total number of lexemes for all the languages, if found. """ if language is None: print("Returning total counts for all languages and data types...\n") @@ -159,7 +158,7 @@ def print_total_lexemes(language: str = None): def print_total_header(language, dt, total_lexemes): """ - Prints the header of the total command output. + Print the header of the total command output. """ language_display = ( "All Languages" if language is None else language.capitalize() @@ -357,8 +356,7 @@ def total_wrapper( data_type : Union[str, List[str]] The data type(s) to check for. - - all_bool : boolean + all_bool : bool Whether all languages and data types should be listed. wikidata_dump : Union[str, bool] diff --git a/src/scribe_data/load/data_to_sqlite.py b/src/scribe_data/load/data_to_sqlite.py index 6693c2498..4bd80988f 100644 --- a/src/scribe_data/load/data_to_sqlite.py +++ b/src/scribe_data/load/data_to_sqlite.py @@ -110,15 +110,15 @@ def data_to_sqlite( def create_table(data_type, cols): """ - Creates a table in the language database given a data type for its title and column names. + Create a table in the language database. Parameters ---------- - data_type : str - The name of the table to be created. + data_type : str + The name of the table to be created. + cols : list of str + The names of columns for the new table. - cols : list of strings - The names of columns for the new table. """ # Convert column names to snake_case if requested. cols = [ @@ -131,15 +131,14 @@ def create_table(data_type, cols): def table_insert(data_type, keys): """ - Inserts a row into a language database table. + Insert a row into a language database table. Parameters ---------- - data_type : str - The name of the table to be inserted into - - keys : list of strings - The values to be inserted into the table row + data_type : str + The name of the table to be inserted into. + keys : list of str + The values to be inserted into the table row. """ insert_question_marks = ", ".join(["?"] * len(keys)) cursor.execute( diff --git a/src/scribe_data/load/send_dbs_to_scribe.py b/src/scribe_data/load/send_dbs_to_scribe.py index f6796774d..f57bfc698 100644 --- a/src/scribe_data/load/send_dbs_to_scribe.py +++ b/src/scribe_data/load/send_dbs_to_scribe.py @@ -1,8 +1,9 @@ """ -Updates Scribe apps with the SQLite language databases that are found in the SQLite data export directory. +Update Scribe apps with SQLite language databases that are found in the SQLite data export directory. + Example -------- +-------- python3 src/scribe_data/load/send_dbs_to_scribe.py .. raw:: html diff --git a/src/scribe_data/unicode/generate_emoji_keywords.py b/src/scribe_data/unicode/generate_emoji_keywords.py index 1d33b1587..938d682b9 100644 --- a/src/scribe_data/unicode/generate_emoji_keywords.py +++ b/src/scribe_data/unicode/generate_emoji_keywords.py @@ -36,7 +36,7 @@ def generate_emoji(language, output_dir: str = None): """ - Generates emoji keywords for a specified language and exports the data to the given directory. + Generate emoji keywords for a specified language. This function first checks and installs the PyICU package, which is necessary for the script to run. If the installation is successful, it proceeds with generating emoji keywords based on the specified language. @@ -46,7 +46,6 @@ def generate_emoji(language, output_dir: str = None): ---------- language : str The ISO code of the language for which to generate emoji keywords. - output_dir : str, optional The directory where the generated data will be saved. If not specified, the data will be saved in a default directory. diff --git a/src/scribe_data/unicode/process_unicode.py b/src/scribe_data/unicode/process_unicode.py index ad978a529..8a25f9a79 100644 --- a/src/scribe_data/unicode/process_unicode.py +++ b/src/scribe_data/unicode/process_unicode.py @@ -48,7 +48,7 @@ def gen_emoji_lexicon( emojis_per_keyword: int, ): """ - Generates a dictionary of keywords (keys) and emoji unicode(s) associated with them (values). + Generate a dictionary of keywords (keys) and emoji unicode(s) associated with them (values). Parameters ---------- diff --git a/src/scribe_data/unicode/unicode_utils.py b/src/scribe_data/unicode/unicode_utils.py index b03f13c8b..4d857869f 100644 --- a/src/scribe_data/unicode/unicode_utils.py +++ b/src/scribe_data/unicode/unicode_utils.py @@ -25,7 +25,10 @@ # See: https://getemoji.com/ def get_emojis_to_ignore(): """ - Returns a list of emojis based on relationships that we want to remove from autosuggestions. + Returns a list of emojis based on relationships that we want to remove from autosuggestions. + + Returns + ------- """ return """ ๐Ÿคฐ ๐Ÿซƒ ๐Ÿคฑ ๐Ÿ‘ฉโ€๐Ÿผ ๐Ÿ‘จโ€๐Ÿผ ๐Ÿ‘ญ ๐Ÿ‘ฌ ๐Ÿ‘ซ ๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ ๐Ÿ’‘ ๐Ÿ‘จโ€โค๏ธโ€๐Ÿ‘จ ๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘จ ๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ’‹โ€๐Ÿ‘ฉ ๐Ÿ’ ๐Ÿ‘จโ€โค๏ธโ€๐Ÿ’‹โ€๐Ÿ‘จ ๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ’‹โ€๐Ÿ‘จ ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘ง ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ง ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง ๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆ ๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ง ๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ ๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ ๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง ๐Ÿ‘จโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘ง ๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง ๐Ÿ‘ฉโ€๐Ÿ‘ฆ ๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ ๐Ÿ‘ฉโ€๐Ÿ‘ง ๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ ๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py index 153fc2938..5cecb6728 100644 --- a/src/scribe_data/utils.py +++ b/src/scribe_data/utils.py @@ -120,13 +120,12 @@ def _load_json(package_path: str, file_name: str) -> Any: """ - Loads a JSON resource from a package into a python entity. + Load a JSON resource from a package into a python entity. Parameters ---------- package_path : str The fully qualified package that contains the resource. - file_name : str The name of the file (resource) that contains the JSON data. diff --git a/src/scribe_data/wikidata/check_query/check.py b/src/scribe_data/wikidata/check_query/check.py index 955168b5a..cdc31b638 100644 --- a/src/scribe_data/wikidata/check_query/check.py +++ b/src/scribe_data/wikidata/check_query/check.py @@ -51,13 +51,13 @@ def ping(url: str, timeout: int) -> bool: ---------- url : str The URL to test. - timeout : int The maximum number of seconds to wait for a reply. Returns ------- - bool : True if connectivity is established or False otherwise. + bool + True if connectivity is established, False otherwise. """ try: with urllib.request.urlopen(url, timeout=timeout) as response: @@ -74,7 +74,8 @@ def all_queries() -> list[QueryFile]: Returns ------- - list[QueryFile] : the SPARQL query files. + list[QueryFile] + List of SPARQL query files. """ parts = Path(__file__).resolve().parts prj_root_idx = parts.index(PROJECT_ROOT) @@ -99,7 +100,8 @@ def changed_queries() -> Optional[list[QueryFile]]: Returns ------- - Optional[list[QueryFile]] : list of changed/new SPARQL queries or None if there's an error. + Optional[list[QueryFile]] + List of changed/new SPARQL queries, or None if there's an error. """ result = subprocess.run( ( @@ -137,7 +139,8 @@ def check_sparql_file(fpath: str) -> Path: Returns ------- - Path : the validated file. + Path + The validated file path. """ path = Path(fpath) @@ -233,7 +236,7 @@ def main(argv=None) -> int: If set to None then argparse will use sys.argv as the arguments. Returns - -------- + ------- int The exit status - 0 - success; any other value - failure. """ diff --git a/src/scribe_data/wikidata/check_query/query.py b/src/scribe_data/wikidata/check_query/query.py index 6e5e6fc87..6743c4396 100644 --- a/src/scribe_data/wikidata/check_query/query.py +++ b/src/scribe_data/wikidata/check_query/query.py @@ -39,12 +39,13 @@ def load(self, limit: int) -> str: Parameters ---------- - limit : int - The maximum number of results a query should return. + limit : int + The maximum number of results a query should return. Returns ------- - str : the SPARQL query. + str + the SPARQL query. """ with open(self.path, encoding="utf-8") as in_stream: return f"{in_stream.read()}\nLIMIT {limit}\n" @@ -62,11 +63,10 @@ def __init__(self, message: str, query: QueryFile) -> None: """ Parameters ---------- - message : str - Why the query failed. - - query : QueryFile - The query that failed. + message : str + The error message. + query : QueryFile + The query that failed. """ self.message = message self.query = query diff --git a/src/scribe_data/wikidata/format_data.py b/src/scribe_data/wikidata/format_data.py index 2aa2db970..2e823ccb9 100644 --- a/src/scribe_data/wikidata/format_data.py +++ b/src/scribe_data/wikidata/format_data.py @@ -42,22 +42,21 @@ def format_data( data_type: str = args.data_type, ): """ - Formats data that has been queried from the Wikidata Query Service. + Format data queried from the Wikidata Query Service. Parameters ---------- dir_path : str The output directory path for results. - language : str The language for which the data is being loaded. - data_type : str The type of data being loaded (e.g. 'nouns', 'verbs'). Returns - _______ - A saved and formatted data file for the given language and data type. + ------- + None + Saves and formatted data file for the given language and data type. """ data_list, data_path = load_queried_data( dir_path=dir_path, language=language, data_type=data_type diff --git a/src/scribe_data/wikidata/query_data.py b/src/scribe_data/wikidata/query_data.py index 7476bc619..486684361 100644 --- a/src/scribe_data/wikidata/query_data.py +++ b/src/scribe_data/wikidata/query_data.py @@ -49,16 +49,14 @@ def execute_formatting_script(output_dir: str, language: str, data_type: str): ---------- output_dir : str The output directory path for results. - language : str The language for which the data is being loaded. - data_type : str The type of data being loaded (e.g. 'nouns', 'verbs'). Returns ------- - The results of the formatting script saved in the given output directory. + The results of the formatting script are saved in the given output directory. """ formatting_file_path = Path(__file__).parent / "format_data.py" diff --git a/src/scribe_data/wikipedia/process_wiki.py b/src/scribe_data/wikipedia/process_wiki.py index 047a4ccd7..16acda8c2 100644 --- a/src/scribe_data/wikipedia/process_wiki.py +++ b/src/scribe_data/wikipedia/process_wiki.py @@ -47,7 +47,7 @@ def clean( verbose=True, ): """ - Cleans text body to prepare it for analysis. + Clean text body to prepare it for analysis. Parameters ---------- From d630543a0756559fe4feb66038973bd9ae49ad3e Mon Sep 17 00:00:00 2001 From: Andrew Tavis McAllister Date: Sun, 19 Jan 2025 18:41:00 +0100 Subject: [PATCH 2/2] Minor formatting and edits to docstrings - wip remove validation --- .pre-commit-config.yaml | 12 +++++----- src/scribe_data/cli/cli_utils.py | 1 + src/scribe_data/cli/convert.py | 22 +++++++++++++++++++ src/scribe_data/cli/download.py | 1 + src/scribe_data/load/data_to_sqlite.py | 3 ++- src/scribe_data/load/send_dbs_to_scribe.py | 2 +- src/scribe_data/unicode/unicode_utils.py | 4 +++- src/scribe_data/wikidata/check_query/query.py | 1 + 8 files changed, 37 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c4ebf8cad..ee04375e5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,9 +26,9 @@ repos: - id: ruff-format - - repo: https://github.com/numpy/numpydoc - rev: v1.8.0 - hooks: - - id: numpydoc-validation - files: ^src/ - exclude: ^tests/ + # - repo: https://github.com/numpy/numpydoc + # rev: v1.8.0 + # hooks: + # - id: numpydoc-validation + # files: ^src/ + # exclude: ^tests/ diff --git a/src/scribe_data/cli/cli_utils.py b/src/scribe_data/cli/cli_utils.py index cc129a9ab..5262c14d7 100644 --- a/src/scribe_data/cli/cli_utils.py +++ b/src/scribe_data/cli/cli_utils.py @@ -63,6 +63,7 @@ def print_formatted_data(data: Union[dict, list], data_type: str) -> None: ---------- data : Union[dict, list] The data to format and print. + data_type : str The type of data being printed, used to determine formatting style. """ diff --git a/src/scribe_data/cli/convert.py b/src/scribe_data/cli/convert.py index aba4db445..afab2fb32 100644 --- a/src/scribe_data/cli/convert.py +++ b/src/scribe_data/cli/convert.py @@ -53,16 +53,22 @@ def convert_to_json( ---------- language : str The language of the file to convert. + data_type : Union[str, List[str]] The data type of the file to convert. + output_type : str The output format, should be "json". + input_file : str The input CSV/TSV file path. + output_dir : Path The output directory path for results. + overwrite : bool Whether to overwrite existing files. + identifier_case : str The case format for identifiers. Default is "camel". @@ -202,10 +208,13 @@ def convert_to_csv_or_tsv( ---------- language : str The language of the file to convert. + data_type : Union[str, List[str]] The data type of the file to convert. + output_type : str The output format, should be "csv" or "tsv". + input_file : str The input JSON file path. @@ -386,16 +395,22 @@ def convert_to_sqlite( ---------- language : str The language of the file to convert. + data_type : str The data type of the file to convert. + output_type : str The output format, should be "sqlite". + input_file : str, optional The input file path for the data to be converted. + output_dir : str, optional The output directory path for results. + overwrite : bool, optional Whether to overwrite existing files. + identifier_case : str, optional The case format for identifiers. Default is "camel". @@ -451,18 +466,25 @@ def convert_wrapper( ---------- languages : Union[str, List[str]] The language(s) of the data to convert. + data_types : Union[str, List[str]] The data type(s) of the data to convert. + output_type : str The desired output format. Can be 'json', 'csv', 'tsv', or 'sqlite'. + input_files : Union[str, List[str]] The path(s) to the input file(s). + output_dir : str The output directory where converted files will be stored. + overwrite : bool, optional Whether to overwrite existing output files. + identifier_case : str, optional The case format for identifiers. Default is "camel". + all : bool, optional Convert all languages and data types. diff --git a/src/scribe_data/cli/download.py b/src/scribe_data/cli/download.py index 69bd42fa4..67d2d1d5d 100644 --- a/src/scribe_data/cli/download.py +++ b/src/scribe_data/cli/download.py @@ -146,6 +146,7 @@ def check_wd_dump_exists(target_entity): ------- str The URL of the dump file if it exists. + None If the dump file does not exist. """ diff --git a/src/scribe_data/load/data_to_sqlite.py b/src/scribe_data/load/data_to_sqlite.py index 4bd80988f..8de6848b3 100644 --- a/src/scribe_data/load/data_to_sqlite.py +++ b/src/scribe_data/load/data_to_sqlite.py @@ -116,9 +116,9 @@ def create_table(data_type, cols): ---------- data_type : str The name of the table to be created. + cols : list of str The names of columns for the new table. - """ # Convert column names to snake_case if requested. cols = [ @@ -137,6 +137,7 @@ def table_insert(data_type, keys): ---------- data_type : str The name of the table to be inserted into. + keys : list of str The values to be inserted into the table row. """ diff --git a/src/scribe_data/load/send_dbs_to_scribe.py b/src/scribe_data/load/send_dbs_to_scribe.py index f57bfc698..21fb56710 100644 --- a/src/scribe_data/load/send_dbs_to_scribe.py +++ b/src/scribe_data/load/send_dbs_to_scribe.py @@ -3,7 +3,7 @@ Example --------- +------- python3 src/scribe_data/load/send_dbs_to_scribe.py .. raw:: html diff --git a/src/scribe_data/unicode/unicode_utils.py b/src/scribe_data/unicode/unicode_utils.py index 4d857869f..1ceaf506c 100644 --- a/src/scribe_data/unicode/unicode_utils.py +++ b/src/scribe_data/unicode/unicode_utils.py @@ -25,10 +25,12 @@ # See: https://getemoji.com/ def get_emojis_to_ignore(): """ - Returns a list of emojis based on relationships that we want to remove from autosuggestions. + Returns a list of emojis based on relationships that we want to remove from autosuggestions. Returns ------- + str + All emojis that should not be included in emoji suggestions. """ return """ ๐Ÿคฐ ๐Ÿซƒ ๐Ÿคฑ ๐Ÿ‘ฉโ€๐Ÿผ ๐Ÿ‘จโ€๐Ÿผ ๐Ÿ‘ญ ๐Ÿ‘ฌ ๐Ÿ‘ซ ๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ ๐Ÿ’‘ ๐Ÿ‘จโ€โค๏ธโ€๐Ÿ‘จ ๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘จ ๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ’‹โ€๐Ÿ‘ฉ ๐Ÿ’ ๐Ÿ‘จโ€โค๏ธโ€๐Ÿ’‹โ€๐Ÿ‘จ ๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ’‹โ€๐Ÿ‘จ ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘ง ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ง ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง ๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆ ๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ง ๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ ๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ ๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง ๐Ÿ‘จโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘ง ๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ ๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง ๐Ÿ‘ฉโ€๐Ÿ‘ฆ ๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ ๐Ÿ‘ฉโ€๐Ÿ‘ง ๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ ๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง diff --git a/src/scribe_data/wikidata/check_query/query.py b/src/scribe_data/wikidata/check_query/query.py index 6743c4396..d6aa722ce 100644 --- a/src/scribe_data/wikidata/check_query/query.py +++ b/src/scribe_data/wikidata/check_query/query.py @@ -65,6 +65,7 @@ def __init__(self, message: str, query: QueryFile) -> None: ---------- message : str The error message. + query : QueryFile The query that failed. """