diff --git a/src/scribe_data/check/check_pyICU.py b/src/scribe_data/check/check_pyICU.py new file mode 100644 index 00000000..a30e7e8e --- /dev/null +++ b/src/scribe_data/check/check_pyICU.py @@ -0,0 +1,180 @@ +import requests +import pkg_resources +import sys +import os +import platform # Added to check the OS +from pathlib import Path +import subprocess + + +def check_if_pyicu_installed(): + installed_packages = {pkg.key for pkg in pkg_resources.working_set} + if "pyicu" in installed_packages: + return True + return False + + +def get_python_version_and_architecture(): + """ + Get the current Python version and architecture. + + Returns + ------- + str : python_version + The Python version in the format 'cpXY'. + str : architecture + The architecture type ('amd64' or 'win32'). + """ + version = sys.version_info + python_version = f"cp{version.major}{version.minor}" + architecture = "win_amd64" if sys.maxsize > 2**32 else "win32" + return python_version, architecture + + +def fetch_wheel_releases(): + """ + Fetch the release data for PyICU from GitHub. + + Returns + ------- + list : available_wheels + A list of tuples containing wheel file names and their download URLs. + float : total_size_mb + The total size of all available wheels in MB. + """ + url = "https://api.github.com/repos/cgohlke/pyicu-build/releases" + response = requests.get(url) + response.raise_for_status() # Raise an error for bad responses + + available_wheels = [] + total_size_bytes = 0 + + for release in response.json(): + for asset in release["assets"]: + if asset["name"].endswith(".whl"): + available_wheels.append((asset["name"], asset["browser_download_url"])) + total_size_bytes += asset["size"] + + total_size_mb = total_size_bytes / (1024 * 1024) # Convert bytes to MB + return available_wheels, total_size_mb + + +def download_wheel_file(wheel_url, output_dir): + """ + Download the wheel file from the given URL. + + Parameters + ---------- + wheel_url : str + The URL of the wheel file to download. + output_dir : str + The directory to save the downloaded file. + + Returns + ------- + str : path to the downloaded wheel file. + """ + response = requests.get(wheel_url) + response.raise_for_status() # Raise an error for bad responses + + wheel_filename = os.path.basename(wheel_url) + wheel_path = os.path.join(output_dir, wheel_filename) + + with open(wheel_path, "wb") as wheel_file: + wheel_file.write(response.content) + + return wheel_path + + +def find_matching_wheel(wheels, python_version, architecture): + """ + Find the matching wheel file based on Python version and architecture. + + Parameters + ---------- + wheels : list + The list of available wheels. + python_version : str + The Python version (e.g., 'cp311'). + architecture : str + The architecture type (e.g., 'win_amd64'). + + Returns + ------- + str : The download URL of the matching wheel or None if not found. + """ + for name, download_url in wheels: + if python_version in name and architecture in name: + return download_url + return None + + +def check_and_install_pyicu(): + package_name = "PyICU" + installed_packages = {pkg.key for pkg in pkg_resources.working_set} + if package_name.lower() not in installed_packages: + # print(f"{package_name} not found. Installing...") + + # Fetch available wheels from GitHub to estimate download size + wheels, total_size_mb = fetch_wheel_releases() + + print( + f"{package_name} is not installed.\nIt will be downloaded from 'https://github.com/repos/cgohlke/pyicu'" + f"\nApproximately {total_size_mb:.2f} MB will be downloaded.\nDo you want to proceed? (Y/n)?" + ) + + user_input = input().strip().lower() + if user_input == "" or user_input in ["y", "yes"]: + print("Proceeding with installation...") + else: + print("Installation aborted by the user.") + return False + + # Check the operating system + if platform.system() != "Windows": + # If not Windows, directly use pip to install PyICU + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", package_name], check=True + ) + print(f"{package_name} has been installed successfully.") + except subprocess.CalledProcessError as e: + print(f"Error occurred while installing {package_name}: {e}") + return False + else: + # Windows-specific installation using wheel files + python_version, architecture = get_python_version_and_architecture() + + # Find the matching wheel for the current Python version and architecture + wheel_url = find_matching_wheel(wheels, python_version, architecture) + + if not wheel_url: + print( + "No matching wheel file found for your Python version and architecture." + ) + return False + + # Download the wheel file + output_dir = Path.cwd() # Use the current directory for simplicity + wheel_path = download_wheel_file(wheel_url, output_dir) + + # Install PyICU using pip + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", wheel_path], + check=True, + ) + print(f"{package_name} has been installed successfully.") + + # Remove the downloaded wheel file + os.remove(wheel_path) + print(f"Removed temporary file: {wheel_path}") + + except subprocess.CalledProcessError as e: + print(f"Error occurred while installing {package_name}: {e}") + return False + + # else: + # print(f"{package_name} is already installed.") + + return True diff --git a/src/scribe_data/cli/get.py b/src/scribe_data/cli/get.py index 7bf54453..0ce19d9b 100644 --- a/src/scribe_data/cli/get.py +++ b/src/scribe_data/cli/get.py @@ -139,10 +139,10 @@ def get_data( return True # The emoji keywords process has failed. - elif data_type in {"emoji-keywords", "emoji_keywords"}: - print( - "\nThe Scribe-Data emoji functionality is powered by PyICU, which is currently not installed." - ) - print( - "Please check the installation guide at https://github.com/scribe-org/Scribe-Data/blob/main/src/scribe_data/unicode/UNICODE_INSTALLTION.md for more information.\n" - ) + # elif data_type in {"emoji-keywords", "emoji_keywords"}: + # print( + # "\nThe Scribe-Data emoji functionality is powered by PyICU, which is currently not installed." + # ) + # print( + # "Please check the installation guide at https://github.com/scribe-org/Scribe-Data/blob/main/src/scribe_data/unicode/UNICODE_INSTALLTION.md for more information.\n" + # ) diff --git a/src/scribe_data/unicode/generate_emoji_keywords.py b/src/scribe_data/unicode/generate_emoji_keywords.py index 756f06b3..6dbdcc5a 100644 --- a/src/scribe_data/unicode/generate_emoji_keywords.py +++ b/src/scribe_data/unicode/generate_emoji_keywords.py @@ -25,35 +25,58 @@ from scribe_data.unicode.process_unicode import gen_emoji_lexicon from scribe_data.utils import export_formatted_data, get_language_iso +from scribe_data.check.check_pyICU import ( + check_and_install_pyicu, + check_if_pyicu_installed, +) DATA_TYPE = "emoji-keywords" EMOJI_KEYWORDS_DICT = 3 def generate_emoji(language, output_dir: str = None): - iso = get_language_iso(language=language) - path_to_cldr_annotations = ( - Path(__file__).parent / "cldr-annotations-full" / "annotations" - ) - if iso in os.listdir(path_to_cldr_annotations): - print(f"Emoji Generation for language {language} is supported") - - else: - print(f"Emoji Generation for language {language} is not supported") - return - - updated_path = output_dir[2:] if output_dir.startswith("./") else output_dir - export_dir = Path(updated_path) / language.capitalize() - export_dir.mkdir(parents=True, exist_ok=True) - - if emoji_keywords_dict := gen_emoji_lexicon( - language=language, - emojis_per_keyword=EMOJI_KEYWORDS_DICT, - ): - export_formatted_data( - file_path=output_dir, - formatted_data=emoji_keywords_dict, - query_data_in_use=True, - language=language, - data_type=DATA_TYPE, + """ + Generates emoji keywords for a specified language and exports the data to the given directory. + + This function first checks and installs the PyICU package, which is necessary for the script to run. + If the installation is successful, it proceeds with generating emoji keywords based on the specified language. + The results are then exported to the provided output directory. + + Parameters: + - language (str): The ISO code of the language for which to generate emoji keywords. + - output_dir (str, optional): The directory where the generated data will be saved. If not specified, + the data will be saved in a default directory. + + Returns: + - None: The function does not return any value but outputs data to the specified directory. + """ + if check_and_install_pyicu() and check_if_pyicu_installed() is False: + print("Thank you.") + + if check_if_pyicu_installed(): + iso = get_language_iso(language=language) + path_to_cldr_annotations = ( + Path(__file__).parent / "cldr-annotations-full" / "annotations" ) + if iso in os.listdir(path_to_cldr_annotations): + print(f"Emoji Generation for language {language} is supported") + + else: + print(f"Emoji Generation for language {language} is not supported") + return + + updated_path = output_dir[2:] if output_dir.startswith("./") else output_dir + export_dir = Path(updated_path) / language.capitalize() + export_dir.mkdir(parents=True, exist_ok=True) + + if emoji_keywords_dict := gen_emoji_lexicon( + language=language, + emojis_per_keyword=EMOJI_KEYWORDS_DICT, + ): + export_formatted_data( + file_path=output_dir, + formatted_data=emoji_keywords_dict, + query_data_in_use=True, + language=language.capitalize(), + data_type=DATA_TYPE, + )