Skip to content

Commit

Permalink
ubuntu pyicu fix
Browse files Browse the repository at this point in the history
  • Loading branch information
axif0 committed Oct 24, 2024
1 parent c386f63 commit 94b639a
Show file tree
Hide file tree
Showing 3 changed files with 235 additions and 32 deletions.
180 changes: 180 additions & 0 deletions src/scribe_data/check/check_pyICU.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
import requests
import pkg_resources
import sys
import os
import platform # Added to check the OS
from pathlib import Path
import subprocess


def check_if_pyicu_installed():
installed_packages = {pkg.key for pkg in pkg_resources.working_set}
if "pyicu" in installed_packages:
return True
return False


def get_python_version_and_architecture():
"""
Get the current Python version and architecture.
Returns
-------
str : python_version
The Python version in the format 'cpXY'.
str : architecture
The architecture type ('amd64' or 'win32').
"""
version = sys.version_info
python_version = f"cp{version.major}{version.minor}"
architecture = "win_amd64" if sys.maxsize > 2**32 else "win32"
return python_version, architecture


def fetch_wheel_releases():
"""
Fetch the release data for PyICU from GitHub.
Returns
-------
list : available_wheels
A list of tuples containing wheel file names and their download URLs.
float : total_size_mb
The total size of all available wheels in MB.
"""
url = "https://api.github.com/repos/cgohlke/pyicu-build/releases"
response = requests.get(url)
response.raise_for_status() # Raise an error for bad responses

available_wheels = []
total_size_bytes = 0

for release in response.json():
for asset in release["assets"]:
if asset["name"].endswith(".whl"):
available_wheels.append((asset["name"], asset["browser_download_url"]))
total_size_bytes += asset["size"]

total_size_mb = total_size_bytes / (1024 * 1024) # Convert bytes to MB
return available_wheels, total_size_mb


def download_wheel_file(wheel_url, output_dir):
"""
Download the wheel file from the given URL.
Parameters
----------
wheel_url : str
The URL of the wheel file to download.
output_dir : str
The directory to save the downloaded file.
Returns
-------
str : path to the downloaded wheel file.
"""
response = requests.get(wheel_url)
response.raise_for_status() # Raise an error for bad responses

wheel_filename = os.path.basename(wheel_url)
wheel_path = os.path.join(output_dir, wheel_filename)

with open(wheel_path, "wb") as wheel_file:
wheel_file.write(response.content)

return wheel_path


def find_matching_wheel(wheels, python_version, architecture):
"""
Find the matching wheel file based on Python version and architecture.
Parameters
----------
wheels : list
The list of available wheels.
python_version : str
The Python version (e.g., 'cp311').
architecture : str
The architecture type (e.g., 'win_amd64').
Returns
-------
str : The download URL of the matching wheel or None if not found.
"""
for name, download_url in wheels:
if python_version in name and architecture in name:
return download_url
return None


def check_and_install_pyicu():
package_name = "PyICU"
installed_packages = {pkg.key for pkg in pkg_resources.working_set}
if package_name.lower() not in installed_packages:
# print(f"{package_name} not found. Installing...")

# Fetch available wheels from GitHub to estimate download size
wheels, total_size_mb = fetch_wheel_releases()

print(
f"{package_name} is not installed.\nIt will be downloaded from 'https://github.com/repos/cgohlke/pyicu'"
f"\nApproximately {total_size_mb:.2f} MB will be downloaded.\nDo you want to proceed? (Y/n)?"
)

user_input = input().strip().lower()
if user_input == "" or user_input in ["y", "yes"]:
print("Proceeding with installation...")
else:
print("Installation aborted by the user.")
return False

# Check the operating system
if platform.system() != "Windows":
# If not Windows, directly use pip to install PyICU
try:
subprocess.run(
[sys.executable, "-m", "pip", "install", package_name], check=True
)
print(f"{package_name} has been installed successfully.")
except subprocess.CalledProcessError as e:
print(f"Error occurred while installing {package_name}: {e}")
return False
else:
# Windows-specific installation using wheel files
python_version, architecture = get_python_version_and_architecture()

# Find the matching wheel for the current Python version and architecture
wheel_url = find_matching_wheel(wheels, python_version, architecture)

if not wheel_url:
print(
"No matching wheel file found for your Python version and architecture."
)
return False

# Download the wheel file
output_dir = Path.cwd() # Use the current directory for simplicity
wheel_path = download_wheel_file(wheel_url, output_dir)

# Install PyICU using pip
try:
subprocess.run(
[sys.executable, "-m", "pip", "install", wheel_path],
check=True,
)
print(f"{package_name} has been installed successfully.")

# Remove the downloaded wheel file
os.remove(wheel_path)
print(f"Removed temporary file: {wheel_path}")

except subprocess.CalledProcessError as e:
print(f"Error occurred while installing {package_name}: {e}")
return False

# else:
# print(f"{package_name} is already installed.")

return True
14 changes: 7 additions & 7 deletions src/scribe_data/cli/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,10 @@ def get_data(
return True

# The emoji keywords process has failed.
elif data_type in {"emoji-keywords", "emoji_keywords"}:
print(
"\nThe Scribe-Data emoji functionality is powered by PyICU, which is currently not installed."
)
print(
"Please check the installation guide at https://github.com/scribe-org/Scribe-Data/blob/main/src/scribe_data/unicode/UNICODE_INSTALLTION.md for more information.\n"
)
# elif data_type in {"emoji-keywords", "emoji_keywords"}:
# print(
# "\nThe Scribe-Data emoji functionality is powered by PyICU, which is currently not installed."
# )
# print(
# "Please check the installation guide at https://github.com/scribe-org/Scribe-Data/blob/main/src/scribe_data/unicode/UNICODE_INSTALLTION.md for more information.\n"
# )
73 changes: 48 additions & 25 deletions src/scribe_data/unicode/generate_emoji_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,35 +25,58 @@

from scribe_data.unicode.process_unicode import gen_emoji_lexicon
from scribe_data.utils import export_formatted_data, get_language_iso
from scribe_data.check.check_pyICU import (
check_and_install_pyicu,
check_if_pyicu_installed,
)

DATA_TYPE = "emoji-keywords"
EMOJI_KEYWORDS_DICT = 3


def generate_emoji(language, output_dir: str = None):
iso = get_language_iso(language=language)
path_to_cldr_annotations = (
Path(__file__).parent / "cldr-annotations-full" / "annotations"
)
if iso in os.listdir(path_to_cldr_annotations):
print(f"Emoji Generation for language {language} is supported")

else:
print(f"Emoji Generation for language {language} is not supported")
return

updated_path = output_dir[2:] if output_dir.startswith("./") else output_dir
export_dir = Path(updated_path) / language.capitalize()
export_dir.mkdir(parents=True, exist_ok=True)

if emoji_keywords_dict := gen_emoji_lexicon(
language=language,
emojis_per_keyword=EMOJI_KEYWORDS_DICT,
):
export_formatted_data(
file_path=output_dir,
formatted_data=emoji_keywords_dict,
query_data_in_use=True,
language=language,
data_type=DATA_TYPE,
"""
Generates emoji keywords for a specified language and exports the data to the given directory.
This function first checks and installs the PyICU package, which is necessary for the script to run.
If the installation is successful, it proceeds with generating emoji keywords based on the specified language.
The results are then exported to the provided output directory.
Parameters:
- language (str): The ISO code of the language for which to generate emoji keywords.
- output_dir (str, optional): The directory where the generated data will be saved. If not specified,
the data will be saved in a default directory.
Returns:
- None: The function does not return any value but outputs data to the specified directory.
"""
if check_and_install_pyicu() and check_if_pyicu_installed() is False:
print("Thank you.")

if check_if_pyicu_installed():
iso = get_language_iso(language=language)
path_to_cldr_annotations = (
Path(__file__).parent / "cldr-annotations-full" / "annotations"
)
if iso in os.listdir(path_to_cldr_annotations):
print(f"Emoji Generation for language {language} is supported")

else:
print(f"Emoji Generation for language {language} is not supported")
return

updated_path = output_dir[2:] if output_dir.startswith("./") else output_dir
export_dir = Path(updated_path) / language.capitalize()
export_dir.mkdir(parents=True, exist_ok=True)

if emoji_keywords_dict := gen_emoji_lexicon(
language=language,
emojis_per_keyword=EMOJI_KEYWORDS_DICT,
):
export_formatted_data(
file_path=output_dir,
formatted_data=emoji_keywords_dict,
query_data_in_use=True,
language=language.capitalize(),
data_type=DATA_TYPE,
)

0 comments on commit 94b639a

Please sign in to comment.