diff --git a/README.md b/README.md index add7f34..c9bc9bb 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,28 @@ from kss import split_sentences output = split_sentences("YOUR_INPUT_STRING", **kwargs) ``` +### 6. Alias of module names +Because there are so many modules in Kss, user may have difficulty remembering the names of each module. +Kss provides aliases for some modules to make it easier to use them. +```python +from kss import Kss + +module_1 = Kss("split_morphemes") +module_2 = Kss("tokenize") +# For example, 'split_morphemes' module can be loaded by using the alias named 'tokenize'. +``` + +You can check the alias of each module by using the `alias()` function. +```python +from kss import Kss + +Kss.alias() +``` + +```python +{'aug': 'augment', 'augmentation': 'augment', 'collocation': 'collocate', 'hangulization': 'hangulize', 'hangulisation': 'hangulize', 'hangulise': 'hangulize', 'hanja': 'hanja2hangul', 'hangul2jamo': 'h2j', 'hangul2hcj': 'h2hcj', 'jamo2hangul': 'j2h', 'jamo2hcj': 'j2hcj', 'hcj2hangul': 'hcj2h', 'hcj2jamo': 'hcj2j', 'josa': 'select_josa', 'keyword': 'extract_keywords', 'keywords': 'extract_keywords', 'morpheme': 'split_morphemes', 'morphemes': 'split_morphemes', 'annonymization': 'anonymize', 'news_cleaning': 'clean_news', 'news': 'clean_news', 'completed_form': 'is_completed_form', 'completed': 'is_completed_form', 'filter': 'filter_out', 'reduce_repeats': 'reduce_char_repeats', 'reduce_char': 'reduce_char_repeats', 'reduce_chars': 'reduce_char_repeats', 'reduce_emoticon': 'reduce_emoticon_repeats', 'reduce_emoticons': 'reduce_emoticon_repeats', 'reduce_emo': 'reduce_emoticon_repeats', 'remove_invisible': 'remove_invisible_chars', 'invisible_chars': 'remove_invisible_chars', 'invisible': 'remove_invisible_chars', 'normalization': 'normalize', 'normalisation': 'normalize', 'normalise': 'normalize', 'preprocessing': 'preprocess', 'prep': 'preprocess', 'romanization': 'romanize', 'romanisation': 'romanize', 'romanise': 'romanize', 'safety': 'is_unsafe', 'check_safety': 'is_unsafe', 'sentence': 'split_sentences', 'sentences': 'split_sentences', 'sent_split': 'split_sentences', 'sent_splits': 'split_sentences', 'sents_split': 'split_sentences', 'split_sent': 'split_sentences', 'split_sents': 'split_sentences', 'spacing': 'correct_spacing', 'space': 'correct_spacing', 'spaces': 'correct_spacing', 'summarization': 'summarize_sentences', 'summarize': 'summarize_sentences', 'summ': 'summarize_sentences', 'morph': 'split_morphemes', 'morphs': 'split_morphemes', 'tokenize': 'split_morphemes', 'tokenization': 'split_morphemes', 'split_morph': 'split_morphemes', 'split_morphs': 'split_morphemes', 'morph_split': 'split_morphemes', 'morph_splits': 'split_morphemes', 'morphs_split': 'split_morphemes'} +``` + ## Supported Modules Kss supports the following modules and there are the simple usages of each module in the following sections. @@ -186,7 +208,7 @@ Args: - text (`Union[str, List[str], Tuple[str]]`): single text or list of texts - descriptive (`bool`): return descriptive pronunciation, the 'descriptive' means a real-life pronunciation - group_vowels (`bool`): If True, the vowels of the identical sound are normalized. (e.g. ㅒ -> ㅖ) -- to_syllable: If True, hangul letters or jamo are assembled to form syllables. +- to_syllable (`bool`): If True, hangul letters or jamo are assembled to form syllables. - convert_english_to_hangul_phonemes (`bool`): If True, convert English to Hangul phonemes - convert_numbers_to_hangul_phonemes (`bool`): If True, convert numbers to Hangul phonemes - num_workers (`Union[int, str]`): the number of multiprocessing workers @@ -691,6 +713,9 @@ Args: - noun_only (`bool`): whether to extract only nouns or not - num_workers (`Union[int, str]`): the number of multiprocessing workers +Returns: +- `Union[List[str], List[Tuple[str, float]]]`: list of keywords or list of tuples of keywords and scores + Examples: ```python >>> from kss import Kss diff --git a/kss/__init__.py b/kss/__init__.py index 689a444..d9f9386 100644 --- a/kss/__init__.py +++ b/kss/__init__.py @@ -100,8 +100,12 @@ "filter": "filter_out", "reduce_repeats": "reduce_char_repeats", "reduce_char": "reduce_char_repeats", + "reduce_chars": "reduce_char_repeats", "reduce_emoticon": "reduce_emoticon_repeats", + "reduce_emoticons": "reduce_emoticon_repeats", + "reduce_emo": "reduce_emoticon_repeats", "remove_invisible": "remove_invisible_chars", + "invisible_chars": "remove_invisible_chars", "invisible": "remove_invisible_chars", "normalization": "normalize", "normalisation": "normalize", @@ -117,12 +121,24 @@ "sentences": "split_sentences", "sent_split": "split_sentences", "sent_splits": "split_sentences", + "sents_split": "split_sentences", + "split_sent": "split_sentences", + "split_sents": "split_sentences", "spacing": "correct_spacing", "space": "correct_spacing", "spaces": "correct_spacing", "summarization": "summarize_sentences", "summarize": "summarize_sentences", "summ": "summarize_sentences", + "morph": "split_morphemes", + "morphs": "split_morphemes", + "tokenize": "split_morphemes", + "tokenization": "split_morphemes", + "split_morph": "split_morphemes", + "split_morphs": "split_morphemes", + "morph_split": "split_morphemes", + "morph_splits": "split_morphemes", + "morphs_split": "split_morphemes", } @@ -140,6 +156,10 @@ def help(self): def available(): return list(supported_modules.keys()) + @staticmethod + def alias(): + return alias + def _check_module(self, module: str, supported_modules, alias): from kss._utils.sanity_checks import _check_type @@ -181,4 +201,4 @@ def _find_closest_module(module, min_distance=0.5): __ALL__ = list(supported_modules.keys()) + ["Kss"] -__version__ = "6.0.1" +__version__ = "6.0.2" diff --git a/kss/_modules/g2p/g2p.py b/kss/_modules/g2p/g2p.py index 219f74d..77d7e6a 100644 --- a/kss/_modules/g2p/g2p.py +++ b/kss/_modules/g2p/g2p.py @@ -54,7 +54,7 @@ def g2p( text (Union[str, List[str], Tuple[str]]): single text or list of texts descriptive (bool): return descriptive pronunciation, the 'descriptive' means a real-life pronunciation group_vowels (bool): If True, the vowels of the identical sound are normalized. (e.g. ㅒ -> ㅖ) - to_syllable: If True, hangul letters or jamo are assembled to form syllables. + to_syllable (bool): If True, hangul letters or jamo are assembled to form syllables. convert_english_to_hangul_phonemes (bool): If True, convert English to Hangul phonemes convert_numbers_to_hangul_phonemes (bool): If True, convert numbers to Hangul phonemes num_workers (Union[int, str]): the number of multiprocessing workers diff --git a/kss/_modules/keywords/extract_keywords.py b/kss/_modules/keywords/extract_keywords.py index bbe2c3e..9bbe033 100644 --- a/kss/_modules/keywords/extract_keywords.py +++ b/kss/_modules/keywords/extract_keywords.py @@ -1,6 +1,6 @@ # This code was copied from KR-WordRank [https://github.com/lovit/KR-WordRank] # And modified by Hyunwoong Ko [https://github.com/hyuwoongko] -from typing import List, Union +from typing import List, Union, Tuple from kss._modules.keywords.utils import KRWordRank from kss._utils.sanity_checks import _check_text, _check_type, _check_backend_mecab_pecab_only @@ -15,7 +15,7 @@ def extract_keywords( backend: str = "auto", noun_only: bool = True, num_workers: Union[int, str] = "auto", -): +) -> Union[List[str], List[Tuple[str, float]]]: """ This extracts keywords from the given text. This uses TextRank algorithm to extract keywords. @@ -30,6 +30,9 @@ def extract_keywords( noun_only (bool): whether to extract only nouns or not num_workers (Union[int, str]): the number of multiprocessing workers + Returns: + Union[List[str], List[Tuple[str, float]]]: list of keywords or list of tuples of keywords and scores + Examples: >>> from kss import Kss >>> extract_keywords = Kss("extract_keywords") @@ -50,8 +53,8 @@ def extract_keywords( >>> print(output) ['너무', '정말', '마지막', '영화', '음악'] - References: - This was copied from [KR-WordRank](https://github.com/lovit/KR-WordRank) and modified by Kss + References: + This was copied from [KR-WordRank](https://github.com/lovit/KR-WordRank) and modified by Kss """ text, finish = _check_text(text)