Skip to content

Commit

Permalink
Add alias() function
Browse files Browse the repository at this point in the history
  • Loading branch information
hyunwoongko committed Apr 28, 2024
1 parent 0010fda commit a183292
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 7 deletions.
27 changes: 26 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,28 @@ from kss import split_sentences
output = split_sentences("YOUR_INPUT_STRING", **kwargs)
```

### 6. Alias of module names
Because there are so many modules in Kss, user may have difficulty remembering the names of each module.
Kss provides aliases for some modules to make it easier to use them.
```python
from kss import Kss

module_1 = Kss("split_morphemes")
module_2 = Kss("tokenize")
# For example, 'split_morphemes' module can be loaded by using the alias named 'tokenize'.
```

You can check the alias of each module by using the `alias()` function.
```python
from kss import Kss

Kss.alias()
```

```python
{'aug': 'augment', 'augmentation': 'augment', 'collocation': 'collocate', 'hangulization': 'hangulize', 'hangulisation': 'hangulize', 'hangulise': 'hangulize', 'hanja': 'hanja2hangul', 'hangul2jamo': 'h2j', 'hangul2hcj': 'h2hcj', 'jamo2hangul': 'j2h', 'jamo2hcj': 'j2hcj', 'hcj2hangul': 'hcj2h', 'hcj2jamo': 'hcj2j', 'josa': 'select_josa', 'keyword': 'extract_keywords', 'keywords': 'extract_keywords', 'morpheme': 'split_morphemes', 'morphemes': 'split_morphemes', 'annonymization': 'anonymize', 'news_cleaning': 'clean_news', 'news': 'clean_news', 'completed_form': 'is_completed_form', 'completed': 'is_completed_form', 'filter': 'filter_out', 'reduce_repeats': 'reduce_char_repeats', 'reduce_char': 'reduce_char_repeats', 'reduce_chars': 'reduce_char_repeats', 'reduce_emoticon': 'reduce_emoticon_repeats', 'reduce_emoticons': 'reduce_emoticon_repeats', 'reduce_emo': 'reduce_emoticon_repeats', 'remove_invisible': 'remove_invisible_chars', 'invisible_chars': 'remove_invisible_chars', 'invisible': 'remove_invisible_chars', 'normalization': 'normalize', 'normalisation': 'normalize', 'normalise': 'normalize', 'preprocessing': 'preprocess', 'prep': 'preprocess', 'romanization': 'romanize', 'romanisation': 'romanize', 'romanise': 'romanize', 'safety': 'is_unsafe', 'check_safety': 'is_unsafe', 'sentence': 'split_sentences', 'sentences': 'split_sentences', 'sent_split': 'split_sentences', 'sent_splits': 'split_sentences', 'sents_split': 'split_sentences', 'split_sent': 'split_sentences', 'split_sents': 'split_sentences', 'spacing': 'correct_spacing', 'space': 'correct_spacing', 'spaces': 'correct_spacing', 'summarization': 'summarize_sentences', 'summarize': 'summarize_sentences', 'summ': 'summarize_sentences', 'morph': 'split_morphemes', 'morphs': 'split_morphemes', 'tokenize': 'split_morphemes', 'tokenization': 'split_morphemes', 'split_morph': 'split_morphemes', 'split_morphs': 'split_morphemes', 'morph_split': 'split_morphemes', 'morph_splits': 'split_morphemes', 'morphs_split': 'split_morphemes'}
```

## Supported Modules
Kss supports the following modules and there are the simple usages of each module in the following sections.

Expand Down Expand Up @@ -186,7 +208,7 @@ Args:
- text (`Union[str, List[str], Tuple[str]]`): single text or list of texts
- descriptive (`bool`): return descriptive pronunciation, the 'descriptive' means a real-life pronunciation
- group_vowels (`bool`): If True, the vowels of the identical sound are normalized. (e.g. ㅒ -> ㅖ)
- to_syllable: If True, hangul letters or jamo are assembled to form syllables.
- to_syllable (`bool`): If True, hangul letters or jamo are assembled to form syllables.
- convert_english_to_hangul_phonemes (`bool`): If True, convert English to Hangul phonemes
- convert_numbers_to_hangul_phonemes (`bool`): If True, convert numbers to Hangul phonemes
- num_workers (`Union[int, str]`): the number of multiprocessing workers
Expand Down Expand Up @@ -691,6 +713,9 @@ Args:
- noun_only (`bool`): whether to extract only nouns or not
- num_workers (`Union[int, str]`): the number of multiprocessing workers

Returns:
- `Union[List[str], List[Tuple[str, float]]]`: list of keywords or list of tuples of keywords and scores

Examples:
```python
>>> from kss import Kss
Expand Down
22 changes: 21 additions & 1 deletion kss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,12 @@
"filter": "filter_out",
"reduce_repeats": "reduce_char_repeats",
"reduce_char": "reduce_char_repeats",
"reduce_chars": "reduce_char_repeats",
"reduce_emoticon": "reduce_emoticon_repeats",
"reduce_emoticons": "reduce_emoticon_repeats",
"reduce_emo": "reduce_emoticon_repeats",
"remove_invisible": "remove_invisible_chars",
"invisible_chars": "remove_invisible_chars",
"invisible": "remove_invisible_chars",
"normalization": "normalize",
"normalisation": "normalize",
Expand All @@ -117,12 +121,24 @@
"sentences": "split_sentences",
"sent_split": "split_sentences",
"sent_splits": "split_sentences",
"sents_split": "split_sentences",
"split_sent": "split_sentences",
"split_sents": "split_sentences",
"spacing": "correct_spacing",
"space": "correct_spacing",
"spaces": "correct_spacing",
"summarization": "summarize_sentences",
"summarize": "summarize_sentences",
"summ": "summarize_sentences",
"morph": "split_morphemes",
"morphs": "split_morphemes",
"tokenize": "split_morphemes",
"tokenization": "split_morphemes",
"split_morph": "split_morphemes",
"split_morphs": "split_morphemes",
"morph_split": "split_morphemes",
"morph_splits": "split_morphemes",
"morphs_split": "split_morphemes",
}


Expand All @@ -140,6 +156,10 @@ def help(self):
def available():
return list(supported_modules.keys())

@staticmethod
def alias():
return alias

def _check_module(self, module: str, supported_modules, alias):
from kss._utils.sanity_checks import _check_type

Expand Down Expand Up @@ -181,4 +201,4 @@ def _find_closest_module(module, min_distance=0.5):


__ALL__ = list(supported_modules.keys()) + ["Kss"]
__version__ = "6.0.1"
__version__ = "6.0.2"
2 changes: 1 addition & 1 deletion kss/_modules/g2p/g2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def g2p(
text (Union[str, List[str], Tuple[str]]): single text or list of texts
descriptive (bool): return descriptive pronunciation, the 'descriptive' means a real-life pronunciation
group_vowels (bool): If True, the vowels of the identical sound are normalized. (e.g. ㅒ -> ㅖ)
to_syllable: If True, hangul letters or jamo are assembled to form syllables.
to_syllable (bool): If True, hangul letters or jamo are assembled to form syllables.
convert_english_to_hangul_phonemes (bool): If True, convert English to Hangul phonemes
convert_numbers_to_hangul_phonemes (bool): If True, convert numbers to Hangul phonemes
num_workers (Union[int, str]): the number of multiprocessing workers
Expand Down
11 changes: 7 additions & 4 deletions kss/_modules/keywords/extract_keywords.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This code was copied from KR-WordRank [https://github.com/lovit/KR-WordRank]
# And modified by Hyunwoong Ko [https://github.com/hyuwoongko]
from typing import List, Union
from typing import List, Union, Tuple

from kss._modules.keywords.utils import KRWordRank
from kss._utils.sanity_checks import _check_text, _check_type, _check_backend_mecab_pecab_only
Expand All @@ -15,7 +15,7 @@ def extract_keywords(
backend: str = "auto",
noun_only: bool = True,
num_workers: Union[int, str] = "auto",
):
) -> Union[List[str], List[Tuple[str, float]]]:
"""
This extracts keywords from the given text.
This uses TextRank algorithm to extract keywords.
Expand All @@ -30,6 +30,9 @@ def extract_keywords(
noun_only (bool): whether to extract only nouns or not
num_workers (Union[int, str]): the number of multiprocessing workers
Returns:
Union[List[str], List[Tuple[str, float]]]: list of keywords or list of tuples of keywords and scores
Examples:
>>> from kss import Kss
>>> extract_keywords = Kss("extract_keywords")
Expand All @@ -50,8 +53,8 @@ def extract_keywords(
>>> print(output)
['너무', '정말', '마지막', '영화', '음악']
References:
This was copied from [KR-WordRank](https://github.com/lovit/KR-WordRank) and modified by Kss
References:
This was copied from [KR-WordRank](https://github.com/lovit/KR-WordRank) and modified by Kss
"""

text, finish = _check_text(text)
Expand Down

0 comments on commit a183292

Please sign in to comment.