From 0477ed8d908699348260b0c6c74caa4c431725a9 Mon Sep 17 00:00:00 2001 From: hankcs Date: Mon, 27 Nov 2023 20:15:55 -0800 Subject: [PATCH] Translate documents to Chinese --- .github/ISSUE_TEMPLATE/bug_report.md | 11 +- .github/ISSUE_TEMPLATE/config.yml | 4 +- .github/ISSUE_TEMPLATE/feature_request.md | 13 +- README.md | 321 ++++-- .../abstractive_summarization_restful.ipynb | 160 +++ .../hanlp_demo/zh/amr_restful.ipynb | 432 ++++++++ .../hanlp_demo/hanlp_demo/zh/amr_stl.ipynb | 361 +++++++ .../zh/classification_restful.ipynb | 259 +++++ .../hanlp_demo/hanlp_demo/zh/con_mtl.ipynb | 355 +++++++ .../hanlp_demo/zh/con_restful.ipynb | 280 +++++ .../hanlp_demo/hanlp_demo/zh/con_stl.ipynb | 607 +++++++++++ .../hanlp_demo/zh/cor_restful.ipynb | 221 ++++ .../hanlp_demo/hanlp_demo/zh/dep_mtl.ipynb | 379 +++++++ .../hanlp_demo/zh/dep_restful.ipynb | 314 ++++++ .../hanlp_demo/hanlp_demo/zh/dep_stl.ipynb | 430 ++++++++ .../zh/extractive_summarization_restful.ipynb | 277 +++++ .../hanlp_demo/zh/gec_restful.ipynb | 149 +++ .../hanlp_demo/zh/keyphrase_restful.ipynb | 243 +++++ .../hanlp_demo/zh/lid_restful.ipynb | 245 +++++ .../hanlp_demo/hanlp_demo/zh/lid_stl.ipynb | 281 +++++ .../hanlp_demo/hanlp_demo/zh/ner_mtl.ipynb | 523 +++++++++ .../hanlp_demo/zh/ner_restful.ipynb | 335 ++++++ .../hanlp_demo/hanlp_demo/zh/ner_stl.ipynb | 325 ++++++ .../hanlp_demo/hanlp_demo/zh/pos_mtl.ipynb | 403 +++++++ .../hanlp_demo/zh/pos_restful.ipynb | 309 ++++++ .../hanlp_demo/hanlp_demo/zh/pos_stl.ipynb | 319 ++++++ .../hanlp_demo/hanlp_demo/zh/sdp_mtl.ipynb | 335 ++++++ .../hanlp_demo/zh/sdp_restful.ipynb | 261 +++++ .../hanlp_demo/hanlp_demo/zh/sdp_stl.ipynb | 401 +++++++ .../hanlp_demo/zh/sentiment_restful.ipynb | 272 +++++ .../hanlp_demo/hanlp_demo/zh/srl_mtl.ipynb | 366 +++++++ .../hanlp_demo/zh/srl_restful.ipynb | 312 ++++++ .../hanlp_demo/hanlp_demo/zh/srl_stl.ipynb | 218 ++++ .../hanlp_demo/zh/sts_restful.ipynb | 138 +++ .../hanlp_demo/hanlp_demo/zh/sts_stl.ipynb | 151 +++ .../hanlp_demo/hanlp_demo/zh/tok_mtl.ipynb | 630 +++++++++++ .../hanlp_demo/zh/tok_restful.ipynb | 324 ++++++ .../hanlp_demo/hanlp_demo/zh/tok_stl.ipynb | 722 +++++++++++++ .../hanlp_demo/zh/train/finetune_ner.py | 25 +- .../hanlp_demo/zh/tst_restful.ipynb | 135 +++ .../hanlp_demo/hanlp_demo/zh/tutorial.ipynb | 993 ++++++++++++++++++ 41 files changed, 12732 insertions(+), 107 deletions(-) create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/abstractive_summarization_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/amr_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/amr_stl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/classification_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/con_mtl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/con_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/con_stl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/cor_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/dep_mtl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/dep_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/dep_stl.ipynb create mode 100755 plugins/hanlp_demo/hanlp_demo/zh/extractive_summarization_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/gec_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/keyphrase_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/lid_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/lid_stl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/ner_mtl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/ner_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/ner_stl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/pos_mtl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/pos_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/pos_stl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/sdp_mtl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/sdp_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/sdp_stl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/sentiment_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/srl_mtl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/srl_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/srl_stl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/sts_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/sts_stl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/tok_mtl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/tok_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/tok_stl.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/tst_restful.ipynb create mode 100644 plugins/hanlp_demo/hanlp_demo/zh/tutorial.ipynb diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index fa2917c38..105e8a551 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,6 +1,6 @@ --- -name: ๐Ÿ›Bug report -about: Create a report to help us improve +name: ๐Ÿ›ๅ‘็Žฐไธ€ไธชbug +about: ้œ€ๆไบค็‰ˆๆœฌๅทใ€่งฆๅ‘ไปฃ็ ใ€้”™่ฏฏๆ—ฅๅฟ— title: '' labels: bug assignees: hankcs @@ -8,9 +8,7 @@ assignees: hankcs --- **Describe the bug** @@ -37,3 +35,6 @@ A clear and concise description of what you expected to happen. Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached. * [ ] I've completed this form and searched the web for solutions. + + + \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 3798e2d93..ec9fbc54f 100755 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,5 +1,5 @@ blank_issues_enabled: false contact_links: - - name: โ‰๏ธ Need help with HanLP? + - name: โ‰๏ธ ๆ้—ฎๆฑ‚ๅŠฉ่ฏทไธŠ่ฎบๅ› url: https://bbs.hankcs.com/ - about: Join our multilingual forum and have a free discussion. + about: ๆฌข่ฟŽๅ‰ๅพ€่ด่ถๆ•ˆๅบ”่ฎบๅ›ๆฑ‚ๅŠฉ diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 7fe9ac744..6f16d2594 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,6 +1,6 @@ --- -name: ๐Ÿš€Feature request -about: Suggest an idea for this project +name: ๐Ÿš€ๆ–ฐๅŠŸ่ƒฝ่ฏทๆ„ฟ +about: ๅปบ่ฎฎๅขžๅŠ ไธ€ไธชๆ–ฐๅŠŸ่ƒฝ title: '' labels: feature request assignees: hankcs @@ -8,8 +8,10 @@ assignees: hankcs --- @@ -29,3 +31,6 @@ Please fill in the template below to bypass our spam filter. **Any other info** * [ ] I've carefully completed this form. + + + \ No newline at end of file diff --git a/README.md b/README.md index 5d60625e4..9269a14a4 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,3 @@ -
-

HanLP: Han Language Processing

@@ -15,117 +13,260 @@ Downloads - - Open In Colab + + ๅœจ็บฟ่ฟ่กŒ
-

- ไธญๆ–‡ | + English | ๆ—ฅๆœฌ่ชž | - Docs | - Forum + ๆ–‡ๆกฃ | + ่ฎบๆ–‡ | + ่ฎบๅ› | + docker | + โ–ถ๏ธๅœจ็บฟ่ฟ่กŒ

-The multilingual NLP library for researchers and companies, built on PyTorch and TensorFlow 2.x, for advancing -state-of-the-art deep learning techniques in both academia and industry. HanLP was designed from day one to be -efficient, user-friendly and extendable. -Thanks to open-access corpora like Universal Dependencies and OntoNotes, HanLP 2.1 now offers 10 joint tasks on [130 -languages](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/mtl.html#hanlp.pretrained.mtl.UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_MMINILMV2L6): tokenization, lemmatization, part-of-speech tagging, token feature extraction, dependency parsing, -constituency parsing, semantic role labeling, semantic dependency parsing, abstract meaning representation (AMR) -parsing. -For end users, HanLP offers light-weighted RESTful APIs and native Python APIs. +้ขๅ‘็”Ÿไบง็Žฏๅขƒ็š„ๅคš่ฏญ็ง่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅทฅๅ…ทๅŒ…๏ผŒๅŸบไบŽPyTorchๅ’ŒTensorFlow 2.xๅŒๅผ•ๆ“Ž๏ผŒ็›ฎๆ ‡ๆ˜ฏๆ™ฎๅŠ่ฝๅœฐๆœ€ๅ‰ๆฒฟ็š„NLPๆŠ€ๆœฏใ€‚HanLPๅ…ทๅค‡ๅŠŸ่ƒฝๅฎŒๅ–„ใ€็ฒพๅบฆๅ‡†็กฎใ€ๆ€ง่ƒฝ้ซ˜ๆ•ˆใ€่ฏญๆ–™ๆ—ถๆ–ฐใ€ๆžถๆž„ๆธ…ๆ™ฐใ€ๅฏ่‡ชๅฎšไน‰็š„็‰น็‚นใ€‚ -## RESTful APIs +[![demo](https://raw.githubusercontent.com/hankcs/OpenCC-to-HanLP/img/demo.gif)](https://mybinder.org/v2/gh/hankcs/HanLP/doc-zh?filepath=plugins%2Fhanlp_demo%2Fhanlp_demo%2Fzh%2Ftutorial.ipynb) -Tiny packages in several KBs for agile development and mobile applications. Although anonymous users are welcomed, an -auth key is suggested -and [a free one can be applied here](https://bbs.hankcs.com/t/apply-for-free-hanlp-restful-apis/3178) under -the [CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license. +ๅ€ŸๅŠฉไธ–็•ŒไธŠๆœ€ๅคง็š„ๅคš่ฏญ็ง่ฏญๆ–™ๅบ“๏ผŒHanLP2.1ๆ”ฏๆŒๅŒ…ๆ‹ฌ็ฎ€็นไธญ่‹ฑๆ—ฅไฟ„ๆณ•ๅพทๅœจๅ†…็š„[130็ง่ฏญ่จ€](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/mtl.html#hanlp.pretrained.mtl.UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_MMINILMV2L6)ไธŠ็š„10็ง่”ๅˆไปปๅŠกไปฅๅŠๅคš็งๅ•ไปปๅŠกใ€‚HanLP้ข„่ฎญ็ปƒไบ†ๅๅ‡ ็งไปปๅŠกไธŠ็š„ๆ•ฐๅไธชๆจกๅž‹ๅนถไธ”ๆญฃๅœจๆŒ็ปญ่ฟญไปฃ่ฏญๆ–™ๅบ“ไธŽๆจกๅž‹๏ผš -
- Click to expand tutorials for RESTful APIs +
- ### Python +| ๅŠŸ่ƒฝ | RESTful | ๅคšไปปๅŠก | ๅ•ไปปๅŠก | ๆจกๅž‹ | ๆ ‡ๆณจๆ ‡ๅ‡† | +| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | +| [ๅˆ†่ฏ](https://hanlp.hankcs.com/demos/tok.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/tok_restful.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/tok_mtl.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/tok_stl.ipynb) | [tok](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/tok.html) | [็ฒ—ๅˆ†](https://hanlp.hankcs.com/docs/annotations/tok/msr.html)ใ€[็ป†ๅˆ†](https://hanlp.hankcs.com/docs/annotations/tok/ctb.html) | +| [่ฏๆ€งๆ ‡ๆณจ](https://hanlp.hankcs.com/demos/pos.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/pos_restful.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/pos_mtl.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/pos_stl.ipynb) | [pos](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/pos.html) | [CTB](https://hanlp.hankcs.com/docs/annotations/pos/ctb.html)ใ€[PKU](https://hanlp.hankcs.com/docs/annotations/pos/pku.html)ใ€[863](https://hanlp.hankcs.com/docs/annotations/pos/863.html) | +| [ๅ‘ฝๅๅฎžไฝ“่ฏ†ๅˆซ](https://hanlp.hankcs.com/demos/ner.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/ner_restful.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/ner_mtl.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/ner_stl.ipynb) | [ner](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/ner.html) | [PKU](https://hanlp.hankcs.com/docs/annotations/ner/pku.html)ใ€[MSRA](https://hanlp.hankcs.com/docs/annotations/ner/msra.html)ใ€[OntoNotes](https://hanlp.hankcs.com/docs/annotations/ner/ontonotes.html) | +| [ไพๅญ˜ๅฅๆณ•ๅˆ†ๆž](https://hanlp.hankcs.com/demos/dep.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/dep_restful.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/dep_mtl.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/dep_stl.ipynb) | [dep](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/dep.html) | [SD](https://hanlp.hankcs.com/docs/annotations/dep/sd_zh.html)ใ€[UD](https://hanlp.hankcs.com/docs/annotations/dep/ud.html#chinese)ใ€[PMT](https://hanlp.hankcs.com/docs/annotations/dep/pmt.html) | +| [ๆˆๅˆ†ๅฅๆณ•ๅˆ†ๆž](https://hanlp.hankcs.com/demos/con.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/con_restful.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/con_mtl.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/con_stl.ipynb) | [con](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/constituency.html) | [Chinese Tree Bank](https://hanlp.hankcs.com/docs/annotations/constituency/ctb.html) | +| [่ฏญไน‰ไพๅญ˜ๅˆ†ๆž](https://hanlp.hankcs.com/demos/sdp.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/sdp_restful.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/sdp_mtl.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/sdp_stl.ipynb) | [sdp](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/sdp.html) | [CSDP](https://hanlp.hankcs.com/docs/annotations/sdp/semeval16.html#) | +| [่ฏญไน‰่ง’่‰ฒๆ ‡ๆณจ](https://hanlp.hankcs.com/demos/srl.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/srl_restful.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/srl_mtl.ipynb) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/srl_stl.ipynb) | [srl](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/srl.html) | [Chinese Proposition Bank](https://hanlp.hankcs.com/docs/annotations/srl/cpb.html) | +| [ๆŠฝ่ฑกๆ„ไน‰่กจ็คบ](https://hanlp.hankcs.com/demos/amr.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/amr_restful.ipynb) | ๆš‚ๆ—  | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/amr_stl.ipynb) | [amr](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/amr.html) | [CAMR](https://www.hankcs.com/nlp/corpus/introduction-to-chinese-abstract-meaning-representation.html) | +| [ๆŒ‡ไปฃๆถˆ่งฃ](https://hanlp.hankcs.com/demos/cor.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/cor_restful.ipynb) | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | OntoNotes | +| [่ฏญไน‰ๆ–‡ๆœฌ็›ธไผผๅบฆ](https://hanlp.hankcs.com/demos/sts.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/sts_restful.ipynb) | ๆš‚ๆ—  | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/sts_stl.ipynb) | [sts](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/sts.html) | ๆš‚ๆ—  | +| [ๆ–‡ๆœฌ้ฃŽๆ ผ่ฝฌๆข](https://hanlp.hankcs.com/demos/tst.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/tst_restful.ipynb) | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | +| [ๅ…ณ้”ฎ่ฏ็Ÿญ่ฏญๆๅ–](https://hanlp.hankcs.com/demos/keyphrase.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/keyphrase_restful.ipynb) | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | +| [ๆŠฝๅ–ๅผ่‡ชๅŠจๆ‘˜่ฆ](https://hanlp.hankcs.com/demos/exsum.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/extractive_summarization_restful.ipynb) | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | +| [็”Ÿๆˆๅผ่‡ชๅŠจๆ‘˜่ฆ](https://hanlp.hankcs.com/demos/absum.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/abstractive_summarization_restful.ipynb) | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | +| [ๆ–‡ๆœฌ่ฏญๆณ•็บ ้”™](https://hanlp.hankcs.com/demos/gec.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/gec_restful.ipynb) | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | +| [ๆ–‡ๆœฌๅˆ†็ฑป](https://hanlp.hankcs.com/demos/classification.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/classification_restful.ipynb) | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | +| [ๆƒ…ๆ„Ÿๅˆ†ๆž](https://hanlp.hankcs.com/demos/sentiment.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/sentiment_restful.ipynb) | ๆš‚ๆ—  | ๆš‚ๆ—  | ๆš‚ๆ—  | `[-1,+1]` | +| [่ฏญ็งๆฃ€ๆต‹](https://hanlp.hankcs.com/demos/classification.html) | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/lid_restful.ipynb) | ๆš‚ๆ—  | [ๆ•™็จ‹](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/lid_stl.ipynb) | ๆš‚ๆ—  | [ISO 639-1็ผ–็ ](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) | - ```bash - pip install hanlp_restful - ``` +
- Create a client with our API endpoint and your auth. +- ่ฏๅนฒๆๅ–ใ€่ฏๆณ•่ฏญๆณ•็‰นๅพๆๅ–่ฏทๅ‚่€ƒ[่‹ฑๆ–‡ๆ•™็จ‹](https://hanlp.hankcs.com/docs/tutorial.html)๏ผ›[่ฏๅ‘้‡](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/word2vec.html)ๅ’Œ[ๅฎŒๅฝขๅกซ็ฉบ](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/mlm.html)่ฏทๅ‚่€ƒ็›ธๅบ”ๆ–‡ๆกฃใ€‚ +- ็ฎ€็น่ฝฌๆขใ€ๆ‹ผ้Ÿณใ€ๆ–ฐ่ฏๅ‘็Žฐใ€ๆ–‡ๆœฌ่š็ฑป่ฏทๅ‚่€ƒ[1.xๆ•™็จ‹](https://github.com/hankcs/HanLP/tree/1.x)ใ€‚ - ```python - from hanlp_restful import HanLPClient - HanLP = HanLPClient('https://hanlp.hankcs.com/api', auth=None, language='mul') # mul: multilingual, zh: Chinese - ``` +้‡ไฝ“่ฃ่กฃ๏ผŒHanLPๆไพ›**RESTful**ๅ’Œ**native**ไธค็งAPI๏ผŒๅˆ†ๅˆซ้ขๅ‘่ฝป้‡็บงๅ’Œๆตท้‡็บงไธค็งๅœบๆ™ฏใ€‚ๆ— ่ฎบไฝ•็งAPIไฝ•็ง่ฏญ่จ€๏ผŒHanLPๆŽฅๅฃๅœจ่ฏญไน‰ไธŠไฟๆŒไธ€่‡ด๏ผŒๅœจไปฃ็ ไธŠๅšๆŒๅผ€ๆบใ€‚ๅฆ‚ๆžœๆ‚จๅœจ็ ”็ฉถไธญไฝฟ็”จไบ†HanLP๏ผŒ่ฏทๅผ•็”จๆˆ‘ไปฌ็š„[EMNLP่ฎบๆ–‡](https://aclanthology.org/2021.emnlp-main.451/)ใ€‚ - ### Java +### ่ฝป้‡็บงRESTful API - Insert the following dependency into your `pom.xml`. +ไป…ๆ•ฐKB๏ผŒ้€‚ๅˆๆ•ๆทๅผ€ๅ‘ใ€็งปๅŠจAPP็ญ‰ๅœบๆ™ฏใ€‚็ฎ€ๅ•ๆ˜“็”จ๏ผŒๆ— ้œ€GPU้…็Žฏๅขƒ๏ผŒ็ง’้€Ÿๅฎ‰่ฃ…ใ€‚่ฏญๆ–™ๆ›ดๅคšใ€ๆจกๅž‹ๆ›ดๅคงใ€็ฒพๅบฆๆ›ด้ซ˜๏ผŒ**ๅผบ็ƒˆๆŽจ่**ใ€‚ๆœๅŠกๅ™จGPU็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆท้…้ข่พƒๅฐ‘๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏท**ๅ…่ดนๅ…ฌ็›Š**API็ง˜้’ฅ`auth`](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚ - ```xml - - com.hankcs.hanlp.restful - hanlp-restful - 0.0.15 - - ``` +#### Python - Create a client with our API endpoint and your auth. +```shell +pip install hanlp_restful +``` - ```java - HanLPClient HanLP = new HanLPClient("https://hanlp.hankcs.com/api", null, "mul"); // mul: multilingual, zh: Chinese +ๅˆ›ๅปบๅฎขๆˆท็ซฏ๏ผŒๅกซๅ…ฅๆœๅŠกๅ™จๅœฐๅ€ๅ’Œ็ง˜้’ฅ๏ผš - ``` +```python +from hanlp_restful import HanLPClient +HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง +``` - ### Quick Start +#### Golang - No matter which language you use, the same interface can be used to parse a document. +ๅฎ‰่ฃ… `go get -u github.com/hankcs/gohanlp@main` ๏ผŒๅˆ›ๅปบๅฎขๆˆท็ซฏ๏ผŒๅกซๅ…ฅๆœๅŠกๅ™จๅœฐๅ€ๅ’Œ็ง˜้’ฅ๏ผš - ```python - HanLP.parse( - "In 2021, HanLPv2.1 delivers state-of-the-art multilingual NLP techniques to production environments. 2021ๅนดใ€HanLPv2.1ใฏๆฌกไธ–ไปฃใฎๆœ€ๅ…ˆ็ซฏๅคš่จ€่ชžNLPๆŠ€่ก“ใ‚’ๆœฌ็•ช็’ฐๅขƒใซๅฐŽๅ…ฅใ—ใพใ™ใ€‚2021ๅนด HanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚") - ``` +```go +HanLP := hanlp.HanLPClient(hanlp.WithAuth(""),hanlp.WithLanguage("zh")) // authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง +``` - See [docs](https://hanlp.hankcs.com/docs/tutorial.html) for visualization, annotation guidelines and more details. +#### Java -
+ๅœจ`pom.xml`ไธญๆทปๅŠ ไพ่ต–๏ผš +```xml + + com.hankcs.hanlp.restful + hanlp-restful + 0.0.12 + +``` + +ๅˆ›ๅปบๅฎขๆˆท็ซฏ๏ผŒๅกซๅ…ฅๆœๅŠกๅ™จๅœฐๅ€ๅ’Œ็ง˜้’ฅ๏ผš + +```java +HanLPClient HanLP = new HanLPClient("https://www.hanlp.com/api", null, "zh"); // authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง +``` + +#### ๅฟซ้€ŸไธŠๆ‰‹ -## Native APIs +ๆ— ่ฎบไฝ•็งๅผ€ๅ‘่ฏญ่จ€๏ผŒ่ฐƒ็”จ`parse`ๆŽฅๅฃ๏ผŒไผ ๅ…ฅไธ€็ฏ‡ๆ–‡็ซ ๏ผŒๅพ—ๅˆฐHanLP็ฒพๅ‡†็š„ๅˆ†ๆž็ป“ๆžœใ€‚ + +```java +HanLP.parse("2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚") +``` + +ๆ›ดๅคšๅŠŸ่ƒฝๅŒ…ๆ‹ฌ่ฏญไน‰็›ธไผผๅบฆใ€้ฃŽๆ ผ่ฝฌๆขใ€ๆŒ‡ไปฃๆถˆ่งฃ็ญ‰๏ผŒ่ฏทๅ‚่€ƒ[ๆ–‡ๆกฃ](https://hanlp.hankcs.com/docs/api/restful.html)ๅ’Œ[ๆต‹่ฏ•็”จไพ‹](https://github.com/hankcs/HanLP/blob/master/plugins/hanlp_restful/tests/test_client.py)ใ€‚ + +### ๆตท้‡็บงnative API + +ไพ่ต–PyTorchใ€TensorFlow็ญ‰ๆทฑๅบฆๅญฆไน ๆŠ€ๆœฏ๏ผŒ้€‚ๅˆ**ไธ“ไธš**NLPๅทฅ็จ‹ๅธˆใ€็ ”็ฉถ่€…ไปฅๅŠๆœฌๅœฐๆตท้‡ๆ•ฐๆฎๅœบๆ™ฏใ€‚่ฆๆฑ‚Python 3.6่‡ณ3.10๏ผŒๆ”ฏๆŒWindows๏ผŒๆŽจ่*nixใ€‚ๅฏไปฅๅœจCPUไธŠ่ฟ่กŒ๏ผŒๆŽจ่GPU/TPUใ€‚ๅฎ‰่ฃ…PyTorch็‰ˆ๏ผš ```bash pip install hanlp ``` -HanLP requires Python 3.6 or later. GPU/TPU is suggested but not mandatory. +- HanLPๆฏๆฌกๅ‘ๅธƒ้ƒฝ้€š่ฟ‡ไบ†Linuxใ€macOSๅ’ŒWindowsไธŠPython3.6่‡ณ3.10็š„[ๅ•ๅ…ƒๆต‹่ฏ•](https://github.com/hankcs/HanLP/actions?query=branch%3Amaster)๏ผŒไธๅญ˜ๅœจๅฎ‰่ฃ…้—ฎ้ข˜ใ€‚ + +HanLPๅ‘ๅธƒ็š„ๆจกๅž‹ๅˆ†ไธบๅคšไปปๅŠกๅ’Œๅ•ไปปๅŠกไธค็ง๏ผŒๅคšไปปๅŠก้€Ÿๅบฆๅฟซ็œๆ˜พๅญ˜๏ผŒๅ•ไปปๅŠก็ฒพๅบฆ้ซ˜ๆ›ด็ตๆดปใ€‚ + +#### ๅคšไปปๅŠกๆจกๅž‹ + +HanLP็š„ๅทฅไฝœๆต็จ‹ไธบๅŠ ่ฝฝๆจกๅž‹็„ถๅŽๅฐ†ๅ…ถๅฝ“ไฝœๅ‡ฝๆ•ฐ่ฐƒ็”จ๏ผŒไพ‹ๅฆ‚ไธ‹ๅˆ—่”ๅˆๅคšไปปๅŠกๆจกๅž‹๏ผš + +```python +import hanlp +HanLP = hanlp.load(hanlp.pretrained.mtl.CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH) # ไธ–็•Œๆœ€ๅคงไธญๆ–‡่ฏญๆ–™ๅบ“ +HanLP(['2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', '้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚']) +``` + +Native API็š„่พ“ๅ…ฅๅ•ไฝไธบๅฅๅญ๏ผŒ้œ€ไฝฟ็”จ[ๅคš่ฏญ็งๅˆ†ๅฅๆจกๅž‹](https://github.com/hankcs/HanLP/blob/master/plugins/hanlp_demo/hanlp_demo/sent_split.py)ๆˆ–[ๅŸบไบŽ่ง„ๅˆ™็š„ๅˆ†ๅฅๅ‡ฝๆ•ฐ](https://github.com/hankcs/HanLP/blob/master/hanlp/utils/rules.py#L19)ๅ…ˆ่กŒๅˆ†ๅฅใ€‚RESTfulๅ’Œnativeไธค็งAPI็š„่ฏญไน‰่ฎพ่ฎกๅฎŒๅ…จไธ€่‡ด๏ผŒ็”จๆˆทๅฏไปฅๆ— ็ผไบ’ๆขใ€‚็ฎ€ๆด็š„ๆŽฅๅฃไนŸๆ”ฏๆŒ็ตๆดป็š„ๅ‚ๆ•ฐ๏ผŒๅธธ็”จ็š„ๆŠ€ๅทงๆœ‰๏ผš + +- ็ตๆดป็š„`tasks`ไปปๅŠก่ฐƒๅบฆ๏ผŒไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซ๏ผŒ่ฏฆ่ง[ๆ•™็จ‹](https://mybinder.org/v2/gh/hankcs/HanLP/doc-zh?filepath=plugins%2Fhanlp_demo%2Fhanlp_demo%2Fzh%2Ftutorial.ipynb)ใ€‚ๅœจๅ†…ๅญ˜ๆœ‰้™็š„ๅœบๆ™ฏไธ‹๏ผŒ็”จๆˆท่ฟ˜ๅฏไปฅ[ๅˆ ้™คไธ้œ€่ฆ็š„ไปปๅŠก](https://github.com/hankcs/HanLP/blob/master/plugins/hanlp_demo/hanlp_demo/zh/demo_del_tasks.py)่พพๅˆฐๆจกๅž‹็˜ฆ่บซ็š„ๆ•ˆๆžœใ€‚ +- ้ซ˜ๆ•ˆ็š„trieๆ ‘่‡ชๅฎšไน‰่ฏๅ…ธ๏ผŒไปฅๅŠๅผบๅˆถใ€ๅˆๅนถใ€ๆ กๆญฃ3็ง่ง„ๅˆ™๏ผŒ่ฏทๅ‚่€ƒ[demo](https://github.com/hankcs/HanLP/blob/doc-zh/plugins/hanlp_demo/hanlp_demo/zh/tok_mtl.ipynb)ๅ’Œ[ๆ–‡ๆกฃ](https://hanlp.hankcs.com/docs/api/hanlp/components/tokenizers/transformer.html)ใ€‚่ง„ๅˆ™็ณป็ปŸ็š„ๆ•ˆๆžœๅฐ†ๆ— ็ผๅบ”็”จๅˆฐๅŽ็ปญ็ปŸ่ฎกๆจกๅž‹๏ผŒไปŽ่€Œๅฟซ้€Ÿ้€‚ๅบ”ๆ–ฐ้ข†ๅŸŸใ€‚ + +#### ๅ•ไปปๅŠกๆจกๅž‹ -### Quick Start +ๆ นๆฎๆˆ‘ไปฌ็š„[ๆœ€ๆ–ฐ็ ”็ฉถ](https://aclanthology.org/2021.emnlp-main.451)๏ผŒๅคšไปปๅŠกๅญฆไน ็š„ไผ˜ๅŠฟๅœจไบŽ้€Ÿๅบฆๅ’Œๆ˜พๅญ˜๏ผŒ็„ถ่€Œ็ฒพๅบฆๅพ€ๅพ€ไธๅฆ‚ๅ•ไปปๅŠกๆจกๅž‹ใ€‚ๆ‰€ไปฅ๏ผŒHanLP้ข„่ฎญ็ปƒไบ†่ฎธๅคšๅ•ไปปๅŠกๆจกๅž‹ๅนถ่ฎพ่ฎกไบ†ไผ˜้›…็š„[ๆตๆฐด็บฟๆจกๅผ](https://hanlp.hankcs.com/docs/api/hanlp/components/pipeline.html#hanlp.components.pipeline.Pipeline)ๅฐ†ๅ…ถ็ป„่ฃ…่ตทๆฅใ€‚ ```python import hanlp +HanLP = hanlp.pipeline() \ + .append(hanlp.utils.rules.split_sentence, output_key='sentences') \ + .append(hanlp.load('FINE_ELECTRA_SMALL_ZH'), output_key='tok') \ + .append(hanlp.load('CTB9_POS_ELECTRA_SMALL'), output_key='pos') \ + .append(hanlp.load('MSRA_NER_ELECTRA_SMALL_ZH'), output_key='ner', input_key='tok') \ + .append(hanlp.load('CTB9_DEP_ELECTRA_SMALL', conll=0), output_key='dep', input_key='tok')\ + .append(hanlp.load('CTB9_CON_ELECTRA_SMALL'), output_key='con', input_key='tok') +HanLP('2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚') +``` -HanLP = hanlp.load(hanlp.pretrained.mtl.UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_XLMR_BASE) -print(HanLP(['In 2021, HanLPv2.1 delivers state-of-the-art multilingual NLP techniques to production environments.', - '2021ๅนดใ€HanLPv2.1ใฏๆฌกไธ–ไปฃใฎๆœ€ๅ…ˆ็ซฏๅคš่จ€่ชžNLPๆŠ€่ก“ใ‚’ๆœฌ็•ช็’ฐๅขƒใซๅฐŽๅ…ฅใ—ใพใ™ใ€‚', - '2021ๅนด HanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚'])) +ๆ›ดๅคšๅŠŸ่ƒฝ๏ผŒ่ฏทๅ‚่€ƒ[demo](https://github.com/hankcs/HanLP/tree/doc-zh/plugins/hanlp_demo/hanlp_demo/zh)ๅ’Œ[ๆ–‡ๆกฃ](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/index.html)ไบ†่งฃๆ›ดๅคšๆจกๅž‹ไธŽ็”จๆณ•ใ€‚ + +### ่พ“ๅ‡บๆ ผๅผ + +ๆ— ่ฎบไฝ•็งAPIไฝ•็งๅผ€ๅ‘่ฏญ่จ€ไฝ•็ง่‡ช็„ถ่ฏญ่จ€๏ผŒHanLP็š„่พ“ๅ‡บ็ปŸไธ€ไธบ`json`ๆ ผๅผๅ…ผๅฎน`dict`็š„[`Document`](https://hanlp.hankcs.com/docs/api/common/document.html): + +```json +{ + "tok/fine": [ + ["2021ๅนด", "HanLPv2.1", "ไธบ", "็”Ÿไบง", "็Žฏๅขƒ", "ๅธฆๆฅ", "ๆฌก", "ไธ–ไปฃ", "ๆœ€", "ๅ…ˆ่ฟ›", "็š„", "ๅคš", "่ฏญ็ง", "NLP", "ๆŠ€ๆœฏ", "ใ€‚"], + ["้˜ฟๅฉ†ไธป", "ๆฅๅˆฐ", "ๅŒ—ไบฌ", "็ซ‹ๆ–นๅบญ", "ๅ‚่ง‚", "่‡ช็„ถ", "่ฏญไน‰", "็ง‘ๆŠ€", "ๅ…ฌๅธ", "ใ€‚"] + ], + "tok/coarse": [ + ["2021ๅนด", "HanLPv2.1", "ไธบ", "็”Ÿไบง", "็Žฏๅขƒ", "ๅธฆๆฅ", "ๆฌกไธ–ไปฃ", "ๆœ€", "ๅ…ˆ่ฟ›", "็š„", "ๅคš่ฏญ็ง", "NLP", "ๆŠ€ๆœฏ", "ใ€‚"], + ["้˜ฟๅฉ†ไธป", "ๆฅๅˆฐ", "ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ", "ๅ‚่ง‚", "่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ", "ใ€‚"] + ], + "pos/ctb": [ + ["NT", "NR", "P", "NN", "NN", "VV", "JJ", "NN", "AD", "JJ", "DEG", "CD", "NN", "NR", "NN", "PU"], + ["NN", "VV", "NR", "NR", "VV", "NN", "NN", "NN", "NN", "PU"] + ], + "pos/pku": [ + ["t", "nx", "p", "vn", "n", "v", "b", "n", "d", "a", "u", "a", "n", "nx", "n", "w"], + ["n", "v", "ns", "ns", "v", "n", "n", "n", "n", "w"] + ], + "pos/863": [ + ["nt", "w", "p", "v", "n", "v", "a", "nt", "d", "a", "u", "a", "n", "ws", "n", "w"], + ["n", "v", "ns", "n", "v", "n", "n", "n", "n", "w"] + ], + "ner/pku": [ + [], + [["ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ", "ns", 2, 4], ["่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ", "nt", 5, 9]] + ], + "ner/msra": [ + [["2021ๅนด", "DATE", 0, 1], ["HanLPv2.1", "ORGANIZATION", 1, 2]], + [["ๅŒ—ไบฌ", "LOCATION", 2, 3], ["็ซ‹ๆ–นๅบญ", "LOCATION", 3, 4], ["่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ", "ORGANIZATION", 5, 9]] + ], + "ner/ontonotes": [ + [["2021ๅนด", "DATE", 0, 1], ["HanLPv2.1", "ORG", 1, 2]], + [["ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ", "FAC", 2, 4], ["่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ", "ORG", 5, 9]] + ], + "srl": [ + [[["2021ๅนด", "ARGM-TMP", 0, 1], ["HanLPv2.1", "ARG0", 1, 2], ["ไธบ็”Ÿไบง็Žฏๅขƒ", "ARG2", 2, 5], ["ๅธฆๆฅ", "PRED", 5, 6], ["ๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏ", "ARG1", 6, 15]], [["ๆœ€", "ARGM-ADV", 8, 9], ["ๅ…ˆ่ฟ›", "PRED", 9, 10], ["ๆŠ€ๆœฏ", "ARG0", 14, 15]]], + [[["้˜ฟๅฉ†ไธป", "ARG0", 0, 1], ["ๆฅๅˆฐ", "PRED", 1, 2], ["ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ", "ARG1", 2, 4]], [["้˜ฟๅฉ†ไธป", "ARG0", 0, 1], ["ๅ‚่ง‚", "PRED", 4, 5], ["่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ", "ARG1", 5, 9]]] + ], + "dep": [ + [[6, "tmod"], [6, "nsubj"], [6, "prep"], [5, "nn"], [3, "pobj"], [0, "root"], [8, "amod"], [15, "nn"], [10, "advmod"], [15, "rcmod"], [10, "assm"], [13, "nummod"], [15, "nn"], [15, "nn"], [6, "dobj"], [6, "punct"]], + [[2, "nsubj"], [0, "root"], [4, "nn"], [2, "dobj"], [2, "conj"], [9, "nn"], [9, "nn"], [9, "nn"], [5, "dobj"], [2, "punct"]] + ], + "sdp": [ + [[[6, "Time"]], [[6, "Exp"]], [[5, "mPrep"]], [[5, "Desc"]], [[6, "Datv"]], [[13, "dDesc"]], [[0, "Root"], [8, "Desc"], [13, "Desc"]], [[15, "Time"]], [[10, "mDegr"]], [[15, "Desc"]], [[10, "mAux"]], [[8, "Quan"], [13, "Quan"]], [[15, "Desc"]], [[15, "Nmod"]], [[6, "Pat"]], [[6, "mPunc"]]], + [[[2, "Agt"], [5, "Agt"]], [[0, "Root"]], [[4, "Loc"]], [[2, "Lfin"]], [[2, "ePurp"]], [[8, "Nmod"]], [[9, "Nmod"]], [[9, "Nmod"]], [[5, "Datv"]], [[5, "mPunc"]]] + ], + "con": [ + ["TOP", [["IP", [["NP", [["NT", ["2021ๅนด"]]]], ["NP", [["NR", ["HanLPv2.1"]]]], ["VP", [["PP", [["P", ["ไธบ"]], ["NP", [["NN", ["็”Ÿไบง"]], ["NN", ["็Žฏๅขƒ"]]]]]], ["VP", [["VV", ["ๅธฆๆฅ"]], ["NP", [["ADJP", [["NP", [["ADJP", [["JJ", ["ๆฌก"]]]], ["NP", [["NN", ["ไธ–ไปฃ"]]]]]], ["ADVP", [["AD", ["ๆœ€"]]]], ["VP", [["JJ", ["ๅ…ˆ่ฟ›"]]]]]], ["DEG", ["็š„"]], ["NP", [["QP", [["CD", ["ๅคš"]]]], ["NP", [["NN", ["่ฏญ็ง"]]]]]], ["NP", [["NR", ["NLP"]], ["NN", ["ๆŠ€ๆœฏ"]]]]]]]]]], ["PU", ["ใ€‚"]]]]]], + ["TOP", [["IP", [["NP", [["NN", ["้˜ฟๅฉ†ไธป"]]]], ["VP", [["VP", [["VV", ["ๆฅๅˆฐ"]], ["NP", [["NR", ["ๅŒ—ไบฌ"]], ["NR", ["็ซ‹ๆ–นๅบญ"]]]]]], ["VP", [["VV", ["ๅ‚่ง‚"]], ["NP", [["NN", ["่‡ช็„ถ"]], ["NN", ["่ฏญไน‰"]], ["NN", ["็ง‘ๆŠ€"]], ["NN", ["ๅ…ฌๅธ"]]]]]]]], ["PU", ["ใ€‚"]]]]]] + ] +} ``` -- In particular, the Python `HanLPClient` can also be used as a callable function following the same semantics. - See [docs](https://hanlp.hankcs.com/docs/tutorial.html) for visualization, annotation guidelines and more details. -- To process Chinese or Japanese, HanLP provides mono-lingual models in each language which significantly outperform the - multi-lingual model. See [docs](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/index.html) for the list of models. +็‰นๅˆซๅœฐ๏ผŒPython RESTfulๅ’Œnative APIๆ”ฏๆŒๅŸบไบŽ็ญ‰ๅฎฝๅญ—ไฝ“็š„[ๅฏ่ง†ๅŒ–](https://hanlp.hankcs.com/docs/tutorial.html#visualization)๏ผŒ่ƒฝๅคŸ็›ดๆŽฅๅฐ†่ฏญ่จ€ๅญฆ็ป“ๆž„ๅœจๆŽงๅˆถๅฐๅ†…ๅฏ่ง†ๅŒ–ๅ‡บๆฅ๏ผš -## Train Your Own Models +```python +HanLP(['2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', '้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚']).pretty_print() + +Dep Tree Token Relati PoS Tok NER Type Tok SRL PA1 Tok SRL PA2 Tok PoS 3 4 5 6 7 8 9 +โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 2021ๅนด tmod NT 2021ๅนด โ”€โ”€โ”€โ–บDATE 2021ๅนด โ”€โ”€โ”€โ–บARGM-TMP 2021ๅนด 2021ๅนด NT โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ” + โ”‚โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ HanLPv2.1 nsubj NR HanLPv2.1 โ”€โ”€โ”€โ–บORGANIZATION HanLPv2.1 โ”€โ”€โ”€โ–บARG0 HanLPv2.1 HanLPv2.1 NR โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค + โ”‚โ”‚โ”Œโ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€ ไธบ prep P ไธบ ไธบ โ—„โ”€โ” ไธบ ไธบ P โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ + โ”‚โ”‚โ”‚ โ”‚ โ”Œโ”€โ–บ ็”Ÿไบง nn NN ็”Ÿไบง ็”Ÿไบง โ”œโ–บARG2 ็”Ÿไบง ็”Ÿไบง NN โ”€โ”€โ” โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บPP โ”€โ”€โ”€โ” โ”‚ + โ”‚โ”‚โ”‚ โ””โ”€โ–บโ””โ”€โ”€ ็Žฏๅขƒ pobj NN ็Žฏๅขƒ ็Žฏๅขƒ โ—„โ”€โ”˜ ็Žฏๅขƒ ็Žฏๅขƒ NN โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜ โ”‚ โ”‚ +โ”Œโ”ผโ”ดโ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ๅธฆๆฅ root VV ๅธฆๆฅ ๅธฆๆฅ โ•Ÿโ”€โ”€โ–บPRED ๅธฆๆฅ ๅธฆๆฅ VV โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ โ”‚ +โ”‚โ”‚ โ”Œโ”€โ–บ ๆฌก amod JJ ๆฌก ๆฌก โ—„โ”€โ” ๆฌก ๆฌก JJ โ”€โ”€โ”€โ–บADJPโ”€โ”€โ” โ”‚ โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค +โ”‚โ”‚ โ”Œโ”€โ”€โ”€โ–บโ””โ”€โ”€ ไธ–ไปฃ nn NN ไธ–ไปฃ ไธ–ไปฃ โ”‚ ไธ–ไปฃ ไธ–ไปฃ NN โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ” โ”‚ โ”‚ โ”‚ +โ”‚โ”‚ โ”‚ โ”Œโ”€โ–บ ๆœ€ advmod AD ๆœ€ ๆœ€ โ”‚ ๆœ€ โ”€โ”€โ”€โ–บARGM-ADV ๆœ€ AD โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บADVPโ”€โ”€โ”ผโ–บADJPโ”€โ”€โ” โ”œโ–บVP โ”€โ”€โ”€โ”˜ โ”œโ–บIP +โ”‚โ”‚ โ”‚โ”Œโ”€โ”€โ–บโ”œโ”€โ”€ ๅ…ˆ่ฟ› rcmod JJ ๅ…ˆ่ฟ› ๅ…ˆ่ฟ› โ”‚ ๅ…ˆ่ฟ› โ•Ÿโ”€โ”€โ–บPRED ๅ…ˆ่ฟ› JJ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บVP โ”€โ”€โ”€โ”˜ โ”‚ โ”‚ โ”‚ +โ”‚โ”‚ โ”‚โ”‚ โ””โ”€โ–บ ็š„ assm DEG ็š„ ็š„ โ”œโ–บARG1 ็š„ ็š„ DEGโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค โ”‚ โ”‚ +โ”‚โ”‚ โ”‚โ”‚ โ”Œโ”€โ–บ ๅคš nummod CD ๅคš ๅคš โ”‚ ๅคš ๅคš CD โ”€โ”€โ”€โ–บQP โ”€โ”€โ”€โ” โ”œโ–บNP โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚โ”‚ โ”‚โ”‚โ”Œโ”€โ–บโ””โ”€โ”€ ่ฏญ็ง nn NN ่ฏญ็ง ่ฏญ็ง โ”‚ ่ฏญ็ง ่ฏญ็ง NN โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค โ”‚ +โ”‚โ”‚ โ”‚โ”‚โ”‚ โ”Œโ”€โ–บ NLP nn NR NLP NLP โ”‚ NLP NLP NR โ”€โ”€โ” โ”‚ โ”‚ +โ”‚โ””โ”€โ–บโ””โ”ดโ”ดโ”€โ”€โ”ดโ”€โ”€ ๆŠ€ๆœฏ dobj NN ๆŠ€ๆœฏ ๆŠ€ๆœฏ โ—„โ”€โ”˜ ๆŠ€ๆœฏ โ”€โ”€โ”€โ–บARG0 ๆŠ€ๆœฏ NN โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”˜ โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ ใ€‚ punct PU ใ€‚ ใ€‚ ใ€‚ ใ€‚ PU โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + +Dep Tree Tok Relat Po Tok NER Type Tok SRL PA1 Tok SRL PA2 Tok Po 3 4 5 6 +โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€ โ”€โ”€ โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + โ”Œโ”€โ–บ ้˜ฟๅฉ†ไธป nsubj NN ้˜ฟๅฉ†ไธป ้˜ฟๅฉ†ไธป โ”€โ”€โ”€โ–บARG0 ้˜ฟๅฉ†ไธป โ”€โ”€โ”€โ–บARG0 ้˜ฟๅฉ†ไธป NNโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ” +โ”Œโ”ฌโ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”ดโ”€โ”€ ๆฅๅˆฐ root VV ๆฅๅˆฐ ๆฅๅˆฐ โ•Ÿโ”€โ”€โ–บPRED ๆฅๅˆฐ ๆฅๅˆฐ VVโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚โ”‚ โ”‚ โ”Œโ”€โ–บ ๅŒ—ไบฌ nn NR ๅŒ—ไบฌ โ”€โ”€โ”€โ–บLOCATION ๅŒ—ไบฌ โ—„โ”€โ” ๅŒ—ไบฌ ๅŒ—ไบฌ NRโ”€โ”€โ” โ”œโ–บVP โ”€โ”€โ”€โ” โ”‚ +โ”‚โ”‚ โ””โ”€โ–บโ””โ”€โ”€ ็ซ‹ๆ–นๅบญ dobj NR ็ซ‹ๆ–นๅบญ โ”€โ”€โ”€โ–บLOCATION ็ซ‹ๆ–นๅบญ โ—„โ”€โ”ดโ–บARG1 ็ซ‹ๆ–นๅบญ ็ซ‹ๆ–นๅบญ NRโ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜ โ”‚ โ”‚ +โ”‚โ””โ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€ ๅ‚่ง‚ conj VV ๅ‚่ง‚ ๅ‚่ง‚ ๅ‚่ง‚ โ•Ÿโ”€โ”€โ–บPRED ๅ‚่ง‚ VVโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค +โ”‚ โ”‚ โ”Œโ”€โ”€โ”€โ–บ ่‡ช็„ถ nn NN ่‡ช็„ถ โ—„โ”€โ” ่‡ช็„ถ ่‡ช็„ถ โ—„โ”€โ” ่‡ช็„ถ NNโ”€โ”€โ” โ”‚ โ”‚ โ”œโ–บIP +โ”‚ โ”‚ โ”‚โ”Œโ”€โ”€โ–บ ่ฏญไน‰ nn NN ่ฏญไน‰ โ”‚ ่ฏญไน‰ ่ฏญไน‰ โ”‚ ่ฏญไน‰ NN โ”‚ โ”œโ–บVP โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”‚ โ”‚โ”‚โ”Œโ”€โ–บ ็ง‘ๆŠ€ nn NN ็ง‘ๆŠ€ โ”œโ–บORGANIZATION ็ง‘ๆŠ€ ็ง‘ๆŠ€ โ”œโ–บARG1 ็ง‘ๆŠ€ NN โ”œโ–บNP โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ””โ”€โ–บโ””โ”ดโ”ดโ”€โ”€ ๅ…ฌๅธ dobj NN ๅ…ฌๅธ โ—„โ”€โ”˜ ๅ…ฌๅธ ๅ…ฌๅธ โ—„โ”€โ”˜ ๅ…ฌๅธ NNโ”€โ”€โ”˜ โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ ใ€‚ punct PU ใ€‚ ใ€‚ ใ€‚ ใ€‚ PUโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +ๅ…ณไบŽๆ ‡ๆณจ้›†ๅซไน‰๏ผŒ่ฏทๅ‚่€ƒ[ใ€Š่ฏญ่จ€ๅญฆๆ ‡ๆณจ่ง„่Œƒใ€‹](https://hanlp.hankcs.com/docs/annotations/index.html)ๅŠ[ใ€Šๆ ผๅผ่ง„่Œƒใ€‹](https://hanlp.hankcs.com/docs/data_format.html)ใ€‚ๆˆ‘ไปฌ่ดญไนฐใ€ๆ ‡ๆณจๆˆ–้‡‡็”จไบ†ไธ–็•ŒไธŠ้‡็บงๆœ€ๅคงใ€็ง็ฑปๆœ€ๅคš็š„่ฏญๆ–™ๅบ“็”จไบŽ่”ๅˆๅคš่ฏญ็งๅคšไปปๅŠกๅญฆไน ๏ผŒๆ‰€ไปฅHanLP็š„ๆ ‡ๆณจ้›†ไนŸๆ˜ฏ่ฆ†็›–้ขๆœ€ๅนฟ็š„ใ€‚ + +## ่ฎญ็ปƒไฝ ่‡ชๅทฑ็š„้ข†ๅŸŸๆจกๅž‹ -To write DL models is not hard, the real hard thing is to write a model able to reproduce the scores in papers. The -snippet below shows how to surpass the state-of-the-art tokenizer in 6 minutes. +ๅ†™ๆทฑๅบฆๅญฆไน ๆจกๅž‹ไธ€็‚น้ƒฝไธ้šพ๏ผŒ้šพ็š„ๆ˜ฏๅค็Žฐ่พƒ้ซ˜็š„ๅ‡†็กฎ็Ž‡ใ€‚ไธ‹ๅˆ—[ไปฃ็ ](https://github.com/hankcs/HanLP/blob/master/plugins/hanlp_demo/hanlp_demo/zh/train_sota_bert_pku.py)ๅฑ•็คบไบ†ๅฆ‚ไฝ•ๅœจsighan2005 PKU่ฏญๆ–™ๅบ“ไธŠ่Šฑ6ๅˆ†้’Ÿ่ฎญ็ปƒไธ€ไธช่ถ…่ถŠๅญฆๆœฏ็•Œstate-of-the-art็š„ไธญๆ–‡ๅˆ†่ฏๆจกๅž‹ใ€‚ ```python tokenizer = TransformerTaggingTokenizer() -save_dir = 'data/model/cws/sighan2005_pku_bert_base_96.7' +save_dir = 'data/model/cws/sighan2005_pku_bert_base_96.73' tokenizer.fit( SIGHAN2005_PKU_TRAIN_ALL, SIGHAN2005_PKU_TEST, # Conventionally, no devset is used. See Tian et al. (2020). @@ -145,25 +286,23 @@ tokenizer.fit( tokenizer.evaluate(SIGHAN2005_PKU_TEST, save_dir) ``` -The result is guaranteed to be `96.73` as the random seed is fixed. Different from some overclaiming papers and -projects, HanLP promises every single digit in our scores is reproducible. Any issues on reproducibility will be treated -and solved as a top-priority fatal bug. +ๅ…ถไธญ๏ผŒ็”ฑไบŽๆŒ‡ๅฎšไบ†้šๆœบๆ•ฐ็งๅญ๏ผŒ็ป“ๆžœไธ€ๅฎšๆ˜ฏ`96.73`ใ€‚ไธๅŒไบŽ้‚ฃไบ›่™šๅ‡ๅฎฃไผ ็š„ๅญฆๆœฏ่ฎบๆ–‡ๆˆ–ๅ•†ไธš้กน็›ฎ๏ผŒHanLPไฟ่ฏๆ‰€ๆœ‰็ป“ๆžœๅฏๅค็Žฐใ€‚ๅฆ‚ๆžœไฝ ๆœ‰ไปปไฝ•่ดจ็–‘๏ผŒๆˆ‘ไปฌๅฐ†ๅฝ“ไฝœๆœ€้ซ˜ไผ˜ๅ…ˆ็บง็š„่‡ดๅ‘ฝๆ€งbug็ฌฌไธ€ๆ—ถ้—ดๆŽ’ๆŸฅ้—ฎ้ข˜ใ€‚ -## Performance +่ฏทๅ‚่€ƒ[demo](https://github.com/hankcs/HanLP/tree/master/plugins/hanlp_demo/hanlp_demo/zh/train)ไบ†่งฃๆ›ดๅคš่ฎญ็ปƒ่„šๆœฌใ€‚ -The performance of multi-task learning models is shown in the following table. +## ๆ€ง่ƒฝ
langcorporamodeltokposnerdepconsrlsdplemfeaamr
finecoarsectbpku863udpkumsraontonotesSemEval16DMPASPSD
mulUD2.7
OntoNotes5
small98.62----93.23--74.4279.1076.8570.63-91.1993.6785.3487.7184.51-
base98.97----90.32--80.3278.7471.2373.63-92.6096.0481.1985.0882.13-
zhopensmall97.25-96.66-----95.0084.5787.6273.4084.57------
base97.50-97.07-----96.0487.1189.8477.7887.11------
closesmall96.7095.9396.8797.5695.05-96.2295.7476.7984.4488.1375.8174.28------
base97.5296.4496.9997.5995.29-96.4895.7277.7785.2988.5776.5273.76------
ernie96.9597.2996.7697.6495.22-97.3196.4777.9585.6789.1778.5174.10------
-- Multi-task learning models often under-perform their single-task learning counterparts according to our latest - research. Similarly, mono-lingual models often outperform multi-lingual models. Therefore, we strongly recommend the - use of [a single-task mono-lingual model](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/index.html) if you are - targeting at high accuracy instead of faster speed. -- A state-of-the-art [AMR model](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/amr.html) has been released. +- ๆ นๆฎๆˆ‘ไปฌ็š„[ๆœ€ๆ–ฐ็ ”็ฉถ](https://aclanthology.org/2021.emnlp-main.451)๏ผŒๅ•ไปปๅŠกๅญฆไน ็š„ๆ€ง่ƒฝๅพ€ๅพ€ไผ˜ไบŽๅคšไปปๅŠกๅญฆไน ใ€‚ๅœจไนŽ็ฒพๅบฆ็”šไบŽ้€Ÿๅบฆ็š„่ฏ๏ผŒๅปบ่ฎฎไฝฟ็”จ[ๅ•ไปปๅŠกๆจกๅž‹](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/index.html)ใ€‚ -## Citing +HanLP้‡‡็”จ็š„ๆ•ฐๆฎ้ข„ๅค„็†ไธŽๆ‹†ๅˆ†ๆฏ”ไพ‹ไธŽๆต่กŒๆ–นๆณ•ๆœชๅฟ…็›ธๅŒ๏ผŒๆฏ”ๅฆ‚HanLP้‡‡็”จไบ†[ๅฎŒๆ•ด็‰ˆ็š„MSRAๅ‘ฝๅๅฎžไฝ“่ฏ†ๅˆซ่ฏญๆ–™](https://bbs.hankcs.com/t/topic/3033)๏ผŒ่€Œ้žๅคงไผ—ไฝฟ็”จ็š„้˜‰ๅ‰ฒ็‰ˆ๏ผ›HanLPไฝฟ็”จไบ†่ฏญๆณ•่ฆ†็›–ๆ›ดๅนฟ็š„[Stanford Dependenciesๆ ‡ๅ‡†](https://hanlp.hankcs.com/docs/annotations/dep/sd_zh.html)๏ผŒ่€Œ้žๅญฆๆœฏ็•Œๆฒฟ็”จ็š„Zhang and Clark (2008)ๆ ‡ๅ‡†๏ผ›HanLPๆๅ‡บไบ†[ๅ‡ๅŒ€ๅˆ†ๅ‰ฒCTB็š„ๆ–นๆณ•](https://bbs.hankcs.com/t/topic/3024)๏ผŒ่€Œไธ้‡‡็”จๅญฆๆœฏ็•Œไธๅ‡ๅŒ€ไธ”้—ๆผไบ†51ไธช้ป„้‡‘ๆ–‡ไปถ็š„ๆ–นๆณ•ใ€‚HanLPๅผ€ๆบไบ†[ไธ€ๆ•ดๅฅ—่ฏญๆ–™้ข„ๅค„็†่„šๆœฌไธŽ็›ธๅบ”่ฏญๆ–™ๅบ“](https://github.com/hankcs/HanLP/blob/master/plugins/hanlp_demo/hanlp_demo/zh/train/open_small.py)๏ผŒๅŠ›ๅ›พๆŽจๅŠจไธญๆ–‡NLP็š„้€ๆ˜ŽๅŒ–ใ€‚ -If you use HanLP in your research, please cite [our EMNLP paper](https://aclanthology.org/2021.emnlp-main.451): +ๆ€ปไน‹๏ผŒHanLPๅชๅšๆˆ‘ไปฌ่ฎคไธบๆญฃ็กฎใ€ๅ…ˆ่ฟ›็š„ไบ‹ๆƒ…๏ผŒ่€Œไธไธ€ๅฎšๆ˜ฏๆต่กŒใ€ๆƒๅจ็š„ไบ‹ๆƒ…ใ€‚ + +## ๅผ•็”จ + +ๅฆ‚ๆžœไฝ ๅœจ็ ”็ฉถไธญไฝฟ็”จไบ†HanLP๏ผŒ่ฏทๆŒ‰ๅฆ‚ไธ‹ๆ ผๅผๅผ•็”จ๏ผš ```bibtex @inproceedings{he-choi-2021-stem, @@ -182,15 +321,25 @@ If you use HanLP in your research, please cite [our EMNLP paper](https://aclanth ## License -### Codes +### ๆบไปฃ็  + +HanLPๆบไปฃ็ ็š„ๆŽˆๆƒๅ่ฎฎไธบ **Apache License 2.0**๏ผŒๅฏๅ…่ดน็”จๅšๅ•†ไธš็”จ้€”ใ€‚่ฏทๅœจไบงๅ“่ฏดๆ˜Žไธญ้™„ๅŠ HanLP็š„้“พๆŽฅๅ’ŒๆŽˆๆƒๅ่ฎฎใ€‚HanLPๅ—็‰ˆๆƒๆณ•ไฟๆŠค๏ผŒไพตๆƒๅฟ…็ฉถใ€‚ + +##### ่‡ช็„ถ่ฏญไน‰๏ผˆ้’ๅฒ›๏ผ‰็ง‘ๆŠ€ๆœ‰้™ๅ…ฌๅธ + +HanLPไปŽv1.7็‰ˆ่ตท็‹ฌ็ซ‹่ฟไฝœ๏ผŒ็”ฑ่‡ช็„ถ่ฏญไน‰๏ผˆ้’ๅฒ›๏ผ‰็ง‘ๆŠ€ๆœ‰้™ๅ…ฌๅธไฝœไธบ้กน็›ฎไธปไฝ“๏ผŒไธปๅฏผๅŽ็ปญ็‰ˆๆœฌ็š„ๅผ€ๅ‘๏ผŒๅนถๆ‹ฅๆœ‰ๅŽ็ปญ็‰ˆๆœฌ็š„็‰ˆๆƒใ€‚ + +##### ๅคงๅฟซๆœ็ดข + +HanLP v1.3~v1.65็‰ˆ็”ฑๅคงๅฟซๆœ็ดขไธปๅฏผๅผ€ๅ‘๏ผŒ็ปง็ปญๅฎŒๅ…จๅผ€ๆบ๏ผŒๅคงๅฟซๆœ็ดขๆ‹ฅๆœ‰็›ธๅ…ณ็‰ˆๆƒใ€‚ + +##### ไธŠๆตทๆž—ๅŽŸๅ…ฌๅธ -HanLP is licensed under **Apache License 2.0**. You can use HanLP in your commercial products for free. We would -appreciate it if you add a link to HanLP on your website. +HanLP ๆ—ฉๆœŸๅพ—ๅˆฐไบ†ไธŠๆตทๆž—ๅŽŸๅ…ฌๅธ็š„ๅคงๅŠ›ๆ”ฏๆŒ๏ผŒๅนถๆ‹ฅๆœ‰1.28ๅŠๅ‰ๅบ็‰ˆๆœฌ็š„็‰ˆๆƒ๏ผŒ็›ธๅ…ณ็‰ˆๆœฌไนŸๆ›พๅœจไธŠๆตทๆž—ๅŽŸๅ…ฌๅธ็ฝ‘็ซ™ๅ‘ๅธƒใ€‚ -### Models +### ้ข„่ฎญ็ปƒๆจกๅž‹ -Unless otherwise specified, all models in HanLP are licensed -under [CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/). +ๆœบๅ™จๅญฆไน ๆจกๅž‹็š„ๆŽˆๆƒๅœจๆณ•ๅพ‹ไธŠๆฒกๆœ‰ๅฎš่ฎบ๏ผŒไฝ†ๆœฌ็€ๅฐŠ้‡ๅผ€ๆบ่ฏญๆ–™ๅบ“ๅŽŸๅง‹ๆŽˆๆƒ็š„็ฒพ็ฅž๏ผŒๅฆ‚ไธ็‰นๅˆซ่ฏดๆ˜Ž๏ผŒHanLP็š„ๅคš่ฏญ็งๆจกๅž‹ๆŽˆๆƒๆฒฟ็”จ[CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)๏ผŒไธญๆ–‡ๆจกๅž‹ๆŽˆๆƒไธบไป…ไพ›็ ”็ฉถไธŽๆ•™ๅญฆไฝฟ็”จใ€‚ ## References diff --git a/plugins/hanlp_demo/hanlp_demo/zh/abstractive_summarization_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/abstractive_summarization_restful.ipynb new file mode 100644 index 000000000..d13d73692 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/abstractive_summarization_restful.ipynb @@ -0,0 +1,160 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4M7ka0K5OMWU", + "outputId": "d74f0749-0587-454a-d7c9-7418d45ce534" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BMW528wGNulM" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ็”Ÿๆˆๅผ่‡ชๅŠจๆ‘˜่ฆ\n", + "็”Ÿๆˆๅผ่‡ชๅŠจๆ‘˜่ฆ๏ผˆAbstractive Summarization๏ผ‰ไปปๅŠก็š„็›ฎๆ ‡ๆ˜ฏไธบๆ–‡็ซ ็”Ÿๆˆไธ€ๆฎต็ฎ€็Ÿญ็š„ๆฆ‚ๆ‹ฌๆ€งๆ‘˜่ฆใ€‚ ็”Ÿๆˆ็š„ๆ‘˜่ฆๆœ‰ๅฏ่ƒฝๅ‡บ็ŽฐๅŽŸๆ–‡ไธญไธๅญ˜ๅœจ็š„ๆ–ฐ็Ÿญ่ฏญๆˆ–ๆ–ฐๅฅๅญ๏ผŒๅนถไธ”ๆ•ดไฝ“ๆต็•…ๆ€ง่พƒ้ซ˜ใ€‚\n", + "### ไธญๆ–‡\n", + "็”Ÿๆˆๅผ่‡ชๅŠจๆ‘˜่ฆไปปๅŠก็š„่พ“ๅ…ฅไธบไธ€ๆฎตๆ–‡ๆœฌ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BqEmDMGGOtk3", + "outputId": "936d439a-e1ff-4308-d2aa-775955558594" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'้•ฟๆฑŸ่ฏๅˆธ๏ผš็œ‹ๅฅฝๅคง้‡‘ๅฑžๅ“็งไธญ็š„้“œ้“้’ข'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.abstractive_summarization('''\n", + "ๆฏ็ปAIๅฟซ่ฎฏ๏ผŒ2ๆœˆ4ๆ—ฅ๏ผŒ้•ฟๆฑŸ่ฏๅˆธ็ ”็ฉถๆ‰€้‡‘ๅฑž่กŒไธš้ฆ–ๅธญๅˆ†ๆžๅธˆ็Ž‹้นคๆถ›่กจ็คบ๏ผŒ2023ๅนดๆตทๅค–็ปๆตŽ่กฐ้€€๏ผŒ็พŽๅ€บ็Žฐๅค„ไบŽๅŽ†ๅฒ้ซ˜ไฝ๏ผŒ\n", + "้ป„้‡‘็š„่ถ‹ๅŠฟๆ˜ฏๅ€ผๅพ—ๅ…ณๆณจ็š„๏ผ›ๅœจๅ›ฝๅ†…้œ€ๆฑ‚ไฟฎๅค็š„่ฟ‡็จ‹ไธญ๏ผŒ็œ‹ๅฅฝๅคง้‡‘ๅฑžๅ“็งไธญ็š„้“œ้“้’ขใ€‚\n", + "ๆญคๅค–๏ผŒๅœจ็ป†ๅˆ†็š„ๅฐๅ“็ง้‡Œ๏ผŒๅปบ่ฎฎๅ…ณๆณจไธคๆกไธป็บฟ๏ผŒไธ€ๆ˜ฏๆ–ฐ่ƒฝๆบ๏ผŒๆฏ”ๅฆ‚้”‚ใ€้’ดใ€้•ใ€็จ€ๅœŸ๏ผŒไบŒๆ˜ฏไธ“็ฒพ็‰นๆ–ฐไธป็บฟใ€‚๏ผˆๅคฎ่ง†่ดข็ป๏ผ‰\n", + "''')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj1Jk-2sPHYx" + }, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบไธ€ๆฎตๆ‘˜่ฆใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ่‹ฑๆ–‡\n", + "ๆŒ‰็…งHanLPไธ€่ดฏ็š„ๅคš่ฏญ็ง่ฎพ่ฎก๏ผŒไปปไฝ•่ฏญ่จ€้ƒฝๆ”ฏๆŒใ€‚็”ฑไบŽๆœๅŠกๅ™จGPU่ต„ๆบ้™ๅˆถ๏ผŒ็›ฎๅ‰่‹ฑๆ–‡ๆŽฅๅฃๆš‚ๆœชไธŠ็บฟใ€‚ๅฆ‚ๆžœไฝ ๆœ‰็›ธๅบ”้œ€ๆฑ‚๏ผŒๆฌข่ฟŽๅ‰ๅพ€่ฎบๅ›ๅ‘่ตท่ฏทๆ„ฟใ€‚" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "absum_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/plugins/hanlp_demo/hanlp_demo/zh/amr_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/amr_restful.ipynb new file mode 100644 index 000000000..5fd648d5d --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/amr_restful.ipynb @@ -0,0 +1,432 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4M7ka0K5OMWU", + "outputId": "d74f0749-0587-454a-d7c9-7418d45ce534" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BMW528wGNulM" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ๆŠฝ่ฑกๆ„ไน‰่กจ็คบ\n", + "### ไธญๆ–‡\n", + "ๆŠฝ่ฑกๆ„ไน‰่กจ็คบไปปๅŠก็š„่พ“ๅ…ฅไธบไธ€ๆฎตๆ–‡ๆœฌๆˆ–ๅทฒๅˆ†่ฏๅฎŒๆฏ•็š„ๅฅๅญ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BqEmDMGGOtk3", + "outputId": "936d439a-e1ff-4308-d2aa-775955558594" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graphs = HanLP.abstract_meaning_representation('็”ทๅญฉๅธŒๆœ›ๅฅณๅญฉ็›ธไฟกไป–ใ€‚')\n", + "len(graphs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj1Jk-2sPHYx" + }, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบๆฏไธชๅฅๅญ็›ธๅบ”็š„AMRๅ›พ็š„Meaning Representationๆ ผๅผ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': '0',\n", + " 'input': '็”ทๅญฉ ๅธŒๆœ› ๅฅณๅญฉ ็›ธไฟก ไป– ใ€‚',\n", + " 'nodes': [{'id': 0,\n", + " 'label': '็”ทๅญฉ',\n", + " 'anchors': [{'from': 0, 'to': 2}, {'from': 12, 'to': 13}]},\n", + " {'id': 1, 'label': 'ๅธŒๆœ›-01', 'anchors': [{'from': 3, 'to': 5}]},\n", + " {'id': 2, 'label': 'ๅฅณๅญฉ', 'anchors': [{'from': 6, 'to': 8}]},\n", + " {'id': 3, 'label': '็›ธไฟก-01', 'anchors': [{'from': 9, 'to': 11}]}],\n", + " 'edges': [{'source': 1, 'target': 3, 'label': 'arg1'},\n", + " {'source': 1, 'target': 0, 'label': 'arg0'},\n", + " {'source': 3, 'target': 2, 'label': 'arg0'},\n", + " {'source': 3, 'target': 0, 'label': 'arg1'}],\n", + " 'tops': [1],\n", + " 'framework': 'amr'}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graph = graphs[0]\n", + "graph" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๆณจๆ„ไธŠ้ขโ€œ็”ทๅญฉโ€ๆœ‰2ไธชanchor๏ผŒๅˆ†ๅˆซๅฏนๅบ”โ€œ็”ทๅญฉโ€ๅ’Œโ€œไป–โ€ใ€‚ไนŸๅฐฑๆ˜ฏ่ฏด๏ผŒMRๆ ผๅผๅ…ถๅฎžๅŒ…ๅซไบ†ๆŒ‡ไปฃๆถˆ่งฃ็š„็ป“ๆžœใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ๅฏ่ง†ๅŒ–\n", + "ๆŒ‡ๅฎš`visualization='svg'`ๅณๅฏๅพ—ๅˆฐ็Ÿข้‡ๅ›พๅฏ่ง†ๅŒ–ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "1\n", + "\n", + "ๅธŒๆœ›-01\n", + "\n", + "\n", + "\n", + "top->1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "็”ทๅญฉ\n", + "\n", + "\n", + "\n", + "1->0\n", + "\n", + "\n", + "arg0\n", + "\n", + "\n", + "\n", + "3\n", + "\n", + "็›ธไฟก-01\n", + "\n", + "\n", + "\n", + "1->3\n", + "\n", + "\n", + "arg1\n", + "\n", + "\n", + "\n", + "3->0\n", + "\n", + "\n", + "arg1\n", + "\n", + "\n", + "\n", + "2\n", + "\n", + "ๅฅณๅญฉ\n", + "\n", + "\n", + "\n", + "3->2\n", + "\n", + "\n", + "arg0\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from IPython.display import SVG, display\n", + "\n", + "def show_svg(g):\n", + " display(SVG(data=g['svg']))\n", + " \n", + "graph = HanLP.abstract_meaning_representation('็”ทๅญฉๅธŒๆœ›ๅฅณๅญฉ็›ธไฟกไป–ใ€‚', visualization='svg')[0]\n", + "show_svg(graph)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ๅคš่ฏญ็งๆ”ฏๆŒ\n", + "้™คไบ†ไธญๆ–‡ๅค–๏ผŒๆ”ฏๆŒ็š„่ฏญ่จ€ๅˆ—่กจ๏ผš" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ่‹ฑๆ–‡\n", + "็›ฎๅ‰๏ผŒHanLPๆœๅŠกๅ™จ่ฟ˜ๆ”ฏๆŒ่‹ฑๆ–‡AMR๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "0\n", + "\n", + "\n", + "\n", + "\n", + "1\n", + "\n", + "want-01\n", + "\n", + "\n", + "\n", + "top->1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "boy\n", + "\n", + "\n", + "\n", + "1->0\n", + "\n", + "\n", + "arg0\n", + "\n", + "\n", + "\n", + "3\n", + "\n", + "believe-01\n", + "\n", + "\n", + "\n", + "1->3\n", + "\n", + "\n", + "arg1\n", + "\n", + "\n", + "\n", + "3->0\n", + "\n", + "\n", + "arg1\n", + "\n", + "\n", + "\n", + "2\n", + "\n", + "girl\n", + "\n", + "\n", + "\n", + "3->2\n", + "\n", + "\n", + "arg0\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "graph = HanLP.abstract_meaning_representation('The boy wants the girl to believe him.',\n", + " language='en', visualization='svg')[0]\n", + "show_svg(graph)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "็”จๆˆทๅฏไปฅ้€š่ฟ‡ๆŒ‡ๅฎš`language`ๅ‚ๆ•ฐๆฅๅฎž็Žฐ่‹ฑๆ–‡ๆŠฝ่ฑกๆ„ไน‰่กจ็คบ็š„ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': '0',\n", + " 'input': 'The boy wants the girl to believe him .',\n", + " 'nodes': [{'id': 0, 'label': 'boy'},\n", + " {'id': 1, 'label': 'wants-01'},\n", + " {'id': 2, 'label': 'girl'},\n", + " {'id': 3, 'label': 'believe-01'}],\n", + " 'edges': [{'source': 3, 'target': 0, 'label': 'arg1'},\n", + " {'source': 1, 'target': 3, 'label': 'arg1'},\n", + " {'source': 3, 'target': 2, 'label': 'arg0'},\n", + " {'source': 1, 'target': 0, 'label': 'arg0'}],\n", + " 'tops': [1],\n", + " 'framework': 'amr'}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.abstract_meaning_representation(tokens=[['The', 'boy', 'wants', 'the', 'girl', 'to', 'believe', 'him', '.']], \n", + " language='en')[0]" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "amr_stl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/amr_stl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/amr_stl.ipynb new file mode 100644 index 000000000..4b599e473 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/amr_stl.ipynb @@ -0,0 +1,361 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp[amr] -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4M7ka0K5OMWU", + "outputId": "d74f0749-0587-454a-d7c9-7418d45ce534" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'AMR3_SEQ2SEQ_BART_LARGE': 'https://file.hankcs.com/hanlp/amr/amr3_seq2seq_bart_large_83.30_20220125_114450.zip',\n", + " 'MRP2020_AMR_ENG_ZHO_XLM_BASE': 'http://download.hanlp.com/amr/extra/amr-eng-zho-xlm-roberta-base_20220412_223756.zip',\n", + " 'MRP2020_AMR_ZHO_MENGZI_BASE': 'http://download.hanlp.com/amr/extra/amr-zho-mengzi-base_20220415_101941.zip'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.amr.ALL # ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BMW528wGNulM" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0tmKBu7sNAXX", + "outputId": "df2de87b-27f5-4c72-8eb2-25ceefdd8270" + }, + "outputs": [], + "source": [ + "amr = hanlp.load('MRP2020_AMR_ENG_ZHO_XLM_BASE')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ๆŠฝ่ฑกๆ„ไน‰่กจ็คบ\n", + "ๆŠฝ่ฑกๆ„ไน‰่กจ็คบไปปๅŠก็š„่พ“ๅ…ฅไธบไธ€ไธชๆˆ–ๅคšไธชๅฅๅญ๏ผŒ`MRP2020_AMR_ENG_ZHO_XLM_BASE`่ฆๆฑ‚ๆไพ›ๅˆ†่ฏๅฎŒๆฏ•็š„ๅฅๅญ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BqEmDMGGOtk3", + "outputId": "936d439a-e1ff-4308-d2aa-775955558594" + }, + "outputs": [], + "source": [ + "graph = amr([\"็”ทๅญฉ\", \"ๅธŒๆœ›\", \"ๅฅณๅญฉ\", \"็›ธไฟก\", \"ไป–\", \"ใ€‚\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj1Jk-2sPHYx" + }, + "source": [ + "่ฟ”ๅ›žๅฏน่ฑกไธบ[penman.Graph](https://penman.readthedocs.io/en/latest/api/penman.graph.html)็ฑปๅž‹๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graph" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๆ‰“ๅฐๆ—ถไธบๅ‹ๅฅฝๆ ผๅผ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(x2 / ๅธŒๆœ›-01\n", + " :arg1 (x4 / ็›ธไฟก-01\n", + " :arg0 (x3 / ๅฅณๅญฉ)\n", + " :arg1 x1)\n", + " :arg0 (x1 / ็”ทๅญฉ))\n" + ] + } + ], + "source": [ + "print(graph)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฏฅAMR็š„ๅฏ่ง†ๅŒ–็ป“ๆžœไธบ๏ผš\n", + "\n", + "![amr-zh](https://hanlp.hankcs.com/backend/v2/amr_svg?tokens=%E7%94%B7%E5%AD%A9%20%E5%B8%8C%E6%9C%9B%20%E5%A5%B3%E5%AD%A9%20%E7%9B%B8%E4%BF%A1%20%E4%BB%96%20%E3%80%82&language=zh&scale=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MRP2020_AMR_ENG_ZHO_XLM_BASE`ๅ…ถๅฎžๆ˜ฏไธ€ไธชMeaning Representation Parsingๆจกๅž‹๏ผŒๆ”ฏๆŒ่พ“ๅ‡บMeaning Representation๏ผˆMR๏ผ‰ๆ ผๅผ๏ผŒ่ฏฅๆ ผๅผๆฏ”AMR็š„่กจ่พพๅŠ›ๆ›ดๅผบ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': '0',\n", + " 'input': '็”ทๅญฉ ๅธŒๆœ› ๅฅณๅญฉ ็›ธไฟก ไป– ใ€‚',\n", + " 'nodes': [{'id': 0,\n", + " 'label': '็”ทๅญฉ',\n", + " 'anchors': [{'from': 0, 'to': 2}, {'from': 12, 'to': 13}]},\n", + " {'id': 1, 'label': 'ๅธŒๆœ›-01', 'anchors': [{'from': 3, 'to': 5}]},\n", + " {'id': 2, 'label': 'ๅฅณๅญฉ', 'anchors': [{'from': 6, 'to': 8}]},\n", + " {'id': 3, 'label': '็›ธไฟก-01', 'anchors': [{'from': 9, 'to': 11}]}],\n", + " 'edges': [{'source': 1, 'target': 3, 'label': 'arg1'},\n", + " {'source': 1, 'target': 0, 'label': 'arg0'},\n", + " {'source': 3, 'target': 2, 'label': 'arg0'},\n", + " {'source': 3, 'target': 0, 'label': 'arg1'}],\n", + " 'tops': [1],\n", + " 'framework': 'amr'}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "amr([\"็”ทๅญฉ\", \"ๅธŒๆœ›\", \"ๅฅณๅญฉ\", \"็›ธไฟก\", \"ไป–\", \"ใ€‚\"], output_amr=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๆณจๆ„ไธŠ้ขโ€œ็”ทๅญฉโ€ๆœ‰2ไธชanchor๏ผŒๅˆ†ๅˆซๅฏนๅบ”โ€œ็”ทๅญฉโ€ๅ’Œโ€œไป–โ€ใ€‚ไนŸๅฐฑๆ˜ฏ่ฏด๏ผŒMRๆ ผๅผๅ…ถๅฎžๅŒ…ๅซไบ†ๆŒ‡ไปฃๆถˆ่งฃ็š„็ป“ๆžœใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๅคš่ฏญ็งๆ”ฏๆŒ\n", + "`MRP2020_AMR_ENG_ZHO_XLM_BASE`ๅŒๆ—ถ่ฟ˜ๆ˜ฏไธ€ไธชCross-Lingualๆจกๅž‹๏ผŒๆ”ฏๆŒ็š„่ฏญ่จ€ๅˆ—่กจ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['amr', 'eng'], ['amr', 'zho']]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "amr.config.frameworks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "็”จๆˆทๅฏไปฅ้€š่ฟ‡ๆŒ‡ๅฎšlanguageๅ‚ๆ•ฐๆฅๅฎž็Žฐ่‹ฑๆ–‡ๆŠฝ่ฑกๆ„ไน‰่กจ็คบ็š„ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(w1 / wants-01\n", + " :arg1 (b2 / believe-01\n", + " :arg0 (g1 / girl)\n", + " :arg1 b1)\n", + " :arg0 (b1 / boy))\n" + ] + } + ], + "source": [ + "print(amr(['The', 'boy', 'wants', 'the', 'girl', 'to', 'believe', 'him', '.'], language='eng'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ไธบไบ†่พพๅˆฐๆœ€ไฝณๆ•ˆๆžœ๏ผŒๅปบ่ฎฎๅŒๆ—ถๆไพ›ๆฏไธช่ฏ็š„่ฏๅนฒ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(w1 / want-01\n", + " :arg1 (b2 / believe-01\n", + " :arg0 (g1 / girl)\n", + " :arg1 b1)\n", + " :arg0 (b1 / boy))\n" + ] + } + ], + "source": [ + "print(amr([('The', 'the'), ('boy', 'boy'), ('wants', 'want'), ('the', 'the'), ('girl', 'girl'), ('to', 'to'),\n", + " ('believe', 'believe'), ('him', 'he'), ('.', '.')], language='eng'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฏฅAMR็š„ๅฏ่ง†ๅŒ–็ป“ๆžœไธบ๏ผš\n", + "\n", + "![amr-en](https://hanlp.hankcs.com/backend/v2/amr_svg?tokens=The%20boy%20wants%20the%20girl%20to%20believe%20him%20.&language=en&scale=1)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "amr_stl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/classification_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/classification_restful.ipynb new file mode 100644 index 000000000..d9f00c6eb --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/classification_restful.ipynb @@ -0,0 +1,259 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nf9TgeCTC0OT" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jaW4eu6kC0OU", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_xI_bLAaC0OU" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IYwV-UkNNzFp", + "outputId": "54065443-9b0a-444c-f6c0-c701bc86400b", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1Uf_u7ddMhUt", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ๆ–‡ๆœฌๅˆ†็ฑป\n", + "ๆ–‡ๆœฌๅˆ†็ฑปไปปๅŠก็š„่พ“ๅ…ฅไธบๆ–‡ๆกฃไปฅๅŠๅˆ†็ฑปๆจกๅž‹๏ผŒไปฅๆ–ฐ้—ป้ข†ๅŸŸ็š„`news_zh`ไธบไพ‹๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "BqEmDMGGOtk3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'็ง‘ๆŠ€'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.text_classification('2021ๅนด HanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', model='news_zh')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SwaPn1hjC0OW" + }, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบๆ–‡ๆกฃๆœ€ๅฏ่ƒฝ็š„็ฑป็›ฎใ€‚HanLPๆ”ฏๆŒ่ฟ”ๅ›ž็ฑป็›ฎๅฏนๅบ”็š„ๆฆ‚็Ž‡๏ผˆ็ฝฎไฟกๅบฆ๏ผ‰๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "egpWwHKxC0OX", + "outputId": "f7c77687-dd75-4fa2-dbd2-be6bda8a3fff" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['็ง‘ๆŠ€', 0.999642014503479]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.text_classification('2021ๅนด HanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', model='news_zh', prob=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kq_j5TLFC0OX" + }, + "source": [ + "HanLPไนŸๆ”ฏๆŒ่ฟ”ๅ›žๆฆ‚็Ž‡ๆœ€้ซ˜็š„`topk`ไธช็ฑป็›ฎ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "isJhzYyIC0OX", + "outputId": "683c8489-dffc-426e-f95b-e91dfb373260" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['็ง‘ๆŠ€', 'ๅฎถๅฑ…']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.text_classification('2021ๅนด HanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', model='news_zh', topk=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฏฅๅŠŸ่ƒฝๅฏนไบŽๆททๅˆไบ†ๅคšไธชไธป้ข˜็š„ๆ–‡ๆกฃ่€Œ่จ€็‰นๅˆซๅฎž็”จ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ๆ—ถๅฐš': 0.6342714428901672,\n", + " 'ๅฎถๅฑ…': 0.359315425157547,\n", + " '็ง‘ๆŠ€': 0.0013340614968910813,\n", + " 'ไฝ“่‚ฒ': 0.001275017624720931,\n", + " 'ๆˆฟไบง': 0.0010209722677245736,\n", + " 'ๅจฑไน': 0.0006360886618494987,\n", + " '่ดข็ป': 0.0005668793455697596,\n", + " 'ๆธธๆˆ': 0.00037119409535080194,\n", + " 'ๆ•™่‚ฒ': 0.00029694309341721237,\n", + " '่‚ก็ฅจ': 0.0002858955995179713,\n", + " 'ๆ˜Ÿๅบง': 0.0002288677787873894,\n", + " 'ๅฝฉ็ฅจ': 0.00022682634880766273,\n", + " 'ๆ—ถๆ”ฟ': 0.0001005345256999135,\n", + " '็คพไผš': 6.985480285948142e-05}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = '''\n", + "ๆ”นไบ†ๅฅฝๅ‡ ๆฌก๏ผŒๆ„Ÿ่ง‰็ปˆไบŽๅฏไปฅ็กฎๅฎšไบ†ใ€‚\n", + "่ฟ™ๆฌก็š„็œŸไธๆ˜ฏๅšไบ†ๅค่‘ฃๆ„Ÿ็š„็ฑณ้‡‘่‰ฒๆŸ“่‰ฒ๏ผŒๆณ•่•พไนŸๅšไบ†ๅŒๆ ท็š„้ขœ่‰ฒใ€‚\n", + "็œŸไธ่ฝฏ็ณฏ็š„ๆ‰‹ๆ„Ÿๅ’ŒๆธฉๆŸ”็š„ๅ…‰ๆณฝๆ„Ÿ๏ผŒๅœจๅณๅฐ†็ป“ๆŸ็š„ๅ†ฌๅคฉ๏ผŒๆ˜พๅพ—ๆ ผๅค–็š„็พŽๅฅฝใ€‚\n", + "'''\n", + "\n", + "HanLP.text_classification(text, model='news_zh', topk=True, prob=True)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "classification_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/plugins/hanlp_demo/hanlp_demo/zh/con_mtl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/con_mtl.ipynb new file mode 100644 index 000000000..796bf7bf2 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/con_mtl.ipynb @@ -0,0 +1,355 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_small_20201223_035557.zip',\n", + " 'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_base_20201223_201906.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_small_20210111_124159.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_base_20210111_124519.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ERNIE_GRAM_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_ernie_gram_base_aug_20210904_145403.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_MT5_SMALL': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_mt5_small_20210228_123458.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_XLMR_BASE': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_xlm_base_20210602_211620.zip',\n", + " 'NPCMJ_UD_KYOTO_TOK_POS_CON_BERT_BASE_CHAR_JA': 'https://file.hankcs.com/hanlp/mtl/npcmj_ud_kyoto_tok_pos_ner_dep_con_srl_bert_base_char_ja_20210914_133742.zip'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.mtl.ALL # MTLๅคšไปปๅŠก๏ผŒๅ…ทไฝ“ไปปๅŠก่งๆจกๅž‹ๅ็งฐ๏ผŒ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅˆ†ไธบ่ฎธๅคšไปปๅŠก๏ผŒๅˆ†่ฏๅชๆ˜ฏๆœ€ๅˆ็บง็š„ไธ€ไธชใ€‚ไธŽๅ…ถๆฏไธชไปปๅŠกๅ•็‹ฌๅˆ›ๅปบไธ€ไธชๆจกๅž‹๏ผŒไธๅฆ‚ๅˆฉ็”จHanLP็š„่”ๅˆๆจกๅž‹ไธ€ๆฌกๆ€งๅฎŒๆˆๅคšไธชไปปๅŠก๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "HanLP = hanlp.load(hanlp.pretrained.mtl.CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ็Ÿญ่ฏญๅฅๆณ•ๅˆ†ๆž\n", + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒ็Ÿญ่ฏญๅฅๆณ•ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [], + "source": [ + "doc = HanLP(['2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', '้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚'], tasks='con')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบไธ€ไธช[Document](https://hanlp.hankcs.com/docs/api/common/document.html):" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " [\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"้˜ฟๅฉ†ไธป\", \"ๆฅๅˆฐ\", \"ๅŒ—ไบฌ\", \"็ซ‹ๆ–นๅบญ\", \"ๅ‚่ง‚\", \"่‡ช็„ถ\", \"่ฏญไน‰\", \"็ง‘ๆŠ€\", \"ๅ…ฌๅธ\", \"ใ€‚\"]\n", + " ],\n", + " \"con\": [\n", + " [\"TOP\", [[\"IP\", [[\"NP\", [[\"_\", [\"2021ๅนด\"]]]], [\"NP\", [[\"_\", [\"HanLPv2.1\"]]]], [\"VP\", [[\"PP\", [[\"_\", [\"ไธบ\"]], [\"NP\", [[\"_\", [\"็”Ÿไบง\"]], [\"_\", [\"็Žฏๅขƒ\"]]]]]], [\"VP\", [[\"_\", [\"ๅธฆๆฅ\"]], [\"NP\", [[\"ADJP\", [[\"NP\", [[\"ADJP\", [[\"_\", [\"ๆฌก\"]]]], [\"NP\", [[\"_\", [\"ไธ–ไปฃ\"]]]]]], [\"ADVP\", [[\"_\", [\"ๆœ€\"]]]], [\"VP\", [[\"_\", [\"ๅ…ˆ่ฟ›\"]]]]]], [\"_\", [\"็š„\"]], [\"NP\", [[\"QP\", [[\"_\", [\"ๅคš\"]]]], [\"NP\", [[\"_\", [\"่ฏญ็ง\"]]]]]], [\"NP\", [[\"_\", [\"NLP\"]], [\"_\", [\"ๆŠ€ๆœฏ\"]]]]]]]]]], [\"_\", [\"ใ€‚\"]]]]]],\n", + " [\"TOP\", [[\"IP\", [[\"NP\", [[\"_\", [\"้˜ฟๅฉ†ไธป\"]]]], [\"VP\", [[\"VP\", [[\"_\", [\"ๆฅๅˆฐ\"]], [\"NP\", [[\"_\", [\"ๅŒ—ไบฌ\"]], [\"_\", [\"็ซ‹ๆ–นๅบญ\"]]]]]], [\"VP\", [[\"_\", [\"ๅ‚่ง‚\"]], [\"NP\", [[\"_\", [\"่‡ช็„ถ\"]], [\"_\", [\"่ฏญไน‰\"]], [\"_\", [\"็ง‘ๆŠ€\"]], [\"_\", [\"ๅ…ฌๅธ\"]]]]]]]], [\"_\", [\"ใ€‚\"]]]]]]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(doc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`doc['con']`ไธบTree็ฑปๅž‹๏ผŒๆ˜ฏlist็š„ๅญ็ฑปใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "ๅฏ่ง†ๅŒ–็Ÿญ่ฏญๅฅๆณ•ๆ ‘๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zo08uquCTFSk", + "outputId": "c6077f2d-7084-4f4b-a3bc-9aa9951704ea" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Token     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌก         
ไธ–ไปฃ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
P    3       4       5       6       7       8       9 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”                                       โ”‚   
_โ”€โ”€โ”       โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บPP โ”€โ”€โ”€โ”       โ”‚   
_โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜                               โ”‚       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”‚       โ”‚   
_โ”€โ”€โ”€โ–บADJPโ”€โ”€โ”                       โ”‚       โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค   
_โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”               โ”‚       โ”‚       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บADVPโ”€โ”€โ”ผโ–บADJPโ”€โ”€โ”       โ”œโ–บVP โ”€โ”€โ”€โ”˜       โ”œโ–บIP
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บVP โ”€โ”€โ”€โ”˜       โ”‚       โ”‚               โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค       โ”‚               โ”‚   
_โ”€โ”€โ”€โ–บQP โ”€โ”€โ”€โ”               โ”œโ–บNP โ”€โ”€โ”€โ”˜               โ”‚   
_โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค                       โ”‚   
_โ”€โ”€โ”                       โ”‚                       โ”‚   
_โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”˜                       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   

Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
P    3       4       5       6 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”               โ”‚   
_โ”€โ”€โ”       โ”œโ–บVP โ”€โ”€โ”€โ”       โ”‚   
_โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜       โ”‚       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค   
_โ”€โ”€โ”       โ”‚       โ”‚       โ”œโ–บIP
_  โ”‚       โ”œโ–บVP โ”€โ”€โ”€โ”˜       โ”‚   
_  โ”œโ–บNP โ”€โ”€โ”€โ”˜               โ”‚   
_โ”€โ”€โ”˜                       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "doc.pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๅฐ†็ฌฌไธ€ไธช็Ÿญ่ฏญๆ ‘่ฝฌๆขไธบbracketedๆ ผๅผ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(TOP\n", + " (IP\n", + " (NP (_ 2021ๅนด))\n", + " (NP (_ HanLPv2.1))\n", + " (VP\n", + " (PP (_ ไธบ) (NP (_ ็”Ÿไบง) (_ ็Žฏๅขƒ)))\n", + " (VP\n", + " (_ ๅธฆๆฅ)\n", + " (NP\n", + " (ADJP\n", + " (NP (ADJP (_ ๆฌก)) (NP (_ ไธ–ไปฃ)))\n", + " (ADVP (_ ๆœ€))\n", + " (VP (_ ๅ…ˆ่ฟ›)))\n", + " (_ ็š„)\n", + " (NP (QP (_ ๅคš)) (NP (_ ่ฏญ็ง)))\n", + " (NP (_ NLP) (_ ๆŠ€ๆœฏ)))))\n", + " (_ ใ€‚)))\n" + ] + } + ], + "source": [ + "print(doc['con'][0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๅฐ†็ฌฌไธ€ไธช็Ÿญ่ฏญๆ ‘่ฝฌๆขไธบlistๆ ผๅผ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['TOP',\n", + " [['IP',\n", + " [['NP', [['_', ['2021ๅนด']]]],\n", + " ['NP', [['_', ['HanLPv2.1']]]],\n", + " ['VP',\n", + " [['PP', [['_', ['ไธบ']], ['NP', [['_', ['็”Ÿไบง']], ['_', ['็Žฏๅขƒ']]]]]],\n", + " ['VP',\n", + " [['_', ['ๅธฆๆฅ']],\n", + " ['NP',\n", + " [['ADJP',\n", + " [['NP', [['ADJP', [['_', ['ๆฌก']]]], ['NP', [['_', ['ไธ–ไปฃ']]]]]],\n", + " ['ADVP', [['_', ['ๆœ€']]]],\n", + " ['VP', [['_', ['ๅ…ˆ่ฟ›']]]]]],\n", + " ['_', ['็š„']],\n", + " ['NP', [['QP', [['_', ['ๅคš']]]], ['NP', [['_', ['่ฏญ็ง']]]]]],\n", + " ['NP', [['_', ['NLP']], ['_', ['ๆŠ€ๆœฏ']]]]]]]]]],\n", + " ['_', ['ใ€‚']]]]]]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc['con'][0].to_list()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XOsWkOqQfzlr" + }, + "source": [ + "ไธบๅทฒๅˆ†่ฏ็š„ๅฅๅญๆ‰ง่กŒ็Ÿญ่ฏญๅฅๆณ•ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "bLZSTbv_f3OA", + "outputId": "111c0be9-bac6-4eee-d5bd-a972ffc34844" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Token 
โ”€โ”€โ”€โ”€โ”€ 
hanlp 
ไธบ     
็”Ÿไบง    
็Žฏๅขƒ    
ๅธฆๆฅ    
ๆฌกไธ–ไปฃ   
ๆœ€     
ๅ…ˆ่ฟ›    
็š„     
ๅคš่ฏญ็ง   
nlp   
ๆŠ€ๆœฏ    
ใ€‚     
P    3       4       5       6       7       8       9 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”                                       โ”‚   
_โ”€โ”€โ”       โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บPP โ”€โ”€โ”€โ”       โ”‚   
_โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜                               โ”‚       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”‚       โ”‚   
_โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”                       โ”‚       โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค   
_โ”€โ”€โ”€โ–บADVPโ”€โ”€โ”ผโ–บVP โ”€โ”€โ”€โ”€โ–บIP โ”€โ”€โ”€โ”       โ”‚       โ”‚       โ”œโ–บIP
_โ”€โ”€โ”€โ–บVP โ”€โ”€โ”€โ”˜               โ”‚       โ”œโ–บVP โ”€โ”€โ”€โ”˜       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค       โ”‚               โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ผโ–บNP โ”€โ”€โ”€โ”˜               โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค                       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”˜                       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   

Tok 
โ”€โ”€โ”€ 
ๆˆ‘   
็š„   
ๅธŒๆœ›  
ๆ˜ฏ   
ๅธŒๆœ›  
ๅผ ๆ™š้œž 
็š„   
่ƒŒๅฝฑ  
่ขซ   
ๆ™š้œž  
ๆ˜ ็บข  
ใ€‚   
P    3       4       5       6       7       8       9       10      11
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
_โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”                                                           
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ–บDNP โ”€โ”€โ”                                                   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”               โ”‚       โ”‚   
_โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”                               โ”‚               โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ–บDNP โ”€โ”€โ”                       โ”œโ–บVP โ”€โ”€โ”€โ”€โ–บIP โ”€โ”€โ”€โ”˜       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”       โ”‚                       โ”œโ–บIP
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”œโ–บIP โ”€โ”€โ”€โ”˜                       โ”‚   
_โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”               โ”œโ–บVP โ”€โ”€โ”€โ”˜                               โ”‚   
_โ”€โ”€โ”€โ–บVP โ”€โ”€โ”€โ”ดโ–บIP โ”€โ”€โ”€โ”€โ–บCP โ”€โ”€โ”€โ”˜                                       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "HanLP([\n", + " [\"HanLP\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌกไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"ๆˆ‘\", \"็š„\", \"ๅธŒๆœ›\", \"ๆ˜ฏ\", \"ๅธŒๆœ›\", \"ๅผ ๆ™š้œž\", \"็š„\", \"่ƒŒๅฝฑ\", \"่ขซ\", \"ๆ™š้œž\", \"ๆ˜ ็บข\", \"ใ€‚\"]\n", + " ], tasks='con', skip_tasks='tok*').pretty_print()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "con_mtl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/con_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/con_restful.ipynb new file mode 100644 index 000000000..9a594b00c --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/con_restful.ipynb @@ -0,0 +1,280 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ็Ÿญ่ฏญๅฅๆณ•ๅˆ†ๆž\n", + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒ็Ÿญ่ฏญๅฅๆณ•ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [], + "source": [ + "doc = HanLP('2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', tasks='con')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบไธ€ไธช[Document](https://hanlp.hankcs.com/docs/api/common/document.html):" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " [\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"]\n", + " ],\n", + " \"con\": [\n", + " [\"TOP\", [[\"IP\", [[\"NP\", [[\"_\", [\"2021ๅนด\"]]]], [\"NP\", [[\"_\", [\"HanLPv2.1\"]]]], [\"VP\", [[\"PP\", [[\"_\", [\"ไธบ\"]], [\"NP\", [[\"_\", [\"็”Ÿไบง\"]], [\"_\", [\"็Žฏๅขƒ\"]]]]]], [\"VP\", [[\"_\", [\"ๅธฆๆฅ\"]], [\"NP\", [[\"IP\", [[\"VP\", [[\"NP\", [[\"QP\", [[\"CLP\", [[\"_\", [\"ๆฌก\"]]]]]], [\"NP\", [[\"_\", [\"ไธ–ไปฃ\"]]]]]], [\"ADVP\", [[\"_\", [\"ๆœ€\"]]]], [\"VP\", [[\"_\", [\"ๅ…ˆ่ฟ›\"]]]]]]]], [\"_\", [\"็š„\"]], [\"NP\", [[\"QP\", [[\"_\", [\"ๅคš\"]]]], [\"NP\", [[\"_\", [\"่ฏญ็ง\"]]]]]], [\"NP\", [[\"_\", [\"NLP\"]], [\"_\", [\"ๆŠ€ๆœฏ\"]]]]]]]]]], [\"_\", [\"ใ€‚\"]]]]]]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(doc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`doc['con']`ไธบTree็ฑปๅž‹๏ผŒๆ˜ฏlist็š„ๅญ็ฑปใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "ๅฏ่ง†ๅŒ–็Ÿญ่ฏญๅฅๆณ•ๆ ‘๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zo08uquCTFSk", + "outputId": "c6077f2d-7084-4f4b-a3bc-9aa9951704ea" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Token     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌก         
ไธ–ไปฃ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
P    3       4       5       6       7       8       9       10      11
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”                                                       โ”‚   
_โ”€โ”€โ”       โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บPP โ”€โ”€โ”€โ”       โ”‚   
_โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜                                               โ”‚       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”‚       โ”‚   
_โ”€โ”€โ”€โ–บCLP โ”€โ”€โ”€โ–บQP โ”€โ”€โ”€โ”                               โ”‚       โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”                       โ”‚       โ”‚       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บADVPโ”€โ”€โ”ผโ–บVP โ”€โ”€โ”€โ”€โ–บIP โ”€โ”€โ”€โ”       โ”œโ–บVP โ”€โ”€โ”€โ”˜       โ”œโ–บIP
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บVP โ”€โ”€โ”€โ”˜               โ”‚       โ”‚               โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค       โ”‚               โ”‚   
_โ”€โ”€โ”€โ–บQP โ”€โ”€โ”€โ”                               โ”œโ–บNP โ”€โ”€โ”€โ”˜               โ”‚   
_โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค                       โ”‚   
_โ”€โ”€โ”                                       โ”‚                       โ”‚   
_โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”˜                       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "doc.pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฝฌๆขไธบbracketedๆ ผๅผ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(TOP\n", + " (IP\n", + " (NP (_ 2021ๅนด))\n", + " (NP (_ HanLPv2.1))\n", + " (VP\n", + " (PP (_ ไธบ) (NP (_ ็”Ÿไบง) (_ ็Žฏๅขƒ)))\n", + " (VP\n", + " (_ ๅธฆๆฅ)\n", + " (NP\n", + " (IP\n", + " (VP\n", + " (NP (QP (CLP (_ ๆฌก))) (NP (_ ไธ–ไปฃ)))\n", + " (ADVP (_ ๆœ€))\n", + " (VP (_ ๅ…ˆ่ฟ›))))\n", + " (_ ็š„)\n", + " (NP (QP (_ ๅคš)) (NP (_ ่ฏญ็ง)))\n", + " (NP (_ NLP) (_ ๆŠ€ๆœฏ)))))\n", + " (_ ใ€‚)))\n" + ] + } + ], + "source": [ + "print(doc['con'][0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XOsWkOqQfzlr" + }, + "source": [ + "ไธบๅทฒๅˆ†่ฏ็š„ๅฅๅญๆ‰ง่กŒ็Ÿญ่ฏญๅฅๆณ•ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "bLZSTbv_f3OA", + "outputId": "111c0be9-bac6-4eee-d5bd-a972ffc34844" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Token 
โ”€โ”€โ”€โ”€โ”€ 
hanlp 
ไธบ     
็”Ÿไบง    
็Žฏๅขƒ    
ๅธฆๆฅ    
ๆฌกไธ–ไปฃ   
ๆœ€     
ๅ…ˆ่ฟ›    
็š„     
ๅคš่ฏญ็ง   
nlp   
ๆŠ€ๆœฏ    
ใ€‚     
P    3       4       5       6       7       8       9       10      11      12
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”                                                               โ”‚   
_โ”€โ”€โ”       โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บPP โ”€โ”€โ”€โ”       โ”‚   
_โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜                                                       โ”‚       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”‚       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”                                       โ”‚       โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค   
_โ”€โ”€โ”€โ–บADVPโ”€โ”€โ”       โ”œโ–บVP โ”€โ”€โ”€โ”€โ–บIP โ”€โ”€โ”€โ”                       โ”‚       โ”‚       โ”œโ–บIP
_โ”€โ”€โ”€โ–บVP โ”€โ”€โ”€โ”ดโ–บVP โ”€โ”€โ”€โ”˜               โ”œโ–บCP โ”€โ”€โ”€โ”€โ–บCP โ”€โ”€โ”€โ”       โ”œโ–บVP โ”€โ”€โ”€โ”˜       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜               โ”‚       โ”‚               โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ–บNP โ”€โ”€โ”€โ”˜               โ”‚   
_โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”                                       โ”‚                       โ”‚   
_โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”˜                       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   

Tok 
โ”€โ”€โ”€ 
ๆˆ‘   
็š„   
ๅธŒๆœ›  
ๆ˜ฏ   
ๅธŒๆœ›  
ๅผ ๆ™š้œž 
็š„   
่ƒŒๅฝฑ  
่ขซ   
ๆ™š้œž  
ๆ˜ ็บข  
ใ€‚   
P    3       4       5       6       7       8       9       10      11
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
_โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”                                                           
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ–บDNP โ”€โ”€โ”                                                   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”               โ”‚       โ”‚   
_โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”                               โ”‚               โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ–บDNP โ”€โ”€โ”                       โ”œโ–บVP โ”€โ”€โ”€โ”€โ–บIP โ”€โ”€โ”€โ”˜       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”       โ”‚                       โ”œโ–บIP
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”œโ–บIP โ”€โ”€โ”€โ”˜                       โ”‚   
_โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”               โ”œโ–บVP โ”€โ”€โ”€โ”˜                               โ”‚   
_โ”€โ”€โ”€โ–บVP โ”€โ”€โ”€โ”ดโ–บIP โ”€โ”€โ”€โ”€โ–บCP โ”€โ”€โ”€โ”˜                                       โ”‚   
_โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "HanLP(tokens=[\n", + " [\"HanLP\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌกไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"ๆˆ‘\", \"็š„\", \"ๅธŒๆœ›\", \"ๆ˜ฏ\", \"ๅธŒๆœ›\", \"ๅผ ๆ™š้œž\", \"็š„\", \"่ƒŒๅฝฑ\", \"่ขซ\", \"ๆ™š้œž\", \"ๆ˜ ็บข\", \"ใ€‚\"]\n", + " ], tasks='con').pretty_print()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "con_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/con_stl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/con_stl.ipynb new file mode 100644 index 000000000..5fbb611e4 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/con_stl.ipynb @@ -0,0 +1,607 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'CTB9_CON_ELECTRA_SMALL': 'https://file.hankcs.com/hanlp/constituency/ctb9_con_electra_small_20220215_230116.zip',\n", + " 'CTB9_CON_FULL_TAG_ELECTRA_SMALL': 'https://file.hankcs.com/hanlp/constituency/ctb9_full_tag_con_electra_small_20220118_103119.zip'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.constituency.ALL # ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "con = hanlp.load('CTB9_CON_FULL_TAG_ELECTRA_SMALL')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ็Ÿญ่ฏญๅฅๆณ•ๅˆ†ๆž\n", + "่พ“ๅ…ฅไธบๅทฒๅˆ†่ฏ็š„ไธ€ไธชๆˆ–ๅคšไธชๅฅๅญ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [], + "source": [ + "trees = con([[\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"], [\"้˜ฟๅฉ†ไธป\", \"ๆฅๅˆฐ\", \"ๅŒ—ไบฌ\", \"็ซ‹ๆ–นๅบญ\", \"ๅ‚่ง‚\", \"่‡ช็„ถ\", \"่ฏญไน‰\", \"็ง‘ๆŠ€\", \"ๅ…ฌๅธ\", \"ใ€‚\"]], tasks='con')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบไธ€ไธช`Tree`็š„ๆ•ฐ็ป„:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['TOP', [['IP', [['NP-TMP', [['_', ['2021ๅนด']]]], ['NP-PN-SBJ', [['_', ['HanLPv2.1']]]], ['VP', [['PP-BNF', [['_', ['ไธบ']], ['NP', [['_', ['็”Ÿไบง']], ['_', ['็Žฏๅขƒ']]]]]], ['VP', [['_', ['ๅธฆๆฅ']], ['NP-OBJ', [['CP', [['CP', [['IP', [['VP', [['NP', [['DP', [['_', ['ๆฌก']]]], ['NP', [['_', ['ไธ–ไปฃ']]]]]], ['ADVP', [['_', ['ๆœ€']]]], ['VP', [['_', ['ๅ…ˆ่ฟ›']]]]]]]], ['_', ['็š„']]]]]], ['NP', [['QP', [['_', ['ๅคš']]]], ['NP', [['_', ['่ฏญ็ง']]]]]], ['NP', [['_', ['NLP']], ['_', ['ๆŠ€ๆœฏ']]]]]]]]]], ['_', ['ใ€‚']]]]]], ['TOP', [['IP', [['NP-SBJ', [['_', ['้˜ฟๅฉ†ไธป']]]], ['VP', [['VP', [['_', ['ๆฅๅˆฐ']], ['NP-OBJ', [['_', ['ๅŒ—ไบฌ']], ['NP-PN', [['_', ['็ซ‹ๆ–นๅบญ']]]]]]]], ['VP', [['_', ['ๅ‚่ง‚']], ['NP-OBJ', [['_', ['่‡ช็„ถ']], ['_', ['่ฏญไน‰']], ['_', ['็ง‘ๆŠ€']], ['_', ['ๅ…ฌๅธ']]]]]]]], ['_', ['ใ€‚']]]]]]]\n" + ] + } + ], + "source": [ + "print(trees)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฝฌๆขไธบbracketedๆ ผๅผ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(TOP\n", + " (IP\n", + " (NP-TMP (_ 2021ๅนด))\n", + " (NP-PN-SBJ (_ HanLPv2.1))\n", + " (VP\n", + " (PP-BNF (_ ไธบ) (NP (_ ็”Ÿไบง) (_ ็Žฏๅขƒ)))\n", + " (VP\n", + " (_ ๅธฆๆฅ)\n", + " (NP-OBJ\n", + " (CP\n", + " (CP\n", + " (IP\n", + " (VP\n", + " (NP (DP (_ ๆฌก)) (NP (_ ไธ–ไปฃ)))\n", + " (ADVP (_ ๆœ€))\n", + " (VP (_ ๅ…ˆ่ฟ›))))\n", + " (_ ็š„)))\n", + " (NP (QP (_ ๅคš)) (NP (_ ่ฏญ็ง)))\n", + " (NP (_ NLP) (_ ๆŠ€ๆœฏ)))))\n", + " (_ ใ€‚)))\n" + ] + } + ], + "source": [ + "print(trees[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ็ป„่ฃ…ๆตๆฐด็บฟ" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "็Ÿญ่ฏญๆˆๅˆ†ๆ ‘็š„็ฌฌไธ€ๅฑ‚non-terminalไธ€่ˆฌๆ˜ฏ่ฏๆ€งๆ ‡็ญพ๏ผŒๆ‰€ไปฅ็ปๅธธไธŽ่ฏๆ€งๆ ‡ๆณจไธ€่ตทไฝฟ็”จใ€‚ไธบๆญค๏ผŒๅ…ˆๅŠ ่ฝฝไธ€ไธช่ฏๆ€งๆ ‡ๆณจๅ™จ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "pos = hanlp.load(hanlp.pretrained.pos.CTB9_POS_ELECTRA_SMALL)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "็„ถๅŽๅˆ›ๅปบไธ€ไธชๅ‡ฝๆ•ฐๅฐ†่ฏๆ€งๆ ‡็ญพๅ’Œๅฅๆณ•ๆ ‘็ป„่ฃ…่ตทๆฅ:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from hanlp_common.document import Document\n", + "def merge_pos_into_con(doc:Document):\n", + " flat = isinstance(doc['pos'][0], str)\n", + " if flat:\n", + " doc = Document((k, [v]) for k, v in doc.items())\n", + " for tree, tags in zip(doc['con'], doc['pos']):\n", + " offset = 0\n", + " for subtree in tree.subtrees(lambda t: t.height() == 2):\n", + " tag = subtree.label()\n", + " if tag == '_':\n", + " subtree.set_label(tags[offset])\n", + " offset += 1\n", + " if flat:\n", + " doc = doc.squeeze()\n", + " return doc" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ไน‹ๅŽๅฐฑๅฏไปฅ็”จไธ€ไธชๆตๆฐด็บฟๅฐ†ไธ‰่€…็ป„่ฃ…่ตทๆฅไบ†๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "nlp = hanlp.pipeline() \\\n", + " .append(pos, input_key='tok', output_key='pos') \\\n", + " .append(con, input_key='tok', output_key='con') \\\n", + " .append(merge_pos_into_con, input_key='*')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฏฅๆตๆฐด็บฟ็š„็ป“ๆž„ๅฆ‚ไธ‹๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[tok->TransformerTagger->pos, tok->CRFConstituencyParser->con, None->merge_pos_into_con->None]\n" + ] + } + ], + "source": [ + "print(nlp)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ไผ ๅ…ฅไธ€ไธชๅทฒๅˆ†่ฏ็š„ๅฅๅญ่ฏ•่ฏ•๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok\": [\n", + " \"2021ๅนด\",\n", + " \"HanLPv2.1\",\n", + " \"ๅธฆๆฅ\",\n", + " \"ๆœ€\",\n", + " \"ๅ…ˆ่ฟ›\",\n", + " \"็š„\",\n", + " \"ๅคš\",\n", + " \"่ฏญ็ง\",\n", + " \"NLP\",\n", + " \"ๆŠ€ๆœฏ\",\n", + " \"ใ€‚\"\n", + " ],\n", + " \"pos\": [\n", + " \"NT\",\n", + " \"NR\",\n", + " \"VV\",\n", + " \"AD\",\n", + " \"VA\",\n", + " \"DEC\",\n", + " \"CD\",\n", + " \"NN\",\n", + " \"NR\",\n", + " \"NN\",\n", + " \"PU\"\n", + " ],\n", + " \"con\": [\n", + " \"TOP\",\n", + " [[\"IP\", [[\"NP-TMP\", [[\"NT\", [\"2021ๅนด\"]]]], [\"NP-PN-SBJ\", [[\"NR\", [\"HanLPv2.1\"]]]], [\"VP\", [[\"VV\", [\"ๅธฆๆฅ\"]], [\"NP-OBJ\", [[\"CP\", [[\"CP\", [[\"IP\", [[\"VP\", [[\"ADVP\", [[\"AD\", [\"ๆœ€\"]]]], [\"VP\", [[\"VA\", [\"ๅ…ˆ่ฟ›\"]]]]]]]], [\"DEC\", [\"็š„\"]]]]]], [\"NP\", [[\"QP\", [[\"CD\", [\"ๅคš\"]]]], [\"NP\", [[\"NN\", [\"่ฏญ็ง\"]]]]]], [\"NP\", [[\"NR\", [\"NLP\"]], [\"NN\", [\"ๆŠ€ๆœฏ\"]]]]]]]], [\"PU\", [\"ใ€‚\"]]]]]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "doc = nlp(tok=[\"2021ๅนด\", \"HanLPv2.1\", \"ๅธฆๆฅ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"])\n", + "print(doc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๆตๆฐด็บฟ็š„่พ“ๅ‡บไนŸๆ˜ฏไธ€ไธชDocument๏ผŒๆ‰€ไปฅๆ”ฏๆŒๅฏ่ง†ๅŒ–๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Token     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ๅธฆๆฅ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
PoS    3       4       5       6       7       8         9            10
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
NT โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP-TMP โ”€โ”€โ”€โ”€โ”   
NR โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP-PN-SBJโ”€โ”€โ”ค   
VV โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”            โ”‚   
AD โ”€โ”€โ”€โ–บADVPโ”€โ”€โ”                                         โ”‚            โ”‚   
VA โ”€โ”€โ”€โ–บVP โ”€โ”€โ”€โ”ดโ–บVP โ”€โ”€โ”€โ”€โ–บIP โ”€โ”€โ”€โ”                         โ”‚            โ”‚   
DECโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ–บCP โ”€โ”€โ”€โ”€โ–บCP โ”€โ”€โ”€โ”         โ”œโ–บVPโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ–บIP
CD โ”€โ”€โ”€โ–บQP โ”€โ”€โ”€โ”                               โ”‚         โ”‚            โ”‚   
NN โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ผโ–บNP-OBJโ”€โ”€โ”˜            โ”‚   
NR โ”€โ”€โ”                                       โ”‚                      โ”‚   
NN โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”˜                      โ”‚   
PU โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "doc.pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๅฆ‚ๆžœ่ฆๅˆ†ๆžๅŽŸๅง‹ๆ–‡ๆœฌ็š„่ฏ๏ผŒๅˆ†่ฏๆ˜ฏ็ฌฌไธ€ๆญฅ๏ผŒๆ‰€ไปฅๅ…ˆๅŠ ่ฝฝไธ€ไธชๅˆ†่ฏๅ™จ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "tok = hanlp.load(hanlp.pretrained.tok.COARSE_ELECTRA_SMALL_ZH)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "็„ถๅŽๅฐ†ๅˆ†่ฏๅ™จๆ’ๅ…ฅๅˆฐๆตๆฐด็บฟ็š„็ฌฌไธ€็บง๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[None->TransformerTaggingTokenizer->tok,\n", + " tok->TransformerTagger->pos,\n", + " tok->CRFConstituencyParser->con,\n", + " None->merge_pos_into_con->None]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nlp.insert(0, tok, output_key='tok')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "็„ถๅŽๅฐฑๅฏไปฅ็›ดๆŽฅๅˆ†ๆžๅŽŸๅง‹ๆ–‡ๆœฌไบ†๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(TOP\n", + " (IP\n", + " (NT 2021)\n", + " (M ๅนด)\n", + " (NP-PN-SBJ (NR HanLPv2.1))\n", + " (VP\n", + " (VV ๅธฆๆฅ)\n", + " (NP-OBJ\n", + " (CP (CP (IP (VP (ADVP (AD ๆœ€)) (VP (VA ๅ…ˆ่ฟ›)))) (DEC ็š„)))\n", + " (NP (QP (CD ๅคš)) (NP (NN ่ฏญ็ง)))\n", + " (NP (NR NLP) (NN ๆŠ€ๆœฏ))))\n", + " (PU ใ€‚)))\n" + ] + } + ], + "source": [ + "print(nlp('2021ๅนดHanLPv2.1ๅธฆๆฅๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚')['con'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ไฝ ๆ˜Ž็™ฝๅ—๏ผŸHanLPๆ˜ฏไธบ่ชๆ˜Žไบบ่ฎพ่ฎก็š„๏ผŒๅช่ฆไฝ ่ถณๅคŸ่ชๆ˜Ž๏ผŒไฝ ๅฐฑๅฏไปฅไผ˜้›…ๅœฐๅฎž็Žฐๅ„็งๅŠŸ่ƒฝใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๆ“ไฝœ็Ÿญ่ฏญๆ ‘็š„ๆŠ€ๅทง" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "็Ÿญ่ฏญ็ป“ๆž„ๆ ‘็š„็ฑปๅž‹ไธบ`phrasetree.tree.Tree`๏ผŒๆไพ›ไบ†่ฎธๅคšๆŽฅๅฃ๏ผŒๆญคๅค„ๅˆ—ไธพๅ…ถไธญไธ€ไบ›ๅธธ็”จ็š„ๆŽฅๅฃใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(TOP\n", + " (IP\n", + " (NP-TMP (NT 2021ๅนด))\n", + " (NP-PN-SBJ (NR HanLPv2.1))\n", + " (VP\n", + " (VV ๅธฆๆฅ)\n", + " (NP-OBJ\n", + " (CP (CP (IP (VP (ADVP (AD ๆœ€)) (VP (VA ๅ…ˆ่ฟ›)))) (DEC ็š„)))\n", + " (NP (QP (CD ๅคš)) (NP (NN ่ฏญ็ง)))\n", + " (NP (NR NLP) (NN ๆŠ€ๆœฏ))))\n", + " (PU ใ€‚)))\n" + ] + } + ], + "source": [ + "tree = doc['con'] # treeๆ•ฐ็ป„็š„่ฏๅˆ™้œ€่ฆdoc['con'][0]\n", + "print(tree)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ๆŒ‰้ซ˜ๅบฆๆžšไธพๅญๆ ‘" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ๅญๆ ‘๏ผš(VP (ADVP (AD ๆœ€)) (VP (VA ๅ…ˆ่ฟ›)))\tๆ ‡็ญพ๏ผšVP\t็Ÿญ่ฏญ๏ผš['ๆœ€', 'ๅ…ˆ่ฟ›']\n", + "ๅญๆ ‘๏ผš(NP (QP (CD ๅคš)) (NP (NN ่ฏญ็ง)))\tๆ ‡็ญพ๏ผšNP\t็Ÿญ่ฏญ๏ผš['ๅคš', '่ฏญ็ง']\n" + ] + } + ], + "source": [ + "for subtree in tree.subtrees(lambda t: t.height() == 4):\n", + " print(f'ๅญๆ ‘๏ผš{subtree}\\tๆ ‡็ญพ๏ผš{subtree.label()}\\t็Ÿญ่ฏญ๏ผš{subtree.leaves()}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ๆŒ‰ๆ ‡็ญพๆžšไธพๅญๆ ‘" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(NP (QP (CD ๅคš)) (NP (NN ่ฏญ็ง)))\n", + "(NP (NN ่ฏญ็ง))\n", + "(NP (NR NLP) (NN ๆŠ€ๆœฏ))\n" + ] + } + ], + "source": [ + "for subtree in tree.subtrees(lambda t: t.label() == 'NP'):\n", + " print(subtree)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ้ๅŽ†ๅญ่Š‚็‚น" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "็ˆถ่Š‚็‚น(NP (NR NLP) (NN ๆŠ€ๆœฏ))็š„ๅญ่Š‚็‚นๆœ‰๏ผš\n", + "(NR NLP)\n", + "(NN ๆŠ€ๆœฏ)\n" + ] + } + ], + "source": [ + "print(f'็ˆถ่Š‚็‚น{subtree}็š„ๅญ่Š‚็‚นๆœ‰๏ผš')\n", + "for child in subtree:\n", + " print(child)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "con_stl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/plugins/hanlp_demo/hanlp_demo/zh/cor_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/cor_restful.ipynb new file mode 100644 index 000000000..c81faef57 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/cor_restful.ipynb @@ -0,0 +1,221 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ๆŒ‡ไปฃๆถˆ่งฃ\n", + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒๆŒ‡ไปฃๆถˆ่งฃ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [], + "source": [ + "ret = HanLP.coreference_resolution('ๆˆ‘ๅง้€ๆˆ‘ๅฅน็š„็Œซใ€‚ๆˆ‘ๅพˆๅ–œๆฌขๅฎƒใ€‚')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบไธ€ไธชๅŒ…ๅซๅˆ†่ฏ็ป“ๆžœไธŽ็ฐ‡็š„dict:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ret == {'clusters': [\n", + " [['ๆˆ‘', 0, 1], ['ๆˆ‘', 3, 4], ['ๆˆ‘', 8, 9]], # ๆŒ‡ไปฃ่ฏด่ฏไบบ\n", + " [['ๆˆ‘ๅง', 0, 2], ['ๅฅน', 4, 5]], # ๆŒ‡ไปฃ่ฏด่ฏไบบ็š„ๅงๅง\n", + " [['ๅฅน็š„็Œซ', 4, 7], ['ๅฎƒ', 11, 12]]], # ๆŒ‡ไปฃ่ฏด่ฏไบบ็š„ๅงๅง็š„็Œซ\n", + " 'tokens': ['ๆˆ‘', 'ๅง', '้€', 'ๆˆ‘', 'ๅฅน', '็š„', '็Œซ', 'ใ€‚', 'ๆˆ‘', 'ๅพˆ', 'ๅ–œๆฌข', 'ๅฎƒ', 'ใ€‚']}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "ๅฏนๅบ”ๅฆ‚ไธ‹็ป“ๆž„๏ผš\n", + "![cor](https://file.hankcs.com/img/coref_demo_small.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XOsWkOqQfzlr" + }, + "source": [ + "ไธบๅทฒๅˆ†่ฏ็š„ๅฅๅญๆ‰ง่กŒๆŒ‡ไปฃๆถˆ่งฃ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "bLZSTbv_f3OA", + "outputId": "111c0be9-bac6-4eee-d5bd-a972ffc34844" + }, + "outputs": [], + "source": [ + "clusters = HanLP.coreference_resolution(tokens=[['ๆˆ‘', 'ๅง', '้€', 'ๆˆ‘', 'ๅฅน', '็š„', '็Œซ', 'ใ€‚'],\n", + " ['ๆˆ‘', 'ๅพˆ', 'ๅ–œๆฌข', 'ๅฎƒ', 'ใ€‚']])\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบ็ฐ‡็š„list๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clusters == [\n", + " [['ๆˆ‘', 0, 1], ['ๆˆ‘', 3, 4], ['ๆˆ‘', 8, 9]], # ๆŒ‡ไปฃ่ฏด่ฏไบบ\n", + " [['ๆˆ‘ๅง', 0, 2], ['ๅฅน', 4, 5]], # ๆŒ‡ไปฃ่ฏด่ฏไบบ็š„ๅงๅง\n", + " [['ๅฅน็š„็Œซ', 4, 7], ['ๅฎƒ', 11, 12]]] # ๆŒ‡ไปฃ่ฏด่ฏไบบ็š„ๅงๅง็š„็Œซ" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "cor_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/dep_mtl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/dep_mtl.ipynb new file mode 100644 index 000000000..2bd89c486 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/dep_mtl.ipynb @@ -0,0 +1,379 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_small_20201223_035557.zip',\n", + " 'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_base_20201223_201906.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_small_20210111_124159.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_base_20210111_124519.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ERNIE_GRAM_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_ernie_gram_base_aug_20210904_145403.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_MT5_SMALL': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_mt5_small_20210228_123458.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_XLMR_BASE': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_xlm_base_20210602_211620.zip',\n", + " 'NPCMJ_UD_KYOTO_TOK_POS_CON_BERT_BASE_CHAR_JA': 'https://file.hankcs.com/hanlp/mtl/npcmj_ud_kyoto_tok_pos_ner_dep_con_srl_bert_base_char_ja_20210914_133742.zip'}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.mtl.ALL # MTLๅคšไปปๅŠก๏ผŒๅ…ทไฝ“ไปปๅŠก่งๆจกๅž‹ๅ็งฐ๏ผŒ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅˆ†ไธบ่ฎธๅคšไปปๅŠก๏ผŒๅˆ†่ฏๅชๆ˜ฏๆœ€ๅˆ็บง็š„ไธ€ไธชใ€‚ไธŽๅ…ถๆฏไธชไปปๅŠกๅ•็‹ฌๅˆ›ๅปบไธ€ไธชๆจกๅž‹๏ผŒไธๅฆ‚ๅˆฉ็”จHanLP็š„่”ๅˆๆจกๅž‹ไธ€ๆฌกๆ€งๅฎŒๆˆๅคšไธชไปปๅŠก๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "HanLP = hanlp.load(hanlp.pretrained.mtl.CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ไพๅญ˜ๅฅๆณ•ๅˆ†ๆž\n", + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒไพๅญ˜ๅฅๆณ•ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [], + "source": [ + "doc = HanLP(['2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', '้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚'], tasks='dep')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบไธ€ไธช[Document](https://hanlp.hankcs.com/docs/api/common/document.html):" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " [\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"้˜ฟๅฉ†ไธป\", \"ๆฅๅˆฐ\", \"ๅŒ—ไบฌ\", \"็ซ‹ๆ–นๅบญ\", \"ๅ‚่ง‚\", \"่‡ช็„ถ\", \"่ฏญไน‰\", \"็ง‘ๆŠ€\", \"ๅ…ฌๅธ\", \"ใ€‚\"]\n", + " ],\n", + " \"dep\": [\n", + " [[6, \"tmod\"], [6, \"nsubj\"], [6, \"prep\"], [5, \"nn\"], [3, \"pobj\"], [0, \"root\"], [8, \"amod\"], [15, \"nn\"], [10, \"advmod\"], [15, \"rcmod\"], [10, \"assm\"], [13, \"nummod\"], [15, \"nn\"], [15, \"nn\"], [6, \"dobj\"], [6, \"punct\"]],\n", + " [[2, \"nsubj\"], [0, \"root\"], [4, \"nn\"], [2, \"dobj\"], [2, \"conj\"], [9, \"nn\"], [9, \"nn\"], [9, \"nn\"], [5, \"dobj\"], [2, \"punct\"]]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(doc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`doc['dep']`ไธบๅฅๅญไปฌ็š„ไพๅญ˜ๅฅๆณ•ๆ ‘ๅˆ—่กจ๏ผŒ็ฌฌ`i`ไธชไบŒๅ…ƒ็ป„่กจ็คบ็ฌฌ`i`ไธชๅ•่ฏ็š„`[ไธญๅฟƒ่ฏ็š„ไธ‹ๆ ‡, ไธŽไธญๅฟƒ่ฏ็š„ไพๅญ˜ๅ…ณ็ณป]`ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "ๅฏ่ง†ๅŒ–ไพๅญ˜ๅฅๆณ•ๆ ‘๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zo08uquCTFSk", + "outputId": "c6077f2d-7084-4f4b-a3bc-9aa9951704ea" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dep Tree \tToken \tRelati\n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ\t2021ๅนด \ttmod \n", + " โ”‚โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ\tHanLPv2.1\tnsubj \n", + " โ”‚โ”‚โ”Œโ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€\tไธบ \tprep \n", + " โ”‚โ”‚โ”‚ โ”‚ โ”Œโ”€โ–บ\t็”Ÿไบง \tnn \n", + " โ”‚โ”‚โ”‚ โ””โ”€โ–บโ””โ”€โ”€\t็Žฏๅขƒ \tpobj \n", + "โ”Œโ”ผโ”ดโ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tๅธฆๆฅ \troot \n", + "โ”‚โ”‚ โ”Œโ”€โ–บ\tๆฌก \tamod \n", + "โ”‚โ”‚ โ”Œโ”€โ”€โ”€โ–บโ””โ”€โ”€\tไธ–ไปฃ \tnn \n", + "โ”‚โ”‚ โ”‚ โ”Œโ”€โ–บ\tๆœ€ \tadvmod\n", + "โ”‚โ”‚ โ”‚โ”Œโ”€โ”€โ–บโ”œโ”€โ”€\tๅ…ˆ่ฟ› \trcmod \n", + "โ”‚โ”‚ โ”‚โ”‚ โ””โ”€โ–บ\t็š„ \tassm \n", + "โ”‚โ”‚ โ”‚โ”‚ โ”Œโ”€โ–บ\tๅคš \tnummod\n", + "โ”‚โ”‚ โ”‚โ”‚โ”Œโ”€โ–บโ””โ”€โ”€\t่ฏญ็ง \tnn \n", + "โ”‚โ”‚ โ”‚โ”‚โ”‚ โ”Œโ”€โ–บ\tNLP \tnn \n", + "โ”‚โ””โ”€โ–บโ””โ”ดโ”ดโ”€โ”€โ”ดโ”€โ”€\tๆŠ€ๆœฏ \tdobj \n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ\tใ€‚ \tpunct \n", + "\n", + "Dep Tree \tTok\tRelat\n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€\n", + " โ”Œโ”€โ–บ\t้˜ฟๅฉ†ไธป\tnsubj\n", + "โ”Œโ”ฌโ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”ดโ”€โ”€\tๆฅๅˆฐ \troot \n", + "โ”‚โ”‚ โ”‚ โ”Œโ”€โ–บ\tๅŒ—ไบฌ \tnn \n", + "โ”‚โ”‚ โ””โ”€โ–บโ””โ”€โ”€\t็ซ‹ๆ–นๅบญ\tdobj \n", + "โ”‚โ””โ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€\tๅ‚่ง‚ \tconj \n", + "โ”‚ โ”‚ โ”Œโ”€โ”€โ”€โ–บ\t่‡ช็„ถ \tnn \n", + "โ”‚ โ”‚ โ”‚โ”Œโ”€โ”€โ–บ\t่ฏญไน‰ \tnn \n", + "โ”‚ โ”‚ โ”‚โ”‚โ”Œโ”€โ–บ\t็ง‘ๆŠ€ \tnn \n", + "โ”‚ โ””โ”€โ–บโ””โ”ดโ”ดโ”€โ”€\tๅ…ฌๅธ \tdobj \n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ\tใ€‚ \tpunct\n" + ] + } + ], + "source": [ + "doc.pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฝฌๆขไธบCoNLLๆ ผๅผ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\t2021ๅนด\t_\t_\t_\t_\t6\ttmod\t_\t_\n", + "2\tHanLPv2.1\t_\t_\t_\t_\t6\tnsubj\t_\t_\n", + "3\tไธบ\t_\t_\t_\t_\t6\tprep\t_\t_\n", + "4\t็”Ÿไบง\t_\t_\t_\t_\t5\tnn\t_\t_\n", + "5\t็Žฏๅขƒ\t_\t_\t_\t_\t3\tpobj\t_\t_\n", + "6\tๅธฆๆฅ\t_\t_\t_\t_\t0\troot\t_\t_\n", + "7\tๆฌก\t_\t_\t_\t_\t8\tamod\t_\t_\n", + "8\tไธ–ไปฃ\t_\t_\t_\t_\t15\tnn\t_\t_\n", + "9\tๆœ€\t_\t_\t_\t_\t10\tadvmod\t_\t_\n", + "10\tๅ…ˆ่ฟ›\t_\t_\t_\t_\t15\trcmod\t_\t_\n", + "11\t็š„\t_\t_\t_\t_\t10\tassm\t_\t_\n", + "12\tๅคš\t_\t_\t_\t_\t13\tnummod\t_\t_\n", + "13\t่ฏญ็ง\t_\t_\t_\t_\t15\tnn\t_\t_\n", + "14\tNLP\t_\t_\t_\t_\t15\tnn\t_\t_\n", + "15\tๆŠ€ๆœฏ\t_\t_\t_\t_\t6\tdobj\t_\t_\n", + "16\tใ€‚\t_\t_\t_\t_\t6\tpunct\t_\t_\n", + "\n", + "1\t้˜ฟๅฉ†ไธป\t_\t_\t_\t_\t2\tnsubj\t_\t_\n", + "2\tๆฅๅˆฐ\t_\t_\t_\t_\t0\troot\t_\t_\n", + "3\tๅŒ—ไบฌ\t_\t_\t_\t_\t4\tnn\t_\t_\n", + "4\t็ซ‹ๆ–นๅบญ\t_\t_\t_\t_\t2\tdobj\t_\t_\n", + "5\tๅ‚่ง‚\t_\t_\t_\t_\t2\tconj\t_\t_\n", + "6\t่‡ช็„ถ\t_\t_\t_\t_\t9\tnn\t_\t_\n", + "7\t่ฏญไน‰\t_\t_\t_\t_\t9\tnn\t_\t_\n", + "8\t็ง‘ๆŠ€\t_\t_\t_\t_\t9\tnn\t_\t_\n", + "9\tๅ…ฌๅธ\t_\t_\t_\t_\t5\tdobj\t_\t_\n", + "10\tใ€‚\t_\t_\t_\t_\t2\tpunct\t_\t_\n" + ] + } + ], + "source": [ + "print(doc.to_conll())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XOsWkOqQfzlr" + }, + "source": [ + "ไธบๅทฒๅˆ†่ฏ็š„ๅฅๅญๆ‰ง่กŒไพๅญ˜ๅฅๆณ•ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "bLZSTbv_f3OA", + "outputId": "111c0be9-bac6-4eee-d5bd-a972ffc34844" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dep Tree \tToken\tRelati\n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ\tHanLP\tnsubj \n", + " โ”‚โ”Œโ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€\tไธบ \tprep \n", + " โ”‚โ”‚ โ”‚ โ”Œโ”€โ–บ\t็”Ÿไบง \tnn \n", + " โ”‚โ”‚ โ””โ”€โ–บโ””โ”€โ”€\t็Žฏๅขƒ \tpobj \n", + "โ”Œโ”ผโ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tๅธฆๆฅ \troot \n", + "โ”‚โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ–บ\tๆฌกไธ–ไปฃ \tnn \n", + "โ”‚โ”‚ โ”‚ โ”Œโ”€โ–บ\tๆœ€ \tadvmod\n", + "โ”‚โ”‚ โ”‚โ”Œโ”€โ–บโ”œโ”€โ”€\tๅ…ˆ่ฟ› \trcmod \n", + "โ”‚โ”‚ โ”‚โ”‚ โ””โ”€โ–บ\t็š„ \tassm \n", + "โ”‚โ”‚ โ”‚โ”‚ โ”Œโ”€โ”€โ–บ\tๅคš่ฏญ็ง \tnn \n", + "โ”‚โ”‚ โ”‚โ”‚ โ”‚โ”Œโ”€โ–บ\tNLP \tnn \n", + "โ”‚โ””โ”€โ–บโ””โ”ดโ”€โ”ดโ”ดโ”€โ”€\tๆŠ€ๆœฏ \tdobj \n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ\tใ€‚ \tpunct \n", + "\n", + "Dep Tree \tTok\tRelation \n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + " โ”Œโ”€โ–บโ”Œโ”€โ”€\tๆˆ‘ \tassmod \n", + " โ”‚ โ””โ”€โ–บ\t็š„ \tassm \n", + " โ”Œโ”€โ–บโ””โ”€โ”€โ”€โ”€โ”€\tๅธŒๆœ› \ttop \n", + "โ”Œโ”ฌโ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tๆ˜ฏ \troot \n", + "โ”‚โ””โ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tๅธŒๆœ› \tccomp \n", + "โ”‚ โ”‚ โ”Œโ”€โ–บโ”Œโ”€โ”€\tๅผ ๆ™š้œž\tassmod \n", + "โ”‚ โ”‚ โ”‚ โ””โ”€โ–บ\t็š„ \tassm \n", + "โ”‚ โ”‚ โ”Œโ”€โ–บโ””โ”€โ”€โ”€โ”€โ”€\t่ƒŒๅฝฑ \tnsubjpass\n", + "โ”‚ โ””โ”€โ–บโ””โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€\t่ขซ \tccomp \n", + "โ”‚ โ”‚ โ”Œโ”€โ–บ\tๆ™š้œž \tnsubj \n", + "โ”‚ โ””โ”€โ–บโ””โ”€โ”€\tๆ˜ ็บข \tdep \n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ\tใ€‚ \tpunct \n" + ] + } + ], + "source": [ + "HanLP([\n", + " [\"HanLP\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌกไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"ๆˆ‘\", \"็š„\", \"ๅธŒๆœ›\", \"ๆ˜ฏ\", \"ๅธŒๆœ›\", \"ๅผ ๆ™š้œž\", \"็š„\", \"่ƒŒๅฝฑ\", \"่ขซ\", \"ๆ™š้œž\", \"ๆ˜ ็บข\", \"ใ€‚\"]\n", + " ], tasks='dep', skip_tasks='tok*').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ๆณจๆ„\n", + "Native API็š„่พ“ๅ…ฅๅ•ไฝ้™ๅฎšไธบๅฅๅญ๏ผŒ้œ€ไฝฟ็”จ[ๅคš่ฏญ็งๅˆ†ๅฅๆจกๅž‹](https://github.com/hankcs/HanLP/blob/master/plugins/hanlp_demo/hanlp_demo/sent_split.py)ๆˆ–[ๅŸบไบŽ่ง„ๅˆ™็š„ๅˆ†ๅฅๅ‡ฝๆ•ฐ](https://github.com/hankcs/HanLP/blob/master/hanlp/utils/rules.py#L19)ๅ…ˆ่กŒๅˆ†ๅฅใ€‚RESTfulๅŒๆ—ถๆ”ฏๆŒๅ…จๆ–‡ใ€ๅฅๅญใ€ๅทฒๅˆ†่ฏ็š„ๅฅๅญใ€‚้™คๆญคไน‹ๅค–๏ผŒRESTfulๅ’Œnativeไธค็งAPI็š„่ฏญไน‰่ฎพ่ฎกๅฎŒๅ…จไธ€่‡ด๏ผŒ็”จๆˆทๅฏไปฅๆ— ็ผไบ’ๆขใ€‚" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "dep_mtl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/dep_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/dep_restful.ipynb new file mode 100644 index 000000000..119d57926 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/dep_restful.ipynb @@ -0,0 +1,314 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ไพๅญ˜ๅฅๆณ•ๅˆ†ๆž\n", + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒไพๅญ˜ๅฅๆณ•ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [], + "source": [ + "doc = HanLP('2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', tasks='dep')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบไธ€ไธช[Document](https://hanlp.hankcs.com/docs/api/common/document.html):" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " [\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"]\n", + " ],\n", + " \"dep\": [\n", + " [[6, \"tmod\"], [6, \"nsubj\"], [6, \"prep\"], [5, \"nn\"], [3, \"pobj\"], [0, \"root\"], [8, \"clf\"], [10, \"dep\"], [10, \"advmod\"], [15, \"rcmod\"], [10, \"cpm\"], [13, \"nummod\"], [15, \"nn\"], [15, \"nn\"], [6, \"dobj\"], [6, \"punct\"]]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(doc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`doc['dep']`ไธบๅฅๅญไปฌ็š„ไพๅญ˜ๅฅๆณ•ๆ ‘ๅˆ—่กจ๏ผŒ็ฌฌ`i`ไธชไบŒๅ…ƒ็ป„่กจ็คบ็ฌฌ`i`ไธชๅ•่ฏ็š„`[ไธญๅฟƒ่ฏ็š„ไธ‹ๆ ‡, ไธŽไธญๅฟƒ่ฏ็š„ไพๅญ˜ๅ…ณ็ณป]`ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "ๅฏ่ง†ๅŒ–ไพๅญ˜ๅฅๆณ•ๆ ‘๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zo08uquCTFSk", + "outputId": "c6077f2d-7084-4f4b-a3bc-9aa9951704ea" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dep Tree \tToken \tRelati\n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ\t2021ๅนด \ttmod \n", + " โ”‚โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ\tHanLPv2.1\tnsubj \n", + " โ”‚โ”‚โ”Œโ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€\tไธบ \tprep \n", + " โ”‚โ”‚โ”‚ โ”‚ โ”Œโ”€โ–บ\t็”Ÿไบง \tnn \n", + " โ”‚โ”‚โ”‚ โ””โ”€โ–บโ””โ”€โ”€\t็Žฏๅขƒ \tpobj \n", + "โ”Œโ”ฌโ”ดโ”ดโ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tๅธฆๆฅ \troot \n", + "โ”‚โ”‚ โ”Œโ”€โ–บ\tๆฌก \tclf \n", + "โ”‚โ”‚ โ”Œโ”€โ–บโ””โ”€โ”€\tไธ–ไปฃ \tdep \n", + "โ”‚โ”‚ โ”‚ โ”Œโ”€โ–บ\tๆœ€ \tadvmod\n", + "โ”‚โ”‚ โ”Œโ”€โ–บโ””โ”€โ”€โ”ผโ”€โ”€\tๅ…ˆ่ฟ› \trcmod \n", + "โ”‚โ”‚ โ”‚ โ””โ”€โ–บ\t็š„ \tcpm \n", + "โ”‚โ”‚ โ”‚ โ”Œโ”€โ–บ\tๅคš \tnummod\n", + "โ”‚โ”‚ โ”‚ โ”Œโ”€โ–บโ””โ”€โ”€\t่ฏญ็ง \tnn \n", + "โ”‚โ”‚ โ”‚ โ”‚ โ”Œโ”€โ–บ\tNLP \tnn \n", + "โ”‚โ””โ”€โ–บโ””โ”€โ”€โ”ดโ”€โ”€โ”ดโ”€โ”€\tๆŠ€ๆœฏ \tdobj \n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ\tใ€‚ \tpunct \n" + ] + } + ], + "source": [ + "doc.pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฝฌๆขไธบCoNLLๆ ผๅผ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\t2021ๅนด\t_\t_\t_\t_\t6\ttmod\t_\t_\n", + "2\tHanLPv2.1\t_\t_\t_\t_\t6\tnsubj\t_\t_\n", + "3\tไธบ\t_\t_\t_\t_\t6\tprep\t_\t_\n", + "4\t็”Ÿไบง\t_\t_\t_\t_\t5\tnn\t_\t_\n", + "5\t็Žฏๅขƒ\t_\t_\t_\t_\t3\tpobj\t_\t_\n", + "6\tๅธฆๆฅ\t_\t_\t_\t_\t0\troot\t_\t_\n", + "7\tๆฌก\t_\t_\t_\t_\t8\tclf\t_\t_\n", + "8\tไธ–ไปฃ\t_\t_\t_\t_\t10\tdep\t_\t_\n", + "9\tๆœ€\t_\t_\t_\t_\t10\tadvmod\t_\t_\n", + "10\tๅ…ˆ่ฟ›\t_\t_\t_\t_\t15\trcmod\t_\t_\n", + "11\t็š„\t_\t_\t_\t_\t10\tcpm\t_\t_\n", + "12\tๅคš\t_\t_\t_\t_\t13\tnummod\t_\t_\n", + "13\t่ฏญ็ง\t_\t_\t_\t_\t15\tnn\t_\t_\n", + "14\tNLP\t_\t_\t_\t_\t15\tnn\t_\t_\n", + "15\tๆŠ€ๆœฏ\t_\t_\t_\t_\t6\tdobj\t_\t_\n", + "16\tใ€‚\t_\t_\t_\t_\t6\tpunct\t_\t_\n" + ] + } + ], + "source": [ + "print(doc.to_conll())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XOsWkOqQfzlr" + }, + "source": [ + "ไธบๅทฒๅˆ†่ฏ็š„ๅฅๅญๆ‰ง่กŒไพๅญ˜ๅฅๆณ•ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "bLZSTbv_f3OA", + "outputId": "111c0be9-bac6-4eee-d5bd-a972ffc34844" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dep Tree \tToken\tRelati\n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€\n", + " โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ\tHanLP\tnsubj \n", + " โ”‚โ”Œโ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€\tไธบ \tprep \n", + " โ”‚โ”‚ โ”‚ โ”Œโ”€โ–บ\t็”Ÿไบง \tnn \n", + " โ”‚โ”‚ โ””โ”€โ–บโ””โ”€โ”€\t็Žฏๅขƒ \tpobj \n", + "โ”Œโ”ผโ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tๅธฆๆฅ \troot \n", + "โ”‚โ”‚ โ”Œโ”€โ”€โ–บ\tๆฌกไธ–ไปฃ \tdep \n", + "โ”‚โ”‚ โ”‚โ”Œโ”€โ–บ\tๆœ€ \tadvmod\n", + "โ”‚โ”‚ โ”Œโ”€โ–บโ””โ”ผโ”€โ”€\tๅ…ˆ่ฟ› \trcmod \n", + "โ”‚โ”‚ โ”‚ โ””โ”€โ–บ\t็š„ \tcpm \n", + "โ”‚โ”‚ โ”‚ โ”Œโ”€โ”€โ–บ\tๅคš่ฏญ็ง \tnn \n", + "โ”‚โ”‚ โ”‚ โ”‚โ”Œโ”€โ–บ\tNLP \tnn \n", + "โ”‚โ””โ”€โ–บโ””โ”€โ”€โ”ดโ”ดโ”€โ”€\tๆŠ€ๆœฏ \tdobj \n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ\tใ€‚ \tpunct \n", + "\n", + "Dep Tree \tTok\tRelation \n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + " โ”Œโ”€โ–บโ”Œโ”€โ”€\tๆˆ‘ \tassmod \n", + " โ”‚ โ””โ”€โ–บ\t็š„ \tassm \n", + " โ”Œโ”€โ–บโ””โ”€โ”€โ”€โ”€โ”€\tๅธŒๆœ› \ttop \n", + "โ”Œโ”ฌโ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tๆ˜ฏ \troot \n", + "โ”‚โ””โ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tๅธŒๆœ› \tccomp \n", + "โ”‚ โ”‚ โ”Œโ”€โ–บโ”Œโ”€โ”€\tๅผ ๆ™š้œž\tassmod \n", + "โ”‚ โ”‚ โ”‚ โ””โ”€โ–บ\t็š„ \tassm \n", + "โ”‚ โ”‚ โ”Œโ”€โ–บโ””โ”€โ”€โ”€โ”€โ”€\t่ƒŒๅฝฑ \tnsubjpass\n", + "โ”‚ โ””โ”€โ–บโ””โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€\t่ขซ \tccomp \n", + "โ”‚ โ”‚ โ”Œโ”€โ–บ\tๆ™š้œž \tnsubj \n", + "โ”‚ โ””โ”€โ–บโ””โ”€โ”€\tๆ˜ ็บข \tdep \n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ\tใ€‚ \tpunct \n" + ] + } + ], + "source": [ + "HanLP(tokens=[\n", + " [\"HanLP\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌกไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"ๆˆ‘\", \"็š„\", \"ๅธŒๆœ›\", \"ๆ˜ฏ\", \"ๅธŒๆœ›\", \"ๅผ ๆ™š้œž\", \"็š„\", \"่ƒŒๅฝฑ\", \"่ขซ\", \"ๆ™š้œž\", \"ๆ˜ ็บข\", \"ใ€‚\"]\n", + " ], tasks='dep').pretty_print()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "dep_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/dep_stl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/dep_stl.ipynb new file mode 100644 index 000000000..606b38489 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/dep_stl.ipynb @@ -0,0 +1,430 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4M7ka0K5OMWU", + "outputId": "69cdad22-d94d-41fb-9591-1c29515a3da9" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'CTB5_BIAFFINE_DEP_ZH': 'https://file.hankcs.com/hanlp/dep/biaffine_ctb5_20191229_025833.zip',\n", + " 'CTB7_BIAFFINE_DEP_ZH': 'https://file.hankcs.com/hanlp/dep/biaffine_ctb7_20200109_022431.zip',\n", + " 'CTB9_DEP_ELECTRA_SMALL': 'https://file.hankcs.com/hanlp/dep/ctb9_dep_electra_small_20220216_100306.zip',\n", + " 'PMT1_DEP_ELECTRA_SMALL': 'https://file.hankcs.com/hanlp/dep/pmt_dep_electra_small_20220218_134518.zip',\n", + " 'CTB9_UDC_ELECTRA_SMALL': 'https://file.hankcs.com/hanlp/dep/udc_dep_electra_small_20220218_095452.zip',\n", + " 'PTB_BIAFFINE_DEP_EN': 'https://file.hankcs.com/hanlp/dep/ptb_dep_biaffine_20200101_174624.zip'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.dep.ALL # ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BMW528wGNulM" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "dep = hanlp.load(hanlp.pretrained.dep.CTB9_DEP_ELECTRA_SMALL)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ไพๅญ˜ๅฅๆณ•ๅˆ†ๆž\n", + "ไพๅญ˜ๅฅๆณ•ๅˆ†ๆžไปปๅŠก็š„่พ“ๅ…ฅไธบๅทฒๅˆ†่ฏ็š„ไธ€ไธชๆˆ–ๅคšไธชๅฅๅญ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "BqEmDMGGOtk3" + }, + "outputs": [], + "source": [ + "tree = dep([\"2021ๅนด\", \"HanLPv2.1\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj1Jk-2sPHYx" + }, + "source": [ + "่ฟ”ๅ›žๅฏน่ฑกไธบ[CoNLLSentence](https://hanlp.hankcs.com/docs/api/common/conll.html#hanlp_common.conll.CoNLLSentence)็ฑปๅž‹๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "U_PGm06m6K20", + "outputId": "a25c6452-5032-42b3-d501-99158380c487" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': 1,\n", + " 'form': '2021ๅนด',\n", + " 'cpos': None,\n", + " 'pos': None,\n", + " 'head': 3,\n", + " 'deprel': 'tmod',\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'phead': None,\n", + " 'pdeprel': None},\n", + " {'id': 2,\n", + " 'form': 'HanLPv2.1',\n", + " 'cpos': None,\n", + " 'pos': None,\n", + " 'head': 3,\n", + " 'deprel': 'nsubj',\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'phead': None,\n", + " 'pdeprel': None},\n", + " {'id': 3,\n", + " 'form': 'ๅธฆๆฅ',\n", + " 'cpos': None,\n", + " 'pos': None,\n", + " 'head': 0,\n", + " 'deprel': 'root',\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'phead': None,\n", + " 'pdeprel': None},\n", + " {'id': 4,\n", + " 'form': 'ๆฌก',\n", + " 'cpos': None,\n", + " 'pos': None,\n", + " 'head': 5,\n", + " 'deprel': 'det',\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'phead': None,\n", + " 'pdeprel': None},\n", + " {'id': 5,\n", + " 'form': 'ไธ–ไปฃ',\n", + " 'cpos': None,\n", + " 'pos': None,\n", + " 'head': 7,\n", + " 'deprel': 'dep',\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'phead': None,\n", + " 'pdeprel': None},\n", + " {'id': 6,\n", + " 'form': 'ๆœ€',\n", + " 'cpos': None,\n", + " 'pos': None,\n", + " 'head': 7,\n", + " 'deprel': 'advmod',\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'phead': None,\n", + " 'pdeprel': None},\n", + " {'id': 7,\n", + " 'form': 'ๅ…ˆ่ฟ›',\n", + " 'cpos': None,\n", + " 'pos': None,\n", + " 'head': 12,\n", + " 'deprel': 'rcmod',\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'phead': None,\n", + " 'pdeprel': None},\n", + " {'id': 8,\n", + " 'form': '็š„',\n", + " 'cpos': None,\n", + " 'pos': None,\n", + " 'head': 7,\n", + " 'deprel': 'cpm',\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'phead': None,\n", + " 'pdeprel': None},\n", + " {'id': 9,\n", + " 'form': 'ๅคš',\n", + " 'cpos': None,\n", + " 'pos': None,\n", + " 'head': 10,\n", + " 'deprel': 'nummod',\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'phead': None,\n", + " 'pdeprel': None},\n", + " {'id': 10,\n", + " 'form': '่ฏญ็ง',\n", + " 'cpos': None,\n", + " 'pos': None,\n", + " 'head': 12,\n", + " 'deprel': 'nn',\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'phead': None,\n", + " 'pdeprel': None},\n", + " {'id': 11,\n", + " 'form': 'NLP',\n", + " 'cpos': None,\n", + " 'pos': None,\n", + " 'head': 12,\n", + " 'deprel': 'nn',\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'phead': None,\n", + " 'pdeprel': None},\n", + " {'id': 12,\n", + " 'form': 'ๆŠ€ๆœฏ',\n", + " 'cpos': None,\n", + " 'pos': None,\n", + " 'head': 3,\n", + " 'deprel': 'dobj',\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'phead': None,\n", + " 'pdeprel': None},\n", + " {'id': 13,\n", + " 'form': 'ใ€‚',\n", + " 'cpos': None,\n", + " 'pos': None,\n", + " 'head': 3,\n", + " 'deprel': 'punct',\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'phead': None,\n", + " 'pdeprel': None}]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tree" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Gn_RQa_Z6K20" + }, + "source": [ + "ๆ‰“ๅฐๆ—ถไธบCoNLLๆ ผๅผ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "26P1LGzv6K20", + "outputId": "c78ffdb0-3cd7-492d-f55e-0d50120faffb" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\t2021ๅนด\t_\t_\t_\t_\t3\ttmod\t_\t_\n", + "2\tHanLPv2.1\t_\t_\t_\t_\t3\tnsubj\t_\t_\n", + "3\tๅธฆๆฅ\t_\t_\t_\t_\t0\troot\t_\t_\n", + "4\tๆฌก\t_\t_\t_\t_\t5\tdet\t_\t_\n", + "5\tไธ–ไปฃ\t_\t_\t_\t_\t7\tdep\t_\t_\n", + "6\tๆœ€\t_\t_\t_\t_\t7\tadvmod\t_\t_\n", + "7\tๅ…ˆ่ฟ›\t_\t_\t_\t_\t12\trcmod\t_\t_\n", + "8\t็š„\t_\t_\t_\t_\t7\tcpm\t_\t_\n", + "9\tๅคš\t_\t_\t_\t_\t10\tnummod\t_\t_\n", + "10\t่ฏญ็ง\t_\t_\t_\t_\t12\tnn\t_\t_\n", + "11\tNLP\t_\t_\t_\t_\t12\tnn\t_\t_\n", + "12\tๆŠ€ๆœฏ\t_\t_\t_\t_\t3\tdobj\t_\t_\n", + "13\tใ€‚\t_\t_\t_\t_\t3\tpunct\t_\t_\n" + ] + } + ], + "source": [ + "print(tree)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๅฆ‚ๆžœไธ้œ€่ฆCoNLLๆ ผๅผ็š„่ฏ๏ผŒไนŸ่ฎธ`conll=False`ๆ—ถ็š„่พ“ๅ‡บๆ›ดๅŠ ็ฎ€ๆด๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(3, 'tmod'),\n", + " (3, 'nsubj'),\n", + " (0, 'root'),\n", + " (5, 'det'),\n", + " (7, 'dep'),\n", + " (7, 'advmod'),\n", + " (12, 'rcmod'),\n", + " (7, 'cpm'),\n", + " (10, 'nummod'),\n", + " (12, 'nn'),\n", + " (12, 'nn'),\n", + " (3, 'dobj'),\n", + " (3, 'punct')]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dep([\"2021ๅนด\", \"HanLPv2.1\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"], conll=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ๅฏ่ง†ๅŒ–\n", + "ไฝ ๅฏไปฅๆž„้€ ไธ€ไธช`Document`ๅฎž็Žฐๆผ‚ไบฎ็š„ๅฏ่ง†ๅŒ–๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Dep Tree      
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
         โ”Œโ”€โ”€โ–บ 
         โ”‚โ”Œโ”€โ–บ 
โ”Œโ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”ดโ”€โ”€ 
โ”‚โ”‚        โ”Œโ”€โ–บ 
โ”‚โ”‚     โ”Œโ”€โ–บโ””โ”€โ”€ 
โ”‚โ”‚     โ”‚  โ”Œโ”€โ–บ 
โ”‚โ”‚  โ”Œโ”€โ–บโ””โ”€โ”€โ”ผโ”€โ”€ 
โ”‚โ”‚  โ”‚     โ””โ”€โ–บ 
โ”‚โ”‚  โ”‚     โ”Œโ”€โ–บ 
โ”‚โ”‚  โ”‚  โ”Œโ”€โ–บโ””โ”€โ”€ 
โ”‚โ”‚  โ”‚  โ”‚  โ”Œโ”€โ–บ 
โ”‚โ””โ”€โ–บโ””โ”€โ”€โ”ดโ”€โ”€โ”ดโ”€โ”€ 
โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
Token     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ๅธฆๆฅ        
ๆฌก         
ไธ–ไปฃ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
Relati
โ”€โ”€โ”€โ”€โ”€โ”€
tmod  
nsubj 
root  
det   
dep   
advmod
rcmod 
cpm   
nummod
nn    
nn    
dobj  
punct 
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from hanlp_common.document import Document\n", + "doc = Document(\n", + " tok=[\"2021ๅนด\", \"HanLPv2.1\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " dep=[(3, 'tmod'), (3, 'nsubj'), (0, 'root'), (5, 'det'), (7, 'dep'), (7, 'advmod'), (12, 'rcmod'), (7, 'cpm'), (10, 'nummod'), (12, 'nn'), (12, 'nn'), (3, 'dobj'), (3, 'punct')]\n", + ")\n", + "doc.pretty_print()" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "dep_stl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/extractive_summarization_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/extractive_summarization_restful.ipynb new file mode 100755 index 000000000..2bfd7b111 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/extractive_summarization_restful.ipynb @@ -0,0 +1,277 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4M7ka0K5OMWU", + "outputId": "d74f0749-0587-454a-d7c9-7418d45ce534" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BMW528wGNulM" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ๆŠฝๅ–ๅผ่‡ชๅŠจๆ‘˜่ฆ\n", + "ๆŠฝๅ–ๅผ่‡ชๅŠจๆ‘˜่ฆ็š„็›ฎๆ ‡ๆ˜ฏไปŽๆ–‡็ซ ไธญ็ญ›้€‰ๅ‡บไธ€ไบ›ไฝœไธบๆ‘˜่ฆ็š„ไธญๅฟƒๅฅๅญ๏ผšๆ—ข่ฆ็ดงๆ‰ฃ่ฆ็‚น๏ผŒๅˆ่ฆ้ฟๅ…่ต˜่ฏญใ€‚\n", + "### ไธญๆ–‡\n", + "ๆŠฝๅ–ๅผ่‡ชๅŠจๆ‘˜่ฆไปปๅŠก็š„่พ“ๅ…ฅไธบไธ€ๆฎตๆ–‡ๆœฌๅ’Œๆ‰€้œ€็š„ๆ‘˜่ฆๅฅๅญๆ•ฐ้‡็š„ๆœ€ๅคงๅ€ผ`topk`๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BqEmDMGGOtk3", + "outputId": "936d439a-e1ff-4308-d2aa-775955558594" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ๆฎDigiTimesๆŠฅ้“๏ผŒๅœจไธŠๆตท็–ซๆƒ…่ถ‹็ผ“๏ผŒ้˜ฒ็–ซ็ฎกๆŽงๅผ€ๅง‹ๆ”พๆพๅŽ๏ผŒ่‹นๆžœไพ›ๅบ”ๅ•†ๅนฟ่พพๆญฃๅœจ้€ๆญฅๆขๅคๅ…ถไธญๅ›ฝๅทฅๅŽ‚็š„MacBookไบงๅ“็”Ÿไบงใ€‚': 0.9999685883522034,\n", + " 'ไปๆœ‰่ฎธๅคš่‹นๆžœ็ฌ”่ฎฐๆœฌ็”จๆˆทๅœจ็ญ‰ๅพ…3ๆœˆๅ’Œ4ๆœˆ่ฎข่ดญ็š„MacBook Proๆœบๅž‹ๅˆฐ่ดง๏ผŒ็”ฑไบŽ่‹นๆžœ็š„ไพ›ๅบ”้—ฎ้ข˜๏ผŒไป–ไปฌ็š„ๅ‘่ดงๆ—ถ้—ด่ขซๅคงๅคงๆŽจ่ฟŸไบ†ใ€‚': 0.5798477530479431,\n", + " 'ๅฐฝ็ฎกMacBook Pro็š„็”Ÿไบง้€ๆธๆขๅค๏ผŒไฝ†ไพ›ๅบ”้—ฎ้ข˜้ข„่ฎกไพ็„ถๅฝฑๅ“2022ๅนด็ฌฌไธ‰ๅญฃๅบฆ็š„ไบงๅ“้”€ๅ”ฎใ€‚': 0.5435440540313721}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = '''\n", + "ๆฎDigiTimesๆŠฅ้“๏ผŒๅœจไธŠๆตท็–ซๆƒ…่ถ‹็ผ“๏ผŒ้˜ฒ็–ซ็ฎกๆŽงๅผ€ๅง‹ๆ”พๆพๅŽ๏ผŒ่‹นๆžœไพ›ๅบ”ๅ•†ๅนฟ่พพๆญฃๅœจ้€ๆญฅๆขๅคๅ…ถไธญๅ›ฝๅทฅๅŽ‚็š„MacBookไบงๅ“็”Ÿไบงใ€‚\n", + "ๆฎไพ›ๅบ”้“พๆถˆๆฏไบบๅฃซ็งฐ๏ผŒ็”ŸไบงๅŽ‚็š„่ฎขๅ•ๆ‹‰ๅŠจๆƒ…ๅ†ตๆญฃๅœจๆ…ขๆ…ข่ฝฌๅผบ๏ผŒ่ฟ™ไผšๆ้ซ˜MacBook Proๆœบๅž‹็š„ไพ›ๅบ”้‡๏ผŒๅนถ็ผฉ็Ÿญ่‹นๆžœๅฎขๆˆทๅœจ่ฟ‡ๅŽปๅ‡ ๅ‘จๆ‰€็ปๅŽ†็š„ๅปถ้•ฟไบค่ดงๆ—ถ้—ดใ€‚\n", + "ไปๆœ‰่ฎธๅคš่‹นๆžœ็ฌ”่ฎฐๆœฌ็”จๆˆทๅœจ็ญ‰ๅพ…3ๆœˆๅ’Œ4ๆœˆ่ฎข่ดญ็š„MacBook Proๆœบๅž‹ๅˆฐ่ดง๏ผŒ็”ฑไบŽ่‹นๆžœ็š„ไพ›ๅบ”้—ฎ้ข˜๏ผŒไป–ไปฌ็š„ๅ‘่ดงๆ—ถ้—ด่ขซๅคงๅคงๆŽจ่ฟŸไบ†ใ€‚\n", + "ๆฎๅˆ†ๆžๅธˆ้ƒญๆ˜Ž้Œค่กจ็คบ๏ผŒๅนฟ่พพๆ˜ฏ้ซ˜็ซฏMacBook Pro็š„ๅ”ฏไธ€ไพ›ๅบ”ๅ•†๏ผŒ่‡ช้˜ฒ็–ซๅฐๆŽงไพ่ต–๏ผŒMacBook Proๅคง้ƒจๅˆ†ๅž‹ๅทไบค่ดงๆ—ถ้—ดๅขžๅŠ ไบ†ไธ‰ๅˆฐไบ”ๅ‘จ๏ผŒ\n", + "ไธ€ไบ›้ซ˜็ซฏๅฎšๅˆถๅž‹ๅท็š„MacBook Pro้…็ฝฎ่ฆๅˆฐ6ๆœˆๅบ•ๅˆฐ7ๆœˆๅˆๆ‰่ƒฝไบค่ดงใ€‚\n", + "ๅฐฝ็ฎกMacBook Pro็š„็”Ÿไบง้€ๆธๆขๅค๏ผŒไฝ†ไพ›ๅบ”้—ฎ้ข˜้ข„่ฎกไพ็„ถๅฝฑๅ“2022ๅนด็ฌฌไธ‰ๅญฃๅบฆ็š„ไบงๅ“้”€ๅ”ฎใ€‚\n", + "่‹นๆžœไธŠๅ‘จ่กจ็คบ๏ผŒ้˜ฒ็–ซๆŽชๆ–ฝๅ’Œๅ…ƒ้ƒจไปถ็Ÿญ็ผบๅฐ†็ปง็ปญไฝฟๅ…ถ้šพไปฅ็”Ÿไบง่ถณๅคŸ็š„ไบงๅ“ๆฅๆปก่ถณๆถˆ่ดน่€…็š„ๅผบๅŠฒ้œ€ๆฑ‚๏ผŒ่ฟ™ๆœ€็ปˆๅฐ†ๅฝฑๅ“่‹นๆžœ6ๆœˆไปฝ็š„ๆ”ถๅ…ฅใ€‚\n", + "'''\n", + "HanLP.extractive_summarization(text, topk=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj1Jk-2sPHYx" + }, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบๆœ€ๅคš`topk`ไธชๆ‘˜่ฆๅฅๅญไปฅๅŠ็›ธๅบ”็š„ๆƒ้‡๏ผŒๆƒ้‡ๅ–ๅ€ผๅŒบ้—ดไธบ$[0, 1]$ใ€‚็”ฑไบŽTrigram BlockingๆŠ€ๅทง๏ผŒๅฎž้™…่ฟ”ๅ›ž็š„ๆ‘˜่ฆๅฅๆ•ฐ้‡ๅฏ่ƒฝๅฐไบŽ`topk`ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ๅฏ่ง†ๅŒ–" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "ๆฎDigiTimesๆŠฅ้“๏ผŒๅœจไธŠๆตท็–ซๆƒ…่ถ‹็ผ“๏ผŒ้˜ฒ็–ซ็ฎกๆŽงๅผ€ๅง‹ๆ”พๆพๅŽ๏ผŒ่‹นๆžœไพ›ๅบ”ๅ•†ๅนฟ่พพๆญฃๅœจ้€ๆญฅๆขๅคๅ…ถไธญๅ›ฝๅทฅๅŽ‚็š„MacBookไบงๅ“็”Ÿไบงใ€‚\n", + "ๆฎไพ›ๅบ”้“พๆถˆๆฏไบบๅฃซ็งฐ๏ผŒ็”ŸไบงๅŽ‚็š„่ฎขๅ•ๆ‹‰ๅŠจๆƒ…ๅ†ตๆญฃๅœจๆ…ขๆ…ข่ฝฌๅผบ๏ผŒ่ฟ™ไผšๆ้ซ˜MacBook Proๆœบๅž‹็š„ไพ›ๅบ”้‡๏ผŒๅนถ็ผฉ็Ÿญ่‹นๆžœๅฎขๆˆทๅœจ่ฟ‡ๅŽปๅ‡ ๅ‘จๆ‰€็ปๅŽ†็š„ๅปถ้•ฟไบค่ดงๆ—ถ้—ดใ€‚\n", + "ไปๆœ‰่ฎธๅคš่‹นๆžœ็ฌ”่ฎฐๆœฌ็”จๆˆทๅœจ็ญ‰ๅพ…3ๆœˆๅ’Œ4ๆœˆ่ฎข่ดญ็š„MacBook Proๆœบๅž‹ๅˆฐ่ดง๏ผŒ็”ฑไบŽ่‹นๆžœ็š„ไพ›ๅบ”้—ฎ้ข˜๏ผŒไป–ไปฌ็š„ๅ‘่ดงๆ—ถ้—ด่ขซๅคงๅคงๆŽจ่ฟŸไบ†ใ€‚\n", + "ๆฎๅˆ†ๆžๅธˆ้ƒญๆ˜Ž้Œค่กจ็คบ๏ผŒๅนฟ่พพๆ˜ฏ้ซ˜็ซฏMacBook Pro็š„ๅ”ฏไธ€ไพ›ๅบ”ๅ•†๏ผŒ่‡ช้˜ฒ็–ซๅฐๆŽงไพ่ต–๏ผŒMacBook Proๅคง้ƒจๅˆ†ๅž‹ๅทไบค่ดงๆ—ถ้—ดๅขžๅŠ ไบ†ไธ‰ๅˆฐไบ”ๅ‘จ๏ผŒ\n", + "ไธ€ไบ›้ซ˜็ซฏๅฎšๅˆถๅž‹ๅท็š„MacBook Pro้…็ฝฎ่ฆๅˆฐ6ๆœˆๅบ•ๅˆฐ7ๆœˆๅˆๆ‰่ƒฝไบค่ดงใ€‚\n", + "ๅฐฝ็ฎกMacBook Pro็š„็”Ÿไบง้€ๆธๆขๅค๏ผŒไฝ†ไพ›ๅบ”้—ฎ้ข˜้ข„่ฎกไพ็„ถๅฝฑๅ“2022ๅนด็ฌฌไธ‰ๅญฃๅบฆ็š„ไบงๅ“้”€ๅ”ฎใ€‚\n", + "่‹นๆžœไธŠๅ‘จ่กจ็คบ๏ผŒ้˜ฒ็–ซๆŽชๆ–ฝๅ’Œๅ…ƒ้ƒจไปถ็Ÿญ็ผบๅฐ†็ปง็ปญไฝฟๅ…ถ้šพไปฅ็”Ÿไบง่ถณๅคŸ็š„ไบงๅ“ๆฅๆปก่ถณๆถˆ่ดน่€…็š„ๅผบๅŠฒ้œ€ๆฑ‚๏ผŒ่ฟ™ๆœ€็ปˆๅฐ†ๅฝฑๅ“่‹นๆžœ6ๆœˆไปฝ็š„ๆ”ถๅ…ฅใ€‚\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def highlight(text, scores):\n", + " for k, v in scores.items():\n", + " text = text.replace(k, f'{k}')\n", + " from IPython.display import display, HTML\n", + " display(HTML(text))\n", + "\n", + "scores = HanLP.extractive_summarization(text, topk=100)\n", + "highlight(text, scores)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ็นไฝ“ไธญๆ–‡\n", + "HanLP็š„ๆŠฝๅ–ๅผ่‡ชๅŠจๆ‘˜่ฆๆŽฅๅฃๆ”ฏๆŒ็นไฝ“ไธญๆ–‡๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'่ฏ็ˆพ่ก—ๆ—ฅๅ ฑๅ‘จไบŒ๏ผˆ3ๆ—ฅ๏ผ‰ๅ ฑๅฐŽ๏ผŒๆ นๆ“š็Ÿฅๆƒ…ไบบ้€้œฒ๏ผŒๆ—ฅๅ‰ๅทฒๅฎฃๅธƒๅฐ‡ไปฅ440ๅ„„็พŽๅ…ƒ่ฒทไธ‹ๆŽจ็‰น๏ผˆTwitter๏ผ‰ไธฆไธ‹ๅธ‚็š„้ฆฌๆ–ฏๅ…‹๏ผŒๆ›พ็ถ“่ทŸไธ€ไบ›ๆฝ›ๅœจๆŠ•่ณ‡ไบบ่ชช๏ผŒไป–ๅฏไปฅๅœจ็Ÿญ็ŸญๅนพๅนดๅพŒ๏ผŒๅ†ๅฐ‡้€™ๅฎถ็คพ็พคๅช’้ซ”ๅ…ฌๅธ้‡ๆ–ฐไธŠๅธ‚ใ€‚': 0.9999818205833435,\n", + " 'ๆถˆๆฏไพ†ๆบ่ชช๏ผŒ็‰นๆ–ฏๆ‹‰ๅ‰ต่พฆไบบๅ…ผๅŸท่กŒ้•ท้ฆฌๆ–ฏๅ…‹่กจ็คบ๏ผŒไป–่จˆๅŠƒๅœจ่ฒทไธ‹ๆŽจ็‰นๅพŒๆœ€็Ÿญไธ‰ๅนดๅ…ง๏ผŒๅฐฑๅฑ•้–‹ๆŽจ็‰น็š„้ฆ–ๆฌกๅ…ฌ้–‹็™ผ่กŒ่‚ก็ฅจใ€‚': 0.503434419631958,\n", + " 'ๆ นๆ“šไน‹ๅ‰่ฏ็ˆพ่ก—ๆ—ฅๅ ฑ็š„ๅ ฑๅฐŽ๏ผŒ้ฆฌๆ–ฏๅ…‹็‚บ่ณผ่ฒทๆŽจ็‰น็ฑŒ็พ้‡‘ๆ™‚๏ผŒ่ˆ‡็งๅ‹Ÿ่‚กๆฌŠๅ…ฌๅธ็ญ‰ๆŠ•่ณ‡ไบบ่จŽ่ซ–ๅ‡บ่ณ‡ไบ‹ๅฎœ๏ผŒApollo Global Managementๆœ‰่ˆˆ่ถฃๅƒ่ˆ‡ใ€‚': 0.2688594460487366}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = '''\n", + "่ฏ็ˆพ่ก—ๆ—ฅๅ ฑๅ‘จไบŒ๏ผˆ3ๆ—ฅ๏ผ‰ๅ ฑๅฐŽ๏ผŒๆ นๆ“š็Ÿฅๆƒ…ไบบ้€้œฒ๏ผŒๆ—ฅๅ‰ๅทฒๅฎฃๅธƒๅฐ‡ไปฅ440ๅ„„็พŽๅ…ƒ่ฒทไธ‹ๆŽจ็‰น๏ผˆTwitter๏ผ‰ไธฆไธ‹ๅธ‚็š„้ฆฌๆ–ฏๅ…‹๏ผŒๆ›พ็ถ“่ทŸไธ€ไบ›ๆฝ›ๅœจๆŠ•่ณ‡ไบบ่ชช๏ผŒไป–ๅฏไปฅๅœจ็Ÿญ็ŸญๅนพๅนดๅพŒ๏ผŒๅ†ๅฐ‡้€™ๅฎถ็คพ็พคๅช’้ซ”ๅ…ฌๅธ้‡ๆ–ฐไธŠๅธ‚ใ€‚\n", + "ๆถˆๆฏไพ†ๆบ่ชช๏ผŒ็‰นๆ–ฏๆ‹‰ๅ‰ต่พฆไบบๅ…ผๅŸท่กŒ้•ท้ฆฌๆ–ฏๅ…‹่กจ็คบ๏ผŒไป–่จˆๅŠƒๅœจ่ฒทไธ‹ๆŽจ็‰นๅพŒๆœ€็Ÿญไธ‰ๅนดๅ…ง๏ผŒๅฐฑๅฑ•้–‹ๆŽจ็‰น็š„้ฆ–ๆฌกๅ…ฌ้–‹็™ผ่กŒ่‚ก็ฅจใ€‚\n", + "้ฆฌๆ–ฏๅ…‹่ฒทๆŽจ็‰น็š„ไบคๆ˜“ๆกˆ้ ๆœŸๅœจไปŠๅนด็จๅพŒ่ตฐๅฎŒ็จ‹ๅบ๏ผŒๅŒ…ๆ‹ฌ็ฒๅพ—่‚กๆฑๅŒๆ„ไปฅๅŠ็›ฃ็ฎกๆฉŸ้—œๆ ธๅ‡†็ญ‰ๆญฅ้ฉŸใ€‚\n", + "ๆ นๆ“šไน‹ๅ‰่ฏ็ˆพ่ก—ๆ—ฅๅ ฑ็š„ๅ ฑๅฐŽ๏ผŒ้ฆฌๆ–ฏๅ…‹็‚บ่ณผ่ฒทๆŽจ็‰น็ฑŒ็พ้‡‘ๆ™‚๏ผŒ่ˆ‡็งๅ‹Ÿ่‚กๆฌŠๅ…ฌๅธ็ญ‰ๆŠ•่ณ‡ไบบ่จŽ่ซ–ๅ‡บ่ณ‡ไบ‹ๅฎœ๏ผŒApollo Global Managementๆœ‰่ˆˆ่ถฃๅƒ่ˆ‡ใ€‚\n", + "็งๅ‹Ÿ่‚กๆฌŠๅ…ฌๅธ้€šๅธธ้ƒฝๅ…ˆ่ฒทไธ‹ๅ…ฌๅธๅฐ‡ไน‹็งๆœ‰ๅŒ–๏ผŒๆŠŠๅ…ฌๅธ็งปๅ‡บ็œพไบบๆณจ็›ฎ็š„็„ฆ้ปžไน‹ๅค–ไปฅๅพŒ๏ผŒๆ•ด้ “ๅ…ฌๅธ๏ผŒๆŽฅ่‘—ๅ†ๆŠŠๅ…ฌๅธไธŠๅธ‚๏ผŒๆ™‚้–“ๅธธๆ˜ฏไบ”ๅนดๅทฆๅณใ€‚\n", + "่ฏ็ˆพ่ก—ๆ—ฅๅ ฑๆŒ‡ๅ‡บ๏ผŒ้ฆฌๆ–ฏๅ…‹ๆš—็คบไป–ๅฐๆŽจ็‰นๆœ‰้กžไผผ็š„่ฆๅŠƒ็š„่ฉฑ๏ผŒๆœ‰ๅŠฉ่ชชๆœๆฝ›ๅœจๆŠ•่ณ‡ไบบ๏ผŒไป–ๆœƒๅพˆๅฟซ่กŒๅ‹•๏ผŒๆ”นๅ–„ๆŽจ็‰น็š„็‡Ÿ้‹ๅ’Œ็ฒๅˆฉใ€‚\n", + "'''\n", + "scores = HanLP.extractive_summarization(text)\n", + "scores" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "่ฏ็ˆพ่ก—ๆ—ฅๅ ฑๅ‘จไบŒ๏ผˆ3ๆ—ฅ๏ผ‰ๅ ฑๅฐŽ๏ผŒๆ นๆ“š็Ÿฅๆƒ…ไบบ้€้œฒ๏ผŒๆ—ฅๅ‰ๅทฒๅฎฃๅธƒๅฐ‡ไปฅ440ๅ„„็พŽๅ…ƒ่ฒทไธ‹ๆŽจ็‰น๏ผˆTwitter๏ผ‰ไธฆไธ‹ๅธ‚็š„้ฆฌๆ–ฏๅ…‹๏ผŒๆ›พ็ถ“่ทŸไธ€ไบ›ๆฝ›ๅœจๆŠ•่ณ‡ไบบ่ชช๏ผŒไป–ๅฏไปฅๅœจ็Ÿญ็ŸญๅนพๅนดๅพŒ๏ผŒๅ†ๅฐ‡้€™ๅฎถ็คพ็พคๅช’้ซ”ๅ…ฌๅธ้‡ๆ–ฐไธŠๅธ‚ใ€‚\n", + "ๆถˆๆฏไพ†ๆบ่ชช๏ผŒ็‰นๆ–ฏๆ‹‰ๅ‰ต่พฆไบบๅ…ผๅŸท่กŒ้•ท้ฆฌๆ–ฏๅ…‹่กจ็คบ๏ผŒไป–่จˆๅŠƒๅœจ่ฒทไธ‹ๆŽจ็‰นๅพŒๆœ€็Ÿญไธ‰ๅนดๅ…ง๏ผŒๅฐฑๅฑ•้–‹ๆŽจ็‰น็š„้ฆ–ๆฌกๅ…ฌ้–‹็™ผ่กŒ่‚ก็ฅจใ€‚\n", + "้ฆฌๆ–ฏๅ…‹่ฒทๆŽจ็‰น็š„ไบคๆ˜“ๆกˆ้ ๆœŸๅœจไปŠๅนด็จๅพŒ่ตฐๅฎŒ็จ‹ๅบ๏ผŒๅŒ…ๆ‹ฌ็ฒๅพ—่‚กๆฑๅŒๆ„ไปฅๅŠ็›ฃ็ฎกๆฉŸ้—œๆ ธๅ‡†็ญ‰ๆญฅ้ฉŸใ€‚\n", + "ๆ นๆ“šไน‹ๅ‰่ฏ็ˆพ่ก—ๆ—ฅๅ ฑ็š„ๅ ฑๅฐŽ๏ผŒ้ฆฌๆ–ฏๅ…‹็‚บ่ณผ่ฒทๆŽจ็‰น็ฑŒ็พ้‡‘ๆ™‚๏ผŒ่ˆ‡็งๅ‹Ÿ่‚กๆฌŠๅ…ฌๅธ็ญ‰ๆŠ•่ณ‡ไบบ่จŽ่ซ–ๅ‡บ่ณ‡ไบ‹ๅฎœ๏ผŒApollo Global Managementๆœ‰่ˆˆ่ถฃๅƒ่ˆ‡ใ€‚\n", + "็งๅ‹Ÿ่‚กๆฌŠๅ…ฌๅธ้€šๅธธ้ƒฝๅ…ˆ่ฒทไธ‹ๅ…ฌๅธๅฐ‡ไน‹็งๆœ‰ๅŒ–๏ผŒๆŠŠๅ…ฌๅธ็งปๅ‡บ็œพไบบๆณจ็›ฎ็š„็„ฆ้ปžไน‹ๅค–ไปฅๅพŒ๏ผŒๆ•ด้ “ๅ…ฌๅธ๏ผŒๆŽฅ่‘—ๅ†ๆŠŠๅ…ฌๅธไธŠๅธ‚๏ผŒๆ™‚้–“ๅธธๆ˜ฏไบ”ๅนดๅทฆๅณใ€‚\n", + "่ฏ็ˆพ่ก—ๆ—ฅๅ ฑๆŒ‡ๅ‡บ๏ผŒ้ฆฌๆ–ฏๅ…‹ๆš—็คบไป–ๅฐๆŽจ็‰นๆœ‰้กžไผผ็š„่ฆๅŠƒ็š„่ฉฑ๏ผŒๆœ‰ๅŠฉ่ชชๆœๆฝ›ๅœจๆŠ•่ณ‡ไบบ๏ผŒไป–ๆœƒๅพˆๅฟซ่กŒๅ‹•๏ผŒๆ”นๅ–„ๆŽจ็‰น็š„็‡Ÿ้‹ๅ’Œ็ฒๅˆฉใ€‚\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "highlight(text, scores)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ่‹ฑๆ–‡\n", + "ๆŒ‰็…งHanLPไธ€่ดฏ็š„ๅคš่ฏญ็ง่ฎพ่ฎก๏ผŒไปปไฝ•่ฏญ่จ€้ƒฝๆ”ฏๆŒใ€‚็”ฑไบŽๆœๅŠกๅ™จGPU่ต„ๆบ้™ๅˆถ๏ผŒ็›ฎๅ‰่‹ฑๆ–‡ๆŽฅๅฃๆš‚ๆœชไธŠ็บฟใ€‚ๅฆ‚ๆžœไฝ ๆœ‰็›ธๅบ”้œ€ๆฑ‚๏ผŒๆฌข่ฟŽๅ‰ๅพ€่ฎบๅ›ๅ‘่ตท่ฏทๆ„ฟใ€‚" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "extractive_summarization_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/plugins/hanlp_demo/hanlp_demo/zh/gec_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/gec_restful.ipynb new file mode 100644 index 000000000..6988ddf58 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/gec_restful.ipynb @@ -0,0 +1,149 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏญๆณ•็บ ้”™\n", + "่พ“ๅ…ฅ็Ÿญๆ–‡ๆœฌ๏ผŒๆ‰ง่กŒ่ฏญๆณ•็บ ้”™๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['ๆฏไธช้’ๅนด้ƒฝๅบ”ๅฝ“ๆœ‰่ฟœๅคง็š„ๆŠฑ่ดŸใ€‚', 'ๆœ‰็š„ๅŒๅญฆๅฏน่ฏญ่จ€ๅพˆๆœ‰ๅ…ด่ถฃใ€‚', 'ๆˆ‘ๅธ‚ๆœฌๅœฐๅฑ…ๆฐ‘็บฆๅ ๅ…จๅธ‚ไบบๅฃ็š„70%ใ€‚']" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.grammatical_error_correction(['ๆฏไธช้’ๅนด้ƒฝๅบ”ๅฝ“ๆœ‰่ฟœๅคง็š„ๆŠฅๅคใ€‚', 'ๆœ‰็š„ๅŒๅญฆๅฏน่ฏญ่จ€ๅพˆๅ…ด่ถฃใ€‚', 'ๆˆ‘ๅธ‚ๆœฌๅœฐๅฑ…ๆฐ‘็บฆๅ ๅ…จๅธ‚ไบบๅฃ็š„70%ๅคšใ€‚'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๅ€ผๆ˜ฏๆฏๆฎต็Ÿญๆ–‡ๆœฌ็š„ไฟฎๆ”น็ป“ๆžœๅˆ—่กจใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๆต‹่ฏ•็‰ˆ\n", + "ๅฝ“ๅ‰็‰ˆๆœฌไธบๆต‹่ฏ•็‰ˆ๏ผŒๆš‚ๆ—ถไป…ๆ”ฏๆŒๆ‹ผๅ†™ใ€ๆ ‡็‚นๅ’Œ็ฎ€ๅ•็š„่ฏญๆณ•้”™่ฏฏ๏ผŒHanLP็š„็บฟไธŠๆจกๅž‹ๅ’Œ่ฏญๆ–™ๅบ“ไป็„ถๅœจ่ฟญไปฃๅ‘ๅฑ•ไธญใ€‚ๆฌข่ฟŽๅนฟๅคง็”จๆˆทๅฐ†ๆต‹่ฏ•็‰ˆ็š„้—ฎ้ข˜ๅ้ฆˆๅˆฐ[่ฎบๅ›](https://bbs.hankcs.com/c/text-generation/gec/30)๏ผŒๆˆ‘ไปฌๅฐ†ๅœจไธ‹ไธ€ไธช็‰ˆๆœฌไธญ๏ผŒๅฐ†HanLP็š„ๆ–‡ๆœฌ็บ ้”™่ƒฝๅŠ›ๆๅ‡ๅˆฐ้ซ˜่€ƒ่ฏญๆ–‡ๆฐดๅนณใ€‚" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "gec_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/plugins/hanlp_demo/hanlp_demo/zh/keyphrase_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/keyphrase_restful.ipynb new file mode 100644 index 000000000..d4290bcdc --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/keyphrase_restful.ipynb @@ -0,0 +1,243 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4M7ka0K5OMWU", + "outputId": "d74f0749-0587-454a-d7c9-7418d45ce534" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BMW528wGNulM" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ๅ…ณ้”ฎ่ฏๆๅ–\n", + "ๅ…ณ้”ฎ่ฏ๏ผˆ็Ÿญ่ฏญ๏ผ‰ๆๅ–็š„็›ฎๆ ‡ๆ˜ฏๆ–‡ๆœฌไธญๆœ€ๅ…ทๆœ‰ไปฃ่กจๆ€ง็š„ๅ…ณ้”ฎ่ฏไปฅๅŠ็Ÿญ่ฏญใ€‚\n", + "### ไธญๆ–‡\n", + "ๅ…ณ้”ฎ่ฏๆๅ–ไปปๅŠก็š„่พ“ๅ…ฅไธบไธ€ๆฎตๆ–‡ๆœฌๅ’Œๆ‰€้œ€็š„ๅ…ณ้”ฎ่ฏๆ•ฐ้‡`topk`๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BqEmDMGGOtk3", + "outputId": "936d439a-e1ff-4308-d2aa-775955558594" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'่‡ช็„ถ่ฏญ่จ€ๅค„็†': 0.800000011920929,\n", + " 'HanLP็š„ๅ…จ้ƒจๆ€ง่ƒฝ': 0.5256577134132385,\n", + " 'ไธ€้—จๅšๅคง็ฒพๆทฑ็š„ๅญฆ็ง‘': 0.42154020071029663}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.keyphrase_extraction('่‡ช็„ถ่ฏญ่จ€ๅค„็†ๆ˜ฏไธ€้—จๅšๅคง็ฒพๆทฑ็š„ๅญฆ็ง‘๏ผŒๆŽŒๆก็†่ฎบๆ‰่ƒฝๅ‘ๆŒฅๅ‡บHanLP็š„ๅ…จ้ƒจๆ€ง่ƒฝใ€‚ '\n", + " 'ใ€Š่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅ…ฅ้—จใ€‹ๆ˜ฏไธ€ๆœฌ้…ๅฅ—HanLP็š„NLPๅ…ฅ้—จไนฆ๏ผŒๅŠฉไฝ ้›ถ่ตท็‚นไธŠๆ‰‹่‡ช็„ถ่ฏญ่จ€ๅค„็†ใ€‚', topk=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj1Jk-2sPHYx" + }, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบ`topk`ไธชๅ…ณ้”ฎ่ฏไปฅๅŠ็›ธๅบ”็š„ๆƒ้‡๏ผŒๆƒ้‡ๅ–ๅ€ผๅŒบ้—ดไธบ$[0, 1]$ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๅ…ณ้”ฎ่ฏๆๅ–ๅนถไธไป…้™ไบŽ็Ÿญๆ–‡ๆœฌ๏ผŒ้•ฟๆ–‡็ซ ไนŸไธ€ๆ ทๆ”ฏๆŒ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ๆ–ฐๅ† ็—…ๆฏ’ๆ ธ้…ธ้˜ณๆ€งๆ„ŸๆŸ“': 0.888239324092865,\n", + " '็กฎ่ฏŠ็—…ไพ‹': 0.8868124485015869,\n", + " 'ๆœฌๅœŸๆ— ็—‡็Šถๆ„ŸๆŸ“่€…': 0.8557102680206299,\n", + " 'ๅฑžๅœฐ็คพๅŒบ๏ผˆๆ‘ๅฑฏ๏ผ‰': 0.8164600133895874,\n", + " '็–ซๆƒ…้˜ฒๆŽงๅทฅไฝœ': 0.7749382853507996,\n", + " 'ๆˆ‘ๅธ‚็–ซๆƒ…้˜ฒๆŽง่ฆๆฑ‚': 0.7502512335777283,\n", + " '็—‡็Šถ': 0.669366180896759,\n", + " 'ๆˆ‘ๅธ‚็–ซๆƒ…ๅฝขๅŠฟ': 0.6673010587692261,\n", + " 'ๆ„ŸๆŸ“': 0.6663177013397217,\n", + " 'ๆœฌๅœŸ็กฎ่ฏŠ็—…ไพ‹': 0.6464788317680359}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc = '''\n", + "4ๆœˆ15ๆ—ฅ0-24ๆ—ถ๏ผŒ้•ฟๆ˜ฅๅธ‚ๆ–ฐๅขžๆœฌๅœŸ็กฎ่ฏŠ็—…ไพ‹157ไพ‹๏ผˆๅซ57ไพ‹ๆ— ็—‡็Šถๆ„ŸๆŸ“่€…่ฝฌไธบ็กฎ่ฏŠ็—…ไพ‹๏ผ‰๏ผŒๆ–ฐๅขžๆœฌๅœŸๆ— ็—‡็Šถๆ„ŸๆŸ“่€…407ไพ‹ใ€‚\n", + "ไปฅไธŠไบบๅ‘˜ๅ‡ไธบ้š”็ฆป็ฎกๆŽงๆœŸ้—ด็ญ›ๆŸฅๆ–ฐๅ† ็—…ๆฏ’ๆ ธ้…ธ้˜ณๆ€งๆ„ŸๆŸ“่€…ใ€‚\n", + "ๅฝ“ๅ‰ๆˆ‘ๅธ‚็–ซๆƒ…ๅฝขๅŠฟไธฅๅณป๏ผŒไธบๅšๅฅฝๅ…จๅธ‚็–ซๆƒ…้˜ฒๆŽงๅทฅไฝœ๏ผŒๅฐฝๅฟซๆขๅคๆญฃๅธธ็คพไผš็งฉๅบๅ’Œ็ปๆตŽ็คพไผšๅ‘ๅฑ•๏ผŒ้•ฟๆ˜ฅๅธ‚ๆ–ฐๅ† ่‚บ็‚Ž็–ซๆƒ…้˜ฒๆŽงๅทฅไฝœ้ข†ๅฏผๅฐ็ป„ๅŠžๅ…ฌๅฎคๆ้†’ๅนฟๅคงๅธ‚ๆฐ‘๏ผŒ\n", + "่ฏทไธฅๆ ผ้ตๅฎˆๆˆ‘ๅธ‚็–ซๆƒ…้˜ฒๆŽง่ฆๆฑ‚๏ผŒ้…ๅˆๅ„้ƒจ้—จ่ฝๅฎžๅฅฝ้˜ฒๆŽงๆŽชๆ–ฝ๏ผŒ่ฟ›ไธ€ๆญฅๆ้ซ˜้˜ฒ่Œƒๆ„่ฏ†๏ผŒๅšๆŒ่ง„่Œƒๆˆดๅฃ็ฝฉใ€ๅ‹คๆด—ๆ‰‹ใ€ๅธธ้€š้ฃŽใ€ไฟๆŒ็คพไบค่ท็ฆปใ€ไธ่š้คใ€ไธ่š้›†๏ผŒ\n", + "ๅ‡ๅฐ‘็–พ็—…ๆ„ŸๆŸ“้ฃŽ้™ฉใ€‚ไธ€ๆ—ฆๅ‡บ็Žฐๅ‘็ƒญใ€ๅนฒๅ’ณใ€ไนๅŠ›ใ€ๅ’ฝ็—›ใ€ๅ—…ๅ‘ณ่ง‰ๅ‡้€€ๆˆ–ไธงๅคฑ็ญ‰ไธ้€‚็—‡็Šถ๏ผŒๅบ”ๅŠๆ—ถๅ‘ๅฑžๅœฐ็คพๅŒบ๏ผˆๆ‘ๅฑฏ๏ผ‰ๆˆ–็–พๆŽงๆœบๆž„ๆŠฅๅ‘Šใ€‚\n", + "'''\n", + "HanLP.keyphrase_extraction(doc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ๅฏ่ง†ๅŒ–" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "4ๆœˆ15ๆ—ฅ0-24ๆ—ถ๏ผŒ้•ฟๆ˜ฅๅธ‚ๆ–ฐๅขžๆœฌๅœŸ็กฎ่ฏŠ็—…ไพ‹157ไพ‹๏ผˆๅซ57ไพ‹ๆ— ็—‡็Šถๆ„ŸๆŸ“่€…่ฝฌไธบ็กฎ่ฏŠ็—…ไพ‹๏ผ‰๏ผŒๆ–ฐๅขžๆœฌๅœŸๆ— ็—‡็Šถๆ„ŸๆŸ“่€…407ไพ‹ใ€‚\n", + "ไปฅไธŠไบบๅ‘˜ๅ‡ไธบ้š”็ฆป็ฎกๆŽงๆœŸ้—ด็ญ›ๆŸฅๆ–ฐๅ† ็—…ๆฏ’ๆ ธ้…ธ้˜ณๆ€งๆ„ŸๆŸ“่€…ใ€‚\n", + "ๅฝ“ๅ‰ๆˆ‘ๅธ‚็–ซๆƒ…ๅฝขๅŠฟไธฅๅณป๏ผŒไธบๅšๅฅฝๅ…จๅธ‚็–ซๆƒ…้˜ฒๆŽงๅทฅไฝœ๏ผŒๅฐฝๅฟซๆขๅคๆญฃๅธธ็คพไผš็งฉๅบๅ’Œ็ปๆตŽ็คพไผšๅ‘ๅฑ•๏ผŒ้•ฟๆ˜ฅๅธ‚ๆ–ฐๅ† ่‚บ็‚Ž็–ซๆƒ…้˜ฒๆŽงๅทฅไฝœ้ข†ๅฏผๅฐ็ป„ๅŠžๅ…ฌๅฎคๆ้†’ๅนฟๅคงๅธ‚ๆฐ‘๏ผŒ\n", + "่ฏทไธฅๆ ผ้ตๅฎˆๆˆ‘ๅธ‚็–ซๆƒ…้˜ฒๆŽง่ฆๆฑ‚๏ผŒ้…ๅˆๅ„้ƒจ้—จ่ฝๅฎžๅฅฝ้˜ฒๆŽงๆŽชๆ–ฝ๏ผŒ่ฟ›ไธ€ๆญฅๆ้ซ˜้˜ฒ่Œƒๆ„่ฏ†๏ผŒๅšๆŒ่ง„่Œƒๆˆดๅฃ็ฝฉใ€ๅ‹คๆด—ๆ‰‹ใ€ๅธธ้€š้ฃŽใ€ไฟๆŒ็คพไบค่ท็ฆปใ€ไธ่š้คใ€ไธ่š้›†๏ผŒ\n", + "ๅ‡ๅฐ‘็–พ็—…ๆ„ŸๆŸ“้ฃŽ้™ฉใ€‚ไธ€ๆ—ฆๅ‡บ็Žฐๅ‘็ƒญใ€ๅนฒๅ’ณใ€ไนๅŠ›ใ€ๅ’ฝ็—›ใ€ๅ—…ๅ‘ณ่ง‰ๅ‡้€€ๆˆ–ไธงๅคฑ็ญ‰ไธ้€‚็—‡็Šถ๏ผŒๅบ”ๅŠๆ—ถๅ‘ๅฑžๅœฐ็คพๅŒบ๏ผˆๆ‘ๅฑฏ๏ผ‰ๆˆ–็–พๆŽงๆœบๆž„ๆŠฅๅ‘Šใ€‚\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def highlight(text, scores):\n", + " for k, v in scores.items():\n", + " text = text.replace(k, f'{k}')\n", + " from IPython.display import display, HTML\n", + " display(HTML(text))\n", + "\n", + "scores = HanLP.keyphrase_extraction(doc)\n", + "highlight(doc, scores)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ่‹ฑๆ–‡\n", + "ๆŒ‰็…งHanLPไธ€่ดฏ็š„ๅคš่ฏญ็ง่ฎพ่ฎก๏ผŒไปปไฝ•่ฏญ่จ€้ƒฝๆ”ฏๆŒใ€‚็”ฑไบŽๆœๅŠกๅ™จGPU่ต„ๆบ้™ๅˆถ๏ผŒ็›ฎๅ‰่‹ฑๆ–‡ๆŽฅๅฃๆš‚ๆœชไธŠ็บฟใ€‚ๅฆ‚ๆžœไฝ ๆœ‰็›ธๅบ”้œ€ๆฑ‚๏ผŒๆฌข่ฟŽๅ‰ๅพ€่ฎบๅ›ๅ‘่ตท่ฏทๆ„ฟใ€‚" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "keyphrase_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/lid_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/lid_restful.ipynb new file mode 100644 index 000000000..5c24acaea --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/lid_restful.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nf9TgeCTC0OT" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jaW4eu6kC0OU", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_xI_bLAaC0OU" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IYwV-UkNNzFp", + "outputId": "54065443-9b0a-444c-f6c0-c701bc86400b", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1Uf_u7ddMhUt", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏญ็ง่ฏ†ๅˆซ\n", + "่ฏญ็ง่ฏ†ๅˆซไปปๅŠก็š„่พ“ๅ…ฅไธบไธ€ไธชๆˆ–ๅคšไธชๆ–‡ๆกฃ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "BqEmDMGGOtk3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'en'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.language_identification('In 2021, HanLPv2.1 delivers state-of-the-art multilingual NLP techniques to production environments.')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SwaPn1hjC0OW" + }, + "source": [ + "่ฟ”ๅ›žๅฏน่ฑกไธบ[ISO 639-1็ผ–็ ](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)ใ€‚HanLPๆ”ฏๆŒ่ฟ”ๅ›ž่ฏญ็งๅฏนๅบ”็š„ๆฆ‚็Ž‡๏ผˆ็ฝฎไฟกๅบฆ๏ผ‰๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "egpWwHKxC0OX", + "outputId": "f7c77687-dd75-4fa2-dbd2-be6bda8a3fff" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['ja', 0.9976244568824768]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.language_identification('2021ๅนดใ€HanLPv2.1ใฏๆฌกไธ–ไปฃใฎๆœ€ๅ…ˆ็ซฏๅคš่จ€่ชžNLPๆŠ€่ก“ใ‚’ๆœฌ็•ช็’ฐๅขƒใซๅฐŽๅ…ฅใ—ใพใ™ใ€‚', prob=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kq_j5TLFC0OX" + }, + "source": [ + "HanLPไนŸๆ”ฏๆŒ่ฟ”ๅ›žๆฆ‚็Ž‡ๆœ€้ซ˜็š„`topk`ไธช่ฏญ็ง๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "isJhzYyIC0OX", + "outputId": "683c8489-dffc-426e-f95b-e91dfb373260" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['zh', 'ja']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.language_identification('2021ๅนด HanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', topk=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฏฅๅŠŸ่ƒฝๅฏนไบŽๆททๅˆไบ†ๅคšไธช่ฏญ็ง็š„ๆ–‡ๆกฃ่€Œ่จ€็‰นๅˆซๅฎž็”จ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'zh': 0.3952908217906952,\n", + " 'en': 0.37189167737960815,\n", + " 'ja': 0.056213412433862686}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = '''\n", + "2021ๅนด HanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚\n", + "In 2021, HanLPv2.1 delivers state-of-the-art multilingual NLP techniques to production environments.\n", + "'''\n", + "\n", + "HanLP.language_identification(text, topk=3, prob=True)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "lid_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/plugins/hanlp_demo/hanlp_demo/zh/lid_stl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/lid_stl.ipynb new file mode 100644 index 000000000..10ef9444f --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/lid_stl.ipynb @@ -0,0 +1,281 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nf9TgeCTC0OT" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jaW4eu6kC0OU", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "!pip install hanlp[fasttext] -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_xI_bLAaC0OU" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IYwV-UkNNzFp", + "outputId": "54065443-9b0a-444c-f6c0-c701bc86400b", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'CHNSENTICORP_BERT_BASE_ZH': 'https://file.hankcs.com/hanlp/classification/chnsenticorp_bert_base_20211228_163210.zip',\n", + " 'SST2_ALBERT_BASE_EN': 'https://file.hankcs.com/hanlp/classification/sst2_albert_base_20211228_164917.zip',\n", + " 'LID_176_FASTTEXT_BASE': 'https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin',\n", + " 'LID_176_FASTTEXT_SMALL': 'https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.ftz'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.classifiers.ALL # ไปปๅŠก่ง็ฌฌไธ€ไธชๅญ—ๆฎต" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1Uf_u7ddMhUt", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "pp-1KqEOOJ4t", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Warning : `load_model` does not return WordVectorModel or SupervisedModel any more, but a `FastText` object which is very similar.\n" + ] + } + ], + "source": [ + "lid = hanlp.load('LID_176_FASTTEXT_BASE')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏญ็ง่ฏ†ๅˆซ\n", + "่ฏญ็ง่ฏ†ๅˆซไปปๅŠก็š„่พ“ๅ…ฅไธบไธ€ไธชๆˆ–ๅคšไธชๆ–‡ๆกฃ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "BqEmDMGGOtk3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'en'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lid('In 2021, HanLPv2.1 delivers state-of-the-art multilingual NLP techniques to production environments.')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SwaPn1hjC0OW" + }, + "source": [ + "่ฟ”ๅ›žๅฏน่ฑกไธบ[ISO 639-1็ผ–็ ](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)ใ€‚HanLPๆ”ฏๆŒ่ฟ”ๅ›ž่ฏญ็งๅฏนๅบ”็š„ๆฆ‚็Ž‡๏ผˆ็ฝฎไฟกๅบฆ๏ผ‰๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "egpWwHKxC0OX", + "outputId": "f7c77687-dd75-4fa2-dbd2-be6bda8a3fff" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "('ja', 0.9976244568824768)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lid('2021ๅนดใ€HanLPv2.1ใฏๆฌกไธ–ไปฃใฎๆœ€ๅ…ˆ็ซฏๅคš่จ€่ชžNLPๆŠ€่ก“ใ‚’ๆœฌ็•ช็’ฐๅขƒใซๅฐŽๅ…ฅใ—ใพใ™ใ€‚', prob=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kq_j5TLFC0OX" + }, + "source": [ + "HanLPไนŸๆ”ฏๆŒ่ฟ”ๅ›žๆฆ‚็Ž‡ๆœ€้ซ˜็š„`topk`ไธช่ฏญ็ง๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "isJhzYyIC0OX", + "outputId": "683c8489-dffc-426e-f95b-e91dfb373260" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['zh', 'ja']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lid('2021ๅนด HanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', topk=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฏฅๅŠŸ่ƒฝๅฏนไบŽๆททๅˆไบ†ๅคšไธช่ฏญ็ง็š„ๆ–‡ๆกฃ่€Œ่จ€็‰นๅˆซๅฎž็”จ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'zh': 0.3952908217906952,\n", + " 'en': 0.37189167737960815,\n", + " 'ja': 0.056213412433862686}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = '''\n", + "2021ๅนด HanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚\n", + "In 2021, HanLPv2.1 delivers state-of-the-art multilingual NLP techniques to production environments.\n", + "'''\n", + "\n", + "lid(text, topk=3, prob=True)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "lid_stl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/plugins/hanlp_demo/hanlp_demo/zh/ner_mtl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/ner_mtl.ipynb new file mode 100644 index 000000000..027042ce5 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/ner_mtl.ipynb @@ -0,0 +1,523 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1Uf_u7ddMhUt", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pp-1KqEOOJ4t", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0tmKBu7sNAXX", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EmZDmLn9aGxG", + "outputId": "38469cbe-d56c-4648-b103-b67e6d22aeff", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_small_20201223_035557.zip',\n", + " 'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_base_20201223_201906.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_small_20210111_124159.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_base_20210111_124519.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ERNIE_GRAM_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_ernie_gram_base_aug_20210904_145403.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_MT5_SMALL': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_mt5_small_20210228_123458.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_XLMR_BASE': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_xlm_base_20210602_211620.zip',\n", + " 'NPCMJ_UD_KYOTO_TOK_POS_CON_BERT_BASE_CHAR_JA': 'https://file.hankcs.com/hanlp/mtl/npcmj_ud_kyoto_tok_pos_ner_dep_con_srl_bert_base_char_ja_20210914_133742.zip'}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.mtl.ALL # MTLๅคšไปปๅŠก๏ผŒๅ…ทไฝ“ไปปๅŠก่งๆจกๅž‹ๅ็งฐ๏ผŒ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "w0lm87NUsMwW" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅˆ†ไธบ่ฎธๅคšไปปๅŠก๏ผŒๅˆ†่ฏๅชๆ˜ฏๆœ€ๅˆ็บง็š„ไธ€ไธชใ€‚ไธŽๅ…ถๆฏไธชไปปๅŠกๅ•็‹ฌๅˆ›ๅปบไธ€ไธชๆจกๅž‹๏ผŒไธๅฆ‚ๅˆฉ็”จHanLP็š„่”ๅˆๆจกๅž‹ไธ€ๆฌกๆ€งๅฎŒๆˆๅคšไธชไปปๅŠก๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "6Evnxsa0sMwW", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "HanLP = hanlp.load(hanlp.pretrained.mtl.CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bPUHdNJ-sMwW" + }, + "source": [ + "## ๅ‘ฝๅๅฎžไฝ“่ฏ†ๅˆซ" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "ๅŒๆ—ถๆ‰ง่กŒๆ‰€ๆœ‰ๆ ‡ๅ‡†็š„ๅ‘ฝๅๅฎžไฝ“่ฏ†ๅˆซ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zo08uquCTFSk", + "outputId": "21be671b-ead0-43c9-cc3a-32c305d8be29" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " [\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"้˜ฟๅฉ†ไธป\", \"ๆฅๅˆฐ\", \"ๅŒ—ไบฌ\", \"็ซ‹ๆ–นๅบญ\", \"ๅ‚่ง‚\", \"่‡ช็„ถ\", \"่ฏญไน‰\", \"็ง‘ๆŠ€\", \"ๅ…ฌๅธ\", \"ใ€‚\"]\n", + " ],\n", + " \"ner/msra\": [\n", + " [[\"2021ๅนด\", \"DATE\", 0, 1], [\"HanLPv2.1\", \"WWW\", 1, 2]],\n", + " [[\"ๅŒ—ไบฌ\", \"LOCATION\", 2, 3], [\"็ซ‹ๆ–นๅบญ\", \"LOCATION\", 3, 4], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"ORGANIZATION\", 5, 9]]\n", + " ],\n", + " \"ner/pku\": [\n", + " [],\n", + " [[\"ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ\", \"ns\", 2, 4], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"nt\", 5, 9]]\n", + " ],\n", + " \"ner/ontonotes\": [\n", + " [[\"2021ๅนด\", \"DATE\", 0, 1], [\"HanLPv2.1\", \"ORG\", 1, 2]],\n", + " [[\"ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ\", \"FAC\", 2, 4], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"ORG\", 5, 9]]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(HanLP(['2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', '้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚'], tasks='ner*'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๆฏไธชๅ››ๅ…ƒ็ป„่กจ็คบ`[ๅ‘ฝๅๅฎžไฝ“, ็ฑปๅž‹ๆ ‡็ญพ, ่ตทๅง‹ไธ‹ๆ ‡, ็ปˆๆญขไธ‹ๆ ‡]`๏ผŒไธ‹ๆ ‡ๆŒ‡็š„ๆ˜ฏๅ‘ฝๅๅฎžไฝ“ๅœจๅ•่ฏๆ•ฐ็ป„ไธญ็š„ไธ‹ๆ ‡๏ผŒๅ•่ฏๆ•ฐ็ป„้ป˜่ฎคไธบ็ฌฌไธ€ไธชไปฅ`tok`ๅผ€ๅคด็š„ๆ•ฐ็ป„ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cqEWnj_7p2Lf" + }, + "source": [ + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒๅ‘ฝๅๅฎžไฝ“่ฏ†ๅˆซ๏ผŒ้ป˜่ฎคMSRAๆ ‡ๅ‡†๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 572 + }, + "id": "BqEmDMGGOtk3", + "outputId": "33790ca9-7013-456f-c1cb-e5ddce90a457" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Token \tNER Type \n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "2021ๅนด \tโ”€โ”€โ”€โ–บDATE \n", + "HanLPv2.1\tโ”€โ”€โ”€โ–บWWW \n", + "ไธบ \t \n", + "็”Ÿไบง \t \n", + "็Žฏๅขƒ \t \n", + "ๅธฆๆฅ \t \n", + "ๆฌกไธ–ไปฃ \tโ”€โ”€โ”€โ–บDATE \n", + "ๆœ€ \t \n", + "ๅ…ˆ่ฟ› \t \n", + "็š„ \t \n", + "ๅคš \t \n", + "่ฏญ็ง \t \n", + "NLP \t \n", + "ๆŠ€ๆœฏ \t \n", + "ใ€‚ \t \n", + "้˜ฟๅฉ†ไธป \t \n", + "ๆฅๅˆฐ \t \n", + "ๅŒ—ไบฌ \tโ—„โ”€โ” \n", + "็ซ‹ๆ–นๅบญ \tโ—„โ”€โ”ดโ–บORGANIZATION\n", + "ๅ‚่ง‚ \t \n", + "่‡ช็„ถ \tโ—„โ”€โ” \n", + "่ฏญไน‰ \t โ”‚ \n", + "็ง‘ๆŠ€ \t โ”œโ–บORGANIZATION\n", + "ๅ…ฌๅธ \tโ—„โ”€โ”˜ \n", + "ใ€‚ \t \n" + ] + } + ], + "source": [ + "HanLP('2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='ner').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj1Jk-2sPHYx" + }, + "source": [ + "ๆ‰ง่กŒOntoNotesๅ‘ฝๅๅฎžไฝ“่ฏ†ๅˆซ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 572 + }, + "id": "1goEC7znPNkI", + "outputId": "2a97331c-a5fb-4d3c-ccf2-ce2186616c57", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Token \tNER Type\n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "2021ๅนด \tโ”€โ”€โ”€โ–บDATE\n", + "HanLPv2.1\tโ”€โ”€โ”€โ–บORG \n", + "ไธบ \t \n", + "็”Ÿไบง \t \n", + "็Žฏๅขƒ \t \n", + "ๅธฆๆฅ \t \n", + "ๆฌกไธ–ไปฃ \t \n", + "ๆœ€ \t \n", + "ๅ…ˆ่ฟ› \t \n", + "็š„ \t \n", + "ๅคš \t \n", + "่ฏญ็ง \t \n", + "NLP \t \n", + "ๆŠ€ๆœฏ \t \n", + "ใ€‚ \t \n", + "้˜ฟๅฉ†ไธป \t \n", + "ๆฅๅˆฐ \t \n", + "ๅŒ—ไบฌ \tโ—„โ”€โ” \n", + "็ซ‹ๆ–นๅบญ \tโ—„โ”€โ”ดโ–บORG \n", + "ๅ‚่ง‚ \t \n", + "่‡ช็„ถ \tโ—„โ”€โ” \n", + "่ฏญไน‰ \t โ”‚ \n", + "็ง‘ๆŠ€ \t โ”œโ–บORG \n", + "ๅ…ฌๅธ \tโ—„โ”€โ”˜ \n", + "ใ€‚ \t \n" + ] + } + ], + "source": [ + "HanLP('2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='ner/ontonotes').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ๆณจๆ„\n", + "Native API็š„่พ“ๅ…ฅๅ•ไฝ้™ๅฎšไธบๅฅๅญ๏ผŒ้œ€ไฝฟ็”จ[ๅคš่ฏญ็งๅˆ†ๅฅๆจกๅž‹](https://github.com/hankcs/HanLP/blob/master/plugins/hanlp_demo/hanlp_demo/sent_split.py)ๆˆ–[ๅŸบไบŽ่ง„ๅˆ™็š„ๅˆ†ๅฅๅ‡ฝๆ•ฐ](https://github.com/hankcs/HanLP/blob/master/hanlp/utils/rules.py#L19)ๅ…ˆ่กŒๅˆ†ๅฅใ€‚RESTfulๅŒๆ—ถๆ”ฏๆŒๅ…จๆ–‡ใ€ๅฅๅญใ€ๅทฒๅˆ†่ฏ็š„ๅฅๅญใ€‚้™คๆญคไน‹ๅค–๏ผŒRESTfulๅ’Œnativeไธค็งAPI็š„่ฏญไน‰่ฎพ่ฎกๅฎŒๅ…จไธ€่‡ด๏ผŒ็”จๆˆทๅฏไปฅๆ— ็ผไบ’ๆขใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P7CNTDBRsiYa" + }, + "source": [ + "## ่‡ชๅฎšไน‰่ฏๅ…ธ" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZXtRTXlBsmtw" + }, + "source": [ + "่‡ชๅฎšไน‰่ฏๅ…ธๆ˜ฏNERไปปๅŠก็š„ๆˆๅ‘˜ๅ˜้‡๏ผŒ่ฆๆ“ไฝœ่‡ชๅฎšไน‰่ฏๅ…ธ๏ผŒๅ…ˆ่Žทๅ–ไธ€ไธชNERไปปๅŠกใ€‚ไปฅMSRAไธบไพ‹๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "QgY22h0AszsA" + }, + "outputs": [], + "source": [ + "ner = HanLP['ner/msra']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_6fPzuyps98H" + }, + "source": [ + "### ็™ฝๅๅ•่ฏๅ…ธ\n", + "็™ฝๅๅ•่ฏๅ…ธไธญ็š„่ฏ่ฏญไผšๅฐฝ้‡่ขซ่พ“ๅ‡บใ€‚ๅฝ“็„ถ๏ผŒHanLPไปฅ็ปŸ่ฎกไธบไธป๏ผŒ่ฏๅ…ธ็š„ไผ˜ๅ…ˆ็บงๅพˆไฝŽใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 321 + }, + "id": "plNDyWhws5qg", + "outputId": "7120d400-022c-42e9-fca9-febe3745d2c9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Token\tNER Type \n", + "โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "2021ๅนด\tโ”€โ”€โ”€โ–บDATE \n", + "ๆต‹่ฏ• \t \n", + "้ซ˜่ก€ๅŽ‹ \t \n", + "ๆ˜ฏ \t \n", + "138 \tโ”€โ”€โ”€โ–บINTEGER\n", + "๏ผŒ \t \n", + "ๆ—ถ้—ด \t \n", + "ๆ˜ฏ \t \n", + "ๅˆ้ฅญ \tโ—„โ”€โ” \n", + "ๅŽ \tโ—„โ”€โ”ดโ–บTIME \n", + "2็‚น45 \tโ”€โ”€โ”€โ–บTIME \n", + "๏ผŒ \t \n", + "ไฝŽ่ก€ๅŽ‹ \t \n", + "ๆ˜ฏ \t \n", + "44 \tโ”€โ”€โ”€โ–บINTEGER\n" + ] + } + ], + "source": [ + "ner.dict_whitelist = {'ๅˆ้ฅญๅŽ': 'TIME'}\n", + "doc = HanLP('2021ๅนดๆต‹่ฏ•้ซ˜่ก€ๅŽ‹ๆ˜ฏ138๏ผŒๆ—ถ้—ดๆ˜ฏๅˆ้ฅญๅŽ2็‚น45๏ผŒไฝŽ่ก€ๅŽ‹ๆ˜ฏ44', tasks='ner/msra')\n", + "doc.pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aR_8TICmtw_E" + }, + "source": [ + "### ๅผบๅˆถ่ฏๅ…ธ\n", + "ๅฆ‚ๆžœไฝ ่ฏป่ฟ‡[ใ€Š่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅ…ฅ้—จใ€‹](http://nlp.hankcs.com/book.php)๏ผŒไฝ ๅฐฑไผš็†่งฃBMESOๆ ‡ๆณจ้›†๏ผŒไบŽๆ˜ฏไฝ ๅฏไปฅ็›ดๆŽฅๅนฒ้ข„็ปŸ่ฎกๆจกๅž‹้ข„ๆต‹็š„ๆ ‡็ญพ๏ผŒๆ‹ฟๅˆฐๆœ€้ซ˜ไผ˜ๅ…ˆ็บง็š„ๆƒ้™ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 268 + }, + "id": "sWPljj3stsEA", + "outputId": "99c4c281-a5b6-46bb-dffd-c1722fee7aee" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "To\tNER Type \n", + "โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "ไป– \t \n", + "ๅœจ \t \n", + "ๆต™ๆฑŸ\tโ”€โ”€โ”€โ–บLOCATION\n", + "้‡‘ๅŽ\tโ”€โ”€โ”€โ–บLOCATION\n", + "ๅ‡บ็”Ÿ\t \n", + "๏ผŒ \t \n", + "ไป– \t \n", + "็š„ \t \n", + "ๅๅญ—\t \n", + "ๅซ \t \n", + "้‡‘ๅŽ\tโ”€โ”€โ”€โ–บPERSON \n", + "ใ€‚ \t \n" + ] + } + ], + "source": [ + "ner.dict_tags = {('ๅๅญ—', 'ๅซ', '้‡‘ๅŽ'): ('O', 'O', 'S-PERSON')}\n", + "HanLP('ไป–ๅœจๆต™ๆฑŸ้‡‘ๅŽๅ‡บ็”Ÿ๏ผŒไป–็š„ๅๅญ—ๅซ้‡‘ๅŽใ€‚', tasks='ner/msra').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fkTC0GFxtinZ" + }, + "source": [ + "### ้ป‘ๅๅ•่ฏๅ…ธ\n", + "้ป‘ๅๅ•ไธญ็š„่ฏ่ฏญ็ปๅฏนไธไผš่ขซๅฝ“ๅšๅ‘ฝๅๅฎžไฝ“ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 268 + }, + "id": "bIJpgdGauLJK", + "outputId": "e74ec7ba-00fd-4958-d772-a1d1c40d1033" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "To\tNER Type \n", + "โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "ไป– \t \n", + "ๅœจ \t \n", + "ๆต™ๆฑŸ\tโ”€โ”€โ”€โ–บLOCATION\n", + "้‡‘ๅŽ\t \n", + "ๅ‡บ็”Ÿ\t \n", + "๏ผŒ \t \n", + "ไป– \t \n", + "็š„ \t \n", + "ๅๅญ—\t \n", + "ๅซ \t \n", + "้‡‘ๅŽ\t \n", + "ใ€‚ \t \n" + ] + } + ], + "source": [ + "ner.dict_blacklist = {'้‡‘ๅŽ'}\n", + "HanLP('ไป–ๅœจๆต™ๆฑŸ้‡‘ๅŽๅ‡บ็”Ÿ๏ผŒไป–็š„ๅๅญ—ๅซ้‡‘ๅŽใ€‚', tasks='ner/msra').pretty_print()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "ner_mtl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/ner_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/ner_restful.ipynb new file mode 100644 index 000000000..695e75d3f --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/ner_restful.ipynb @@ -0,0 +1,335 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ๅ‘ฝๅๅฎžไฝ“่ฏ†ๅˆซ" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "ๅŒๆ—ถๆ‰ง่กŒๆ‰€ๆœ‰ๆ ‡ๅ‡†็š„ๅ‘ฝๅๅฎžไฝ“่ฏ†ๅˆซ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zo08uquCTFSk", + "outputId": "21be671b-ead0-43c9-cc3a-32c305d8be29" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " [\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"้˜ฟๅฉ†ไธป\", \"ๆฅๅˆฐ\", \"ๅŒ—ไบฌ\", \"็ซ‹ๆ–นๅบญ\", \"ๅ‚่ง‚\", \"่‡ช็„ถ\", \"่ฏญไน‰\", \"็ง‘ๆŠ€\", \"ๅ…ฌๅธ\", \"ใ€‚\"]\n", + " ],\n", + " \"ner/msra\": [\n", + " [[\"2021ๅนด\", \"DATE\", 0, 1], [\"HanLPv2.1\", \"ORGANIZATION\", 1, 2]],\n", + " [[\"ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ\", \"LOCATION\", 2, 4], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"ORGANIZATION\", 5, 9]]\n", + " ],\n", + " \"ner/pku\": [\n", + " [],\n", + " [[\"ๅŒ—ไบฌ\", \"ns\", 2, 3], [\"็ซ‹ๆ–นๅบญ\", \"ns\", 3, 4], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"nt\", 5, 9]]\n", + " ],\n", + " \"ner/ontonotes\": [\n", + " [[\"2021ๅนด\", \"DATE\", 0, 1], [\"ๆฌกไธ–ไปฃ\", \"DATE\", 6, 8]],\n", + " [[\"ๅŒ—ไบฌ\", \"FAC\", 2, 3], [\"็ซ‹ๆ–นๅบญ\", \"LOC\", 3, 4], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"ORG\", 5, 9]]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(HanLP('2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='ner*'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๆฏไธชๅ››ๅ…ƒ็ป„่กจ็คบ`[ๅ‘ฝๅๅฎžไฝ“, ็ฑปๅž‹ๆ ‡็ญพ, ่ตทๅง‹ไธ‹ๆ ‡, ็ปˆๆญขไธ‹ๆ ‡]`๏ผŒไธ‹ๆ ‡ๆŒ‡็š„ๆ˜ฏๅ‘ฝๅๅฎžไฝ“ๅœจๅ•่ฏๆ•ฐ็ป„ไธญ็š„ไธ‹ๆ ‡๏ผŒๅ•่ฏๆ•ฐ็ป„้ป˜่ฎคไธบ็ฌฌไธ€ไธชไปฅ`tok`ๅผ€ๅคด็š„ๆ•ฐ็ป„ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cqEWnj_7p2Lf" + }, + "source": [ + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒๅ‘ฝๅๅฎžไฝ“่ฏ†ๅˆซ๏ผŒ้ป˜่ฎคMSRAๆ ‡ๅ‡†๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 572 + }, + "id": "BqEmDMGGOtk3", + "outputId": "33790ca9-7013-456f-c1cb-e5ddce90a457" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Token \tNER Type \n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "2021ๅนด \tโ”€โ”€โ”€โ–บDATE \n", + "HanLPv2.1\tโ”€โ”€โ”€โ–บORGANIZATION\n", + "ไธบ \t \n", + "็”Ÿไบง \t \n", + "็Žฏๅขƒ \t \n", + "ๅธฆๆฅ \t \n", + "ๆฌก \t \n", + "ไธ–ไปฃ \t \n", + "ๆœ€ \t \n", + "ๅ…ˆ่ฟ› \t \n", + "็š„ \t \n", + "ๅคš \t \n", + "่ฏญ็ง \t \n", + "NLP \t \n", + "ๆŠ€ๆœฏ \t \n", + "ใ€‚ \t \n", + "\n", + "Tok\tNER Type \n", + "โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "้˜ฟๅฉ†ไธป\t \n", + "ๆฅๅˆฐ \t \n", + "ๅŒ—ไบฌ \tโ—„โ”€โ” \n", + "็ซ‹ๆ–นๅบญ\tโ—„โ”€โ”ดโ–บLOCATION \n", + "ๅ‚่ง‚ \t \n", + "่‡ช็„ถ \tโ—„โ”€โ” \n", + "่ฏญไน‰ \t โ”‚ \n", + "็ง‘ๆŠ€ \t โ”œโ–บORGANIZATION\n", + "ๅ…ฌๅธ \tโ—„โ”€โ”˜ \n", + "ใ€‚ \t \n" + ] + } + ], + "source": [ + "HanLP('2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='ner').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj1Jk-2sPHYx" + }, + "source": [ + "ๆ‰ง่กŒOntoNotesๅ‘ฝๅๅฎžไฝ“่ฏ†ๅˆซ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 572 + }, + "id": "1goEC7znPNkI", + "outputId": "2a97331c-a5fb-4d3c-ccf2-ce2186616c57" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Token \tNER Type\n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "2021ๅนด \tโ”€โ”€โ”€โ–บDATE\n", + "HanLPv2.1\t \n", + "ไธบ \t \n", + "็”Ÿไบง \t \n", + "็Žฏๅขƒ \t \n", + "ๅธฆๆฅ \t \n", + "ๆฌก \tโ—„โ”€โ” \n", + "ไธ–ไปฃ \tโ—„โ”€โ”ดโ–บDATE\n", + "ๆœ€ \t \n", + "ๅ…ˆ่ฟ› \t \n", + "็š„ \t \n", + "ๅคš \t \n", + "่ฏญ็ง \t \n", + "NLP \t \n", + "ๆŠ€ๆœฏ \t \n", + "ใ€‚ \t \n", + "\n", + "Tok\tNER Typ\n", + "โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "้˜ฟๅฉ†ไธป\t \n", + "ๆฅๅˆฐ \t \n", + "ๅŒ—ไบฌ \tโ”€โ”€โ”€โ–บFAC\n", + "็ซ‹ๆ–นๅบญ\tโ”€โ”€โ”€โ–บLOC\n", + "ๅ‚่ง‚ \t \n", + "่‡ช็„ถ \tโ—„โ”€โ” \n", + "่ฏญไน‰ \t โ”‚ \n", + "็ง‘ๆŠ€ \t โ”œโ–บORG\n", + "ๅ…ฌๅธ \tโ—„โ”€โ”˜ \n", + "ใ€‚ \t \n" + ] + } + ], + "source": [ + "HanLP('2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='ner/ontonotes').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XOsWkOqQfzlr" + }, + "source": [ + "ไธบๅทฒๅˆ†่ฏ็š„ๅฅๅญๆ‰ง่กŒๅ‘ฝๅๅฎžไฝ“่ฏ†ๅˆซ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 161 + }, + "id": "bLZSTbv_f3OA", + "outputId": "6a0e1e76-f581-4fd1-8a78-ef97d9429e87" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Token \tNER Type \n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "้˜ฟๅฉ†ไธป \t \n", + "ๆฅๅˆฐ \t \n", + "ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ \tโ”€โ”€โ”€โ–บLOCATION \n", + "ๅ‚่ง‚ \t \n", + "่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\tโ”€โ”€โ”€โ–บORGANIZATION\n", + "ใ€‚ \t \n" + ] + } + ], + "source": [ + "HanLP(tokens=[[\"้˜ฟๅฉ†ไธป\", \"ๆฅๅˆฐ\", \"ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ\", \"ๅ‚่ง‚\", \"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"ใ€‚\"]], tasks='ner').pretty_print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "ner_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/ner_stl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/ner_stl.ipynb new file mode 100644 index 000000000..cdb0bf996 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/ner_stl.ipynb @@ -0,0 +1,325 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1Uf_u7ddMhUt", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pp-1KqEOOJ4t", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0tmKBu7sNAXX", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EmZDmLn9aGxG", + "outputId": "0d55f7a1-3a4c-4170-e60f-da7473208e3f", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'MSRA_NER_BERT_BASE_ZH': 'https://file.hankcs.com/hanlp/ner/ner_bert_base_msra_20211227_114712.zip',\n", + " 'MSRA_NER_ALBERT_BASE_ZH': 'https://file.hankcs.com/hanlp/ner/msra_ner_albert_base_20211228_173323.zip',\n", + " 'MSRA_NER_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/ner/msra_ner_electra_small_20210807_154832.zip',\n", + " 'CONLL03_NER_BERT_BASE_CASED_EN': 'https://file.hankcs.com/hanlp/ner/ner_conll03_bert_base_cased_en_20211227_121443.zip'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.ner.ALL # ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VDT-qmLyvDST" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "Tzu5Qi-xvDST", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "ner = hanlp.load(hanlp.pretrained.ner.MSRA_NER_ELECTRA_SMALL_ZH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ๅ‘ฝๅๅฎžไฝ“่ฏ†ๅˆซ" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "ๅ‘ฝๅๅฎžไฝ“่ฏ†ๅˆซไปปๅŠก็š„่พ“ๅ…ฅไธบๅทฒๅˆ†่ฏ็š„ๅฅๅญ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zo08uquCTFSk", + "outputId": "864da076-7113-4685-e27a-1856e69bdd2a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[('2021ๅนด', 'DATE', 0, 1)], [('ๅŒ—ไบฌ', 'LOCATION', 2, 3), ('็ซ‹ๆ–นๅบญ', 'LOCATION', 3, 4), ('่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ', 'ORGANIZATION', 5, 9)]]\n" + ] + } + ], + "source": [ + "print(ner([[\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"], [\"้˜ฟๅฉ†ไธป\", \"ๆฅๅˆฐ\", \"ๅŒ—ไบฌ\", \"็ซ‹ๆ–นๅบญ\", \"ๅ‚่ง‚\", \"่‡ช็„ถ\", \"่ฏญไน‰\", \"็ง‘ๆŠ€\", \"ๅ…ฌๅธ\", \"ใ€‚\"]], tasks='ner*'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๆฏไธชๅ››ๅ…ƒ็ป„่กจ็คบ`[ๅ‘ฝๅๅฎžไฝ“, ็ฑปๅž‹ๆ ‡็ญพ, ่ตทๅง‹ไธ‹ๆ ‡, ็ปˆๆญขไธ‹ๆ ‡]`๏ผŒไธ‹ๆ ‡ๆŒ‡็š„ๆ˜ฏๅ‘ฝๅๅฎžไฝ“ๅœจๅ•่ฏๆ•ฐ็ป„ไธญ็š„ไธ‹ๆ ‡ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ่‡ชๅฎšไน‰่ฏๅ…ธ" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่‡ชๅฎšไน‰่ฏๅ…ธๆ˜ฏNERไปปๅŠก็š„ๆˆๅ‘˜ๅ˜้‡๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None\n" + ] + } + ], + "source": [ + "print(ner.dict_whitelist)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ็™ฝๅๅ•่ฏๅ…ธ\n", + "็™ฝๅๅ•่ฏๅ…ธไธญ็š„่ฏ่ฏญไผšๅฐฝ้‡่ขซ่พ“ๅ‡บใ€‚ๅฝ“็„ถ๏ผŒHanLPไปฅ็ปŸ่ฎกไธบไธป๏ผŒ่ฏๅ…ธ็š„ไผ˜ๅ…ˆ็บงๅพˆไฝŽใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('2021ๅนด', 'DATE', 0, 1),\n", + " ('138', 'INTEGER', 4, 5),\n", + " ('ๅˆ้ฅญๅŽ', 'TIME', 8, 10),\n", + " ('2็‚น45', 'TIME', 10, 11),\n", + " ('44', 'INTEGER', 14, 15)]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ner.dict_whitelist = {'ๅˆ้ฅญๅŽ': 'TIME'}\n", + "ner(['2021ๅนด', 'ๆต‹่ฏ•', '้ซ˜่ก€ๅŽ‹', 'ๆ˜ฏ', '138', '๏ผŒ', 'ๆ—ถ้—ด', 'ๆ˜ฏ', 'ๅˆ้ฅญ', 'ๅŽ', '2็‚น45', '๏ผŒ', 'ไฝŽ่ก€ๅŽ‹', 'ๆ˜ฏ', '44'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ๅผบๅˆถ่ฏๅ…ธ\n", + "ๅฆ‚ๆžœไฝ ่ฏป่ฟ‡[ใ€Š่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅ…ฅ้—จใ€‹](http://nlp.hankcs.com/book.php)๏ผŒไฝ ๅฐฑไผš็†่งฃBMESOๆ ‡ๆณจ้›†๏ผŒไบŽๆ˜ฏไฝ ๅฏไปฅ็›ดๆŽฅๅนฒ้ข„็ปŸ่ฎกๆจกๅž‹้ข„ๆต‹็š„ๆ ‡็ญพ๏ผŒๆ‹ฟๅˆฐๆœ€้ซ˜ไผ˜ๅ…ˆ็บง็š„ๆƒ้™ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('ๆต™ๆฑŸ', 'LOCATION', 2, 3), ('้‡‘ๅŽ', 'LOCATION', 3, 4), ('้‡‘ๅŽ', 'PERSON', 10, 11)]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ner.dict_tags = {('ๅๅญ—', 'ๅซ', '้‡‘ๅŽ'): ('O', 'O', 'S-PERSON')}\n", + "ner(['ไป–', 'ๅœจ', 'ๆต™ๆฑŸ', '้‡‘ๅŽ', 'ๅ‡บ็”Ÿ', '๏ผŒ', 'ไป–', '็š„', 'ๅๅญ—', 'ๅซ', '้‡‘ๅŽ', 'ใ€‚'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ้ป‘ๅๅ•่ฏๅ…ธ\n", + "้ป‘ๅๅ•ไธญ็š„่ฏ่ฏญ็ปๅฏนไธไผš่ขซๅฝ“ๅšๅ‘ฝๅๅฎžไฝ“ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('ๆต™ๆฑŸ', 'LOCATION', 2, 3)]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ner.dict_blacklist = {'้‡‘ๅŽ'}\n", + "ner(['ไป–', 'ๅœจ', 'ๆต™ๆฑŸ', '้‡‘ๅŽ', 'ๅ‡บ็”Ÿ', '๏ผŒ', 'ไป–', '็š„', 'ๅๅญ—', 'ๅซ', '้‡‘ๅŽ', 'ใ€‚'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "ner_stl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/plugins/hanlp_demo/hanlp_demo/zh/pos_mtl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/pos_mtl.ipynb new file mode 100644 index 000000000..8158a8c07 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/pos_mtl.ipynb @@ -0,0 +1,403 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "view-in-github" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4M7ka0K5OMWU", + "outputId": "50ad002e-4363-46cd-8f5d-b6d6aad3e957" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_small_20201223_035557.zip',\n", + " 'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_base_20201223_201906.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_small_20210111_124159.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_base_20210111_124519.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ERNIE_GRAM_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_ernie_gram_base_aug_20210904_145403.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_MT5_SMALL': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_mt5_small_20210228_123458.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_XLMR_BASE': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_xlm_base_20210602_211620.zip',\n", + " 'NPCMJ_UD_KYOTO_TOK_POS_CON_BERT_BASE_CHAR_JA': 'https://file.hankcs.com/hanlp/mtl/npcmj_ud_kyoto_tok_pos_ner_dep_con_srl_bert_base_char_ja_20210914_133742.zip'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.mtl.ALL # MTLๅคšไปปๅŠก๏ผŒๅ…ทไฝ“ไปปๅŠก่งๆจกๅž‹ๅ็งฐ๏ผŒ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BMW528wGNulM" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅˆ†ไธบ่ฎธๅคšไปปๅŠก๏ผŒๅˆ†่ฏๅชๆ˜ฏๆœ€ๅˆ็บง็š„ไธ€ไธชใ€‚ไธŽๅ…ถๆฏไธชไปปๅŠกๅ•็‹ฌๅˆ›ๅปบไธ€ไธชๆจกๅž‹๏ผŒไธๅฆ‚ๅˆฉ็”จHanLP็š„่”ๅˆๆจกๅž‹ไธ€ๆฌกๆ€งๅฎŒๆˆๅคšไธชไปปๅŠก๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "HanLP = hanlp.load(hanlp.pretrained.mtl.CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏๆ€งๆ ‡ๆณจ\n", + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒ่ฏๆ€งๆ ‡ๆณจ๏ผŒ้ป˜่ฎคCTBๆ ‡ๅ‡†๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "5ad7fd22-651a-4403-d897-a9492eb15854" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
HanLP/NR ไธบ/P ็”Ÿไบง/NN ็Žฏๅขƒ/NN ๅธฆๆฅ/VV ๆฌก/JJ ไธ–ไปฃ/NN ๆœ€/AD ๅ…ˆ่ฟ›/JJ ็š„/DEG ๅคš่ฏญ็ง/NN NLP/NR ๆŠ€ๆœฏ/NN ใ€‚/PU

ๆˆ‘/PN ็š„/DEG ๅธŒๆœ›/NN ๆ˜ฏ/VC ๅธŒๆœ›/VV ๅผ ๆ™š้œž/NR ็š„/DEG ่ƒŒๅฝฑ/NN ่ขซ/LB ๆ™š้œž/NN ๆ˜ ็บข/VV ใ€‚/PU
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "HanLP(['HanLPไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', 'ๆˆ‘็š„ๅธŒๆœ›ๆ˜ฏๅธŒๆœ›ๅผ ๆ™š้œž็š„่ƒŒๅฝฑ่ขซๆ™š้œžๆ˜ ็บขใ€‚'], tasks='pos').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj1Jk-2sPHYx" + }, + "source": [ + "ๆณจๆ„ไธŠ้ขไธคไธชโ€œๅธŒๆœ›โ€็š„่ฏๆ€งๅ„ไธ็›ธๅŒ๏ผŒไธ€ไธชๆ˜ฏๅ่ฏๅฆไธ€ไธชๆ˜ฏๅŠจ่ฏใ€‚\n", + "ๆ‰ง่กŒPKU่ฏๆ€งๆ ‡ๆณจ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "1goEC7znPNkI", + "outputId": "586afd5d-db0d-41bd-f7de-411f37062a8c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
HanLP/nx ไธบ/p ็”Ÿไบง/vn ็Žฏๅขƒ/n ๅธฆๆฅ/v ๆฌก/b ไธ–ไปฃ/n ๆœ€/d ๅ…ˆ่ฟ›/a ็š„/u ๅคš่ฏญ็ง/n NLP/nx ๆŠ€ๆœฏ/n ใ€‚/w

ๆˆ‘/r ็š„/u ๅธŒๆœ›/n ๆ˜ฏ/v ๅธŒๆœ›/v ๅผ ๆ™š้œž/nr ็š„/u ่ƒŒๅฝฑ/n ่ขซ/p ๆ™š้œž/n ๆ˜ ็บข/v ใ€‚/w
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "HanLP(['HanLPไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', 'ๆˆ‘็š„ๅธŒๆœ›ๆ˜ฏๅธŒๆœ›ๅผ ๆ™š้œž็š„่ƒŒๅฝฑ่ขซๆ™š้œžๆ˜ ็บขใ€‚'], tasks='pos/pku').pretty_print()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "ๅŒๆ—ถๆ‰ง่กŒๆ‰€ๆœ‰ๆ ‡ๅ‡†็š„่ฏๆ€งๆ ‡ๆณจ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zo08uquCTFSk", + "outputId": "d2b3eb65-06e6-47a6-d954-04cae27d6c51" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " [\"HanLP\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"ๆˆ‘\", \"็š„\", \"ๅธŒๆœ›\", \"ๆ˜ฏ\", \"ๅธŒๆœ›\", \"ๅผ ๆ™š้œž\", \"็š„\", \"่ƒŒๅฝฑ\", \"่ขซ\", \"ๆ™š้œž\", \"ๆ˜ ็บข\", \"ใ€‚\"]\n", + " ],\n", + " \"pos/ctb\": [\n", + " [\"NR\", \"P\", \"NN\", \"NN\", \"VV\", \"JJ\", \"NN\", \"AD\", \"JJ\", \"DEG\", \"NN\", \"NR\", \"NN\", \"PU\"],\n", + " [\"PN\", \"DEG\", \"NN\", \"VC\", \"VV\", \"NR\", \"DEG\", \"NN\", \"LB\", \"NN\", \"VV\", \"PU\"]\n", + " ],\n", + " \"pos/pku\": [\n", + " [\"nx\", \"p\", \"vn\", \"n\", \"v\", \"b\", \"n\", \"d\", \"a\", \"u\", \"n\", \"nx\", \"n\", \"w\"],\n", + " [\"r\", \"u\", \"n\", \"v\", \"v\", \"nr\", \"u\", \"n\", \"p\", \"n\", \"v\", \"w\"]\n", + " ],\n", + " \"pos/863\": [\n", + " [\"w\", \"p\", \"v\", \"n\", \"v\", \"a\", \"nt\", \"d\", \"a\", \"u\", \"n\", \"ws\", \"n\", \"w\"],\n", + " [\"r\", \"u\", \"n\", \"vl\", \"v\", \"nh\", \"u\", \"n\", \"p\", \"n\", \"v\", \"w\"]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(HanLP(['HanLPไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', 'ๆˆ‘็š„ๅธŒๆœ›ๆ˜ฏๅธŒๆœ›ๅผ ๆ™š้œž็š„่ƒŒๅฝฑ่ขซๆ™š้œžๆ˜ ็บขใ€‚'], tasks='pos*'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ไปฅ`pos`ๅผ€ๅคด็š„ๅญ—ๆฎตไธบ่ฏๆ€ง๏ผŒไปฅ`tok`ๅผ€ๅคด็š„็ฌฌไธ€ไธชๆ•ฐ็ป„ไธบๅ•่ฏ๏ผŒไธค่€…ๆŒ‰ไธ‹ๆ ‡ไธ€ไธ€ๅฏนๅบ”ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ๆณจๆ„\n", + "Native API็š„่พ“ๅ…ฅๅ•ไฝ้™ๅฎšไธบๅฅๅญ๏ผŒ้œ€ไฝฟ็”จ[ๅคš่ฏญ็งๅˆ†ๅฅๆจกๅž‹](https://github.com/hankcs/HanLP/blob/master/plugins/hanlp_demo/hanlp_demo/sent_split.py)ๆˆ–[ๅŸบไบŽ่ง„ๅˆ™็š„ๅˆ†ๅฅๅ‡ฝๆ•ฐ](https://github.com/hankcs/HanLP/blob/master/hanlp/utils/rules.py#L19)ๅ…ˆ่กŒๅˆ†ๅฅใ€‚RESTfulๅŒๆ—ถๆ”ฏๆŒๅ…จๆ–‡ใ€ๅฅๅญใ€ๅทฒๅˆ†่ฏ็š„ๅฅๅญใ€‚้™คๆญคไน‹ๅค–๏ผŒRESTfulๅ’Œnativeไธค็งAPI็š„่ฏญไน‰่ฎพ่ฎกๅฎŒๅ…จไธ€่‡ด๏ผŒ็”จๆˆทๅฏไปฅๆ— ็ผไบ’ๆขใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "suUL042zPpLj" + }, + "source": [ + "## ่‡ชๅฎšไน‰่ฏๅ…ธ\n", + "่‡ชๅฎšไน‰่ฏๅ…ธไธบ่ฏๆ€งๆ ‡ๆณจไปปๅŠก็š„ๆˆๅ‘˜ๅ˜้‡๏ผŒ่ฆๆ“ไฝœ่‡ชๅฎšไน‰่ฏๅ…ธ๏ผŒๅ…ˆ่Žทๅ–ไธ€ไธช่ฏๆ€งๆ ‡ๆณจไปปๅŠก๏ผŒไปฅCTBๆ ‡ๅ‡†ไธบไพ‹๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AzYShIssP6kq", + "outputId": "640cefa5-1d6d-464b-81d2-83c66e2081f2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pos = HanLP['pos/ctb']\n", + "pos" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1q4MUpgVQNlu" + }, + "source": [ + "่‡ชๅฎšไน‰ๅ•ไธช่ฏๆ€ง๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "2zZkH9tRQOoi", + "outputId": "ed0bb8fe-2e68-4c58-e11e-ff6a0cc69ae4" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
HanLP/state-of-the-art-tool ไธบ/P ็”Ÿไบง/NN ็Žฏๅขƒ/NN ๅธฆๆฅ/VV ๆฌก/JJ ไธ–ไปฃ/NN ๆœ€/AD ๅ…ˆ่ฟ›/JJ ็š„/DEG ๅคš่ฏญ็ง/NN NLP/NR ๆŠ€ๆœฏ/NN ใ€‚/PU
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pos.dict_tags = {'HanLP': 'state-of-the-art-tool'}\n", + "HanLP(\"HanLPไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚\", tasks='pos/ctb').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F-9gAeIVQUFG" + }, + "source": [ + "ๆ นๆฎไธŠไธ‹ๆ–‡่‡ชๅฎšไน‰่ฏๆ€ง๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "F8M8cyBrQduw", + "outputId": "16ef7f82-50ff-478f-c3ea-8e768b0cea31" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
ๆˆ‘/PN ็š„/่กฅ่ฏญๆˆๅˆ† ๅธŒๆœ›/ๅ่ฏ ๆ˜ฏ/VC ๅธŒๆœ›/ๅŠจ่ฏ ๅผ ๆ™š้œž/NR ็š„/DEG ่ƒŒๅฝฑ/NN ่ขซ/LB ๆ™š้œž/NN ๆ˜ ็บข/VV ใ€‚/PU
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pos.dict_tags = {('็š„', 'ๅธŒๆœ›'): ('่กฅ่ฏญๆˆๅˆ†', 'ๅ่ฏ'), 'ๅธŒๆœ›': 'ๅŠจ่ฏ'}\n", + "HanLP(\"ๆˆ‘็š„ๅธŒๆœ›ๆ˜ฏๅธŒๆœ›ๅผ ๆ™š้œž็š„่ƒŒๅฝฑ่ขซๆ™š้œžๆ˜ ็บขใ€‚\", tasks='pos/ctb').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "้œ€่ฆ็ฎ—ๆณ•ๅŸบ็ก€ๆ‰่ƒฝ็†่งฃ๏ผŒๅˆๅญฆ่€…ๅฏๅ‚่€ƒ[ใ€Š่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅ…ฅ้—จใ€‹](http://nlp.hankcs.com/book.php)ใ€‚" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "include_colab_link": true, + "name": "pos_mtl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/pos_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/pos_restful.ipynb new file mode 100644 index 000000000..a24d1aa5e --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/pos_restful.ipynb @@ -0,0 +1,309 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏๆ€งๆ ‡ๆณจ\n", + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒ่ฏๆ€งๆ ‡ๆณจ๏ผŒ้ป˜่ฎคCTBๆ ‡ๅ‡†๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
HanLP/NR ไธบ/P ็”Ÿไบง/NN ็Žฏๅขƒ/NN ๅธฆๆฅ/VV ๆฌกไธ–ไปฃ/NN ๆœ€/AD ๅ…ˆ่ฟ›/JJ ็š„/DEG ๅคš/CD ่ฏญ็ง/NN NLP/NN ๆŠ€ๆœฏ/NN ใ€‚/PU

ๆˆ‘/PN ็š„/DEG ๅธŒๆœ›/NN ๆ˜ฏ/VC ๅธŒๆœ›/VV ๅผ ๆ™š้œž/NR ็š„/DEG ่ƒŒๅฝฑ/NN ่ขซ/LB ๆ™š้œž/NN ๆ˜ ็บข/VV ใ€‚/PU
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "HanLP('HanLPไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚ๆˆ‘็š„ๅธŒๆœ›ๆ˜ฏๅธŒๆœ›ๅผ ๆ™š้œž็š„่ƒŒๅฝฑ่ขซๆ™š้œžๆ˜ ็บขใ€‚', tasks='pos').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj1Jk-2sPHYx" + }, + "source": [ + "ๆณจๆ„ไธŠ้ขไธคไธชโ€œๅธŒๆœ›โ€็š„่ฏๆ€งๅ„ไธ็›ธๅŒ๏ผŒไธ€ไธชๆ˜ฏๅ่ฏๅฆไธ€ไธชๆ˜ฏๅŠจ่ฏใ€‚\n", + "\n", + "### ๆ‰ง่กŒPKU่ฏๆ€งๆ ‡ๆณจ" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "1goEC7znPNkI", + "outputId": "7a3fde55-7577-49eb-92c8-48146aaa89d3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
HanLP/nx ไธบ/p ็”Ÿไบง/vn ็Žฏๅขƒ/n ๅธฆๆฅ/v ๆฌกไธ–ไปฃ/n ๆœ€/d ๅ…ˆ่ฟ›/a ็š„/u ๅคš/a ่ฏญ็ง/n NLP/nx ๆŠ€ๆœฏ/n ใ€‚/w

ๆˆ‘/r ็š„/u ๅธŒๆœ›/n ๆ˜ฏ/v ๅธŒๆœ›/v ๅผ ๆ™š้œž/nr ็š„/u ่ƒŒๅฝฑ/n ่ขซ/p ๆ™š้œž/n ๆ˜ ็บข/v ใ€‚/w
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "HanLP('HanLPไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚ๆˆ‘็š„ๅธŒๆœ›ๆ˜ฏๅธŒๆœ›ๅผ ๆ™š้œž็š„่ƒŒๅฝฑ่ขซๆ™š้œžๆ˜ ็บขใ€‚', tasks='pos/pku').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ๆ‰ง่กŒ็ฒ—้ข—็ฒ’ๅบฆๅˆ†่ฏๅ’ŒPKU่ฏๆ€งๆ ‡ๆณจ" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
้˜ฟๅฉ†ไธป/n ๆฅๅˆฐ/v ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ/ns ๅ‚่ง‚/v ่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ/n ใ€‚/w
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "HanLP('้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks=['tok/coarse', 'pos/pku'], skip_tasks='tok/fine').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ไธพไธ€ๅไธ‰๏ผŒไฝ ๅฏไปฅๆŒ‡ๅฎšๅ…ถไป–posๆ ‡ๆณจ้›†๏ผˆctbใ€863็ญ‰๏ผ‰ใ€‚็”จๆˆทๆœ‰ๅคš่ชๆ˜Ž๏ผŒHanLPๅฐฑๆœ‰ๅคšๅผบๅคงใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "### ๅŒๆ—ถๆ‰ง่กŒๆ‰€ๆœ‰ๆ ‡ๅ‡†็š„่ฏๆ€งๆ ‡ๆณจ" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zo08uquCTFSk", + "outputId": "c6077f2d-7084-4f4b-a3bc-9aa9951704ea" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " [\"HanLP\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌกไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"ๆˆ‘\", \"็š„\", \"ๅธŒๆœ›\", \"ๆ˜ฏ\", \"ๅธŒๆœ›\", \"ๅผ ๆ™š้œž\", \"็š„\", \"่ƒŒๅฝฑ\", \"่ขซ\", \"ๆ™š้œž\", \"ๆ˜ ็บข\", \"ใ€‚\"]\n", + " ],\n", + " \"pos/ctb\": [\n", + " [\"NR\", \"P\", \"NN\", \"NN\", \"VV\", \"NN\", \"AD\", \"JJ\", \"DEG\", \"CD\", \"NN\", \"NN\", \"NN\", \"PU\"],\n", + " [\"PN\", \"DEG\", \"NN\", \"VC\", \"VV\", \"NR\", \"DEG\", \"NN\", \"LB\", \"NN\", \"VV\", \"PU\"]\n", + " ],\n", + " \"pos/pku\": [\n", + " [\"nx\", \"p\", \"vn\", \"n\", \"v\", \"n\", \"d\", \"a\", \"u\", \"a\", \"n\", \"nx\", \"n\", \"w\"],\n", + " [\"r\", \"u\", \"n\", \"v\", \"v\", \"nr\", \"u\", \"n\", \"p\", \"n\", \"v\", \"w\"]\n", + " ],\n", + " \"pos/863\": [\n", + " [\"w\", \"p\", \"v\", \"n\", \"v\", \"n\", \"d\", \"a\", \"u\", \"a\", \"n\", \"w\", \"n\", \"w\"],\n", + " [\"r\", \"u\", \"v\", \"vl\", \"v\", \"nh\", \"u\", \"n\", \"p\", \"n\", \"v\", \"w\"]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(HanLP('HanLPไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚ๆˆ‘็š„ๅธŒๆœ›ๆ˜ฏๅธŒๆœ›ๅผ ๆ™š้œž็š„่ƒŒๅฝฑ่ขซๆ™š้œžๆ˜ ็บขใ€‚', tasks='pos*'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ไปฅ`pos`ๅผ€ๅคด็š„ๅญ—ๆฎตไธบ่ฏๆ€ง๏ผŒไปฅ`tok`ๅผ€ๅคด็š„็ฌฌไธ€ไธชๆ•ฐ็ป„ไธบๅ•่ฏ๏ผŒไธค่€…ๆŒ‰ไธ‹ๆ ‡ไธ€ไธ€ๅฏนๅบ”ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XOsWkOqQfzlr" + }, + "source": [ + "### ไธบๅทฒๅˆ†่ฏ็š„ๅฅๅญๆ‰ง่กŒ่ฏๆ€งๆ ‡ๆณจ" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "bLZSTbv_f3OA", + "outputId": "111c0be9-bac6-4eee-d5bd-a972ffc34844" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
HanLP/NR ไธบ/P ็”Ÿไบง็Žฏๅขƒ/NN ๅธฆๆฅ/VV ๆฌกไธ–ไปฃ/NN ๆœ€/AD ๅ…ˆ่ฟ›/JJ ็š„/DEG ๅคš่ฏญ็ง/NN NLP/NN ๆŠ€ๆœฏ/NN ใ€‚/PU

ๆˆ‘/PN ็š„/DEG ๅธŒๆœ›/NN ๆ˜ฏ/VC ๅธŒๆœ›/VV ๅผ ๆ™š้œž/NR ็š„/DEG ่ƒŒๅฝฑ/NN ่ขซ/LB ๆ™š้œž/NN ๆ˜ ็บข/VV ใ€‚/PU
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "HanLP(tokens=[\n", + " [\"HanLP\", \"ไธบ\", \"็”Ÿไบง็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌกไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"ๆˆ‘\", \"็š„\", \"ๅธŒๆœ›\", \"ๆ˜ฏ\", \"ๅธŒๆœ›\", \"ๅผ ๆ™š้œž\", \"็š„\", \"่ƒŒๅฝฑ\", \"่ขซ\", \"ๆ™š้œž\", \"ๆ˜ ็บข\", \"ใ€‚\"]\n", + " ], tasks='pos').pretty_print()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "pos_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/plugins/hanlp_demo/hanlp_demo/zh/pos_stl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/pos_stl.ipynb new file mode 100644 index 000000000..af418bcb8 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/pos_stl.ipynb @@ -0,0 +1,319 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4M7ka0K5OMWU", + "outputId": "d74f0749-0587-454a-d7c9-7418d45ce534" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'CTB5_POS_RNN': 'https://file.hankcs.com/hanlp/pos/ctb5_pos_rnn_20200113_235925.zip',\n", + " 'CTB5_POS_RNN_FASTTEXT_ZH': 'https://file.hankcs.com/hanlp/pos/ctb5_pos_rnn_fasttext_20191230_202639.zip',\n", + " 'CTB9_POS_ALBERT_BASE': 'https://file.hankcs.com/hanlp/pos/ctb9_albert_base_20211228_163935.zip',\n", + " 'CTB9_POS_ELECTRA_SMALL_TF': 'https://file.hankcs.com/hanlp/pos/pos_ctb_electra_small_20211227_121341.zip',\n", + " 'CTB9_POS_ELECTRA_SMALL': 'https://file.hankcs.com/hanlp/pos/pos_ctb_electra_small_20220215_111944.zip',\n", + " 'CTB9_POS_RADICAL_ELECTRA_SMALL': 'https://file.hankcs.com/hanlp/pos/pos_ctb_radical_electra_small_20220215_111932.zip',\n", + " 'C863_POS_ELECTRA_SMALL': 'https://file.hankcs.com/hanlp/pos/pos_863_electra_small_20220217_101958.zip',\n", + " 'PKU_POS_ELECTRA_SMALL': 'https://file.hankcs.com/hanlp/pos/pos_pku_electra_small_20220217_142436.zip',\n", + " 'PKU98_POS_ELECTRA_SMALL': 'https://file.hankcs.com/hanlp/pos/pos_pku_electra_small_20210808_125158.zip',\n", + " 'PTB_POS_RNN_FASTTEXT_EN': 'https://file.hankcs.com/hanlp/pos/ptb_pos_rnn_fasttext_20200103_145337.zip'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.pos.ALL # ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BMW528wGNulM" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0tmKBu7sNAXX", + "outputId": "df2de87b-27f5-4c72-8eb2-25ceefdd8270" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading https://file.hankcs.com/hanlp/pos/ctb9_pos_electra_small_20220118_164341.zip to /root/.hanlp/pos/ctb9_pos_electra_small_20220118_164341.zip\n", + "100% 43.6 MiB 21.2 MiB/s ETA: 0 s [=========================================]\n", + "Decompressing /root/.hanlp/pos/ctb9_pos_electra_small_20220118_164341.zip to /root/.hanlp/pos\n", + "Downloading https://file.hankcs.com/hanlp/transformers/electra_zh_small_20210706_125427.zip to /root/.hanlp/transformers/electra_zh_small_20210706_125427.zip\n", + "100% 41.2 KiB 41.2 KiB/s ETA: 0 s [=========================================]\n", + "Decompressing /root/.hanlp/transformers/electra_zh_small_20210706_125427.zip to /root/.hanlp/transformers\n" + ] + } + ], + "source": [ + "pos = hanlp.load(hanlp.pretrained.pos.CTB9_POS_ELECTRA_SMALL)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏๆ€งๆ ‡ๆณจ\n", + "่ฏๆ€งๆ ‡ๆณจไปปๅŠก็š„่พ“ๅ…ฅไธบๅทฒๅˆ†่ฏ็š„ไธ€ไธชๆˆ–ๅคšไธชๅฅๅญ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BqEmDMGGOtk3", + "outputId": "936d439a-e1ff-4308-d2aa-775955558594" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['PN', 'DEG', 'NN', 'VC', 'VV', 'NR', 'DEG', 'NN', 'LB', 'NR', 'VV', 'PU']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pos([\"ๆˆ‘\", \"็š„\", \"ๅธŒๆœ›\", \"ๆ˜ฏ\", \"ๅธŒๆœ›\", \"ๅผ ๆ™š้œž\", \"็š„\", \"่ƒŒๅฝฑ\", \"่ขซ\", \"ๆ™š้œž\", \"ๆ˜ ็บข\", \"ใ€‚\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj1Jk-2sPHYx" + }, + "source": [ + "ๆณจๆ„ไธŠ้ขไธคไธชโ€œๅธŒๆœ›โ€็š„่ฏๆ€งๅ„ไธ็›ธๅŒ๏ผŒไธ€ไธชๆ˜ฏๅ่ฏๅฆไธ€ไธชๆ˜ฏๅŠจ่ฏใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "suUL042zPpLj" + }, + "source": [ + "## ่‡ชๅฎšไน‰่ฏๅ…ธ\n", + "่‡ชๅฎšไน‰่ฏๅ…ธไธบ่ฏๆ€งๆ ‡ๆณจไปปๅŠก็š„ๆˆๅ‘˜ๅ˜้‡๏ผŒไปฅCTBๆ ‡ๅ‡†ไธบไพ‹๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AzYShIssP6kq", + "outputId": "99b2607b-b618-4876-bbea-9f8c24859a85" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None\n" + ] + } + ], + "source": [ + "print(pos.dict_tags)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1q4MUpgVQNlu" + }, + "source": [ + "่‡ชๅฎšไน‰ๅ•ไธช่ฏๆ€ง๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2zZkH9tRQOoi", + "outputId": "4f92a907-10c3-4798-e7b9-914b8f577b2c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['state-of-the-art-tool',\n", + " 'P',\n", + " 'NN',\n", + " 'NN',\n", + " 'VV',\n", + " 'JJ',\n", + " 'NN',\n", + " 'AD',\n", + " 'VA',\n", + " 'DEC',\n", + " 'NN',\n", + " 'NN',\n", + " 'NN',\n", + " 'PU']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pos.dict_tags = {'HanLP': 'state-of-the-art-tool'}\n", + "pos([\"HanLP\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F-9gAeIVQUFG" + }, + "source": [ + "ๆ นๆฎไธŠไธ‹ๆ–‡่‡ชๅฎšไน‰่ฏๆ€ง๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "F8M8cyBrQduw", + "outputId": "24fa7ff0-305d-4d71-925e-f369b1c50e96" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['PN', '่กฅ่ฏญๆˆๅˆ†', 'ๅ่ฏ', 'VC', 'ๅŠจ่ฏ', 'NR', 'DEG', 'NN', 'LB', 'NR', 'VV', 'PU']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pos.dict_tags = {('็š„', 'ๅธŒๆœ›'): ('่กฅ่ฏญๆˆๅˆ†', 'ๅ่ฏ'), 'ๅธŒๆœ›': 'ๅŠจ่ฏ'}\n", + "pos([\"ๆˆ‘\", \"็š„\", \"ๅธŒๆœ›\", \"ๆ˜ฏ\", \"ๅธŒๆœ›\", \"ๅผ ๆ™š้œž\", \"็š„\", \"่ƒŒๅฝฑ\", \"่ขซ\", \"ๆ™š้œž\", \"ๆ˜ ็บข\", \"ใ€‚\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "้œ€่ฆ็ฎ—ๆณ•ๅŸบ็ก€ๆ‰่ƒฝ็†่งฃ๏ผŒๅˆๅญฆ่€…ๅฏๅ‚่€ƒ[ใ€Š่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅ…ฅ้—จใ€‹](http://nlp.hankcs.com/book.php)ใ€‚" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "pos_stl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/sdp_mtl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/sdp_mtl.ipynb new file mode 100644 index 000000000..2ac58e0cd --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/sdp_mtl.ipynb @@ -0,0 +1,335 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "IYwV-UkNNzFp", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_small_20201223_035557.zip',\n", + " 'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_base_20201223_201906.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_small_20210111_124159.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_base_20210111_124519.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ERNIE_GRAM_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_ernie_gram_base_aug_20210904_145403.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_MT5_SMALL': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_mt5_small_20210228_123458.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_XLMR_BASE': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_xlm_base_20210602_211620.zip',\n", + " 'NPCMJ_UD_KYOTO_TOK_POS_CON_BERT_BASE_CHAR_JA': 'https://file.hankcs.com/hanlp/mtl/npcmj_ud_kyoto_tok_pos_ner_dep_con_srl_bert_base_char_ja_20210914_133742.zip'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.mtl.ALL # MTLๅคšไปปๅŠก๏ผŒๅ…ทไฝ“ไปปๅŠก่งๆจกๅž‹ๅ็งฐ๏ผŒ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1Uf_u7ddMhUt", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅˆ†ไธบ่ฎธๅคšไปปๅŠก๏ผŒๅˆ†่ฏๅชๆ˜ฏๆœ€ๅˆ็บง็š„ไธ€ไธชใ€‚ไธŽๅ…ถๆฏไธชไปปๅŠกๅ•็‹ฌๅˆ›ๅปบไธ€ไธชๆจกๅž‹๏ผŒไธๅฆ‚ๅˆฉ็”จHanLP็š„่”ๅˆๆจกๅž‹ไธ€ๆฌกๆ€งๅฎŒๆˆๅคšไธชไปปๅŠก๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "pp-1KqEOOJ4t", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "HanLP = hanlp.load(hanlp.pretrained.mtl.CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏญไน‰ไพๅญ˜ๅˆ†ๆž\n", + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒ่ฏญไน‰ไพๅญ˜ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [], + "source": [ + "doc = HanLP('2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', tasks='sdp')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบไธ€ไธช[Document](https://hanlp.hankcs.com/docs/api/common/document.html):" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " \"2021ๅนด\",\n", + " \"HanLPv2.1\",\n", + " \"ไธบ\",\n", + " \"็”Ÿไบง\",\n", + " \"็Žฏๅขƒ\",\n", + " \"ๅธฆๆฅ\",\n", + " \"ๆฌก\",\n", + " \"ไธ–ไปฃ\",\n", + " \"ๆœ€\",\n", + " \"ๅ…ˆ่ฟ›\",\n", + " \"็š„\",\n", + " \"ๅคš\",\n", + " \"่ฏญ็ง\",\n", + " \"NLP\",\n", + " \"ๆŠ€ๆœฏ\",\n", + " \"ใ€‚\"\n", + " ],\n", + " \"sdp\": [\n", + " [[6, \"Time\"]],\n", + " [[6, \"Exp\"]],\n", + " [[5, \"mPrep\"]],\n", + " [[5, \"Desc\"]],\n", + " [[6, \"Datv\"]],\n", + " [[13, \"dDesc\"]],\n", + " [[0, \"Root\"], [8, \"Desc\"], [13, \"Desc\"]],\n", + " [[15, \"Time\"]],\n", + " [[10, \"mDegr\"]],\n", + " [[15, \"Desc\"]],\n", + " [[10, \"mAux\"]],\n", + " [[8, \"Quan\"], [13, \"Quan\"]],\n", + " [[15, \"Desc\"]],\n", + " [[15, \"Nmod\"]],\n", + " [[6, \"Pat\"]],\n", + " [[6, \"mPunc\"]]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(doc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`doc['sdp']`ๅญ—ๆฎตไปฃ่กจ่ฏญไน‰ไพๅญ˜ๅ›พ็š„ๆ•ฐ็ป„ๆ ผๅผ๏ผŒๆ•ฐ็ป„ไธญ็ฌฌ`i`ไธชๅญๆ•ฐ็ป„ไปฃ่กจ็ฌฌ`i`ไธชๅ•่ฏ็š„่ฏญไน‰ไพๅญ˜ๅ…ณ็ณป๏ผŒๅญๆ•ฐ็ป„ไธญๆฏไธชไบŒๅ…ƒ็ป„็š„ๆ ผๅผไธบ`[ไธญๅฟƒ่ฏ็š„ไธ‹ๆ ‡, ไธŽไธญๅฟƒ่ฏ็š„่ฏญไน‰ไพๅญ˜ๅ…ณ็ณป]`ใ€‚ๆฏไธชๅ•่ฏ็š„่ฏญไน‰ไพๅญ˜ๅ…ณ็ณปๅฏ่ƒฝๆœ‰้›ถไธชใ€ไธ€ไธชๆˆ–ๅคšไธช๏ผˆไปปๆ„ๆ•ฐ้‡๏ผ‰ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฝฌๆขไธบ[CoNLLSentence](https://hanlp.hankcs.com/docs/api/common/conll.html#hanlp_common.conll.CoNLLSentence)ๆ ผๅผๆ›ดๅฎนๆ˜“่ง‚ๅฏŸ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\t2021ๅนด\t_\t_\t_\t_\t_\t_\t6:Time\t_\n", + "2\tHanLPv2.1\t_\t_\t_\t_\t_\t_\t6:Exp\t_\n", + "3\tไธบ\t_\t_\t_\t_\t_\t_\t5:mPrep\t_\n", + "4\t็”Ÿไบง\t_\t_\t_\t_\t_\t_\t5:Desc\t_\n", + "5\t็Žฏๅขƒ\t_\t_\t_\t_\t_\t_\t6:Datv\t_\n", + "6\tๅธฆๆฅ\t_\t_\t_\t_\t_\t_\t13:dDesc\t_\n", + "7\tๆฌก\t_\t_\t_\t_\t_\t_\t0:Root|8:Desc|13:Desc\t_\n", + "8\tไธ–ไปฃ\t_\t_\t_\t_\t_\t_\t15:Time\t_\n", + "9\tๆœ€\t_\t_\t_\t_\t_\t_\t10:mDegr\t_\n", + "10\tๅ…ˆ่ฟ›\t_\t_\t_\t_\t_\t_\t15:Desc\t_\n", + "11\t็š„\t_\t_\t_\t_\t_\t_\t10:mAux\t_\n", + "12\tๅคš\t_\t_\t_\t_\t_\t_\t8:Quan|13:Quan\t_\n", + "13\t่ฏญ็ง\t_\t_\t_\t_\t_\t_\t15:Desc\t_\n", + "14\tNLP\t_\t_\t_\t_\t_\t_\t15:Nmod\t_\n", + "15\tๆŠ€ๆœฏ\t_\t_\t_\t_\t_\t_\t6:Pat\t_\n", + "16\tใ€‚\t_\t_\t_\t_\t_\t_\t6:mPunc\t_\n" + ] + } + ], + "source": [ + "print(doc.to_conll())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XOsWkOqQfzlr" + }, + "source": [ + "ไธบๅทฒๅˆ†่ฏ็š„ๅฅๅญๆ‰ง่กŒ่ฏญไน‰ไพๅญ˜ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "bLZSTbv_f3OA", + "outputId": "111c0be9-bac6-4eee-d5bd-a972ffc34844" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\tHanLP\t_\t_\t_\t_\t_\t_\t5:Exp\t_\n", + "2\tไธบ\t_\t_\t_\t_\t_\t_\t4:mPrep\t_\n", + "3\t็”Ÿไบง\t_\t_\t_\t_\t_\t_\t4:Desc\t_\n", + "4\t็Žฏๅขƒ\t_\t_\t_\t_\t_\t_\t5:Datv\t_\n", + "5\tๅธฆๆฅ\t_\t_\t_\t_\t_\t_\t0:Root\t_\n", + "6\tๆฌกไธ–ไปฃ\t_\t_\t_\t_\t_\t_\t12:Time\t_\n", + "7\tๆœ€\t_\t_\t_\t_\t_\t_\t8:mDegr\t_\n", + "8\tๅ…ˆ่ฟ›\t_\t_\t_\t_\t_\t_\t12:Desc\t_\n", + "9\t็š„\t_\t_\t_\t_\t_\t_\t8:mAux\t_\n", + "10\tๅคš่ฏญ็ง\t_\t_\t_\t_\t_\t_\t12:Desc\t_\n", + "11\tNLP\t_\t_\t_\t_\t_\t_\t12:Nmod\t_\n", + "12\tๆŠ€ๆœฏ\t_\t_\t_\t_\t_\t_\t5:Pat\t_\n", + "13\tใ€‚\t_\t_\t_\t_\t_\t_\t5:mPunc\t_\n", + "\n", + "1\tๆˆ‘\t_\t_\t_\t_\t_\t_\t3:Poss\t_\n", + "2\t็š„\t_\t_\t_\t_\t_\t_\t1:mAux\t_\n", + "3\tๅธŒๆœ›\t_\t_\t_\t_\t_\t_\t4:Exp\t_\n", + "4\tๆ˜ฏ\t_\t_\t_\t_\t_\t_\t11:mMod\t_\n", + "5\tๅธŒๆœ›\t_\t_\t_\t_\t_\t_\t4:dClas\t_\n", + "6\tๅผ ๆ™š้œž\t_\t_\t_\t_\t_\t_\t8:Poss\t_\n", + "7\t็š„\t_\t_\t_\t_\t_\t_\t6:mAux\t_\n", + "8\t่ƒŒๅฝฑ\t_\t_\t_\t_\t_\t_\t11:Pat\t_\n", + "9\t่ขซ\t_\t_\t_\t_\t_\t_\t10:mPrep\t_\n", + "10\tๆ™š้œž\t_\t_\t_\t_\t_\t_\t11:Exp\t_\n", + "11\tๆ˜ ็บข\t_\t_\t_\t_\t_\t_\t5:dCont\t_\n", + "12\tใ€‚\t_\t_\t_\t_\t_\t_\t4:mPunc\t_\n" + ] + } + ], + "source": [ + "print(HanLP([\n", + " [\"HanLP\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌกไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"ๆˆ‘\", \"็š„\", \"ๅธŒๆœ›\", \"ๆ˜ฏ\", \"ๅธŒๆœ›\", \"ๅผ ๆ™š้œž\", \"็š„\", \"่ƒŒๅฝฑ\", \"่ขซ\", \"ๆ™š้œž\", \"ๆ˜ ็บข\", \"ใ€‚\"]\n", + " ], tasks='sdp', skip_tasks='tok*').to_conll())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ๆณจๆ„\n", + "Native API็š„่พ“ๅ…ฅๅ•ไฝ้™ๅฎšไธบๅฅๅญ๏ผŒ้œ€ไฝฟ็”จ[ๅคš่ฏญ็งๅˆ†ๅฅๆจกๅž‹](https://github.com/hankcs/HanLP/blob/master/plugins/hanlp_demo/hanlp_demo/sent_split.py)ๆˆ–[ๅŸบไบŽ่ง„ๅˆ™็š„ๅˆ†ๅฅๅ‡ฝๆ•ฐ](https://github.com/hankcs/HanLP/blob/master/hanlp/utils/rules.py#L19)ๅ…ˆ่กŒๅˆ†ๅฅใ€‚RESTfulๅŒๆ—ถๆ”ฏๆŒๅ…จๆ–‡ใ€ๅฅๅญใ€ๅทฒๅˆ†่ฏ็š„ๅฅๅญใ€‚้™คๆญคไน‹ๅค–๏ผŒRESTfulๅ’Œnativeไธค็งAPI็š„่ฏญไน‰่ฎพ่ฎกๅฎŒๅ…จไธ€่‡ด๏ผŒ็”จๆˆทๅฏไปฅๆ— ็ผไบ’ๆขใ€‚" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "sdp_mtl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/sdp_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/sdp_restful.ipynb new file mode 100644 index 000000000..53a5afd7d --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/sdp_restful.ipynb @@ -0,0 +1,261 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏญไน‰ไพๅญ˜ๅˆ†ๆž\n", + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒ่ฏญไน‰ไพๅญ˜ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [], + "source": [ + "doc = HanLP('2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', tasks='sdp')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบไธ€ไธช[Document](https://hanlp.hankcs.com/docs/api/common/document.html):" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " [\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"]\n", + " ],\n", + " \"sdp\": [\n", + " [[[6, \"Time\"]], [[6, \"Agt\"]], [[5, \"mPrep\"]], [[5, \"Desc\"]], [[6, \"Datv\"]], [[0, \"Root\"]], [[8, \"Qp\"]], [[15, \"TDur\"]], [[10, \"mDegr\"]], [[15, \"Desc\"]], [[10, \"mAux\"]], [[13, \"Quan\"]], [[15, \"Desc\"]], [[15, \"Nmod\"]], [[6, \"Cont\"]], [[6, \"mPunc\"]]]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(doc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`doc['sdp']`ๅญ—ๆฎตไปฃ่กจ่ฏญไน‰ไพๅญ˜ๅ›พ็š„ๆ•ฐ็ป„ๆ ผๅผ๏ผŒๆ•ฐ็ป„ไธญ็ฌฌ`i`ไธชๅญๆ•ฐ็ป„ไปฃ่กจ็ฌฌ`i`ไธชๅ•่ฏ็š„่ฏญไน‰ไพๅญ˜ๅ…ณ็ณป๏ผŒๅญๆ•ฐ็ป„ไธญๆฏไธชไบŒๅ…ƒ็ป„็š„ๆ ผๅผไธบ`[ไธญๅฟƒ่ฏ็š„ไธ‹ๆ ‡, ไธŽไธญๅฟƒ่ฏ็š„่ฏญไน‰ไพๅญ˜ๅ…ณ็ณป]`ใ€‚ๆฏไธชๅ•่ฏ็š„่ฏญไน‰ไพๅญ˜ๅ…ณ็ณปๅฏ่ƒฝๆœ‰้›ถไธชใ€ไธ€ไธชๆˆ–ๅคšไธช๏ผˆไปปๆ„ๆ•ฐ้‡๏ผ‰ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฝฌๆขไธบ[CoNLLSentence](https://hanlp.hankcs.com/docs/api/common/conll.html#hanlp_common.conll.CoNLLSentence)ๆ ผๅผๆ›ดๅฎนๆ˜“่ง‚ๅฏŸ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\t2021ๅนด\t_\t_\t_\t_\t_\t_\t6:Time\t_\n", + "2\tHanLPv2.1\t_\t_\t_\t_\t_\t_\t6:Agt\t_\n", + "3\tไธบ\t_\t_\t_\t_\t_\t_\t5:mPrep\t_\n", + "4\t็”Ÿไบง\t_\t_\t_\t_\t_\t_\t5:Desc\t_\n", + "5\t็Žฏๅขƒ\t_\t_\t_\t_\t_\t_\t6:Datv\t_\n", + "6\tๅธฆๆฅ\t_\t_\t_\t_\t_\t_\t0:Root\t_\n", + "7\tๆฌก\t_\t_\t_\t_\t_\t_\t8:Qp\t_\n", + "8\tไธ–ไปฃ\t_\t_\t_\t_\t_\t_\t15:TDur\t_\n", + "9\tๆœ€\t_\t_\t_\t_\t_\t_\t10:mDegr\t_\n", + "10\tๅ…ˆ่ฟ›\t_\t_\t_\t_\t_\t_\t15:Desc\t_\n", + "11\t็š„\t_\t_\t_\t_\t_\t_\t10:mAux\t_\n", + "12\tๅคš\t_\t_\t_\t_\t_\t_\t13:Quan\t_\n", + "13\t่ฏญ็ง\t_\t_\t_\t_\t_\t_\t15:Desc\t_\n", + "14\tNLP\t_\t_\t_\t_\t_\t_\t15:Nmod\t_\n", + "15\tๆŠ€ๆœฏ\t_\t_\t_\t_\t_\t_\t6:Cont\t_\n", + "16\tใ€‚\t_\t_\t_\t_\t_\t_\t6:mPunc\t_\n" + ] + } + ], + "source": [ + "print(doc.to_conll())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XOsWkOqQfzlr" + }, + "source": [ + "ไธบๅทฒๅˆ†่ฏ็š„ๅฅๅญๆ‰ง่กŒ่ฏญไน‰ไพๅญ˜ๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "bLZSTbv_f3OA", + "outputId": "111c0be9-bac6-4eee-d5bd-a972ffc34844" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\tHanLP\t_\t_\t_\t_\t_\t_\t5:Agt\t_\n", + "2\tไธบ\t_\t_\t_\t_\t_\t_\t4:mPrep\t_\n", + "3\t็”Ÿไบง\t_\t_\t_\t_\t_\t_\t4:Desc\t_\n", + "4\t็Žฏๅขƒ\t_\t_\t_\t_\t_\t_\t5:Datv\t_\n", + "5\tๅธฆๆฅ\t_\t_\t_\t_\t_\t_\t0:Root\t_\n", + "6\tๆฌกไธ–ไปฃ\t_\t_\t_\t_\t_\t_\t12:Time\t_\n", + "7\tๆœ€\t_\t_\t_\t_\t_\t_\t8:mDegr\t_\n", + "8\tๅ…ˆ่ฟ›\t_\t_\t_\t_\t_\t_\t12:Desc\t_\n", + "9\t็š„\t_\t_\t_\t_\t_\t_\t8:mAux\t_\n", + "10\tๅคš่ฏญ็ง\t_\t_\t_\t_\t_\t_\t12:Desc\t_\n", + "11\tNLP\t_\t_\t_\t_\t_\t_\t12:Nmod\t_\n", + "12\tๆŠ€ๆœฏ\t_\t_\t_\t_\t_\t_\t5:Cont\t_\n", + "13\tใ€‚\t_\t_\t_\t_\t_\t_\t5:mPunc\t_\n", + "\n", + "1\tๆˆ‘\t_\t_\t_\t_\t_\t_\t3:Poss\t_\n", + "2\t็š„\t_\t_\t_\t_\t_\t_\t1:mAux\t_\n", + "3\tๅธŒๆœ›\t_\t_\t_\t_\t_\t_\t0:Root|4:Exp\t_\n", + "4\tๆ˜ฏ\t_\t_\t_\t_\t_\t_\t5:mMod\t_\n", + "5\tๅธŒๆœ›\t_\t_\t_\t_\t_\t_\t4:dClas\t_\n", + "6\tๅผ ๆ™š้œž\t_\t_\t_\t_\t_\t_\t8:Poss\t_\n", + "7\t็š„\t_\t_\t_\t_\t_\t_\t6:mAux\t_\n", + "8\t่ƒŒๅฝฑ\t_\t_\t_\t_\t_\t_\t11:Pat\t_\n", + "9\t่ขซ\t_\t_\t_\t_\t_\t_\t10:mPrep\t_\n", + "10\tๆ™š้œž\t_\t_\t_\t_\t_\t_\t11:Exp\t_\n", + "11\tๆ˜ ็บข\t_\t_\t_\t_\t_\t_\t5:dCont\t_\n", + "12\tใ€‚\t_\t_\t_\t_\t_\t_\t5:mPunc\t_\n" + ] + } + ], + "source": [ + "print(HanLP(tokens=[\n", + " [\"HanLP\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌกไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"ๆˆ‘\", \"็š„\", \"ๅธŒๆœ›\", \"ๆ˜ฏ\", \"ๅธŒๆœ›\", \"ๅผ ๆ™š้œž\", \"็š„\", \"่ƒŒๅฝฑ\", \"่ขซ\", \"ๆ™š้œž\", \"ๆ˜ ็บข\", \"ใ€‚\"]\n", + " ], tasks='sdp').to_conll())" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "sdp_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/sdp_stl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/sdp_stl.ipynb new file mode 100644 index 000000000..c42e93b76 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/sdp_stl.ipynb @@ -0,0 +1,401 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nf9TgeCTC0OT" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jaW4eu6kC0OU", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_xI_bLAaC0OU" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IYwV-UkNNzFp", + "outputId": "54065443-9b0a-444c-f6c0-c701bc86400b", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'SEMEVAL16_NEWS_BIAFFINE_ZH': 'https://file.hankcs.com/hanlp/sdp/semeval16-news-biaffine_20191231_235407.zip',\n", + " 'SEMEVAL16_TEXT_BIAFFINE_ZH': 'https://file.hankcs.com/hanlp/sdp/semeval16-text-biaffine_20200101_002257.zip',\n", + " 'SEMEVAL16_ALL_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/sdp/semeval16_sdp_electra_small_20220208_122026.zip',\n", + " 'SEMEVAL15_PAS_BIAFFINE_EN': 'https://file.hankcs.com/hanlp/sdp/semeval15_biaffine_pas_20200103_152405.zip',\n", + " 'SEMEVAL15_PSD_BIAFFINE_EN': 'https://file.hankcs.com/hanlp/sdp/semeval15_biaffine_psd_20200106_123009.zip',\n", + " 'SEMEVAL15_DM_BIAFFINE_EN': 'https://file.hankcs.com/hanlp/sdp/semeval15_biaffine_dm_20200106_122808.zip'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.sdp.ALL # ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1Uf_u7ddMhUt", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "pp-1KqEOOJ4t", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "sdp = hanlp.load('SEMEVAL16_ALL_ELECTRA_SMALL_ZH')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏญไน‰ไพๅญ˜ๅˆ†ๆž\n", + "่ฏญไน‰ไพๅญ˜ๅˆ†ๆž็š„่พ“ๅ…ฅไธบๅทฒๅˆ†่ฏ็š„ไธ€ไธชๆˆ–ๅคšไธชๅฅๅญ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "BqEmDMGGOtk3" + }, + "outputs": [], + "source": [ + "graph = sdp([\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SwaPn1hjC0OW" + }, + "source": [ + "่ฟ”ๅ›žๅฏน่ฑกไธบ[CoNLLSentence](https://hanlp.hankcs.com/docs/api/common/conll.html#hanlp_common.conll.CoNLLSentence)็ฑปๅž‹๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "egpWwHKxC0OX", + "outputId": "f7c77687-dd75-4fa2-dbd2-be6bda8a3fff" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': 1,\n", + " 'form': '2021ๅนด',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(6, 'Time')],\n", + " 'misc': None},\n", + " {'id': 2,\n", + " 'form': 'HanLPv2.1',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(6, 'Exp')],\n", + " 'misc': None},\n", + " {'id': 3,\n", + " 'form': 'ไธบ',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(5, 'mPrep')],\n", + " 'misc': None},\n", + " {'id': 4,\n", + " 'form': '็”Ÿไบง',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(5, 'Desc')],\n", + " 'misc': None},\n", + " {'id': 5,\n", + " 'form': '็Žฏๅขƒ',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(6, 'Datv')],\n", + " 'misc': None},\n", + " {'id': 6,\n", + " 'form': 'ๅธฆๆฅ',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(2, 'eSucc')],\n", + " 'misc': None},\n", + " {'id': 7,\n", + " 'form': 'ๆฌก',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(8, 'Desc'), (13, 'Desc')],\n", + " 'misc': None},\n", + " {'id': 8,\n", + " 'form': 'ไธ–ไปฃ',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(0, 'Root'), (15, 'Time')],\n", + " 'misc': None},\n", + " {'id': 9,\n", + " 'form': 'ๆœ€',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(10, 'mDegr')],\n", + " 'misc': None},\n", + " {'id': 10,\n", + " 'form': 'ๅ…ˆ่ฟ›',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(15, 'Desc')],\n", + " 'misc': None},\n", + " {'id': 11,\n", + " 'form': '็š„',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(10, 'mAux')],\n", + " 'misc': None},\n", + " {'id': 12,\n", + " 'form': 'ๅคš',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(10, 'mDegr'), (13, 'Quan')],\n", + " 'misc': None},\n", + " {'id': 13,\n", + " 'form': '่ฏญ็ง',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(15, 'Desc')],\n", + " 'misc': None},\n", + " {'id': 14,\n", + " 'form': 'NLP',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(15, 'Desc')],\n", + " 'misc': None},\n", + " {'id': 15,\n", + " 'form': 'ๆŠ€ๆœฏ',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(6, 'Pat')],\n", + " 'misc': None},\n", + " {'id': 16,\n", + " 'form': 'ใ€‚',\n", + " 'upos': None,\n", + " 'xpos': None,\n", + " 'head': None,\n", + " 'deprel': None,\n", + " 'lemma': None,\n", + " 'feats': None,\n", + " 'deps': [(6, 'mPunc')],\n", + " 'misc': None}]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graph" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kq_j5TLFC0OX" + }, + "source": [ + "ๆ‰“ๅฐไธบไธบCoNLLๆ ผๅผ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "isJhzYyIC0OX", + "outputId": "683c8489-dffc-426e-f95b-e91dfb373260" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\t2021ๅนด\t_\t_\t_\t_\t_\t_\t6:Time\t_\n", + "2\tHanLPv2.1\t_\t_\t_\t_\t_\t_\t6:Exp\t_\n", + "3\tไธบ\t_\t_\t_\t_\t_\t_\t5:mPrep\t_\n", + "4\t็”Ÿไบง\t_\t_\t_\t_\t_\t_\t5:Desc\t_\n", + "5\t็Žฏๅขƒ\t_\t_\t_\t_\t_\t_\t6:Datv\t_\n", + "6\tๅธฆๆฅ\t_\t_\t_\t_\t_\t_\t2:eSucc\t_\n", + "7\tๆฌก\t_\t_\t_\t_\t_\t_\t8:Desc|13:Desc\t_\n", + "8\tไธ–ไปฃ\t_\t_\t_\t_\t_\t_\t0:Root|15:Time\t_\n", + "9\tๆœ€\t_\t_\t_\t_\t_\t_\t10:mDegr\t_\n", + "10\tๅ…ˆ่ฟ›\t_\t_\t_\t_\t_\t_\t15:Desc\t_\n", + "11\t็š„\t_\t_\t_\t_\t_\t_\t10:mAux\t_\n", + "12\tๅคš\t_\t_\t_\t_\t_\t_\t10:mDegr|13:Quan\t_\n", + "13\t่ฏญ็ง\t_\t_\t_\t_\t_\t_\t15:Desc\t_\n", + "14\tNLP\t_\t_\t_\t_\t_\t_\t15:Desc\t_\n", + "15\tๆŠ€ๆœฏ\t_\t_\t_\t_\t_\t_\t6:Pat\t_\n", + "16\tใ€‚\t_\t_\t_\t_\t_\t_\t6:mPunc\t_\n" + ] + } + ], + "source": [ + "print(graph)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "sdp_stl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/sentiment_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/sentiment_restful.ipynb new file mode 100644 index 000000000..24e65e3d0 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/sentiment_restful.ipynb @@ -0,0 +1,272 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nf9TgeCTC0OT" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jaW4eu6kC0OU", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_xI_bLAaC0OU" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IYwV-UkNNzFp", + "outputId": "54065443-9b0a-444c-f6c0-c701bc86400b", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1Uf_u7ddMhUt", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ๆƒ…ๆ„Ÿๅˆ†ๆž\n", + "ๆƒ…ๆ„Ÿๅˆ†ๆžไปปๅŠก็š„่พ“ๅ…ฅไธบๆ–‡ๆกฃ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "BqEmDMGGOtk3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8418035507202148" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.sentiment_analysis('2021ๅนด HanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SwaPn1hjC0OW" + }, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบๆ–‡ๆกฃ็š„ๆƒ…ๆ„Ÿๆžๆ€ง๏ผŒ่กจ็คบไธบ$[-1, +1]$ไน‹้—ด็š„ๆ•ฐๅ€ผ๏ผŒๆ•ฐๅ€ผ็š„ๆญฃ่ดŸไปฃ่กจๆญฃ่ดŸ้ขๆƒ…็ปช๏ผŒๆ•ฐๅ€ผ็š„็ปๅฏนๅ€ผไปฃ่กจๆƒ…ๆ„Ÿ็š„ๅผบ็ƒˆ็จ‹ๅบฆใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "egpWwHKxC0OX", + "outputId": "f7c77687-dd75-4fa2-dbd2-be6bda8a3fff" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8327275514602661" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.sentiment_analysis('็œ‹ๅ“ญไบ†ใ€‚ๆ„Ÿไบบ่‚บ่…‘ใ€‚')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kq_j5TLFC0OX" + }, + "source": [ + "ๆณจๆ„่ฟ”ๅ›žๅ€ผ็š„็ฌฆๅทไปฃ่กจๆญฃ่ดŸๆƒ…ๆ„Ÿ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "isJhzYyIC0OX", + "outputId": "683c8489-dffc-426e-f95b-e91dfb373260" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "-0.8850911855697632" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.sentiment_analysis('็œ‹ๅ“ญไบ†ใ€‚้šพ็œ‹ๅ“ญไบ†ใ€‚')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "็ปๅฏนๅ€ผ็š„ๅคงๅฐไปฃ่กจๆƒ…ๆ„Ÿ็š„ๅผบ็ƒˆ็จ‹ๅบฆ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-0.9190718531608582" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.sentiment_analysis('็œ‹ๅ“ญไบ†ใ€‚้šพ็œ‹ๅ“ญไบ†๏ผ๏ผ๏ผ')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "้•ฟๆ–‡ๆกฃไธ€ๆ ทๆ”ฏๆŒ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9505730271339417" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = '''โ€œ่ฟ™ๆ˜ฏไธ€้ƒจ็”ทไบบๅฟ…็œ‹็š„็”ตๅฝฑใ€‚โ€ไบบไบบ้ƒฝ่ฟ™ไนˆ่ฏดใ€‚ไฝ†ๅ•็บฏไปŽๆ€งๅˆซๅŒบๅˆ†๏ผŒๅฐฑไผš่ฎฉ่ฟ™็”ตๅฝฑๅ˜็‹ญ้š˜ใ€‚\n", + "ใ€Š่‚–็”ณๅ…‹็š„ๆ•‘่ตŽใ€‹็ช็ ดไบ†็”ทไบบ็”ตๅฝฑ็š„ๅฑ€้™๏ผŒ้€š็ฏ‡ๅ‡ ไนŽๅ……ๆปกไปคไบบ้šพไปฅ็ฝฎไฟก็š„ๆธฉ้ฆจๅŸบ่ฐƒ๏ผŒ่€Œ็”ตๅฝฑ้‡Œๆœ€ไผŸๅคง็š„ไธป้ข˜ๆ˜ฏโ€œๅธŒๆœ›โ€ใ€‚\n", + "ๅฝ“ๆˆ‘ไปฌๆ— ๅฅˆๅœฐ้‡ๅˆฐไบ†ๅฆ‚ๅŒ่‚–็”ณๅ…‹ไธ€่ˆฌๅ›š็ฆไบ†ๅฟƒ็ต่‡ช็”ฑ็š„้‚ฃ็งๅ›นๅœ„๏ผŒๆˆ‘ไปฌๆ˜ฏๆ— ๅฅˆ็š„่€ๅธƒ้ฒๅ…‹๏ผŒ็ฐๅฟƒ็š„็‘žๅพท๏ผŒ่ฟ˜ๆ˜ฏๆ™บๆ…ง็š„ๅฎ‰่ฟช๏ผŸ\n", + "่ฟ็”จๆ™บๆ…ง๏ผŒไฟกไปปๅธŒๆœ›๏ผŒๅนถไธ”ๅ‹‡ๆ•ข้ขๅฏนๆๆƒงๅฟƒ็†๏ผŒๅŽปๆ‰“่ดฅๅฎƒ๏ผŸ\n", + "็ปๅ…ธ็š„็”ตๅฝฑไน‹ๆ‰€ไปฅ็ปๅ…ธ๏ผŒๅ› ไธบไป–ไปฌ้ƒฝๅœจๅšๅŒไธ€ไปถไบ‹โ€”โ€”่ฎฉไฝ ไปŽไธๅŒ็š„่ง’ๅบฆๆฅๆฌฃ่ตๅธŒๆœ›็š„็พŽๅฅฝใ€‚'''\n", + "HanLP.sentiment_analysis(text)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "sentiment_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/plugins/hanlp_demo/hanlp_demo/zh/srl_mtl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/srl_mtl.ipynb new file mode 100644 index 000000000..e245b84cc --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/srl_mtl.ipynb @@ -0,0 +1,366 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_small_20201223_035557.zip',\n", + " 'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_base_20201223_201906.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_small_20210111_124159.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_base_20210111_124519.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ERNIE_GRAM_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_ernie_gram_base_aug_20210904_145403.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_MT5_SMALL': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_mt5_small_20210228_123458.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_XLMR_BASE': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_xlm_base_20210602_211620.zip',\n", + " 'NPCMJ_UD_KYOTO_TOK_POS_CON_BERT_BASE_CHAR_JA': 'https://file.hankcs.com/hanlp/mtl/npcmj_ud_kyoto_tok_pos_ner_dep_con_srl_bert_base_char_ja_20210914_133742.zip'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.mtl.ALL # MTLๅคšไปปๅŠก๏ผŒๅ…ทไฝ“ไปปๅŠก่งๆจกๅž‹ๅ็งฐ๏ผŒ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅˆ†ไธบ่ฎธๅคšไปปๅŠก๏ผŒๅˆ†่ฏๅชๆ˜ฏๆœ€ๅˆ็บง็š„ไธ€ไธชใ€‚ไธŽๅ…ถๆฏไธชไปปๅŠกๅ•็‹ฌๅˆ›ๅปบไธ€ไธชๆจกๅž‹๏ผŒไธๅฆ‚ๅˆฉ็”จHanLP็š„่”ๅˆๆจกๅž‹ไธ€ๆฌกๆ€งๅฎŒๆˆๅคšไธชไปปๅŠก๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "HanLP = hanlp.load(hanlp.pretrained.mtl.CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏญไน‰่ง’่‰ฒๅˆ†ๆž\n", + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒ่ฏญไน‰่ง’่‰ฒๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [], + "source": [ + "doc = HanLP('2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', tasks='srl')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบไธ€ไธช[Document](https://hanlp.hankcs.com/docs/api/common/document.html):" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " \"2021ๅนด\",\n", + " \"HanLPv2.1\",\n", + " \"ไธบ\",\n", + " \"็”Ÿไบง\",\n", + " \"็Žฏๅขƒ\",\n", + " \"ๅธฆๆฅ\",\n", + " \"ๆฌก\",\n", + " \"ไธ–ไปฃ\",\n", + " \"ๆœ€\",\n", + " \"ๅ…ˆ่ฟ›\",\n", + " \"็š„\",\n", + " \"ๅคš\",\n", + " \"่ฏญ็ง\",\n", + " \"NLP\",\n", + " \"ๆŠ€ๆœฏ\",\n", + " \"ใ€‚\"\n", + " ],\n", + " \"srl\": [\n", + " [[\"2021ๅนด\", \"ARGM-TMP\", 0, 1], [\"HanLPv2.1\", \"ARG0\", 1, 2], [\"ไธบ็”Ÿไบง็Žฏๅขƒ\", \"ARG2\", 2, 5], [\"ๅธฆๆฅ\", \"PRED\", 5, 6], [\"ๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏ\", \"ARG1\", 6, 15]],\n", + " [[\"ๆœ€\", \"ARGM-ADV\", 8, 9], [\"ๅ…ˆ่ฟ›\", \"PRED\", 9, 10], [\"ๆŠ€ๆœฏ\", \"ARG0\", 14, 15]]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(doc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`doc['srl']`ๅญ—ๆฎตไธบ่ฏญไน‰่ง’่‰ฒๆ ‡ๆณจ็ป“ๆžœ๏ผŒๆฏไธชๅ››ๅ…ƒ็ป„็š„ๆ ผๅผไธบ`[่ฎบๅ…ƒๆˆ–่ฐ“่ฏ, ่ฏญไน‰่ง’่‰ฒๆ ‡็ญพ, ่ตทๅง‹ไธ‹ๆ ‡, ็ปˆๆญขไธ‹ๆ ‡]`ใ€‚ๅ…ถไธญ๏ผŒ่ฐ“่ฏ็š„่ฏญไน‰่ง’่‰ฒๆ ‡็ญพไธบ`PRED`๏ผŒ่ตทๆญขไธ‹ๆ ‡ๅฏนๅบ”ไปฅ`tok`ๅผ€ๅคด็š„็ฌฌไธ€ไธชๅ•่ฏๆ•ฐ็ป„ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "ๅฏ่ง†ๅŒ–่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zo08uquCTFSk", + "outputId": "c6077f2d-7084-4f4b-a3bc-9aa9951704ea" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Token \tSRL PA1 \tToken \tSRL PA2 \n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "2021ๅนด \tโ”€โ”€โ”€โ–บARGM-TMP\t2021ๅนด \t \n", + "HanLPv2.1\tโ”€โ”€โ”€โ–บARG0 \tHanLPv2.1\t \n", + "ไธบ \tโ—„โ”€โ” \tไธบ \t \n", + "็”Ÿไบง \t โ”œโ–บARG2 \t็”Ÿไบง \t \n", + "็Žฏๅขƒ \tโ—„โ”€โ”˜ \t็Žฏๅขƒ \t \n", + "ๅธฆๆฅ \tโ•Ÿโ”€โ”€โ–บPRED \tๅธฆๆฅ \t \n", + "ๆฌก \tโ—„โ”€โ” \tๆฌก \t \n", + "ไธ–ไปฃ \t โ”‚ \tไธ–ไปฃ \t \n", + "ๆœ€ \t โ”‚ \tๆœ€ \tโ”€โ”€โ”€โ–บARGM-ADV\n", + "ๅ…ˆ่ฟ› \t โ”‚ \tๅ…ˆ่ฟ› \tโ•Ÿโ”€โ”€โ–บPRED \n", + "็š„ \t โ”œโ–บARG1 \t็š„ \t \n", + "ๅคš \t โ”‚ \tๅคš \t \n", + "่ฏญ็ง \t โ”‚ \t่ฏญ็ง \t \n", + "NLP \t โ”‚ \tNLP \t \n", + "ๆŠ€ๆœฏ \tโ—„โ”€โ”˜ \tๆŠ€ๆœฏ \tโ”€โ”€โ”€โ–บARG0 \n", + "ใ€‚ \t \tใ€‚ \t \n" + ] + } + ], + "source": [ + "doc.pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "้ๅŽ†่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "็ฌฌ1ไธช่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš\n", + "2021ๅนด = ARGM-TMP at [0, 1]\n", + "HanLPv2.1 = ARG0 at [1, 2]\n", + "ไธบ็”Ÿไบง็Žฏๅขƒ = ARG2 at [2, 5]\n", + "ๅธฆๆฅ = PRED at [5, 6]\n", + "ๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏ = ARG1 at [6, 15]\n", + "็ฌฌ2ไธช่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš\n", + "ๆœ€ = ARGM-ADV at [8, 9]\n", + "ๅ…ˆ่ฟ› = PRED at [9, 10]\n", + "ๆŠ€ๆœฏ = ARG0 at [14, 15]\n" + ] + } + ], + "source": [ + "for i, pas in enumerate(doc['srl']):\n", + " print(f'็ฌฌ{i+1}ไธช่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš')\n", + " for form, role, begin, end in pas:\n", + " print(f'{form} = {role} at [{begin}, {end}]')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XOsWkOqQfzlr" + }, + "source": [ + "ไธบๅทฒๅˆ†่ฏ็š„ๅฅๅญๆ‰ง่กŒ่ฏญไน‰่ง’่‰ฒๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "bLZSTbv_f3OA", + "outputId": "111c0be9-bac6-4eee-d5bd-a972ffc34844" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Token\tSRL PA1 \tToken\tSRL PA2 \n", + "โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "HanLP\tโ”€โ”€โ”€โ–บARG0\tHanLP\t \n", + "ไธบ \tโ—„โ”€โ” \tไธบ \t \n", + "็”Ÿไบง \t โ”œโ–บARG2\t็”Ÿไบง \t \n", + "็Žฏๅขƒ \tโ—„โ”€โ”˜ \t็Žฏๅขƒ \t \n", + "ๅธฆๆฅ \tโ•Ÿโ”€โ”€โ–บPRED\tๅธฆๆฅ \t \n", + "ๆฌกไธ–ไปฃ \tโ—„โ”€โ” \tๆฌกไธ–ไปฃ \t \n", + "ๆœ€ \t โ”‚ \tๆœ€ \tโ”€โ”€โ”€โ–บARGM-ADV\n", + "ๅ…ˆ่ฟ› \t โ”‚ \tๅ…ˆ่ฟ› \tโ•Ÿโ”€โ”€โ–บPRED \n", + "็š„ \t โ”œโ–บARG1\t็š„ \t \n", + "ๅคš่ฏญ็ง \t โ”‚ \tๅคš่ฏญ็ง \t \n", + "NLP \t โ”‚ \tNLP \t \n", + "ๆŠ€ๆœฏ \tโ—„โ”€โ”˜ \tๆŠ€ๆœฏ \tโ”€โ”€โ”€โ–บARG0 \n", + "ใ€‚ \t \tใ€‚ \t \n", + "\n", + "Tok\tSRL PA1 \tTok\tSRL PA2 \tTok\tSRL PA3 \n", + "โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "ๆˆ‘ \tโ—„โ”€โ” \tๆˆ‘ \t \tๆˆ‘ \t \n", + "็š„ \t โ”œโ–บARG0\t็š„ \t \t็š„ \t \n", + "ๅธŒๆœ› \tโ—„โ”€โ”˜ \tๅธŒๆœ› \t \tๅธŒๆœ› \t \n", + "ๆ˜ฏ \tโ•Ÿโ”€โ”€โ–บPRED\tๆ˜ฏ \t \tๆ˜ฏ \t \n", + "ๅธŒๆœ› \tโ—„โ”€โ” \tๅธŒๆœ› \tโ•Ÿโ”€โ”€โ–บPRED\tๅธŒๆœ› \t \n", + "ๅผ ๆ™š้œž\t โ”‚ \tๅผ ๆ™š้œž\tโ—„โ”€โ” \tๅผ ๆ™š้œž\t \n", + "็š„ \t โ”‚ \t็š„ \t โ”‚ \t็š„ \t \n", + "่ƒŒๅฝฑ \t โ”œโ–บARG1\t่ƒŒๅฝฑ \t โ”‚ \t่ƒŒๅฝฑ \t \n", + "่ขซ \t โ”‚ \t่ขซ \t โ”œโ–บARG1\t่ขซ \t \n", + "ๆ™š้œž \t โ”‚ \tๆ™š้œž \t โ”‚ \tๆ™š้œž \tโ”€โ”€โ”€โ–บARG0\n", + "ๆ˜ ็บข \tโ—„โ”€โ”˜ \tๆ˜ ็บข \tโ—„โ”€โ”˜ \tๆ˜ ็บข \tโ•Ÿโ”€โ”€โ–บPRED\n", + "ใ€‚ \t \tใ€‚ \t \tใ€‚ \t \n" + ] + } + ], + "source": [ + "HanLP([\n", + " [\"HanLP\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌกไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"ๆˆ‘\", \"็š„\", \"ๅธŒๆœ›\", \"ๆ˜ฏ\", \"ๅธŒๆœ›\", \"ๅผ ๆ™š้œž\", \"็š„\", \"่ƒŒๅฝฑ\", \"่ขซ\", \"ๆ™š้œž\", \"ๆ˜ ็บข\", \"ใ€‚\"]\n", + " ], tasks='srl', skip_tasks='tok*').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ๆณจๆ„\n", + "Native API็š„่พ“ๅ…ฅๅ•ไฝ้™ๅฎšไธบๅฅๅญ๏ผŒ้œ€ไฝฟ็”จ[ๅคš่ฏญ็งๅˆ†ๅฅๆจกๅž‹](https://github.com/hankcs/HanLP/blob/master/plugins/hanlp_demo/hanlp_demo/sent_split.py)ๆˆ–[ๅŸบไบŽ่ง„ๅˆ™็š„ๅˆ†ๅฅๅ‡ฝๆ•ฐ](https://github.com/hankcs/HanLP/blob/master/hanlp/utils/rules.py#L19)ๅ…ˆ่กŒๅˆ†ๅฅใ€‚RESTfulๅŒๆ—ถๆ”ฏๆŒๅ…จๆ–‡ใ€ๅฅๅญใ€ๅทฒๅˆ†่ฏ็š„ๅฅๅญใ€‚้™คๆญคไน‹ๅค–๏ผŒRESTfulๅ’Œnativeไธค็งAPI็š„่ฏญไน‰่ฎพ่ฎกๅฎŒๅ…จไธ€่‡ด๏ผŒ็”จๆˆทๅฏไปฅๆ— ็ผไบ’ๆขใ€‚" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "srl_mtl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/srl_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/srl_restful.ipynb new file mode 100644 index 000000000..feaeca32b --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/srl_restful.ipynb @@ -0,0 +1,312 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏญไน‰่ง’่‰ฒๅˆ†ๆž\n", + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒ่ฏญไน‰่ง’่‰ฒๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [], + "source": [ + "doc = HanLP('2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', tasks='srl')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๅ€ผไธบไธ€ไธช[Document](https://hanlp.hankcs.com/docs/api/common/document.html):" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " [\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"]\n", + " ],\n", + " \"srl\": [\n", + " [[[\"2021ๅนด\", \"ARGM-TMP\", 0, 1], [\"HanLPv2.1\", \"ARG0\", 1, 2], [\"ไธบ็”Ÿไบง็Žฏๅขƒ\", \"ARG2\", 2, 5], [\"ๅธฆๆฅ\", \"PRED\", 5, 6], [\"ๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏ\", \"ARG1\", 6, 15]], [[\"ๆฌกไธ–ไปฃ\", \"ARGM-TMP\", 6, 8], [\"ๆœ€\", \"ARGM-ADV\", 8, 9], [\"ๅ…ˆ่ฟ›\", \"PRED\", 9, 10], [\"NLPๆŠ€ๆœฏ\", \"ARG0\", 13, 15]]]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "print(doc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`doc['srl']`ๅญ—ๆฎตไธบ่ฏญไน‰่ง’่‰ฒๆ ‡ๆณจ็ป“ๆžœ๏ผŒๆฏไธชๅ››ๅ…ƒ็ป„็š„ๆ ผๅผไธบ`[่ฎบๅ…ƒๆˆ–่ฐ“่ฏ, ่ฏญไน‰่ง’่‰ฒๆ ‡็ญพ, ่ตทๅง‹ไธ‹ๆ ‡, ็ปˆๆญขไธ‹ๆ ‡]`ใ€‚ๅ…ถไธญ๏ผŒ่ฐ“่ฏ็š„่ฏญไน‰่ง’่‰ฒๆ ‡็ญพไธบ`PRED`๏ผŒ่ตทๆญขไธ‹ๆ ‡ๅฏนๅบ”ไปฅ`tok`ๅผ€ๅคด็š„็ฌฌไธ€ไธชๅ•่ฏๆ•ฐ็ป„ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "ๅฏ่ง†ๅŒ–่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zo08uquCTFSk", + "outputId": "c6077f2d-7084-4f4b-a3bc-9aa9951704ea" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Token \tSRL PA1 \tToken \tSRL PA2 \n", + "โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "2021ๅนด \tโ”€โ”€โ”€โ–บARGM-TMP\t2021ๅนด \t \n", + "HanLPv2.1\tโ”€โ”€โ”€โ–บARG0 \tHanLPv2.1\t \n", + "ไธบ \tโ—„โ”€โ” \tไธบ \t \n", + "็”Ÿไบง \t โ”œโ–บARG2 \t็”Ÿไบง \t \n", + "็Žฏๅขƒ \tโ—„โ”€โ”˜ \t็Žฏๅขƒ \t \n", + "ๅธฆๆฅ \tโ•Ÿโ”€โ”€โ–บPRED \tๅธฆๆฅ \t \n", + "ๆฌก \tโ—„โ”€โ” \tๆฌก \tโ—„โ”€โ” \n", + "ไธ–ไปฃ \t โ”‚ \tไธ–ไปฃ \tโ—„โ”€โ”ดโ–บARGM-TMP\n", + "ๆœ€ \t โ”‚ \tๆœ€ \tโ”€โ”€โ”€โ–บARGM-ADV\n", + "ๅ…ˆ่ฟ› \t โ”‚ \tๅ…ˆ่ฟ› \tโ•Ÿโ”€โ”€โ–บPRED \n", + "็š„ \t โ”œโ–บARG1 \t็š„ \t \n", + "ๅคš \t โ”‚ \tๅคš \t \n", + "่ฏญ็ง \t โ”‚ \t่ฏญ็ง \t \n", + "NLP \t โ”‚ \tNLP \tโ—„โ”€โ” \n", + "ๆŠ€ๆœฏ \tโ—„โ”€โ”˜ \tๆŠ€ๆœฏ \tโ—„โ”€โ”ดโ–บARG0 \n", + "ใ€‚ \t \tใ€‚ \t \n" + ] + } + ], + "source": [ + "doc.pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "้ๅŽ†่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "็ฌฌ1ไธช่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš\n", + "2021ๅนด = ARGM-TMP at [0, 1]\n", + "HanLPv2.1 = ARG0 at [1, 2]\n", + "ไธบ็”Ÿไบง็Žฏๅขƒ = ARG2 at [2, 5]\n", + "ๅธฆๆฅ = PRED at [5, 6]\n", + "ๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏ = ARG1 at [6, 15]\n", + "็ฌฌ2ไธช่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš\n", + "ๆฌกไธ–ไปฃ = ARGM-TMP at [6, 8]\n", + "ๆœ€ = ARGM-ADV at [8, 9]\n", + "ๅ…ˆ่ฟ› = PRED at [9, 10]\n", + "NLPๆŠ€ๆœฏ = ARG0 at [13, 15]\n" + ] + } + ], + "source": [ + "for i, pas in enumerate(doc['srl'][0]):\n", + " print(f'็ฌฌ{i+1}ไธช่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš')\n", + " for form, role, begin, end in pas:\n", + " print(f'{form} = {role} at [{begin}, {end}]')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XOsWkOqQfzlr" + }, + "source": [ + "ไธบๅทฒๅˆ†่ฏ็š„ๅฅๅญๆ‰ง่กŒ่ฏญไน‰่ง’่‰ฒๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "bLZSTbv_f3OA", + "outputId": "111c0be9-bac6-4eee-d5bd-a972ffc34844" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Token\tSRL PA1 \tToken\tSRL PA2 \n", + "โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "HanLP\tโ”€โ”€โ”€โ–บARG0\tHanLP\t \n", + "ไธบ \tโ—„โ”€โ” \tไธบ \t \n", + "็”Ÿไบง \t โ”œโ–บARG2\t็”Ÿไบง \t \n", + "็Žฏๅขƒ \tโ—„โ”€โ”˜ \t็Žฏๅขƒ \t \n", + "ๅธฆๆฅ \tโ•Ÿโ”€โ”€โ–บPRED\tๅธฆๆฅ \t \n", + "ๆฌกไธ–ไปฃ \tโ—„โ”€โ” \tๆฌกไธ–ไปฃ \tโ”€โ”€โ”€โ–บARGM-TMP\n", + "ๆœ€ \t โ”‚ \tๆœ€ \tโ”€โ”€โ”€โ–บARGM-ADV\n", + "ๅ…ˆ่ฟ› \t โ”‚ \tๅ…ˆ่ฟ› \tโ•Ÿโ”€โ”€โ–บPRED \n", + "็š„ \t โ”œโ–บARG1\t็š„ \t \n", + "ๅคš่ฏญ็ง \t โ”‚ \tๅคš่ฏญ็ง \t \n", + "NLP \t โ”‚ \tNLP \t \n", + "ๆŠ€ๆœฏ \tโ—„โ”€โ”˜ \tๆŠ€ๆœฏ \tโ”€โ”€โ”€โ–บARG0 \n", + "ใ€‚ \t \tใ€‚ \t \n", + "\n", + "Tok\tSRL PA1 \tTok\tSRL PA2 \tTok\tSRL PA3 \n", + "โ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\tโ”€โ”€โ”€\tโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\n", + "ๆˆ‘ \tโ—„โ”€โ” \tๆˆ‘ \t \tๆˆ‘ \t \n", + "็š„ \t โ”œโ–บARG0\t็š„ \t \t็š„ \t \n", + "ๅธŒๆœ› \tโ—„โ”€โ”˜ \tๅธŒๆœ› \t \tๅธŒๆœ› \t \n", + "ๆ˜ฏ \tโ•Ÿโ”€โ”€โ–บPRED\tๆ˜ฏ \t \tๆ˜ฏ \t \n", + "ๅธŒๆœ› \tโ—„โ”€โ” \tๅธŒๆœ› \tโ•Ÿโ”€โ”€โ–บPRED\tๅธŒๆœ› \t \n", + "ๅผ ๆ™š้œž\t โ”‚ \tๅผ ๆ™š้œž\tโ—„โ”€โ” \tๅผ ๆ™š้œž\tโ—„โ”€โ” \n", + "็š„ \t โ”‚ \t็š„ \t โ”‚ \t็š„ \t โ”œโ–บARG1\n", + "่ƒŒๅฝฑ \t โ”œโ–บARG1\t่ƒŒๅฝฑ \t โ”‚ \t่ƒŒๅฝฑ \tโ—„โ”€โ”˜ \n", + "่ขซ \t โ”‚ \t่ขซ \t โ”œโ–บARG1\t่ขซ \t \n", + "ๆ™š้œž \t โ”‚ \tๆ™š้œž \t โ”‚ \tๆ™š้œž \tโ”€โ”€โ”€โ–บARG0\n", + "ๆ˜ ็บข \tโ—„โ”€โ”˜ \tๆ˜ ็บข \tโ—„โ”€โ”˜ \tๆ˜ ็บข \tโ•Ÿโ”€โ”€โ–บPRED\n", + "ใ€‚ \t \tใ€‚ \t \tใ€‚ \t \n" + ] + } + ], + "source": [ + "HanLP(tokens=[\n", + " [\"HanLP\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌกไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"ๆˆ‘\", \"็š„\", \"ๅธŒๆœ›\", \"ๆ˜ฏ\", \"ๅธŒๆœ›\", \"ๅผ ๆ™š้œž\", \"็š„\", \"่ƒŒๅฝฑ\", \"่ขซ\", \"ๆ™š้œž\", \"ๆ˜ ็บข\", \"ใ€‚\"]\n", + " ], tasks='srl', skip_tasks='tok*').pretty_print()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "srl_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/srl_stl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/srl_stl.ipynb new file mode 100644 index 000000000..7ce29c326 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/srl_stl.ipynb @@ -0,0 +1,218 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'CPB3_SRL_ELECTRA_SMALL': 'https://file.hankcs.com/hanlp/srl/cpb3_electra_small_crf_has_transform_20220218_135910.zip'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.srl.ALL # ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "srl = hanlp.load('CPB3_SRL_ELECTRA_SMALL')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏญไน‰่ง’่‰ฒๅˆ†ๆž\n", + "ไธบๅทฒๅˆ†่ฏ็š„ๅฅๅญๆ‰ง่กŒ่ฏญไน‰่ง’่‰ฒๅˆ†ๆž๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[[('2021ๅนด', 'ARGM-TMP', 0, 1),\n", + " ('HanLPv2.1', 'ARG0', 1, 2),\n", + " ('ไธบ็”Ÿไบง็Žฏๅขƒ', 'ARG2', 2, 5),\n", + " ('ๅธฆๆฅ', 'PRED', 5, 6),\n", + " ('ๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏ', 'ARG1', 6, 15)],\n", + " [('ๆฌกไธ–ไปฃ', 'ARGM-TMP', 6, 8),\n", + " ('ๆœ€', 'ARGM-ADV', 8, 9),\n", + " ('ๅ…ˆ่ฟ›', 'PRED', 9, 10),\n", + " ('ๆŠ€ๆœฏ', 'ARG0', 14, 15)]]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "srl(['2021ๅนด', 'HanLPv2.1', 'ไธบ', '็”Ÿไบง', '็Žฏๅขƒ', 'ๅธฆๆฅ', 'ๆฌก', 'ไธ–ไปฃ', 'ๆœ€', 'ๅ…ˆ่ฟ›', '็š„', 'ๅคš', '่ฏญ็ง', 'NLP', 'ๆŠ€ๆœฏ', 'ใ€‚'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฏญไน‰่ง’่‰ฒๆ ‡ๆณจ็ป“ๆžœไธญๆฏไธชๅ››ๅ…ƒ็ป„็š„ๆ ผๅผไธบ`[่ฎบๅ…ƒๆˆ–่ฐ“่ฏ, ่ฏญไน‰่ง’่‰ฒๆ ‡็ญพ, ่ตทๅง‹ไธ‹ๆ ‡, ็ปˆๆญขไธ‹ๆ ‡]`ใ€‚ๅ…ถไธญ๏ผŒ่ฐ“่ฏ็š„่ฏญไน‰่ง’่‰ฒๆ ‡็ญพไธบ`PRED`๏ผŒ่ตทๆญขไธ‹ๆ ‡ๅฏนๅบ”ๅ•่ฏๆ•ฐ็ป„ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "้ๅŽ†่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "็ฌฌ1ไธช่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš\n", + "2021ๅนด = ARGM-TMP at [0, 1]\n", + "HanLPv2.1 = ARG0 at [1, 2]\n", + "ไธบ็”Ÿไบง็Žฏๅขƒ = ARG2 at [2, 5]\n", + "ๅธฆๆฅ = PRED at [5, 6]\n", + "ๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏ = ARG1 at [6, 15]\n", + "็ฌฌ2ไธช่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš\n", + "ๆฌกไธ–ไปฃ = ARGM-TMP at [6, 8]\n", + "ๆœ€ = ARGM-ADV at [8, 9]\n", + "ๅ…ˆ่ฟ› = PRED at [9, 10]\n", + "ๆŠ€ๆœฏ = ARG0 at [14, 15]\n" + ] + } + ], + "source": [ + "for i, pas in enumerate(srl(['2021ๅนด', 'HanLPv2.1', 'ไธบ', '็”Ÿไบง', '็Žฏๅขƒ', 'ๅธฆๆฅ', 'ๆฌก', 'ไธ–ไปฃ', 'ๆœ€', 'ๅ…ˆ่ฟ›', '็š„', 'ๅคš', '่ฏญ็ง', 'NLP', 'ๆŠ€ๆœฏ', 'ใ€‚'])):\n", + " print(f'็ฌฌ{i+1}ไธช่ฐ“่ฏ่ฎบๅ…ƒ็ป“ๆž„๏ผš')\n", + " for form, role, begin, end in pas:\n", + " print(f'{form} = {role} at [{begin}, {end}]')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ๆณจๆ„\n", + "Native API็š„่พ“ๅ…ฅๅ•ไฝ้™ๅฎšไธบๅฅๅญ๏ผŒ้œ€ไฝฟ็”จ[ๅคš่ฏญ็งๅˆ†ๅฅๆจกๅž‹](https://github.com/hankcs/HanLP/blob/master/plugins/hanlp_demo/hanlp_demo/sent_split.py)ๆˆ–[ๅŸบไบŽ่ง„ๅˆ™็š„ๅˆ†ๅฅๅ‡ฝๆ•ฐ](https://github.com/hankcs/HanLP/blob/master/hanlp/utils/rules.py#L19)ๅ…ˆ่กŒๅˆ†ๅฅใ€‚RESTfulๅŒๆ—ถๆ”ฏๆŒๅ…จๆ–‡ใ€ๅฅๅญใ€ๅทฒๅˆ†่ฏ็š„ๅฅๅญใ€‚้™คๆญคไน‹ๅค–๏ผŒRESTfulๅ’Œnativeไธค็งAPI็š„่ฏญไน‰่ฎพ่ฎกๅฎŒๅ…จไธ€่‡ด๏ผŒ็”จๆˆทๅฏไปฅๆ— ็ผไบ’ๆขใ€‚" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "srl_mtl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/sts_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/sts_restful.ipynb new file mode 100644 index 000000000..9a29cbc21 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/sts_restful.ipynb @@ -0,0 +1,138 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏญไน‰ๆ–‡ๆœฌ็›ธไผผๅบฆ\n", + "่พ“ๅ…ฅไธคๆฎต็Ÿญๆ–‡ๆœฌ็ป„ๆˆ็š„ไบŒๅ…ƒ็ป„ๅˆ—่กจ๏ผŒๆ‰ง่กŒ่ฏญไน‰ๆ–‡ๆœฌ็›ธไผผๅบฆ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.9764469861984253, 0.0, 0.003458738327026367]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.semantic_textual_similarity([\n", + " ('็œ‹ๅ›พ็Œœไธ€็”ตๅฝฑๅ', '็œ‹ๅ›พ็Œœ็”ตๅฝฑ'),\n", + " ('ๆ— ็บฟ่ทฏ็”ฑๅ™จๆ€Žไนˆๆ— ็บฟไธŠ็ฝ‘', 'ๆ— ็บฟไธŠ็ฝ‘ๅกๅ’Œๆ— ็บฟ่ทฏ็”ฑๅ™จๆ€Žไนˆ็”จ'),\n", + " ('ๅŒ—ไบฌๅˆฐไธŠๆตท็š„ๅŠจ่ฝฆ็ฅจ', 'ไธŠๆตทๅˆฐๅŒ—ไบฌ็š„ๅŠจ่ฝฆ็ฅจ'),\n", + "])" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "sts_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/plugins/hanlp_demo/hanlp_demo/zh/sts_stl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/sts_stl.ipynb new file mode 100644 index 000000000..7a74e7e79 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/sts_stl.ipynb @@ -0,0 +1,151 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "import hanlp\n", + "hanlp.pretrained.sts.ALL # ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "sts = hanlp.load(hanlp.pretrained.sts.STS_ELECTRA_BASE_ZH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ่ฏญไน‰ๆ–‡ๆœฌ็›ธไผผๅบฆ\n", + "่พ“ๅ…ฅไธคๆฎต็Ÿญๆ–‡ๆœฌ็ป„ๆˆ็š„ไบŒๅ…ƒ็ป„ๅˆ—่กจ๏ผŒๆ‰ง่กŒ่ฏญไน‰ๆ–‡ๆœฌ็›ธไผผๅบฆ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.9764469861984253, 0.0, 0.003458738327026367]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sts([\n", + " ('็œ‹ๅ›พ็Œœไธ€็”ตๅฝฑๅ', '็œ‹ๅ›พ็Œœ็”ตๅฝฑ'),\n", + " ('ๆ— ็บฟ่ทฏ็”ฑๅ™จๆ€Žไนˆๆ— ็บฟไธŠ็ฝ‘', 'ๆ— ็บฟไธŠ็ฝ‘ๅกๅ’Œๆ— ็บฟ่ทฏ็”ฑๅ™จๆ€Žไนˆ็”จ'),\n", + " ('ๅŒ—ไบฌๅˆฐไธŠๆตท็š„ๅŠจ่ฝฆ็ฅจ', 'ไธŠๆตทๅˆฐๅŒ—ไบฌ็š„ๅŠจ่ฝฆ็ฅจ'),\n", + "])" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "sts_stl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/tok_mtl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/tok_mtl.ipynb new file mode 100644 index 000000000..13818c8d7 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/tok_mtl.ipynb @@ -0,0 +1,630 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "view-in-github" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4M7ka0K5OMWU", + "outputId": "9a1dc26a-786a-4dce-c013-7ae5017a8805" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_small_20201223_035557.zip',\n", + " 'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_base_20201223_201906.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_small_20210111_124159.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_base_20210111_124519.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ERNIE_GRAM_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_ernie_gram_base_aug_20210904_145403.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_MT5_SMALL': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_mt5_small_20210228_123458.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_XLMR_BASE': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_xlm_base_20210602_211620.zip',\n", + " 'NPCMJ_UD_KYOTO_TOK_POS_CON_BERT_BASE_CHAR_JA': 'https://file.hankcs.com/hanlp/mtl/npcmj_ud_kyoto_tok_pos_ner_dep_con_srl_bert_base_char_ja_20210914_133742.zip'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.mtl.ALL # MTLๅคšไปปๅŠก๏ผŒๅ…ทไฝ“ไปปๅŠก่งๆจกๅž‹ๅ็งฐ๏ผŒ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BMW528wGNulM" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅˆ†ไธบ่ฎธๅคšไปปๅŠก๏ผŒๅˆ†่ฏๅชๆ˜ฏๆœ€ๅˆ็บง็š„ไธ€ไธชใ€‚ไธŽๅ…ถๆฏไธชไปปๅŠกๅ•็‹ฌๅˆ›ๅปบไธ€ไธชๆจกๅž‹๏ผŒไธๅฆ‚ๅˆฉ็”จHanLP็š„่”ๅˆๆจกๅž‹ไธ€ๆฌกๆ€งๅฎŒๆˆๅคšไธชไปปๅŠก๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0tmKBu7sNAXX", + "outputId": "e0187328-c6d2-47fe-cf84-c5b44703940b" + }, + "outputs": [], + "source": [ + "HanLP = hanlp.load(hanlp.pretrained.mtl.CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ๅˆ†่ฏ\n", + "ไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒๅˆ†่ฏ๏ผŒ้ป˜่ฎค็ป†็ฒ’ๅบฆ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "BqEmDMGGOtk3", + "outputId": "387cbf30-4d70-44b1-d64b-b7a5c22ae31e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "้˜ฟๅฉ†ไธป ๆฅๅˆฐ ๅŒ—ไบฌ ็ซ‹ๆ–นๅบญ ๅ‚่ง‚ ่‡ช็„ถ ่ฏญไน‰ ็ง‘ๆŠ€ ๅ…ฌๅธ ใ€‚\n" + ] + } + ], + "source": [ + "HanLP('้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='tok').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj1Jk-2sPHYx" + }, + "source": [ + "ๆ‰ง่กŒ็ฒ—้ข—็ฒ’ๅบฆๅˆ†่ฏ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "1goEC7znPNkI", + "outputId": "ddf15a17-2f5d-4bc3-d145-908fb6176552" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "้˜ฟๅฉ†ไธป ๆฅๅˆฐ ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ ๅ‚่ง‚ ่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ ใ€‚\n" + ] + } + ], + "source": [ + "HanLP('้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='tok/coarse').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "ๅŒๆ—ถๆ‰ง่กŒ็ป†็ฒ’ๅบฆๅ’Œ็ฒ—็ฒ’ๅบฆๅˆ†่ฏ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zo08uquCTFSk", + "outputId": "bf24a01a-a09b-4b78-fdec-2bb705b4becb" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'tok/fine': ['้˜ฟๅฉ†ไธป', 'ๆฅๅˆฐ', 'ๅŒ—ไบฌ', '็ซ‹ๆ–นๅบญ', 'ๅ‚่ง‚', '่‡ช็„ถ', '่ฏญไน‰', '็ง‘ๆŠ€', 'ๅ…ฌๅธ', 'ใ€‚'],\n", + " 'tok/coarse': ['้˜ฟๅฉ†ไธป', 'ๆฅๅˆฐ', 'ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ', 'ๅ‚่ง‚', '่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ', 'ใ€‚']}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP('้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='tok*')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`coarse`ไธบ็ฒ—ๅˆ†๏ผŒ`fine`ไธบ็ป†ๅˆ†ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ๆณจๆ„\n", + "Native API็š„่พ“ๅ…ฅๅ•ไฝ้™ๅฎšไธบๅฅๅญ๏ผŒ้œ€ไฝฟ็”จ[ๅคš่ฏญ็งๅˆ†ๅฅๆจกๅž‹](https://github.com/hankcs/HanLP/blob/master/plugins/hanlp_demo/hanlp_demo/sent_split.py)ๆˆ–[ๅŸบไบŽ่ง„ๅˆ™็š„ๅˆ†ๅฅๅ‡ฝๆ•ฐ](https://github.com/hankcs/HanLP/blob/master/hanlp/utils/rules.py#L19)ๅ…ˆ่กŒๅˆ†ๅฅใ€‚RESTfulๅŒๆ—ถๆ”ฏๆŒๅ…จๆ–‡ใ€ๅฅๅญใ€ๅทฒๅˆ†่ฏ็š„ๅฅๅญใ€‚้™คๆญคไน‹ๅค–๏ผŒRESTfulๅ’Œnativeไธค็งAPI็š„่ฏญไน‰่ฎพ่ฎกๅฎŒๅ…จไธ€่‡ด๏ผŒ็”จๆˆทๅฏไปฅๆ— ็ผไบ’ๆขใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "suUL042zPpLj" + }, + "source": [ + "## ่‡ชๅฎšไน‰่ฏๅ…ธ\n", + "่‡ชๅฎšไน‰่ฏๅ…ธไธบๅˆ†่ฏไปปๅŠก็š„ๆˆๅ‘˜ๅ˜้‡๏ผŒ่ฆๆ“ไฝœ่‡ชๅฎšไน‰่ฏๅ…ธ๏ผŒๅ…ˆ่Žทๅ–ๅˆ†่ฏไปปๅŠก๏ผŒไปฅ็ป†ๅˆ†ๆ ‡ๅ‡†ไธบไพ‹๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AzYShIssP6kq", + "outputId": "7f07897c-8a97-4193-855d-d9e296581d0c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tok = HanLP['tok/fine']\n", + "tok" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่‡ชๅฎšไน‰่ฏๅ…ธไธบๅˆ†่ฏไปปๅŠก็š„ๆˆๅ‘˜ๅ˜้‡๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "1q4MUpgVQNlu", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(None, None)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tok.dict_combine, tok.dict_force" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2zZkH9tRQOoi", + "outputId": "c231c35b-1a5f-4b54-e5c3-8680d2cc1515", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "HanLPๆ”ฏๆŒๅˆๅนถๅ’Œๅผบๅˆถไธค็งไผ˜ๅ…ˆ็บง็š„่‡ชๅฎšไน‰่ฏๅ…ธ๏ผŒไปฅๆปก่ถณไธๅŒๅœบๆ™ฏ็š„้œ€ๆฑ‚ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F-9gAeIVQUFG", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "ไธๆŒ‚่ฏๅ…ธ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "F8M8cyBrQduw", + "outputId": "c3bf7ec5-b1d4-4207-a979-2c85754c7cd7", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ๅ•†ๅ“ ๅ’Œ ๆœๅŠก ้กน็›ฎ\n" + ] + } + ], + "source": [ + "tok.dict_force = tok.dict_combine = None\n", + "HanLP(\"ๅ•†ๅ“ๅ’ŒๆœๅŠก้กน็›ฎ\", tasks='tok/fine').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DDqQxqQaTayv", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### ๅผบๅˆถๆจกๅผ\n", + "ๅผบๅˆถๆจกๅผไผ˜ๅ…ˆ่พ“ๅ‡บๆญฃๅ‘ๆœ€้•ฟๅŒน้…ๅˆฐ็š„่‡ชๅฎšไน‰่ฏๆก๏ผˆๆ…Ž็”จ๏ผŒ่ฏฆ่ง[ใ€Š่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅ…ฅ้—จใ€‹](http://nlp.hankcs.com/book.php)็ฌฌไบŒ็ซ ๏ผ‰๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bjnEqDaATdVr", + "outputId": "3a282acc-5716-45e4-e1e2-96eefb8ee342", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ๅ•†ๅ“ ๅ’Œๆœ ๅŠก ้กน็›ฎ\n" + ] + } + ], + "source": [ + "tok.dict_force = {'ๅ’Œๆœ', 'ๆœๅŠก้กน็›ฎ'}\n", + "HanLP(\"ๅ•†ๅ“ๅ’ŒๆœๅŠก้กน็›ฎ\", tasks='tok/fine').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ldKAnVoSTgxb", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "ไธŽๅคงไผ—็š„ๆœด็ด ่ฎค็ŸฅไธๅŒ๏ผŒ่ฏๅ…ธไผ˜ๅ…ˆ็บงๆœ€้ซ˜ๆœชๅฟ…ๆ˜ฏๅฅฝไบ‹๏ผŒๆžๆœ‰ๅฏ่ƒฝๅŒน้…ๅˆฐไธ่ฏฅๅˆ†ๅ‡บๆฅ็š„่‡ชๅฎšไน‰่ฏ่ฏญ๏ผŒๅฏผ่‡ดๆญงไน‰ใ€‚่‡ชๅฎšไน‰่ฏ่ฏญ่ถŠ้•ฟ๏ผŒ่ถŠไธๅฎนๆ˜“ๅ‘็”Ÿๆญงไน‰ใ€‚่ฟ™ๅฏๅ‘ๆˆ‘ไปฌๅฐ†ๅผบๅˆถๆจกๅผๆ‹“ๅฑ•ไธบๅผบๅˆถๆ กๆญฃๅŠŸ่ƒฝใ€‚\n", + "\n", + "ๅผบๅˆถๆ กๆญฃๅŽŸ็†็›ธไผผ๏ผŒไฝ†ไผšๅฐ†ๅŒน้…ๅˆฐ็š„่‡ชๅฎšไน‰่ฏๆกๆ›ฟๆขไธบ็›ธๅบ”็š„ๅˆ†่ฏ็ป“ๆžœ:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bwIu0f6wTgbF", + "outputId": "b941b079-5202-420a-e7f3-8f1617a2545c", + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ๅ•†ๅ“ ๅ’Œ ๆœๅŠก ้กน็›ฎ\n" + ] + } + ], + "source": [ + "tok.dict_force = {'ๅ’ŒๆœๅŠก': ['ๅ’Œ', 'ๆœๅŠก']}\n", + "HanLP(\"ๅ•†ๅ“ๅ’ŒๆœๅŠก้กน็›ฎ\", tasks='tok/fine').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ๅˆๅนถๆจกๅผ\n", + "ๅˆๅนถๆจกๅผ็š„ไผ˜ๅ…ˆ็บงไฝŽไบŽ็ปŸ่ฎกๆจกๅž‹๏ผŒๅณ`dict_combine`ไผšๅœจ็ปŸ่ฎกๆจกๅž‹็š„ๅˆ†่ฏ็ป“ๆžœไธŠๆ‰ง่กŒๆœ€้•ฟๅŒน้…ๅนถๅˆๅนถๅŒน้…ๅˆฐ็š„่ฏๆกใ€‚ไธ€่ˆฌๆƒ…ๅ†ตไธ‹๏ผŒๆŽจ่ไฝฟ็”จ่ฏฅๆจกๅผใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ๅ•†ๅ“ ๅ’Œ ๆœๅŠก้กน็›ฎ\n" + ] + } + ], + "source": [ + "tok.dict_force = None\n", + "tok.dict_combine = {'ๅ’Œๆœ', 'ๆœๅŠก้กน็›ฎ'}\n", + "HanLP(\"ๅ•†ๅ“ๅ’ŒๆœๅŠก้กน็›ฎ\", tasks='tok/fine').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9aRzEeRvTlRr" + }, + "source": [ + "้œ€่ฆ็ฎ—ๆณ•ๅŸบ็ก€ๆ‰่ƒฝ็†่งฃ๏ผŒๅˆๅญฆ่€…ๅฏๅ‚่€ƒ[ใ€Š่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅ…ฅ้—จใ€‹](http://nlp.hankcs.com/book.php)ใ€‚\n", + "#### ็ฉบๆ ผๅ•่ฏ" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๅซๆœ‰็ฉบๆ ผใ€ๅˆถ่กจ็ฌฆ็ญ‰๏ผˆTransformer tokenizerๅŽปๆŽ‰็š„ๅญ—็ฌฆ๏ผ‰็š„่ฏ่ฏญ้œ€่ฆ็”จ`tuple`็š„ๅฝขๅผๆไพ›๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['ๅฆ‚ไฝ•', '่ฏ„ไปท', 'iPad Pro', '๏ผŸ', 'iPad Pro', 'ๆœ‰', '2ไธช็ฉบๆ ผ']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tok.dict_combine = {('iPad', 'Pro'), '2ไธช็ฉบๆ ผ'}\n", + "HanLP(\"ๅฆ‚ไฝ•่ฏ„ไปทiPad Pro ๏ผŸiPad Proๆœ‰2ไธช็ฉบๆ ผ\", tasks='tok/fine')['tok/fine']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ชๆ˜Ž็š„็”จๆˆท่ฏท็ปง็ปญ้˜…่ฏป๏ผŒ`tuple`่ฏๅ…ธไธญ็š„ๅญ—็ฌฆไธฒๅ…ถๅฎž็ญ‰ไปทไบŽ่ฏฅๅญ—็ฌฆไธฒ็š„ๆ‰€ๆœ‰ๅฏ่ƒฝ็š„ๅˆ‡ๅˆ†ๆ–นๅผ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys([('2', 'ไธช', '็ฉบๆ ผ'), ('2', 'ไธช', '็ฉบ', 'ๆ ผ'), ('2', 'ไธช็ฉบ', 'ๆ ผ'), ('2', 'ไธช็ฉบๆ ผ'), ('2ไธช', '็ฉบ', 'ๆ ผ'), ('2ไธช', '็ฉบๆ ผ'), ('2ไธช็ฉบๆ ผ',), ('iPad', 'Pro'), ('2ไธช็ฉบ', 'ๆ ผ')])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dict(tok.dict_combine.config[\"dictionary\"]).keys()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๅ•่ฏไฝ็ฝฎ" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "HanLPๆ”ฏๆŒ่พ“ๅ‡บๆฏไธชๅ•่ฏๅœจๆ–‡ๆœฌไธญ็š„ๅŽŸๅง‹ไฝ็ฝฎ๏ผŒไปฅไพฟ็”จไบŽๆœ็ดขๅผ•ๆ“Ž็ญ‰ๅœบๆ™ฏใ€‚ๅœจ่ฏๆณ•ๅˆ†ๆžไธญ๏ผŒ้ž่ฏญ็ด ๅญ—็ฌฆ๏ผˆ็ฉบๆ ผใ€ๆข่กŒใ€ๅˆถ่กจ็ฌฆ็ญ‰๏ผ‰ไผš่ขซๅ‰”้™ค๏ผŒๆญคๆ—ถ้œ€่ฆ้ขๅค–็š„ไฝ็ฝฎไฟกๆฏๆ‰่ƒฝๅฎšไฝๆฏไธชๅ•่ฏ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['2021 ๅนด', 0, 6], ['HanLPv2.1', 7, 16], ['ไธบ', 17, 18], ['็”Ÿไบง', 18, 20], ['็Žฏๅขƒ', 20, 22], ['ๅธฆๆฅ', 22, 24], ['ๆฌก', 24, 25], ['ไธ–ไปฃ', 25, 27], ['ๆœ€', 27, 28], ['ๅ…ˆ่ฟ›', 28, 30], ['็š„', 30, 31], ['ๅคš', 31, 32], ['่ฏญ็ง', 32, 34], ['NLP', 34, 37], ['ๆŠ€ๆœฏ', 37, 39], ['ใ€‚', 39, 40]]\n" + ] + } + ], + "source": [ + "tok.config.output_spans = True\n", + "sent = '2021 ๅนด\\nHanLPv2.1 ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚'\n", + "word_offsets = HanLP(sent, tasks='tok/fine')['tok/fine']\n", + "print(word_offsets)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๆ ผๅผไธบไธ‰ๅ…ƒ็ป„๏ผˆๅ•่ฏ๏ผŒๅ•่ฏ็š„่ตทๅง‹ไธ‹ๆ ‡๏ผŒๅ•่ฏ็š„็ปˆๆญขไธ‹ๆ ‡๏ผ‰๏ผŒไธ‹ๆ ‡ไปฅๅญ—็ฌฆ็บงๅˆซ่ฎก้‡ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "for word, begin, end in word_offsets:\n", + " assert word == sent[begin:end]" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "authorship_tag": "ABX9TyNRpO7rdchCK1UmB0nQmPrG", + "collapsed_sections": [], + "include_colab_link": true, + "name": "tok_mtl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/plugins/hanlp_demo/hanlp_demo/zh/tok_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/tok_restful.ipynb new file mode 100644 index 000000000..d10f38ced --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/tok_restful.ipynb @@ -0,0 +1,324 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ๅˆ†่ฏ\n", + "HanLP็บฟไธŠๆจกๅž‹่ฎญ็ปƒ่‡ช`9970`ไธ‡ๅญ—็š„ๅคงๅž‹็ปผๅˆ่ฏญๆ–™ๅบ“๏ผŒ่ฆ†็›–ๆ–ฐ้—ปใ€็คพไบคๅช’ไฝ“ใ€้‡‘่žใ€ๆณ•ๅพ‹็ญ‰ๅคšไธช้ข†ๅŸŸ๏ผŒๆ˜ฏๅทฒ็Ÿฅ่Œƒๅ›ดๅ†…**ๅ…จไธ–็•Œๆœ€ๅคง**็š„ไธญๆ–‡ๅˆ†่ฏ่ฏญๆ–™ๅบ“ใ€‚่ฏญๆ–™ๅบ“่ง„ๆจกๅ†ณๅฎšๅฎž้™…ๆ•ˆๆžœ๏ผŒ้ขๅ‘็”Ÿไบง็Žฏๅขƒ็š„่ฏญๆ–™ๅบ“ๅบ”ๅฝ“ๅœจๅƒไธ‡ๅญ—้‡็บงใ€‚่‡ช็„ถ่ฏญไน‰็š„่ฏญ่จ€ๅญฆไธ“ๅฎถไธ€็›ดๅœจๆŒ็ปญๆ ‡ๆณจ่ฏฅ่ฏญๆ–™ๅบ“๏ผŒไธŽๆ—ถไฟฑ่ฟ›ไฟๆŒๆœ€ๅ…ˆ่ฟ›็š„ๅˆ†่ฏ่ดจ้‡ใ€‚\n", + "ๅœจๅˆ†่ฏๆ ‡ๅ‡†ไธŠ๏ผŒHanLPๆไพ›็ป†็ฒ’ๅบฆๅ’Œ็ฒ—็ฒ’ๅบฆไธค็ง้ข—็ฒ’ๅบฆ๏ผŒ็ป†็ฒ’ๅบฆ้€‚ๅˆๆœ็ดขๅผ•ๆ“ŽไธšๅŠก๏ผŒ็ฒ—็ฒ’ๅบฆ้€‚ๅˆๆ–‡ๆœฌๆŒ–ๆŽ˜ไธšๅŠกใ€‚\n", + "### ็ป†็ฒ’ๅบฆๅˆ†่ฏ\n", + "้ป˜่ฎค็ป†็ฒ’ๅบฆ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['ๅ•†ๅ“', 'ๅ’Œ', 'ๆœๅŠก', 'ใ€‚'],\n", + " ['้˜ฟๅฉ†ไธป', 'ๆฅๅˆฐ', 'ๅŒ—ไบฌ', '็ซ‹ๆ–นๅบญ', 'ๅ‚่ง‚', '่‡ช็„ถ', '่ฏญไน‰', '็ง‘ๆŠ€', 'ๅ…ฌๅธ', 'ใ€‚']]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.tokenize('ๅ•†ๅ“ๅ’ŒๆœๅŠกใ€‚้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "็”จๆˆทไนŸๅฏไปฅ็›ดๆŽฅๅฐ†`HanLP`ๅฝ“ไฝœๅ‡ฝๆ•ฐ่ฐƒ็”จ๏ผŒๅนถไธ”ๆ‰“ๅฐๆผ‚ไบฎ็š„ๅˆ†่ฏ็ป“ๆžœ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "BqEmDMGGOtk3", + "outputId": "6fbb3eac-df26-4a55-8ba9-975d6cede227" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
ๅ•†ๅ“ ๅ’Œ ๆœๅŠก ใ€‚

้˜ฟๅฉ†ไธป ๆฅๅˆฐ ๅŒ—ไบฌ ็ซ‹ๆ–นๅบญ ๅ‚่ง‚ ่‡ช็„ถ ่ฏญไน‰ ็ง‘ๆŠ€ ๅ…ฌๅธ ใ€‚
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "HanLP('ๅ•†ๅ“ๅ’ŒๆœๅŠกใ€‚้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='tok').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›ž็ฑปๅž‹ไธบ[Document](https://hanlp.hankcs.com/docs/api/common/document.html)๏ผŒๆ˜ฏ`dict`็š„ๅญ็ฑป๏ผŒๆ‹“ๅฑ•ไบ†ๅพˆๅคšๆ“ไฝœๅ„็ง่ฏญ่จ€ๅญฆ็ป“ๆž„็š„ๆ–นๆณ•ใ€‚\n", + "\n", + "ไธคไธชๆŽฅๅฃ้ƒฝไผšๅฏนๆ–‡ๆœฌ่ฟ›่กŒๅˆ†ๅฅ๏ผŒๆ‰€ไปฅ่ฟ”ๅ›ž็š„็ป“ๆžœไธ€ๅฎšๆ˜ฏๅฅๅญ็š„ๅˆ—่กจใ€‚ๆŽจ่ๅœจไธ่ถ…่ฟ‡ๆœๅŠกๅ™จๅ…่ฎธ็š„ๆœ€ๅคง้•ฟๅบฆ็š„ๅ‰ๆไธ‹๏ผŒๅฐฝ้‡ไผ ๅ…ฅๆ•ด็ฏ‡ๆ–‡็ซ ๏ผŒไปฅๆ้ซ˜ๅˆ†่ฏ้€Ÿๅบฆใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jj1Jk-2sPHYx" + }, + "source": [ + "### ็ฒ—็ฒ’ๅบฆๅˆ†่ฏ\n", + "ๆ‰ง่กŒ็ฒ—้ข—็ฒ’ๅบฆๅˆ†่ฏ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['ๅ•†ๅ“', 'ๅ’Œ', 'ๆœๅŠก', 'ใ€‚'], ['้˜ฟๅฉ†ไธป', 'ๆฅๅˆฐ', 'ๅŒ—ไบฌ', '็ซ‹ๆ–นๅบญ', 'ๅ‚่ง‚', '่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ']]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.tokenize('ๅ•†ๅ“ๅ’ŒๆœๅŠกใ€‚้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ', coarse=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๆˆ–่€…็›ดๆŽฅๅฝ“ๅ‡ฝๆ•ฐ่ฐƒ็”จ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "1goEC7znPNkI", + "outputId": "ddf15a17-2f5d-4bc3-d145-908fb6176552" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
้˜ฟๅฉ†ไธป ๆฅๅˆฐ ๅŒ—ไบฌ ็ซ‹ๆ–นๅบญ ๅ‚่ง‚ ่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ ใ€‚
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "HanLP('้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='tok/coarse').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wxctCigrTKu-" + }, + "source": [ + "### ๅŒๆ—ถๆ‰ง่กŒ็ป†็ฒ’ๅบฆๅ’Œ็ฒ—็ฒ’ๅบฆๅˆ†่ฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zo08uquCTFSk", + "outputId": "bf24a01a-a09b-4b78-fdec-2bb705b4becb" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'tok/fine': [['้˜ฟๅฉ†ไธป', 'ๆฅๅˆฐ', 'ๅŒ—ไบฌ', '็ซ‹ๆ–นๅบญ', 'ๅ‚่ง‚', '่‡ช็„ถ', '่ฏญไน‰', '็ง‘ๆŠ€', 'ๅ…ฌๅธ', 'ใ€‚']],\n", + " 'tok/coarse': [['้˜ฟๅฉ†ไธป', 'ๆฅๅˆฐ', 'ๅŒ—ไบฌ', '็ซ‹ๆ–นๅบญ', 'ๅ‚่ง‚', '่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ', 'ใ€‚']]}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP('้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='tok*')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`fine`ไธบ็ป†ๅˆ†๏ผŒ`coarse`ไธบ็ฒ—ๅˆ†ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ๅคš่ฏญ็งๅˆ†่ฏ\n", + "ๅพ—็›ŠไบŽ่ฏญ่จ€ๆ— ๅ…ณ็š„่ฎพ่ฎก๏ผŒHanLPๆ”ฏๆŒๅŒ…ๆ‹ฌ็ฎ€็นไธญ่‹ฑๆ—ฅไฟ„ๆณ•ๅพทๅœจๅ†…็š„104็ง่ฏญ่จ€ไธŠ็š„ๅˆ†่ฏใ€‚่ฟ™ไธ€ๅˆ‡๏ผŒๅช้œ€ๆŒ‡ๅฎš`language='mul'`ๅณๅฏๅฎž็Žฐใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
In 2021 , HanLPv2.1 delivers state-of-the-art multilingual NLP techniques to production environments .

2021 ๅนด ใ€ HanLPv2.1 ใฏ ๆฌก ไธ–ไปฃ ใฎ ๆœ€ ๅ…ˆ็ซฏ ๅคš ่จ€่ชž NLP ๆŠ€่ก“ ใ‚’ ๆœฌ็•ช ็’ฐๅขƒ ใซ ๅฐŽๅ…ฅ ใ—ใพใ™ ใ€‚

2021 ๅนด HanLPv2.1 ไธบ ็”Ÿไบง ็Žฏๅขƒ ๅธฆๆฅ ๆฌกไธ–ไปฃ ๆœ€ ๅ…ˆ่ฟ›็š„ ๅคš ่ฏญ็ง NLP ๆŠ€ๆœฏ ใ€‚
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "HanLP(['In 2021, HanLPv2.1 delivers state-of-the-art multilingual NLP techniques to production environments.',\n", + " '2021ๅนดใ€HanLPv2.1ใฏๆฌกไธ–ไปฃใฎๆœ€ๅ…ˆ็ซฏๅคš่จ€่ชžNLPๆŠ€่ก“ใ‚’ๆœฌ็•ช็’ฐๅขƒใซๅฐŽๅ…ฅใ—ใพใ™ใ€‚',\n", + " '2021ๅนด HanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚'], tasks='tok', language='mul').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅˆ†ไธบ่ฎธๅคšไปปๅŠก๏ผŒๅˆ†่ฏๅชๆ˜ฏๆœ€ๅˆ็บง็š„ไธ€ไธชใ€‚ไนŸ่ฎธๅคงๅฎถๅชๅฌ่ฏด่ฟ‡ไธญๆ–‡ๅˆ†่ฏ๏ผŒไฝ†HanLPๅนถไธๅฑ€้™ไบŽๅˆ†่ฏใ€‚HanLP็š„ไฝฟๅ‘ฝๆ˜ฏๆ™ฎๅŠๆœ€ๅ‰ๆฒฟ็š„่‡ช็„ถ่ฏญ่จ€ๅค„็†ๆŠ€ๆœฏๅˆฐ็”Ÿไบง็Žฏๅขƒ๏ผŒๆ‰€ไปฅๅœจๅ…ถไป–ๆ•™็จ‹ไธญไฝ ไผš่งๅˆฐ่ฎธๅคšๆ›ด้ซ˜็บง็š„NLPไปปๅŠกไปฅๅŠ็›ธๅบ”็š„API็”จๆณ•ใ€‚" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "tok_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/tok_stl.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/tok_stl.ipynb new file mode 100644 index 000000000..0b92527df --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/tok_stl.ipynb @@ -0,0 +1,722 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4M7ka0K5OMWU", + "outputId": "f931579a-f5a8-487a-a89e-33d5477584c3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'SIGHAN2005_PKU_CONVSEG': 'https://file.hankcs.com/hanlp/tok/sighan2005-pku-convseg_20200110_153722.zip',\n", + " 'SIGHAN2005_MSR_CONVSEG': 'https://file.hankcs.com/hanlp/tok/convseg-msr-nocrf-noembed_20200110_153524.zip',\n", + " 'CTB6_CONVSEG': 'https://file.hankcs.com/hanlp/tok/ctb6_convseg_nowe_nocrf_20200110_004046.zip',\n", + " 'PKU_NAME_MERGED_SIX_MONTHS_CONVSEG': 'https://file.hankcs.com/hanlp/tok/pku98_6m_conv_ngram_20200110_134736.zip',\n", + " 'LARGE_ALBERT_BASE': 'https://file.hankcs.com/hanlp/tok/large_corpus_cws_albert_base_20211228_160926.zip',\n", + " 'SIGHAN2005_PKU_BERT_BASE_ZH': 'https://file.hankcs.com/hanlp/tok/sighan2005_pku_bert_base_zh_20201231_141130.zip',\n", + " 'COARSE_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/tok/coarse_electra_small_20220616_012050.zip',\n", + " 'FINE_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/tok/fine_electra_small_20220615_231803.zip',\n", + " 'CTB9_TOK_ELECTRA_SMALL': 'https://file.hankcs.com/hanlp/tok/ctb9_electra_small_20220215_205427.zip',\n", + " 'CTB9_TOK_ELECTRA_BASE': 'http://download.hanlp.com/tok/extra/ctb9_tok_electra_base_20220426_111949.zip',\n", + " 'CTB9_TOK_ELECTRA_BASE_CRF': 'http://download.hanlp.com/tok/extra/ctb9_tok_electra_base_crf_20220426_161255.zip',\n", + " 'MSR_TOK_ELECTRA_BASE_CRF': 'http://download.hanlp.com/tok/extra/msra_crf_electra_base_20220507_113936.zip',\n", + " 'UD_TOK_MMINILMV2L6': 'https://file.hankcs.com/hanlp/tok/ud_tok_mMiniLMv2L6_no_space_mul_20220619_091824.zip',\n", + " 'UD_TOK_MMINILMV2L12': 'https://file.hankcs.com/hanlp/tok/ud_tok_mMiniLMv2L12_no_space_mul_20220619_091159.zip'}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.tok.ALL # ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BMW528wGNulM" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅˆ†ไธบ่ฎธๅคšไปปๅŠก๏ผŒๅˆ†่ฏๅชๆ˜ฏๆœ€ๅˆ็บง็š„ไธ€ไธชใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0tmKBu7sNAXX", + "outputId": "8977891f-9e64-4e39-8ce6-264a791541a3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tok = hanlp.load(hanlp.pretrained.tok.COARSE_ELECTRA_SMALL_ZH)\n", + "tok" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ่ฟ›้˜ถ็Ÿฅ่ฏ†\n", + "ไฝ ๅฏไปฅ้€š่ฟ‡ๅŠ ่ฝฝไธๅŒ็š„ๆจกๅž‹ๅฎž็Žฐๅ„็ง้ข—็ฒ’ๅบฆใ€ๅ„็งๅˆ†่ฏๆ ‡ๅ‡†ใ€ๅ„็ง้ข†ๅŸŸ็š„ไธญๆ–‡ๅˆ†่ฏใ€‚ๅ…ถไธญ๏ผŒcoarseๅ’Œfineๆจกๅž‹่ฎญ็ปƒ่‡ช`9970`ไธ‡ๅญ—็š„ๅคงๅž‹็ปผๅˆ่ฏญๆ–™ๅบ“๏ผŒ่ฆ†็›–ๆ–ฐ้—ปใ€็คพไบคๅช’ไฝ“ใ€้‡‘่žใ€ๆณ•ๅพ‹็ญ‰ๅคšไธช้ข†ๅŸŸ๏ผŒๆ˜ฏๅทฒ็Ÿฅ่Œƒๅ›ดๅ†…**ๅ…จไธ–็•Œๆœ€ๅคง**็š„ไธญๆ–‡ๅˆ†่ฏ่ฏญๆ–™ๅบ“ใ€‚่ฏญๆ–™ๅบ“่ง„ๆจกๅ†ณๅฎšๅฎž้™…ๆ•ˆๆžœ๏ผŒ้ขๅ‘็”Ÿไบง็Žฏๅขƒ็š„่ฏญๆ–™ๅบ“ๅบ”ๅฝ“ๅœจๅƒไธ‡ๅญ—้‡็บงใ€‚ๆฌข่ฟŽ็”จๆˆทๅœจ่‡ชๅทฑ็š„่ฏญๆ–™ไธŠ[่ฎญ็ปƒๆˆ–ๅพฎ่ฐƒๆจกๅž‹](https://github.com/hankcs/HanLP/tree/master/plugins/hanlp_demo/hanlp_demo/zh/train)ไปฅ้€‚ๅบ”ๆ–ฐ้ข†ๅŸŸใ€‚่ฏญๆ–™ๅบ“ๆ ‡ๆณจๆ ‡ๅ‡†ๅ†ณๅฎšๆœ€็ปˆ็š„ๅˆ†่ฏๆ ‡ๅ‡†๏ผŒๆจกๅž‹็š„ๅ‡†็กฎ็Ž‡ๅ†ณๅฎšๅคšๅคง็จ‹ๅบฆไธŠๅ†็Žฐ่ฏฅๅˆ†่ฏๆ ‡ๅ‡†ใ€‚ๆ›ดๅคš่ƒŒๆ™ฏ็Ÿฅ่ฏ†่ฏทๅ‚่€ƒ[ใ€Š่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅ…ฅ้—จใ€‹](http://nlp.hankcs.com/book.php)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KYH1oEKkctuy" + }, + "source": [ + "## ๆ‰ง่กŒๅˆ†่ฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uzex--zFcqKB", + "outputId": "a4db6808-1039-4803-84af-2687cce0fa7b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[['ๅ•†ๅ“', 'ๅ’Œ', 'ๆœๅŠก', 'ใ€‚'], ['ๆ™“็พŽ็„ฐ', 'ๆฅๅˆฐ', 'ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ', 'ๅ‚่ง‚', '่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ']]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tok(['ๅ•†ๅ“ๅ’ŒๆœๅŠกใ€‚', 'ๆ™“็พŽ็„ฐๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ็ป†ๅˆ†ๆ ‡ๅ‡†" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ไฝ ๅฏไปฅ้€š่ฟ‡ๅŠ ่ฝฝ`FINE_ELECTRA_SMALL_ZH`ๆจกๅž‹ๅฎž็Žฐ็ป†็ฒ’ๅบฆไธญๆ–‡ๅˆ†่ฏ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "tok_fine = hanlp.load(hanlp.pretrained.tok.FINE_ELECTRA_SMALL_ZH)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๆ— ่ฎบๅ“ชไธชๆจกๅž‹๏ผŒๅˆ†่ฏๅ™จ็š„ๆŽฅๅฃๆ˜ฏๅฎŒๅ…จไธ€่‡ด็š„๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ๆ™“็พŽ็„ฐ', 'ๆฅๅˆฐ', 'ๅŒ—ไบฌ', '็ซ‹ๆ–นๅบญ', 'ๅ‚่ง‚', '่‡ช็„ถ', '่ฏญไน‰', '็ง‘ๆŠ€', 'ๅ…ฌๅธ']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tok_fine('ๆ™“็พŽ็„ฐๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ๆ— ้™้•ฟๅบฆ\n", + "ไผ—ๆ‰€ๅ‘จ็Ÿฅ๏ผŒTransformer็š„่พ“ๅ…ฅๆœ‰้•ฟๅบฆ้™ๅˆถ๏ผˆ้€šๅธธๆ˜ฏ512๏ผ‰ใ€‚ๅนธ่ฟๅœฐๆ˜ฏ๏ผŒHanLP็š„ๆป‘ๅŠจ็ช—ๅฃๆŠ€ๅทงๅฎŒ็พŽๅœฐ็ช็ ดไบ†่ฏฅ้™ๅˆถใ€‚ๅช่ฆไฝ ็š„ๅ†…ๅญ˜๏ผˆๆ˜พๅญ˜๏ผ‰่ถณๅคŸ๏ผŒHanLPๅฐฑๅฏไปฅๅค„็†ๆ— ้™้•ฟ็š„ๅฅๅญใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ๅนถ่กŒๅˆ†่ฏ\n", + "ๆ— ่ฎบๆ˜ฏCPU่ฟ˜ๆ˜ฏGPU๏ผŒๅŒๆ—ถไผ ๅ…ฅๅคšไธชๅฅๅญ้ƒฝๅฐ†ๅนถ่กŒๅˆ†่ฏใ€‚ไนŸๅฐฑๆ˜ฏ่ฏด๏ผŒไป…่Šฑ่ดน1ไธชๅฅๅญ็š„ๆ—ถ้—ดๅฏไปฅๅค„็†ๅคšไธชๅฅๅญใ€‚็„ถ่€Œๅทฅไฝœ็ ”็ฉถไธญ็š„ๆ–‡ๆœฌ้€šๅธธๆ˜ฏไธ€็ฏ‡ๆ–‡ๆกฃ๏ผŒ่€Œไธๆ˜ฏ่ฎธๅคšๅฅๅญใ€‚ๆญคๆ—ถๅฏไปฅๅˆฉ็”จHanLPๆไพ›็š„ๅˆ†ๅฅๅŠŸ่ƒฝๅ’Œๆตๆฐด็บฟๆจกๅผไผ˜้›…ๅบ”ๅฏน๏ผŒๆ—ข่ƒฝๅค„็†้•ฟๆ–‡ๆœฌๅˆ่ƒฝๅนถ่กŒๅŒ–ใ€‚ๅช้œ€ๅˆ›ๅปบไธ€ไธชๆตๆฐด็บฟ`pipeline`๏ผŒ็ฌฌไธ€็บง็ฎก้“ๅˆ†ๅฅ๏ผŒ็ฌฌไบŒ็บง็ฎก้“ๅˆ†่ฏ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['้‡ไฝ“่ฃ่กฃ', '๏ผŒ', 'HanLP', 'ๆไพ›', 'RESTful', 'ๅ’Œ', 'native', 'ไธค็ง', 'API', 'ใ€‚'],\n", + " ['ไธค่€…', 'ๅœจ', '่ฏญไน‰', 'ไธŠ', 'ไฟๆŒ', 'ไธ€่‡ด', '๏ผŒ', 'ๅœจ', 'ไปฃ็ ', 'ไธŠ', 'ๅšๆŒ', 'ๅผ€ๆบ', 'ใ€‚']]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP = hanlp.pipeline() \\\n", + " .append(hanlp.utils.rules.split_sentence) \\\n", + " .append(tok)\n", + "HanLP('้‡ไฝ“่ฃ่กฃ๏ผŒHanLPๆไพ›RESTfulๅ’Œnativeไธค็งAPIใ€‚ไธค่€…ๅœจ่ฏญไน‰ไธŠไฟๆŒไธ€่‡ด๏ผŒๅœจไปฃ็ ไธŠๅšๆŒๅผ€ๆบใ€‚')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›ž็ป“ๆžœๆ˜ฏๆฏไธชๅฅๅญ็š„ๅˆ†่ฏ`list`๏ผŒๅฆ‚ๆžœ่ฆๅฐ†ๅฎƒไปฌๅˆๅนถๅˆฐไธ€ไธช`list`้‡Œ่ฏฅๆ€ŽไนˆๅŠžๅ‘ข๏ผŸ่ชๆ˜Ž็š„็”จๆˆทๅฏ่ƒฝๅทฒ็ปๆƒณๅˆฐไบ†๏ผŒๅ†ๅŠ ไธ€็บง`lambda`็ฎก้“๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['้‡ไฝ“่ฃ่กฃ', '๏ผŒ', 'HanLP', 'ๆไพ›', 'RESTful', 'ๅ’Œ', 'native', 'ไธค็ง', 'API', 'ใ€‚', 'ไธค่€…', 'ๅœจ', '่ฏญไน‰', 'ไธŠ', 'ไฟๆŒ', 'ไธ€่‡ด', '๏ผŒ', 'ๅœจ', 'ไปฃ็ ', 'ไธŠ', 'ๅšๆŒ', 'ๅผ€ๆบ', 'ใ€‚']\n" + ] + } + ], + "source": [ + "HanLP.append(lambda sents: sum(sents, []))\n", + "print(HanLP('้‡ไฝ“่ฃ่กฃ๏ผŒHanLPๆไพ›RESTfulๅ’Œnativeไธค็งAPIใ€‚ไธค่€…ๅœจ่ฏญไน‰ไธŠไฟๆŒไธ€่‡ด๏ผŒๅœจไปฃ็ ไธŠๅšๆŒๅผ€ๆบใ€‚'))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "suUL042zPpLj" + }, + "source": [ + "## ่‡ชๅฎšไน‰่ฏๅ…ธ" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1q4MUpgVQNlu" + }, + "source": [ + "ๆ™บ่€…ๅƒ่™‘๏ผŒๅฟ…ๆœ‰ไธ€ๅคฑใ€‚ๆจกๅž‹ๅถๅฐ”ไนŸไผš็Šฏ้”™่ฏฏ๏ผŒๆฏ”ๅฆ‚ๆŸไธชๆ—ง็‰ˆๆœฌๆจกๅž‹ๅœจไธๆŒ‚่ฏๅ…ธๆ—ถไผš็Šฏไปฅไธ‹้”™่ฏฏ๏ผˆๆœ€ๆ–ฐ็‰ˆๅทฒ็ปไฟฎๅค๏ผ‰๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2zZkH9tRQOoi", + "outputId": "a74db6c6-0a71-411c-de78-60621a43eded", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['้ฆ–็›ธ', 'ๅ’Œ', 'ๅท', 'ๆ™ฎ้€š', '็”ต่ฏ']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tok = hanlp.load('https://file.hankcs.com/hanlp/tok/coarse_electra_small_20220220_013548.zip')\n", + "tok.dict_force = tok.dict_combine = None\n", + "tok(\"้ฆ–็›ธๅ’Œๅทๆ™ฎ้€š็”ต่ฏ\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ไธŠ้ขๅˆ†่ฏไปปๅŠกไธคไธชๆˆๅ‘˜ๅ˜้‡`dict_force`ๅ’Œ`dict_combine`ไธบ่‡ชๅฎšไน‰่ฏๅ…ธ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "AzYShIssP6kq", + "outputId": "ce3bb1aa-5042-47d7-8ac9-7ed0fd478c77" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(None, None)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tok.dict_combine, tok.dict_force" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "HanLPๆ”ฏๆŒๅˆๅนถๅ’Œๅผบๅˆถไธค็งไผ˜ๅ…ˆ็บง็š„่‡ชๅฎšไน‰่ฏๅ…ธ๏ผŒไปฅๆปก่ถณไธๅŒๅœบๆ™ฏ็š„้œ€ๆฑ‚ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F-9gAeIVQUFG" + }, + "source": [ + "### ๅผบๅˆถๆจกๅผ\n", + "ๅผบๅˆถๆจกๅผ`dict_force`ไผ˜ๅ…ˆ่พ“ๅ‡บๆญฃๅ‘ๆœ€้•ฟๅŒน้…ๅˆฐ็š„่‡ชๅฎšไน‰่ฏๆก๏ผŒๅœจ่ฟ™ไธชๆกˆไพ‹ไธญ๏ผŒ็”จๆˆท็š„็ฌฌไธ€ๅๅบ”ไนŸ่ฎธๆ˜ฏๅฐ†`ๅทๆ™ฎ`ๅŠ ๅ…ฅๅˆฐ`dict_force`ไธญ๏ผŒๅผบๅˆถๅˆ†่ฏๅ™จ่พ“ๅ‡บ`ๅทๆ™ฎ`๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "F8M8cyBrQduw", + "outputId": "c156513c-d13c-47f1-bc3a-c73a8649ddb1" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[['้ฆ–็›ธ', 'ๅ’Œ', 'ๅทๆ™ฎ', '้€š', '็”ต่ฏ'],\n", + " ['้“ถ', 'ๅทๆ™ฎ', '้€šไบบ', 'ไธŽ', 'ๅทๆ™ฎ', '้€š', '็”ต่ฏ', '่ฎฒ', 'ๅ››', 'ๅทๆ™ฎ', '้€š่ฏ']]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tok.dict_force = {'ๅทๆ™ฎ'}\n", + "tok([\"้ฆ–็›ธๅ’Œๅทๆ™ฎ้€š็”ต่ฏ\", \"้“ถๅทๆ™ฎ้€šไบบไธŽๅทๆ™ฎ้€š็”ต่ฏ่ฎฒๅ››ๅทๆ™ฎ้€š่ฏ\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DDqQxqQaTayv" + }, + "source": [ + "็„ถ่€ŒไธŽๅคงไผ—็š„ๆœด็ด ่ฎค็ŸฅไธๅŒ๏ผŒ่ฏๅ…ธไผ˜ๅ…ˆ็บงๆœ€้ซ˜ๆœชๅฟ…ๆ˜ฏๅฅฝไบ‹ใ€‚ๆžๆœ‰ๅฏ่ƒฝๅŒน้…ๅˆฐไธ่ฏฅๅˆ†ๅ‡บๆฅ็š„่‡ชๅฎšไน‰่ฏ่ฏญ๏ผŒๅฏผ่‡ดๆญงไน‰ใ€‚ๅณไพฟๆ˜ฏๅฐ†`ๆ™ฎ้€šไบบ`ๆˆ–`ๆ™ฎ้€š่ฏ`ๅŠ ๅ…ฅๅˆฐ่ฏๅ…ธไธญไนŸๆ— ๆตŽไบŽไบ‹๏ผŒๅ› ไธบๅœจๆญฃๅ‘ๆœ€้•ฟๅŒน้…็ฌฌไบŒไธชๅฅๅญ็š„่ฟ‡็จ‹ไธญ๏ผŒไผšๅŒน้…ๅˆฐ`ๅทๆ™ฎ`่€ŒไธไผšๅŒน้…ๅŽไธค่€…ใ€‚่ฟ™ไนŸ่งฃ้‡Šไบ†ไธบไป€ไนˆ่‡ชๅฎšไน‰่ฏๅ…ธไธญๅญ˜ๅœจ็š„่ฏๅฏ่ƒฝๅˆ†ไธๅ‡บๆฅ๏ผšๅฝ“ๆญงไน‰ๅ‘็”Ÿๆ—ถ๏ผŒไธคไธช่ฏ่ฏญๅ‘็”Ÿไบคๅ‰ๅ†ฒ็ช๏ผŒ่‡ช็„ถๆœ‰ๆ‰€ๅ–่ˆ๏ผŒๆ— ๆณ•ๅŒๆ—ถ่พ“ๅ‡บไธค่€…ใ€‚้‚ฃ็งๅŒๆ—ถ่พ“ๅ‡บๅฅๅญๆˆ–้•ฟๅ•่ฏไธญๆ‰€ๆœ‰ๅฏ่ƒฝ็š„ๅ•่ฏ๏ผŒๅนถไธ”ๅ…่ฎธๅ•่ฏไบคๅ‰็š„็ฎ—ๆณ•๏ผŒๅนถ้žๅˆ†่ฏ๏ผŒ่€Œๆ˜ฏๅคšๆจกๅผๅญ—็ฌฆไธฒๅŒน้…ใ€‚ไฝ ้œ€่ฆๅŸบๆœฌ็š„็ฎ—ๆณ•็Ÿฅ่ฏ†ๆ‰่ƒฝ็†่งฃ่ฟ™ไธ€็‚น๏ผŒๆ€ปไน‹ไธ€่ˆฌๆƒ…ๅ†ตไธ‹ๅบ”ๅฝ“ๆ…Ž็”จๅผบๅˆถๆจกๅผ๏ผŒ่ฏฆ่ง[ใ€Š่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅ…ฅ้—จใ€‹](http://nlp.hankcs.com/book.php)็ฌฌไบŒ็ซ ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่‡ชๅฎšไน‰่ฏ่ฏญ่ถŠ้•ฟ๏ผŒ่ถŠไธๅฎนๆ˜“ๅ‘็”Ÿๆญงไน‰ใ€‚่ฟ™ๅฏๅ‘ๆˆ‘ไปฌๅฐ†ๅผบๅˆถๆจกๅผๆ‹“ๅฑ•ไธบๅผบๅˆถๆ กๆญฃๅŠŸ่ƒฝใ€‚ๅผบๅˆถๆ กๆญฃๅŽŸ็†็›ธไผผ๏ผŒไฝ†ไผšๅฐ†ๅŒน้…ๅˆฐ็š„่‡ชๅฎšไน‰่ฏๆกๆ›ฟๆขไธบ็›ธๅบ”็š„ๅˆ†่ฏ็ป“ๆžœ:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bjnEqDaATdVr", + "outputId": "2e694aed-a71f-4a28-d981-0767d9e263e9" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[['้ฆ–็›ธ', 'ๅ’Œ', 'ๅทๆ™ฎ', '้€š', '็”ต่ฏ'],\n", + " ['้“ถๅท', 'ๆ™ฎ้€šไบบ', 'ไธŽ', 'ๅทๆ™ฎ', '้€š', '็”ต่ฏ', '่ฎฒ', 'ๅ››ๅท', 'ๆ™ฎ้€š่ฏ']]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tok.dict_force = {'ๅทๆ™ฎ้€š็”ต่ฏ': ['ๅทๆ™ฎ', '้€š', '็”ต่ฏ']}\n", + "tok([\"้ฆ–็›ธๅ’Œๅทๆ™ฎ้€š็”ต่ฏ\", \"้“ถๅทๆ™ฎ้€šไบบไธŽๅทๆ™ฎ้€š็”ต่ฏ่ฎฒๅ››ๅทๆ™ฎ้€š่ฏ\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๅผบๅˆถๆ กๆญฃๆ˜ฏไธ€็ง็Ÿญๅนณๅฟซ็š„่ง„ๅˆ™่กฅไธ๏ผŒ้œ€่ฆ้’ˆๅฏนๆฏ็งๅฏ่ƒฝไบง็”Ÿๆญงไน‰็š„่ฏญๅขƒ๏ผŒๆˆชๅ–ไธ€ไธช็‰‡ๆฎตๆ‰ง่กŒๆ กๆญฃใ€‚ๅฝ“ไฝ ็งฏ็ดฏไบ†ๅพˆๅคšๆญงไน‰็‰‡ๆฎตไธŽ็›ธๅบ”็š„ๆ กๆญฃ่กฅไธๅŽ๏ผŒๅ…ถๅฎžๅฐฑๅบ”่ฏฅ่€ƒ่™‘ๅพฎ่ฐƒๆจกๅž‹ใ€‚ๅพฎ่ฐƒๅฏไปฅ่ฎฉๆจกๅž‹ๅขž้‡ๅผๅญฆไน ่ฟ™ไบ›ๆญงไน‰่ฏญๅขƒ๏ผŒๆ‘†่„ฑๅฏน่กฅไธ่ง„ๅˆ™็š„ไพ่ต–๏ผŒๅŒๆ—ถไธพไธ€ๅไธ‰ๅบ”ๅฏนๆ–ฐ็š„่ฏญๅขƒใ€‚ไปŽ้”™่ฏฏไธญ็งฏ็ดฏ็ป้ชŒ๏ผŒ็”จ็ป้ชŒ้ข„ๆต‹ๆœชๆฅ๏ผŒ่ฟ™ๅฐฑๆ˜ฏๆœบๅ™จๅญฆไน ไธŽไบบๅทฅๆ™บ่ƒฝ็š„้ญ…ๅŠ›ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ไบ‹ๅฎžไธŠ๏ผŒโ€œๅทๆ™ฎ้€š็”ต่ฏโ€่ฟ™็งไพ‹ๅญไธ้œ€่ฆ่ฏๅ…ธๅณๅฏๅˆ†ๅฏนใ€‚ๅช้œ€ๆไพ›็ป™็ฅž็ป็ฝ‘็ปœ่ถณๅคŸ็š„ไธŠไธ‹ๆ–‡็บฟ็ดข๏ผˆ่ฟ™ไนŸๆ˜ฏ็œŸๅฎžๆ–‡ๆœฌๆ‰€ๅ…ทๅค‡็š„๏ผ‰๏ผŒๅ‘Š่ฏ‰็ฅž็ป็ฝ‘็ปœโ€œๅทๆ™ฎๆ˜ฏ็พŽๅ›ฝๆ€ป็ปŸโ€๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['้ฆ–็›ธ', 'ๅ’Œ', 'ๅทๆ™ฎ', '้€š', '็”ต่ฏ', '๏ผŒ', 'ๅทๆ™ฎ', 'ๆ˜ฏ', '็พŽๅ›ฝ', 'ๆ€ป็ปŸ', 'ใ€‚'], ['้“ถๅท', 'ๆ™ฎ้€šไบบ', 'ไธŽ', 'ๅทๆ™ฎ', '้€š', '็”ต่ฏ', '่ฎฒ', 'ๅ››ๅท', 'ๆ™ฎ้€š่ฏ', '๏ผŒ', 'ๅทๆ™ฎ', 'ๆ˜ฏ', '็พŽๅ›ฝ', 'ๆ€ป็ปŸ', 'ใ€‚']]\n" + ] + } + ], + "source": [ + "tok.dict_force = tok.dict_combine = None\n", + "print(tok([\"้ฆ–็›ธๅ’Œๅทๆ™ฎ้€š็”ต่ฏ๏ผŒๅทๆ™ฎๆ˜ฏ็พŽๅ›ฝๆ€ป็ปŸใ€‚\", \"้“ถๅทๆ™ฎ้€šไบบไธŽๅทๆ™ฎ้€š็”ต่ฏ่ฎฒๅ››ๅทๆ™ฎ้€š่ฏ๏ผŒๅทๆ™ฎๆ˜ฏ็พŽๅ›ฝๆ€ป็ปŸใ€‚\"]))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9aRzEeRvTlRr" + }, + "source": [ + "ๅœจไธŠ้ข็š„ไพ‹ๅญไธญ๏ผŒ่™ฝ็„ถ่ฏๅ…ธๅฏนโ€œๅทๆ™ฎโ€ๆฒกๆœ‰ๆ–ฝๅŠ ไปปไฝ•ๅฝฑๅ“๏ผŒไฝ†ๆ˜ฏๆ›ดไธฐๅฏŒ็š„ไธŠไธ‹ๆ–‡ไฟƒ่ฟ›ไบ†็ฅž็ป็ฝ‘็ปœๅฏน่ฏญๅขƒ็š„็†่งฃ๏ผŒไฝฟๅ…ถๅพ—ๅ‡บไบ†ๆญฃ็กฎ็š„็ป“ๆžœใ€‚ๆทฑๅบฆๅญฆไน ไธญ็š„็ฅž็ป็ฝ‘็ปœไผผไนŽๅฑ•็คบไบ†ไบ›่ฎธๆ™บ่ƒฝ๏ผŒๆ„Ÿๅ…ด่ถฃ็š„ๅˆๅญฆ่€…ๅฏๅ‚่€ƒ[ใ€Š่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅ…ฅ้—จใ€‹](http://nlp.hankcs.com/book.php)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ldKAnVoSTgxb" + }, + "source": [ + "### ๅˆๅนถๆจกๅผ\n", + "ๅˆๅนถๆจกๅผ็š„ไผ˜ๅ…ˆ็บงไฝŽไบŽ็ปŸ่ฎกๆจกๅž‹๏ผŒๅณ`dict_combine`ไผšๅœจ็ปŸ่ฎกๆจกๅž‹็š„ๅˆ†่ฏ็ป“ๆžœไธŠๆ‰ง่กŒๆœ€้•ฟๅŒน้…ๅนถๅˆๅนถๅŒน้…ๅˆฐ็š„่ฏๆกใ€‚ไธ€่ˆฌๆƒ…ๅ†ตไธ‹๏ผŒๆŽจ่ไฝฟ็”จ่ฏฅๆจกๅผใ€‚ๆฏ”ๅฆ‚๏ผŒๅฐ†โ€œ็พŽๅ›ฝๆ€ป็ปŸโ€ๅŠ ๅ…ฅ`dict_combine`ๅŽไผšๅˆๅนถ`['็พŽๅ›ฝ', 'ๆ€ป็ปŸ']`๏ผŒ่€Œไธไผšๅˆๅนถ`['็พŽๅ›ฝ', 'ๆ€ป', '็ปŸ็ญน้ƒจ']`ไธบ`['็พŽๅ›ฝๆ€ป็ปŸ', '็ญน้ƒจ']`๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bwIu0f6wTgbF", + "outputId": "22807b6a-3472-431b-d1e3-95f6b761c84c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['้ฆ–็›ธ', 'ๅ’Œ', 'ๅทๆ™ฎ', '้€š', '็”ต่ฏ', '๏ผŒ', 'ๅทๆ™ฎ', 'ๆ˜ฏ', '็พŽๅ›ฝๆ€ป็ปŸ', 'ใ€‚'], ['้“ถๅท', 'ๆ™ฎ้€šไบบ', 'ไธŽ', 'ๅทๆ™ฎ', '้€š', '็”ต่ฏ', '่ฎฒ', 'ๅ››ๅท', 'ๆ™ฎ้€š่ฏ', '๏ผŒ', 'ๅทๆ™ฎ', 'ๆ˜ฏ', '็พŽๅ›ฝๆ€ป็ปŸ', 'ใ€‚'], ['็พŽๅ›ฝ', 'ๆ€ป็ปŸ็ญน้ƒจ', '้ƒจ้•ฟ', 'ๆ˜ฏ', '่ฐ', '๏ผŸ']]\n" + ] + } + ], + "source": [ + "tok.dict_force = None\n", + "tok.dict_combine = {'็พŽๅ›ฝๆ€ป็ปŸ'}\n", + "print(tok([\"้ฆ–็›ธๅ’Œๅทๆ™ฎ้€š็”ต่ฏ๏ผŒๅทๆ™ฎๆ˜ฏ็พŽๅ›ฝๆ€ป็ปŸใ€‚\", \"้“ถๅทๆ™ฎ้€šไบบไธŽๅทๆ™ฎ้€š็”ต่ฏ่ฎฒๅ››ๅทๆ™ฎ้€š่ฏ๏ผŒๅทๆ™ฎๆ˜ฏ็พŽๅ›ฝๆ€ป็ปŸใ€‚\", \"็พŽๅ›ฝๆ€ป็ปŸ็ญน้ƒจ้ƒจ้•ฟๆ˜ฏ่ฐ๏ผŸ\"]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ็ฉบๆ ผๅ•่ฏ" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๅซๆœ‰็ฉบๆ ผใ€ๅˆถ่กจ็ฌฆ็ญ‰๏ผˆTransformer tokenizerๅŽปๆŽ‰็š„ๅญ—็ฌฆ๏ผ‰็š„่ฏ่ฏญ้œ€่ฆ็”จ`tuple`็š„ๅฝขๅผๆไพ›๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ๅฆ‚ไฝ•', '่ฏ„ไปท', 'iPad Pro', '๏ผŸ', 'iPad Pro', 'ๆœ‰', '2ไธช็ฉบๆ ผ']" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tok.dict_combine = {('iPad', 'Pro'), '2ไธช็ฉบๆ ผ'}\n", + "tok(\"ๅฆ‚ไฝ•่ฏ„ไปทiPad Pro ๏ผŸiPad Proๆœ‰2ไธช็ฉบๆ ผ\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ชๆ˜Ž็š„็”จๆˆท่ฏท็ปง็ปญ้˜…่ฏป๏ผŒ`tuple`่ฏๅ…ธไธญ็š„ๅญ—็ฌฆไธฒๅ…ถๅฎž็ญ‰ไปทไบŽ่ฏฅๅญ—็ฌฆไธฒ็š„ๆ‰€ๆœ‰ๅฏ่ƒฝ็š„ๅˆ‡ๅˆ†ๆ–นๅผ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys([('iPad', 'Pro'), ('2ไธช็ฉบๆ ผ',), ('2', 'ไธช', '็ฉบๆ ผ'), ('2', 'ไธช', '็ฉบ', 'ๆ ผ'), ('2', 'ไธช็ฉบๆ ผ'), ('2', 'ไธช็ฉบ', 'ๆ ผ'), ('2ไธช', '็ฉบ', 'ๆ ผ'), ('2ไธช', '็ฉบๆ ผ'), ('2ไธช็ฉบ', 'ๆ ผ')])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dict(tok.dict_combine.config[\"dictionary\"]).keys()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๅ•่ฏไฝ็ฝฎ" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "HanLPๆ”ฏๆŒ่พ“ๅ‡บๆฏไธชๅ•่ฏๅœจๆ–‡ๆœฌไธญ็š„ๅŽŸๅง‹ไฝ็ฝฎ๏ผŒไปฅไพฟ็”จไบŽๆœ็ดขๅผ•ๆ“Ž็ญ‰ๅœบๆ™ฏใ€‚ๅœจ่ฏๆณ•ๅˆ†ๆžไธญ๏ผŒ้ž่ฏญ็ด ๅญ—็ฌฆ๏ผˆ็ฉบๆ ผใ€ๆข่กŒใ€ๅˆถ่กจ็ฌฆ็ญ‰๏ผ‰ไผš่ขซๅ‰”้™ค๏ผŒๆญคๆ—ถ้œ€่ฆ้ขๅค–็š„ไฝ็ฝฎไฟกๆฏๆ‰่ƒฝๅฎšไฝๆฏไธชๅ•่ฏ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['2021', 0, 4], ['ๅนด', 5, 6], ['HanLPv2.1', 7, 16], ['ไธบ', 17, 18], ['็”Ÿไบง', 18, 20], ['็Žฏๅขƒ', 20, 22], ['ๅธฆๆฅ', 22, 24], ['ๆฌก', 24, 25], ['ไธ–ไปฃ', 25, 27], ['ๆœ€', 27, 28], ['ๅ…ˆ่ฟ›', 28, 30], ['็š„', 30, 31], ['ๅคš', 31, 32], ['่ฏญ็ง', 32, 34], ['NLP', 34, 37], ['ๆŠ€ๆœฏ', 37, 39], ['ใ€‚', 39, 40]]\n" + ] + } + ], + "source": [ + "tok.config.output_spans = True\n", + "sent = '2021 ๅนด\\nHanLPv2.1 ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚'\n", + "word_offsets = tok(sent)\n", + "print(word_offsets)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "่ฟ”ๅ›žๆ ผๅผไธบไธ‰ๅ…ƒ็ป„๏ผˆๅ•่ฏ๏ผŒๅ•่ฏ็š„่ตทๅง‹ไธ‹ๆ ‡๏ผŒๅ•่ฏ็š„็ปˆๆญขไธ‹ๆ ‡๏ผ‰๏ผŒไธ‹ๆ ‡ไปฅๅญ—็ฌฆ็บงๅˆซ่ฎก้‡ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "for word, begin, end in word_offsets:\n", + " assert word == sent[begin:end]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ๅคš่ฏญ็งๆ”ฏๆŒ" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ๅพ—็›ŠไบŽ่ฏญ่จ€ๆ— ๅ…ณ็š„่ฎพ่ฎก๏ผŒไปฅๅŠๅคง่ง„ๆจกๅคš่ฏญ็ง่ฏญๆ–™ๅบ“๏ผŒๆœ€่ฟ‘HanLPๅ‘ๅธƒไบ†ๆ”ฏๆŒ[130็ง่ฏญ่จ€](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/tok.html#hanlp.pretrained.tok.UD_TOK_MMINILMV2L12)็š„ๅ•ไปปๅŠกๅˆ†่ฏๅ™จใ€‚็”จๆณ•ไธŽไธญๆ–‡ๅˆ†่ฏๅ™จ็›ธๅŒ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "mul = hanlp.load(hanlp.pretrained.tok.UD_TOK_MMINILMV2L6)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[['In', '2021', ',', 'HanLPv2.1', 'delivers', 'state-of-the-art', 'multilingual', 'NLP', 'techniques', 'to', 'production', 'environments', '.'], ['2021ๅนด', 'ใ€', 'HanLPv2.1', 'ใฏ', 'ๆฌกไธ–ไปฃ', 'ใฎ', 'ๆœ€', 'ๅ…ˆ็ซฏ', 'ๅคš', '่จ€่ชž', 'NLP', 'ๆŠ€่ก“', 'ใ‚’', 'ๆœฌ็•ช', '็’ฐๅขƒ', 'ใซ', 'ๅฐŽๅ…ฅ', 'ใ—', 'ใพใ™', 'ใ€‚'], ['2021ๅนด', 'HanLPv2.1', 'ไธบ', '็”Ÿไบง', '็Žฏๅขƒ', 'ๅธฆๆฅ', 'ๆฌก', 'ไธ–ไปฃ', 'ๆœ€', 'ๅ…ˆ่ฟ›', '็š„', 'ๅคš่ฏญ็ง', 'NLP', 'ๆŠ€ๆœฏ', 'ใ€‚'], ['ๅฅˆ้ ˆใใฎใ“', 'ใฏ', '1973ๅนด', '11ๆœˆ', '28ๆ—ฅ', 'ใซ', 'ๅƒ่‘‰', '็œŒ', 'ๅ††็ฉบๅฑฑ', 'ใง', '็”Ÿใพใ‚Œ', 'ใ€', 'ใ‚ฒใƒผใƒ ', 'ๅˆถไฝœ', 'ไผš็คพ', 'ใ€Œ', 'ใƒŽใƒผใƒ„', 'ใ€', 'ใฎ', '่จญ็ซ‹', '่€…', 'ใ ', 'ใ€‚']]\n" + ] + } + ], + "source": [ + "print(mul([\n", + " 'In 2021, HanLPv2.1 delivers state-of-the-art multilingual NLP techniques to production environments.',\n", + " '2021ๅนดใ€HanLPv2.1ใฏๆฌกไธ–ไปฃใฎๆœ€ๅ…ˆ็ซฏๅคš่จ€่ชžNLPๆŠ€่ก“ใ‚’ๆœฌ็•ช็’ฐๅขƒใซๅฐŽๅ…ฅใ—ใพใ™ใ€‚',\n", + " '2021ๅนด HanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚',\n", + " 'ๅฅˆ้ ˆใใฎใ“ใฏ1973ๅนด11ๆœˆ28ๆ—ฅใซๅƒ่‘‰็œŒๅ††็ฉบๅฑฑใง็”Ÿใพใ‚Œใ€ใ‚ฒใƒผใƒ ๅˆถไฝœไผš็คพใ€ŒใƒŽใƒผใƒ„ใ€ใฎ่จญ็ซ‹่€…ใ ใ€‚'\n", + "]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "็›ฎๅ‰๏ผŒๅคš่ฏญ็งๅˆ†่ฏๅ™จ็š„ๆ•ˆๆžœๅนถไธๅฆ‚ๅ•่ฏญ็งๅฅฝใ€‚ๆฌข่ฟŽๅœจไฝ ่‡ชๅทฑ็š„ๅ•่ฏญ็ง่ฏญๆ–™ไธŠ่‡ช่กŒ่ฎญ็ปƒๆ–ฐๆจกๅž‹๏ผŒไนŸๆฌข่ฟŽๅผ€ๆบไฝ ็š„่ฏญๆ–™ๅ’Œๆจกๅž‹ใ€‚" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "authorship_tag": "ABX9TyPxXzYAXgLUW5uKV7v0/2iP", + "collapsed_sections": [], + "name": "tok_stl.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/train/finetune_ner.py b/plugins/hanlp_demo/hanlp_demo/zh/train/finetune_ner.py index 9086c338b..e6586fcc7 100644 --- a/plugins/hanlp_demo/hanlp_demo/zh/train/finetune_ner.py +++ b/plugins/hanlp_demo/hanlp_demo/zh/train/finetune_ner.py @@ -26,16 +26,21 @@ ) ner = TransformerNamedEntityRecognizer() -ner.fit( - trn_data=your_training_corpus, - dev_data=your_development_corpus, - save_dir=save_dir, - epochs=50, # Since the corpus is small, overfit it - finetune=hanlp.pretrained.ner.MSRA_NER_ELECTRA_SMALL_ZH, - # You MUST set the same parameters with the fine-tuning model: - average_subwords=True, - transformer='hfl/chinese-electra-180g-small-discriminator', -) +if not os.path.exists(save_dir): + print('Start fine-tuning ') + ner.fit( + trn_data=your_training_corpus, + dev_data=your_development_corpus, + save_dir=save_dir, + epochs=50, # Since the corpus is small, overfit it + finetune=hanlp.pretrained.ner.MSRA_NER_ELECTRA_SMALL_ZH, + # You MUST set the same parameters with the fine-tuning model: + average_subwords=True, + transformer='hfl/chinese-electra-180g-small-discriminator', + ) +else: + print('Load fine-tuned model') + ner = hanlp.load(save_dir) HanLP = hanlp.pipeline()\ .append(hanlp.load(hanlp.pretrained.tok.FINE_ELECTRA_SMALL_ZH), output_key='tok')\ diff --git a/plugins/hanlp_demo/hanlp_demo/zh/tst_restful.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/tst_restful.ipynb new file mode 100644 index 000000000..e0ee0d616 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/tst_restful.ipynb @@ -0,0 +1,135 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WfGpInivS0fG" + }, + "source": [ + "

็‚นๅ‡ปไธ‹ๅˆ—ๅ›พๆ ‡ๅœจ็บฟ่ฟ่กŒHanLP

\n", + "
\n", + "\t\"Open\n", + "\t\"Open\n", + "
\n", + "\n", + "## ๅฎ‰่ฃ…" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IYwV-UkNNzFp" + }, + "source": [ + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎš๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Uf_u7ddMhUt" + }, + "outputs": [], + "source": [ + "!pip install hanlp_restful -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pp-1KqEOOJ4t" + }, + "source": [ + "## ๅˆ›ๅปบๅฎขๆˆท็ซฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "0tmKBu7sNAXX" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmZDmLn9aGxG" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "elA_UyssOut_" + }, + "source": [ + "## ๆ–‡ๆœฌ้ฃŽๆ ผ่ฝฌๆข\n", + "่พ“ๅ…ฅ็Ÿญๆ–‡ๆœฌไปฅๅŠ็›ฎๆ ‡้ฃŽๆ ผ๏ผŒๆ‰ง่กŒๆ–‡ๆœฌ้ฃŽๆ ผ่ฝฌๆข๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "BqEmDMGGOtk3", + "outputId": "2a0d392f-b99a-4a18-fc7f-754e2abe2e34" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['ๅ›ฝๅฎถๅฏนไธญ็Ÿณๆฒนๅฏ„ไบˆๅทจๅคงๆœŸๆœ›ใ€‚', '่ฆ็”จๅˆ›ๆ–ฐๆŽจๅŠจ้ซ˜่ดจ้‡ๅ‘ๅฑ•ใ€‚']" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HanLP.text_style_transfer(['ๅ›ฝๅฎถๅฏนไธญ็ŸณๆฒนๆŠฑๆœ‰ๅพˆๅคง็š„ๆœŸๆœ›.', '่ฆ็”จๅˆ›ๆ–ฐๅŽปๆŽจๅŠจ้ซ˜่ดจ้‡็š„ๅ‘ๅฑ•ใ€‚'],\n", + " target_style='gov_doc')" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "tst_restful.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/plugins/hanlp_demo/hanlp_demo/zh/tutorial.ipynb b/plugins/hanlp_demo/hanlp_demo/zh/tutorial.ipynb new file mode 100644 index 000000000..383b960fb --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/tutorial.ipynb @@ -0,0 +1,993 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "BZPSH4VkK7J2" + }, + "source": [ + "ๆฌข่ฟŽๆฅๅˆฐHanLPๅœจ็บฟไบคไบ’็Žฏๅขƒ๏ผŒ่ฟ™ๆ˜ฏไธ€ไธชJupyter่ฎฐไบ‹ๆœฌ๏ผŒๅฏไปฅ่พ“ๅ…ฅไปปๆ„Pythonไปฃ็ ๅนถๅœจ็บฟๆ‰ง่กŒใ€‚่ฏท็‚นๅ‡ปๅทฆไธŠ่ง’ใ€Runใ€‘ๆฅ่ฟ่กŒ่ฟ™็ฏ‡NLPๆ•™็จ‹ใ€‚\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XxPAiNwSK7J4" + }, + "source": [ + "## ๅฎ‰่ฃ…\n", + "้‡ไฝ“่ฃ่กฃ๏ผŒHanLPๆไพ›**RESTful**๏ผˆไบ‘็ซฏ๏ผ‰ๅ’Œ**native**๏ผˆๆœฌๅœฐ๏ผ‰ไธค็งAPI๏ผŒๅˆ†ๅˆซ้ขๅ‘่ฝป้‡็บงๅ’Œๆตท้‡็บงไธค็งๅœบๆ™ฏใ€‚ๆ— ่ฎบไฝ•็งAPIไฝ•็ง่ฏญ่จ€๏ผŒHanLPๆŽฅๅฃๅœจ่ฏญไน‰ไธŠไฟๆŒไธ€่‡ด๏ผŒไฝ ๅฏไปฅ**ไปป้€‰ไธ€็ง**APIๆฅ่ฟ่กŒๆœฌๆ•™็จ‹ใ€‚\n", + "\n", + "### ่ฝป้‡็บงRESTful API\n", + "\n", + "ไป…ๆ•ฐKB๏ผŒ้€‚ๅˆๆ•ๆทๅผ€ๅ‘ใ€็งปๅŠจAPP็ญ‰ๅœบๆ™ฏใ€‚็ฎ€ๅ•ๆ˜“็”จ๏ผŒๆ— ้œ€GPU้…็Žฏๅขƒ๏ผŒ**ๅผบ็ƒˆๆŽจ่**๏ผŒ็ง’้€Ÿๅฎ‰่ฃ…๏ผš\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lgMa4kbfK7J5", + "outputId": "5bb662d8-1665-4bcc-c517-70d1c4bc4837" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: hanlp_restful in /usr/local/lib/python3.7/dist-packages (0.0.7)\n", + "Requirement already satisfied: hanlp-common in /usr/local/lib/python3.7/dist-packages (from hanlp_restful) (0.0.9)\n", + "Requirement already satisfied: phrasetree in /usr/local/lib/python3.7/dist-packages (from hanlp-common->hanlp_restful) (0.0.8)\n" + ] + } + ], + "source": [ + "!pip install hanlp_restful" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "N4G6GbNmK7J6" + }, + "source": [ + "ๅˆ›ๅปบๅฎขๆˆท็ซฏ๏ผŒๅกซๅ…ฅๆœๅŠกๅ™จๅœฐๅ€๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "3XM9-3-oK7J6" + }, + "outputs": [], + "source": [ + "from hanlp_restful import HanLPClient\n", + "HanLP = HanLPClient('https://www.hanlp.com/api', auth=None, language='zh') # authไธๅกซๅˆ™ๅŒฟๅ๏ผŒzhไธญๆ–‡๏ผŒmulๅคš่ฏญ็ง" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pbeFH9jmK7J7" + }, + "source": [ + "่ฐƒ็”จ`parse`ๆŽฅๅฃ๏ผŒไผ ๅ…ฅไธ€็ฏ‡ๆ–‡็ซ ๏ผŒๅพ—ๅˆฐHanLP็ฒพๅ‡†็š„ๅˆ†ๆž็ป“ๆžœใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mNJPvZ_3K7J7", + "outputId": "4048d0d6-2dad-4582-e327-f99338f8f72b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " [\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"้˜ฟๅฉ†ไธป\", \"ๆฅๅˆฐ\", \"ๅŒ—ไบฌ\", \"็ซ‹ๆ–นๅบญ\", \"ๅ‚่ง‚\", \"่‡ช็„ถ\", \"่ฏญไน‰\", \"็ง‘ๆŠ€\", \"ๅ…ฌๅธ\", \"ใ€‚\"]\n", + " ],\n", + " \"tok/coarse\": [\n", + " [\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌกไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"้˜ฟๅฉ†ไธป\", \"ๆฅๅˆฐ\", \"ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ\", \"ๅ‚่ง‚\", \"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"ใ€‚\"]\n", + " ],\n", + " \"pos/ctb\": [\n", + " [\"NT\", \"NR\", \"P\", \"NN\", \"NN\", \"VV\", \"JJ\", \"NN\", \"AD\", \"JJ\", \"DEG\", \"CD\", \"NN\", \"NR\", \"NN\", \"PU\"],\n", + " [\"NN\", \"VV\", \"NR\", \"NR\", \"VV\", \"NN\", \"NN\", \"NN\", \"NN\", \"PU\"]\n", + " ],\n", + " \"pos/pku\": [\n", + " [\"t\", \"nx\", \"p\", \"vn\", \"n\", \"v\", \"b\", \"n\", \"d\", \"a\", \"u\", \"a\", \"n\", \"nx\", \"n\", \"w\"],\n", + " [\"n\", \"v\", \"ns\", \"ns\", \"v\", \"n\", \"n\", \"n\", \"n\", \"w\"]\n", + " ],\n", + " \"pos/863\": [\n", + " [\"nt\", \"w\", \"p\", \"v\", \"n\", \"v\", \"a\", \"nt\", \"d\", \"a\", \"u\", \"a\", \"n\", \"ws\", \"n\", \"w\"],\n", + " [\"n\", \"v\", \"ns\", \"n\", \"v\", \"n\", \"n\", \"n\", \"n\", \"w\"]\n", + " ],\n", + " \"ner/msra\": [\n", + " [[\"2021ๅนด\", \"DATE\", 0, 1], [\"HanLPv2.1\", \"ORGANIZATION\", 1, 2]],\n", + " [[\"ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ\", \"LOCATION\", 2, 4], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"ORGANIZATION\", 5, 9]]\n", + " ],\n", + " \"ner/pku\": [\n", + " [],\n", + " [[\"ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ\", \"ns\", 2, 4], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"nt\", 5, 9]]\n", + " ],\n", + " \"ner/ontonotes\": [\n", + " [[\"2021ๅนด\", \"DATE\", 0, 1], [\"HanLPv2.1\", \"ORG\", 1, 2]],\n", + " [[\"ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ\", \"FAC\", 2, 4], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"ORG\", 5, 9]]\n", + " ],\n", + " \"srl\": [\n", + " [[[\"2021ๅนด\", \"ARGM-TMP\", 0, 1], [\"HanLPv2.1\", \"ARG0\", 1, 2], [\"ไธบ็”Ÿไบง็Žฏๅขƒ\", \"ARG2\", 2, 5], [\"ๅธฆๆฅ\", \"PRED\", 5, 6], [\"ๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏ\", \"ARG1\", 6, 15]], [[\"ๆœ€\", \"ARGM-ADV\", 8, 9], [\"ๅ…ˆ่ฟ›\", \"PRED\", 9, 10], [\"ๆŠ€ๆœฏ\", \"ARG0\", 14, 15]]],\n", + " [[[\"้˜ฟๅฉ†ไธป\", \"ARG0\", 0, 1], [\"ๆฅๅˆฐ\", \"PRED\", 1, 2], [\"ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ\", \"ARG1\", 2, 4]], [[\"้˜ฟๅฉ†ไธป\", \"ARG0\", 0, 1], [\"ๅ‚่ง‚\", \"PRED\", 4, 5], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"ARG1\", 5, 9]]]\n", + " ],\n", + " \"dep\": [\n", + " [[6, \"tmod\"], [6, \"nsubj\"], [6, \"prep\"], [5, \"nn\"], [3, \"pobj\"], [0, \"root\"], [8, \"amod\"], [15, \"nn\"], [10, \"advmod\"], [15, \"rcmod\"], [10, \"assm\"], [13, \"nummod\"], [15, \"nn\"], [15, \"nn\"], [6, \"dobj\"], [6, \"punct\"]],\n", + " [[2, \"nsubj\"], [0, \"root\"], [4, \"nn\"], [2, \"dobj\"], [2, \"conj\"], [9, \"nn\"], [9, \"nn\"], [9, \"nn\"], [5, \"dobj\"], [2, \"punct\"]]\n", + " ],\n", + " \"sdp\": [\n", + " [[[6, \"Time\"]], [[6, \"Exp\"]], [[5, \"mPrep\"]], [[5, \"Desc\"]], [[6, \"Datv\"]], [[13, \"dDesc\"]], [[0, \"Root\"], [8, \"Desc\"], [13, \"Desc\"]], [[15, \"Time\"]], [[10, \"mDegr\"]], [[15, \"Desc\"]], [[10, \"mAux\"]], [[8, \"Quan\"], [13, \"Quan\"]], [[15, \"Desc\"]], [[15, \"Nmod\"]], [[6, \"Pat\"]], [[6, \"mPunc\"]]],\n", + " [[[2, \"Agt\"], [5, \"Agt\"]], [[0, \"Root\"]], [[4, \"Loc\"]], [[2, \"Lfin\"]], [[2, \"ePurp\"]], [[8, \"Nmod\"]], [[9, \"Nmod\"]], [[9, \"Nmod\"]], [[5, \"Datv\"]], [[5, \"mPunc\"]]]\n", + " ],\n", + " \"con\": [\n", + " [\"TOP\", [[\"IP\", [[\"NP\", [[\"NT\", [\"2021ๅนด\"]]]], [\"NP\", [[\"NR\", [\"HanLPv2.1\"]]]], [\"VP\", [[\"PP\", [[\"P\", [\"ไธบ\"]], [\"NP\", [[\"NN\", [\"็”Ÿไบง\"]], [\"NN\", [\"็Žฏๅขƒ\"]]]]]], [\"VP\", [[\"VV\", [\"ๅธฆๆฅ\"]], [\"NP\", [[\"ADJP\", [[\"NP\", [[\"ADJP\", [[\"JJ\", [\"ๆฌก\"]]]], [\"NP\", [[\"NN\", [\"ไธ–ไปฃ\"]]]]]], [\"ADVP\", [[\"AD\", [\"ๆœ€\"]]]], [\"VP\", [[\"JJ\", [\"ๅ…ˆ่ฟ›\"]]]]]], [\"DEG\", [\"็š„\"]], [\"NP\", [[\"QP\", [[\"CD\", [\"ๅคš\"]]]], [\"NP\", [[\"NN\", [\"่ฏญ็ง\"]]]]]], [\"NP\", [[\"NR\", [\"NLP\"]], [\"NN\", [\"ๆŠ€ๆœฏ\"]]]]]]]]]], [\"PU\", [\"ใ€‚\"]]]]]],\n", + " [\"TOP\", [[\"IP\", [[\"NP\", [[\"NN\", [\"้˜ฟๅฉ†ไธป\"]]]], [\"VP\", [[\"VP\", [[\"VV\", [\"ๆฅๅˆฐ\"]], [\"NP\", [[\"NR\", [\"ๅŒ—ไบฌ\"]], [\"NR\", [\"็ซ‹ๆ–นๅบญ\"]]]]]], [\"VP\", [[\"VV\", [\"ๅ‚่ง‚\"]], [\"NP\", [[\"NN\", [\"่‡ช็„ถ\"]], [\"NN\", [\"่ฏญไน‰\"]], [\"NN\", [\"็ง‘ๆŠ€\"]], [\"NN\", [\"ๅ…ฌๅธ\"]]]]]]]], [\"PU\", [\"ใ€‚\"]]]]]]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "doc = HanLP.parse(\"2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚\")\n", + "print(doc)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "w4E8Kn_nK7J8" + }, + "source": [ + "#### ๅฏ่ง†ๅŒ–\n", + "่พ“ๅ‡บ็ป“ๆžœๆ˜ฏไธ€ไธชๅฏไปฅ`json`ๅŒ–็š„`dict`๏ผŒ้”ฎไธบ[NLPไปปๅŠกๅ](https://hanlp.hankcs.com/docs/data_format.html#naming-convention)๏ผŒๅ€ผไธบๅˆ†ๆž็ป“ๆžœใ€‚ๅ…ณไบŽๆ ‡ๆณจ้›†ๅซไน‰๏ผŒ่ฏทๅ‚่€ƒ[ใ€Š่ฏญ่จ€ๅญฆๆ ‡ๆณจ่ง„่Œƒใ€‹](https://hanlp.hankcs.com/docs/annotations/index.html)ๅŠ[ใ€Šๆ ผๅผ่ง„่Œƒใ€‹](https://hanlp.hankcs.com/docs/data_format.html)ใ€‚ๆˆ‘ไปฌ่ดญไนฐใ€ๆ ‡ๆณจๆˆ–้‡‡็”จไบ†ไธ–็•ŒไธŠ้‡็บงๆœ€ๅคงใ€็ง็ฑปๆœ€ๅคš็š„่ฏญๆ–™ๅบ“็”จไบŽ่”ๅˆๅคš่ฏญ็งๅคšไปปๅŠกๅญฆไน ๏ผŒๆ‰€ไปฅHanLP็š„ๆ ‡ๆณจ้›†ไนŸๆ˜ฏ่ฆ†็›–้ขๆœ€ๅนฟ็š„ใ€‚้€š่ฟ‡`doc.pretty_print`๏ผŒๅฏไปฅๅœจ็ญ‰ๅฎฝๅญ—ไฝ“็Žฏๅขƒไธญๅพ—ๅˆฐๅฏ่ง†ๅŒ–๏ผŒไฝ ้œ€่ฆๅ–ๆถˆๆข่กŒๆ‰่ƒฝๅฏน้ฝๅฏ่ง†ๅŒ–็ป“ๆžœใ€‚ๆˆ‘ไปฌๅทฒ็ปๅ‘ๅธƒHTML็Žฏๅขƒ็š„ๅฏ่ง†ๅŒ–๏ผŒๅœจJupyter Notebookไธญ่‡ชๅŠจๅฏน้ฝไธญๆ–‡ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 575 + }, + "id": "GZ79la4LK7J8", + "outputId": "b9bd5dc0-52f9-4b42-93fd-7c4e49214ace" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Dep Tree     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
 โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
 โ”‚โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
 โ”‚โ”‚โ”Œโ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€ 
 โ”‚โ”‚โ”‚  โ”‚  โ”Œโ”€โ–บ 
 โ”‚โ”‚โ”‚  โ””โ”€โ–บโ””โ”€โ”€ 
โ”Œโ”ผโ”ดโ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ”‚โ”‚       โ”Œโ”€โ–บ 
โ”‚โ”‚  โ”Œโ”€โ”€โ”€โ–บโ””โ”€โ”€ 
โ”‚โ”‚  โ”‚    โ”Œโ”€โ–บ 
โ”‚โ”‚  โ”‚โ”Œโ”€โ”€โ–บโ”œโ”€โ”€ 
โ”‚โ”‚  โ”‚โ”‚   โ””โ”€โ–บ 
โ”‚โ”‚  โ”‚โ”‚   โ”Œโ”€โ–บ 
โ”‚โ”‚  โ”‚โ”‚โ”Œโ”€โ–บโ””โ”€โ”€ 
โ”‚โ”‚  โ”‚โ”‚โ”‚  โ”Œโ”€โ–บ 
โ”‚โ””โ”€โ–บโ””โ”ดโ”ดโ”€โ”€โ”ดโ”€โ”€ 
โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
Token     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌก         
ไธ–ไปฃ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
Relati 
โ”€โ”€โ”€โ”€โ”€โ”€ 
tmod   
nsubj  
prep   
nn     
pobj   
root   
amod   
nn     
advmod 
rcmod  
assm   
nummod 
nn     
nn     
dobj   
punct  
PoS 
โ”€โ”€โ”€ 
NT  
NR  
P   
NN  
NN  
VV  
JJ  
NN  
AD  
JJ  
DEG 
CD  
NN  
NR  
NN  
PU  
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌก         
ไธ–ไปฃ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
NER Type         
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ”€โ”€โ”€โ–บDATE         
โ”€โ”€โ”€โ–บORGANIZATION 
                 
                 
                 
                 
                 
                 
                 
                 
                 
                 
                 
                 
                 
                 
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌก         
ไธ–ไปฃ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
SRL PA1      
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ”€โ”€โ”€โ–บARGM-TMP 
โ”€โ”€โ”€โ–บARG0     
โ—„โ”€โ”          
  โ”œโ–บARG2     
โ—„โ”€โ”˜          
โ•Ÿโ”€โ”€โ–บPRED     
โ—„โ”€โ”          
  โ”‚          
  โ”‚          
  โ”‚          
  โ”œโ–บARG1     
  โ”‚          
  โ”‚          
  โ”‚          
โ—„โ”€โ”˜          
             
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌก         
ไธ–ไปฃ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
SRL PA2      
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
             
             
             
             
             
             
             
             
โ”€โ”€โ”€โ–บARGM-ADV 
โ•Ÿโ”€โ”€โ–บPRED     
             
             
             
             
โ”€โ”€โ”€โ–บARG0     
             
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌก         
ไธ–ไปฃ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
PoS    3       4       5       6       7       8       9 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
NT โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
NR โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค   
P โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”                                       โ”‚   
NN โ”€โ”€โ”       โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บPP โ”€โ”€โ”€โ”       โ”‚   
NN โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜                               โ”‚       โ”‚   
VV โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”‚       โ”‚   
JJ โ”€โ”€โ”€โ–บADJPโ”€โ”€โ”                       โ”‚       โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค   
NN โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”               โ”‚       โ”‚       โ”‚   
AD โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บADVPโ”€โ”€โ”ผโ–บADJPโ”€โ”€โ”       โ”œโ–บVP โ”€โ”€โ”€โ”˜       โ”œโ–บIP
JJ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บVP โ”€โ”€โ”€โ”˜       โ”‚       โ”‚               โ”‚   
DEGโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค       โ”‚               โ”‚   
CD โ”€โ”€โ”€โ–บQP โ”€โ”€โ”€โ”               โ”œโ–บNP โ”€โ”€โ”€โ”˜               โ”‚   
NN โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค                       โ”‚   
NR โ”€โ”€โ”                       โ”‚                       โ”‚   
NN โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”˜                       โ”‚   
PU โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   

Dep Tree     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
         โ”Œโ”€โ–บ 
โ”Œโ”ฌโ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”ดโ”€โ”€ 
โ”‚โ”‚    โ”‚  โ”Œโ”€โ–บ 
โ”‚โ”‚    โ””โ”€โ–บโ””โ”€โ”€ 
โ”‚โ””โ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ”‚   โ”‚  โ”Œโ”€โ”€โ”€โ–บ 
โ”‚   โ”‚  โ”‚โ”Œโ”€โ”€โ–บ 
โ”‚   โ”‚  โ”‚โ”‚โ”Œโ”€โ–บ 
โ”‚   โ””โ”€โ–บโ””โ”ดโ”ดโ”€โ”€ 
โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
Relat 
โ”€โ”€โ”€โ”€โ”€ 
nsubj 
root  
nn    
dobj  
conj  
nn    
nn    
nn    
dobj  
punct 
Po 
โ”€โ”€ 
NN 
VV 
NR 
NR 
VV 
NN 
NN 
NN 
NN 
PU 
Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
NER Type         
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
                 
                 
โ—„โ”€โ”              
โ—„โ”€โ”ดโ–บLOCATION     
                 
โ—„โ”€โ”              
  โ”‚              
  โ”œโ–บORGANIZATION 
โ—„โ”€โ”˜              
                 
Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
SRL PA1  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ”€โ”€โ”€โ–บARG0 
โ•Ÿโ”€โ”€โ–บPRED 
โ—„โ”€โ”      
โ—„โ”€โ”ดโ–บARG1 
         
         
         
         
         
         
Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
SRL PA2  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ”€โ”€โ”€โ–บARG0 
         
         
         
โ•Ÿโ”€โ”€โ–บPRED 
โ—„โ”€โ”      
  โ”‚      
  โ”œโ–บARG1 
โ—„โ”€โ”˜      
         
Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
Po    3       4       5       6 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
NNโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
VVโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”               โ”‚   
NRโ”€โ”€โ”       โ”œโ–บVP โ”€โ”€โ”€โ”       โ”‚   
NRโ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜       โ”‚       โ”‚   
VVโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค   
NNโ”€โ”€โ”       โ”‚       โ”‚       โ”œโ–บIP
NN  โ”‚       โ”œโ–บVP โ”€โ”€โ”€โ”˜       โ”‚   
NN  โ”œโ–บNP โ”€โ”€โ”€โ”˜               โ”‚   
NNโ”€โ”€โ”˜                       โ”‚   
PUโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   
" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "doc.pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WIKyCLQJK7J9" + }, + "source": [ + "#### ็”ณ่ฏท็ง˜้’ฅ\n", + "็”ฑไบŽๆœๅŠกๅ™จ็ฎ—ๅŠ›ๆœ‰้™๏ผŒๅŒฟๅ็”จๆˆทๆฏๅˆ†้’Ÿ้™2ๆฌก่ฐƒ็”จใ€‚ๅฆ‚ๆžœไฝ ้œ€่ฆๆ›ดๅคš่ฐƒ็”จๆฌกๆ•ฐ๏ผŒ[ๅปบ่ฎฎ็”ณ่ฏทๅ…่ดนๅ…ฌ็›ŠAPI็ง˜้’ฅauth](https://bbs.hanlp.com/t/hanlp2-1-restful-api/53)ใ€‚" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PcZAZopQK7J9" + }, + "source": [ + "### ๆตท้‡็บงnative API\n", + "\n", + "ไพ่ต–PyTorchใ€TensorFlow็ญ‰ๆทฑๅบฆๅญฆไน ๆŠ€ๆœฏ๏ผŒ้€‚ๅˆ**ไธ“ไธš**NLPๅทฅ็จ‹ๅธˆใ€็ ”็ฉถ่€…ไปฅๅŠๆœฌๅœฐๆตท้‡ๆ•ฐๆฎๅœบๆ™ฏใ€‚่ฆๆฑ‚Python 3.6ไปฅไธŠ๏ผŒๆ”ฏๆŒWindows๏ผŒๆŽจ่*nixใ€‚ๅฏไปฅๅœจCPUไธŠ่ฟ่กŒ๏ผŒๆŽจ่GPU/TPUใ€‚\n", + "\n", + "ๆ— ่ฎบๆ˜ฏWindowsใ€Linux่ฟ˜ๆ˜ฏmacOS๏ผŒHanLP็š„ๅฎ‰่ฃ…ๅช้œ€ไธ€ๅฅ่ฏๆžๅฎšใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bjRdHxl1K7J-", + "outputId": "659d7920-c857-4eb8-f45f-dba84366688a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: hanlp in /usr/local/lib/python3.7/dist-packages (2.1.0a54)\n", + "Requirement already satisfied: sentencepiece>=0.1.91torch>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from hanlp) (0.1.96)\n", + "Requirement already satisfied: toposort==1.5 in /usr/local/lib/python3.7/dist-packages (from hanlp) (1.5)\n", + "Requirement already satisfied: alnlp in /usr/local/lib/python3.7/dist-packages (from hanlp) (1.0.0rc27)\n", + "Requirement already satisfied: hanlp-common>=0.0.9 in /usr/local/lib/python3.7/dist-packages (from hanlp) (0.0.9)\n", + "Requirement already satisfied: hanlp-downloader in /usr/local/lib/python3.7/dist-packages (from hanlp) (0.0.23)\n", + "Requirement already satisfied: hanlp-trie>=0.0.2 in /usr/local/lib/python3.7/dist-packages (from hanlp) (0.0.2)\n", + "Requirement already satisfied: transformers>=4.1.1 in /usr/local/lib/python3.7/dist-packages (from hanlp) (4.9.1)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.7/dist-packages (from hanlp) (1.1.0)\n", + "Requirement already satisfied: pynvml in /usr/local/lib/python3.7/dist-packages (from hanlp) (11.0.0)\n", + "Requirement already satisfied: phrasetree in /usr/local/lib/python3.7/dist-packages (from hanlp-common>=0.0.9->hanlp) (0.0.8)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers>=4.1.1->hanlp) (3.0.12)\n", + "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers>=4.1.1->hanlp) (0.0.45)\n", + "Requirement already satisfied: tokenizers<0.11,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers>=4.1.1->hanlp) (0.10.3)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers>=4.1.1->hanlp) (21.0)\n", + "Requirement already satisfied: huggingface-hub==0.0.12 in /usr/local/lib/python3.7/dist-packages (from transformers>=4.1.1->hanlp) (0.0.12)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers>=4.1.1->hanlp) (5.4.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers>=4.1.1->hanlp) (2019.12.20)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers>=4.1.1->hanlp) (4.41.1)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers>=4.1.1->hanlp) (2.23.0)\n", + "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers>=4.1.1->hanlp) (4.6.1)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers>=4.1.1->hanlp) (1.19.5)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from huggingface-hub==0.0.12->transformers>=4.1.1->hanlp) (3.7.4.3)\n", + "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers>=4.1.1->hanlp) (2.4.7)\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.7/dist-packages (from alnlp->hanlp) (1.9.0+cu102)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers>=4.1.1->hanlp) (3.5.0)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers>=4.1.1->hanlp) (1.24.3)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers>=4.1.1->hanlp) (3.0.4)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers>=4.1.1->hanlp) (2.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers>=4.1.1->hanlp) (2021.5.30)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers>=4.1.1->hanlp) (1.0.1)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers>=4.1.1->hanlp) (7.1.2)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers>=4.1.1->hanlp) (1.15.0)\n" + ] + } + ], + "source": [ + "!pip install hanlp -U" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dHhIRwgqK7J-" + }, + "source": [ + "#### ๅŠ ่ฝฝๆจกๅž‹\n", + "HanLP็š„ๅทฅไฝœๆต็จ‹ๆ˜ฏๅ…ˆๅŠ ่ฝฝๆจกๅž‹๏ผŒๆจกๅž‹็š„ๆ ‡็คบ็ฌฆๅญ˜ๅ‚จๅœจ`hanlp.pretrained`่ฟ™ไธชๅŒ…ไธญ๏ผŒๆŒ‰็…งNLPไปปๅŠกๅฝ’็ฑปใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KHY6bsG_K7J-", + "outputId": "208c12b6-2702-4ee7-a03a-f053b7ad3479" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_base_20210111_124519.zip',\n", + " 'CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/close_tok_pos_ner_srl_dep_sdp_con_electra_small_20210111_124159.zip',\n", + " 'NPCMJ_UD_KYOTO_TOK_POS_CON_BERT_BASE_CHAR_JA': 'https://file.hankcs.com/hanlp/mtl/npcmj_ud_kyoto_tok_pos_ner_dep_con_srl_bert_base_char_ja_20210517_225654.zip',\n", + " 'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_base_20201223_201906.zip',\n", + " 'OPEN_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_SMALL_ZH': 'https://file.hankcs.com/hanlp/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_small_20201223_035557.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_MT5_SMALL': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_mt5_small_20210228_123458.zip',\n", + " 'UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_XLMR_BASE': 'https://file.hankcs.com/hanlp/mtl/ud_ontonotes_tok_pos_lem_fea_ner_srl_dep_sdp_con_xlm_base_20210602_211620.zip'}" + ] + }, + "execution_count": 6, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "import hanlp\n", + "hanlp.pretrained.mtl.ALL # MTLๅคšไปปๅŠก๏ผŒๅ…ทไฝ“ไปปๅŠก่งๆจกๅž‹ๅ็งฐ๏ผŒ่ฏญ็ง่งๅ็งฐๆœ€ๅŽไธ€ไธชๅญ—ๆฎตๆˆ–็›ธๅบ”่ฏญๆ–™ๅบ“" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WDT3Hks0K7J_" + }, + "source": [ + "่ฐƒ็”จ`hanlp.load`่ฟ›่กŒๅŠ ่ฝฝ๏ผŒๆจกๅž‹ไผš่‡ชๅŠจไธ‹่ฝฝๅˆฐๆœฌๅœฐ็ผ“ๅญ˜ใ€‚่‡ช็„ถ่ฏญ่จ€ๅค„็†ๅˆ†ไธบ่ฎธๅคšไปปๅŠก๏ผŒๅˆ†่ฏๅชๆ˜ฏๆœ€ๅˆ็บง็š„ไธ€ไธชใ€‚ไธŽๅ…ถๆฏไธชไปปๅŠกๅ•็‹ฌๅˆ›ๅปบไธ€ไธชๆจกๅž‹๏ผŒไธๅฆ‚ๅˆฉ็”จHanLP็š„่”ๅˆๆจกๅž‹ไธ€ๆฌกๆ€งๅฎŒๆˆๅคšไธชไปปๅŠก๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4Cj8a73rK7J_", + "outputId": "a92ac736-6e61-4949-8d35-56c773faf950" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [] + } + ], + "source": [ + "HanLP = hanlp.load(hanlp.pretrained.mtl.CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pBqH_My8K7J_" + }, + "source": [ + "## ๅคšไปปๅŠกๆ‰น้‡ๅˆ†ๆž\n", + "ๅฎขๆˆท็ซฏๅˆ›ๅปบๅฎŒๆฏ•๏ผŒๆˆ–่€…ๆจกๅž‹ๅŠ ่ฝฝๅฎŒๆฏ•ๅŽ๏ผŒๅฐฑๅฏไปฅไผ ๅ…ฅไธ€ไธชๆˆ–ๅคšไธชๅฅๅญ่ฟ›่กŒๅˆ†ๆžไบ†๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "B58npfkHK7J_", + "outputId": "69fed02d-39cb-4b4c-d2c8-d0edc25970ea" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"tok/fine\": [\n", + " [\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌก\", \"ไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš\", \"่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"้˜ฟๅฉ†ไธป\", \"ๆฅๅˆฐ\", \"ๅŒ—ไบฌ\", \"็ซ‹ๆ–นๅบญ\", \"ๅ‚่ง‚\", \"่‡ช็„ถ\", \"่ฏญไน‰\", \"็ง‘ๆŠ€\", \"ๅ…ฌๅธ\", \"ใ€‚\"]\n", + " ],\n", + " \"tok/coarse\": [\n", + " [\"2021ๅนด\", \"HanLPv2.1\", \"ไธบ\", \"็”Ÿไบง\", \"็Žฏๅขƒ\", \"ๅธฆๆฅ\", \"ๆฌกไธ–ไปฃ\", \"ๆœ€\", \"ๅ…ˆ่ฟ›\", \"็š„\", \"ๅคš่ฏญ็ง\", \"NLP\", \"ๆŠ€ๆœฏ\", \"ใ€‚\"],\n", + " [\"้˜ฟๅฉ†ไธป\", \"ๆฅๅˆฐ\", \"ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ\", \"ๅ‚่ง‚\", \"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"ใ€‚\"]\n", + " ],\n", + " \"pos/ctb\": [\n", + " [\"NT\", \"NR\", \"P\", \"NN\", \"NN\", \"VV\", \"JJ\", \"NN\", \"AD\", \"JJ\", \"DEG\", \"CD\", \"NN\", \"NR\", \"NN\", \"PU\"],\n", + " [\"NN\", \"VV\", \"NR\", \"NR\", \"VV\", \"NN\", \"NN\", \"NN\", \"NN\", \"PU\"]\n", + " ],\n", + " \"pos/pku\": [\n", + " [\"t\", \"nx\", \"p\", \"vn\", \"n\", \"v\", \"b\", \"n\", \"d\", \"a\", \"u\", \"a\", \"n\", \"nx\", \"n\", \"w\"],\n", + " [\"n\", \"v\", \"ns\", \"ns\", \"v\", \"n\", \"n\", \"n\", \"n\", \"w\"]\n", + " ],\n", + " \"pos/863\": [\n", + " [\"nt\", \"w\", \"p\", \"v\", \"n\", \"v\", \"a\", \"nt\", \"d\", \"a\", \"u\", \"a\", \"n\", \"ws\", \"n\", \"w\"],\n", + " [\"n\", \"v\", \"ns\", \"n\", \"v\", \"n\", \"n\", \"n\", \"n\", \"w\"]\n", + " ],\n", + " \"ner/msra\": [\n", + " [[\"2021ๅนด\", \"DATE\", 0, 1], [\"HanLPv2.1\", \"WWW\", 1, 2]],\n", + " [[\"ๅŒ—ไบฌ\", \"LOCATION\", 2, 3], [\"็ซ‹ๆ–นๅบญ\", \"LOCATION\", 3, 4], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"ORGANIZATION\", 5, 9]]\n", + " ],\n", + " \"ner/pku\": [\n", + " [],\n", + " [[\"ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ\", \"ns\", 2, 4], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"nt\", 5, 9]]\n", + " ],\n", + " \"ner/ontonotes\": [\n", + " [[\"2021ๅนด\", \"DATE\", 0, 1], [\"HanLPv2.1\", \"ORG\", 1, 2]],\n", + " [[\"ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ\", \"FAC\", 2, 4], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"ORG\", 5, 9]]\n", + " ],\n", + " \"srl\": [\n", + " [[[\"2021ๅนด\", \"ARGM-TMP\", 0, 1], [\"HanLPv2.1\", \"ARG0\", 1, 2], [\"ไธบ็”Ÿไบง็Žฏๅขƒ\", \"ARG2\", 2, 5], [\"ๅธฆๆฅ\", \"PRED\", 5, 6], [\"ๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏ\", \"ARG1\", 6, 15]], [[\"ๆœ€\", \"ARGM-ADV\", 8, 9], [\"ๅ…ˆ่ฟ›\", \"PRED\", 9, 10], [\"ๆŠ€ๆœฏ\", \"ARG0\", 14, 15]]],\n", + " [[[\"้˜ฟๅฉ†ไธป\", \"ARG0\", 0, 1], [\"ๆฅๅˆฐ\", \"PRED\", 1, 2], [\"ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ\", \"ARG1\", 2, 4]], [[\"้˜ฟๅฉ†ไธป\", \"ARG0\", 0, 1], [\"ๅ‚่ง‚\", \"PRED\", 4, 5], [\"่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ\", \"ARG1\", 5, 9]]]\n", + " ],\n", + " \"dep\": [\n", + " [[6, \"tmod\"], [6, \"nsubj\"], [6, \"prep\"], [5, \"nn\"], [3, \"pobj\"], [0, \"root\"], [8, \"amod\"], [15, \"nn\"], [10, \"advmod\"], [15, \"rcmod\"], [10, \"assm\"], [13, \"nummod\"], [15, \"nn\"], [15, \"nn\"], [6, \"dobj\"], [6, \"punct\"]],\n", + " [[2, \"nsubj\"], [0, \"root\"], [4, \"nn\"], [2, \"dobj\"], [2, \"conj\"], [9, \"nn\"], [9, \"nn\"], [9, \"nn\"], [5, \"dobj\"], [2, \"punct\"]]\n", + " ],\n", + " \"sdp\": [\n", + " [[[6, \"Time\"]], [[6, \"Exp\"]], [[5, \"mPrep\"]], [[5, \"Desc\"]], [[6, \"Datv\"]], [[13, \"dDesc\"]], [[0, \"Root\"], [8, \"Desc\"], [13, \"Desc\"]], [[15, \"Time\"]], [[10, \"mDegr\"]], [[15, \"Desc\"]], [[10, \"mAux\"]], [[8, \"Quan\"], [13, \"Quan\"]], [[15, \"Desc\"]], [[15, \"Nmod\"]], [[6, \"Pat\"]], [[6, \"mPunc\"]]],\n", + " [[[2, \"Agt\"], [5, \"Agt\"]], [[0, \"Root\"]], [[4, \"Loc\"]], [[2, \"Lfin\"]], [[2, \"ePurp\"]], [[8, \"Nmod\"]], [[9, \"Nmod\"]], [[9, \"Nmod\"]], [[5, \"Datv\"]], [[5, \"mPunc\"]]]\n", + " ],\n", + " \"con\": [\n", + " [\"TOP\", [[\"IP\", [[\"NP\", [[\"NT\", [\"2021ๅนด\"]]]], [\"NP\", [[\"NR\", [\"HanLPv2.1\"]]]], [\"VP\", [[\"PP\", [[\"P\", [\"ไธบ\"]], [\"NP\", [[\"NN\", [\"็”Ÿไบง\"]], [\"NN\", [\"็Žฏๅขƒ\"]]]]]], [\"VP\", [[\"VV\", [\"ๅธฆๆฅ\"]], [\"NP\", [[\"ADJP\", [[\"NP\", [[\"ADJP\", [[\"JJ\", [\"ๆฌก\"]]]], [\"NP\", [[\"NN\", [\"ไธ–ไปฃ\"]]]]]], [\"ADVP\", [[\"AD\", [\"ๆœ€\"]]]], [\"VP\", [[\"JJ\", [\"ๅ…ˆ่ฟ›\"]]]]]], [\"DEG\", [\"็š„\"]], [\"NP\", [[\"QP\", [[\"CD\", [\"ๅคš\"]]]], [\"NP\", [[\"NN\", [\"่ฏญ็ง\"]]]]]], [\"NP\", [[\"NR\", [\"NLP\"]], [\"NN\", [\"ๆŠ€ๆœฏ\"]]]]]]]]]], [\"PU\", [\"ใ€‚\"]]]]]],\n", + " [\"TOP\", [[\"IP\", [[\"NP\", [[\"NN\", [\"้˜ฟๅฉ†ไธป\"]]]], [\"VP\", [[\"VP\", [[\"VV\", [\"ๆฅๅˆฐ\"]], [\"NP\", [[\"NR\", [\"ๅŒ—ไบฌ\"]], [\"NR\", [\"็ซ‹ๆ–นๅบญ\"]]]]]], [\"VP\", [[\"VV\", [\"ๅ‚่ง‚\"]], [\"NP\", [[\"NN\", [\"่‡ช็„ถ\"]], [\"NN\", [\"่ฏญไน‰\"]], [\"NN\", [\"็ง‘ๆŠ€\"]], [\"NN\", [\"ๅ…ฌๅธ\"]]]]]]]], [\"PU\", [\"ใ€‚\"]]]]]]\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "doc = HanLP(['2021ๅนดHanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚', '้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚'])\n", + "print(doc)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tvuxfWPYK7J_" + }, + "source": [ + "## ๅฏ่ง†ๅŒ–\n", + "่พ“ๅ‡บ็ป“ๆžœๆ˜ฏไธ€ไธชๅฏไปฅ`json`ๅŒ–็š„`dict`๏ผŒ้”ฎไธบ[NLPไปปๅŠกๅ](https://hanlp.hankcs.com/docs/data_format.html#naming-convention)๏ผŒๅ€ผไธบๅˆ†ๆž็ป“ๆžœใ€‚ๅ…ณไบŽๆ ‡ๆณจ้›†ๅซไน‰๏ผŒ่ฏทๅ‚่€ƒ[ใ€Š่ฏญ่จ€ๅญฆๆ ‡ๆณจ่ง„่Œƒใ€‹](https://hanlp.hankcs.com/docs/annotations/index.html)ๅŠ[ใ€Šๆ ผๅผ่ง„่Œƒใ€‹](https://hanlp.hankcs.com/docs/data_format.html)ใ€‚ๆˆ‘ไปฌ่ดญไนฐใ€ๆ ‡ๆณจๆˆ–้‡‡็”จไบ†ไธ–็•ŒไธŠ้‡็บงๆœ€ๅคงใ€็ง็ฑปๆœ€ๅคš็š„่ฏญๆ–™ๅบ“็”จไบŽ่”ๅˆๅคš่ฏญ็งๅคšไปปๅŠกๅญฆไน ๏ผŒๆ‰€ไปฅHanLP็š„ๆ ‡ๆณจ้›†ไนŸๆ˜ฏ่ฆ†็›–้ขๆœ€ๅนฟ็š„ใ€‚้€š่ฟ‡`doc.pretty_print`๏ผŒๅฏไปฅๅœจ็ญ‰ๅฎฝๅญ—ไฝ“็Žฏๅขƒไธญๅพ—ๅˆฐๅฏ่ง†ๅŒ–๏ผŒไฝ ้œ€่ฆๅ–ๆถˆๆข่กŒๆ‰่ƒฝๅฏน้ฝๅฏ่ง†ๅŒ–็ป“ๆžœใ€‚ๆˆ‘ไปฌๅทฒ็ปๅ‘ๅธƒHTML็Žฏๅขƒ็š„ๅฏ่ง†ๅŒ–๏ผŒๅœจJupyter Notebookไธญ่‡ชๅŠจๅฏน้ฝไธญๆ–‡ใ€‚" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 575 + }, + "id": "M8WxTdlAK7KA", + "outputId": "a027a302-74d8-48c9-b30d-45ebf8741c1e" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Dep Tree     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
 โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
 โ”‚โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
 โ”‚โ”‚โ”Œโ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€ 
 โ”‚โ”‚โ”‚  โ”‚  โ”Œโ”€โ–บ 
 โ”‚โ”‚โ”‚  โ””โ”€โ–บโ””โ”€โ”€ 
โ”Œโ”ผโ”ดโ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ”‚โ”‚       โ”Œโ”€โ–บ 
โ”‚โ”‚  โ”Œโ”€โ”€โ”€โ–บโ””โ”€โ”€ 
โ”‚โ”‚  โ”‚    โ”Œโ”€โ–บ 
โ”‚โ”‚  โ”‚โ”Œโ”€โ”€โ–บโ”œโ”€โ”€ 
โ”‚โ”‚  โ”‚โ”‚   โ””โ”€โ–บ 
โ”‚โ”‚  โ”‚โ”‚   โ”Œโ”€โ–บ 
โ”‚โ”‚  โ”‚โ”‚โ”Œโ”€โ–บโ””โ”€โ”€ 
โ”‚โ”‚  โ”‚โ”‚โ”‚  โ”Œโ”€โ–บ 
โ”‚โ””โ”€โ–บโ””โ”ดโ”ดโ”€โ”€โ”ดโ”€โ”€ 
โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
Token     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌก         
ไธ–ไปฃ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
Relati 
โ”€โ”€โ”€โ”€โ”€โ”€ 
tmod   
nsubj  
prep   
nn     
pobj   
root   
amod   
nn     
advmod 
rcmod  
assm   
nummod 
nn     
nn     
dobj   
punct  
PoS 
โ”€โ”€โ”€ 
NT  
NR  
P   
NN  
NN  
VV  
JJ  
NN  
AD  
JJ  
DEG 
CD  
NN  
NR  
NN  
PU  
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌก         
ไธ–ไปฃ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
NER Type 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ”€โ”€โ”€โ–บDATE 
โ”€โ”€โ”€โ–บWWW  
         
         
         
         
         
         
         
         
         
         
         
         
         
         
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌก         
ไธ–ไปฃ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
SRL PA1      
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ”€โ”€โ”€โ–บARGM-TMP 
โ”€โ”€โ”€โ–บARG0     
โ—„โ”€โ”          
  โ”œโ–บARG2     
โ—„โ”€โ”˜          
โ•Ÿโ”€โ”€โ–บPRED     
โ—„โ”€โ”          
  โ”‚          
  โ”‚          
  โ”‚          
  โ”œโ–บARG1     
  โ”‚          
  โ”‚          
  โ”‚          
โ—„โ”€โ”˜          
             
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌก         
ไธ–ไปฃ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
SRL PA2      
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
             
             
             
             
             
             
             
             
โ”€โ”€โ”€โ–บARGM-ADV 
โ•Ÿโ”€โ”€โ–บPRED     
             
             
             
             
โ”€โ”€โ”€โ–บARG0     
             
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021ๅนด     
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌก         
ไธ–ไปฃ        
ๆœ€         
ๅ…ˆ่ฟ›        
็š„         
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
PoS    3       4       5       6       7       8       9 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
NT โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
NR โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค   
P โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”                                       โ”‚   
NN โ”€โ”€โ”       โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บPP โ”€โ”€โ”€โ”       โ”‚   
NN โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜                               โ”‚       โ”‚   
VV โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”‚       โ”‚   
JJ โ”€โ”€โ”€โ–บADJPโ”€โ”€โ”                       โ”‚       โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค   
NN โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”               โ”‚       โ”‚       โ”‚   
AD โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บADVPโ”€โ”€โ”ผโ–บADJPโ”€โ”€โ”       โ”œโ–บVP โ”€โ”€โ”€โ”˜       โ”œโ–บIP
JJ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บVP โ”€โ”€โ”€โ”˜       โ”‚       โ”‚               โ”‚   
DEGโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค       โ”‚               โ”‚   
CD โ”€โ”€โ”€โ–บQP โ”€โ”€โ”€โ”               โ”œโ–บNP โ”€โ”€โ”€โ”˜               โ”‚   
NN โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค                       โ”‚   
NR โ”€โ”€โ”                       โ”‚                       โ”‚   
NN โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”˜                       โ”‚   
PU โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   

Dep Tree     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
         โ”Œโ”€โ–บ 
โ”Œโ”ฌโ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”ดโ”€โ”€ 
โ”‚โ”‚    โ”‚  โ”Œโ”€โ–บ 
โ”‚โ”‚    โ””โ”€โ–บโ””โ”€โ”€ 
โ”‚โ””โ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ”‚   โ”‚  โ”Œโ”€โ”€โ”€โ–บ 
โ”‚   โ”‚  โ”‚โ”Œโ”€โ”€โ–บ 
โ”‚   โ”‚  โ”‚โ”‚โ”Œโ”€โ–บ 
โ”‚   โ””โ”€โ–บโ””โ”ดโ”ดโ”€โ”€ 
โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
Relat 
โ”€โ”€โ”€โ”€โ”€ 
nsubj 
root  
nn    
dobj  
conj  
nn    
nn    
nn    
dobj  
punct 
Po 
โ”€โ”€ 
NN 
VV 
NR 
NR 
VV 
NN 
NN 
NN 
NN 
PU 
Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
NER Type         
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
                 
                 
โ”€โ”€โ”€โ–บLOCATION     
โ”€โ”€โ”€โ–บLOCATION     
                 
โ—„โ”€โ”              
  โ”‚              
  โ”œโ–บORGANIZATION 
โ—„โ”€โ”˜              
                 
Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
SRL PA1  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ”€โ”€โ”€โ–บARG0 
โ•Ÿโ”€โ”€โ–บPRED 
โ—„โ”€โ”      
โ—„โ”€โ”ดโ–บARG1 
         
         
         
         
         
         
Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
SRL PA2  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ”€โ”€โ”€โ–บARG0 
         
         
         
โ•Ÿโ”€โ”€โ–บPRED 
โ—„โ”€โ”      
  โ”‚      
  โ”œโ–บARG1 
โ—„โ”€โ”˜      
         
Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
Po    3       4       5       6 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
NNโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
VVโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”               โ”‚   
NRโ”€โ”€โ”       โ”œโ–บVP โ”€โ”€โ”€โ”       โ”‚   
NRโ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜       โ”‚       โ”‚   
VVโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค   
NNโ”€โ”€โ”       โ”‚       โ”‚       โ”œโ–บIP
NN  โ”‚       โ”œโ–บVP โ”€โ”€โ”€โ”˜       โ”‚   
NN  โ”œโ–บNP โ”€โ”€โ”€โ”˜               โ”‚   
NNโ”€โ”€โ”˜                       โ”‚   
PUโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   
" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "doc.pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_B2HDiZgK7KA" + }, + "source": [ + "## ๆŒ‡ๅฎšไปปๅŠก\n", + "็ฎ€ๆด็š„ๆŽฅๅฃไนŸๆ”ฏๆŒ็ตๆดป็š„ๅ‚ๆ•ฐ๏ผŒไปปๅŠก่ถŠๅฐ‘๏ผŒ้€Ÿๅบฆ่ถŠๅฟซใ€‚ๅฆ‚ๆŒ‡ๅฎšไป…ๆ‰ง่กŒๅˆ†่ฏ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "9Mnys4t2K7KA", + "outputId": "88d72a72-c095-4f6d-df0b-d881887087ce" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
้˜ฟๅฉ†ไธป ๆฅๅˆฐ ๅŒ—ไบฌ ็ซ‹ๆ–นๅบญ ๅ‚่ง‚ ่‡ช็„ถ ่ฏญไน‰ ็ง‘ๆŠ€ ๅ…ฌๅธ ใ€‚
" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "HanLP('้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='tok').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s5RkVkVkK7KA" + }, + "source": [ + "### ๆ‰ง่กŒ็ฒ—้ข—็ฒ’ๅบฆๅˆ†่ฏ" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "5R_PwELlK7KA", + "outputId": "5ce2c037-eb44-481f-9de2-dc0d4122e7c4" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
้˜ฟๅฉ†ไธป ๆฅๅˆฐ ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ ๅ‚่ง‚ ่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ ใ€‚
" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "HanLP('้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='tok/coarse').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pTrajkHEK7KB" + }, + "source": [ + "### ๆ‰ง่กŒๅˆ†่ฏๅ’ŒPKU่ฏๆ€งๆ ‡ๆณจ" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "kkkgVKFqK7KB", + "outputId": "e9f9879b-47ce-459a-e089-923de1c6436c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
้˜ฟๅฉ†ไธป/n ๆฅๅˆฐ/v ๅŒ—ไบฌ/ns ็ซ‹ๆ–นๅบญ/ns ๅ‚่ง‚/v ่‡ช็„ถ/n ่ฏญไน‰/n ็ง‘ๆŠ€/n ๅ…ฌๅธ/n ใ€‚/w
" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "HanLP('้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='pos/pku').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YLLTVY0RK7KB" + }, + "source": [ + "### ๆ‰ง่กŒ็ฒ—้ข—็ฒ’ๅบฆๅˆ†่ฏๅ’ŒPKU่ฏๆ€งๆ ‡ๆณจ" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "5qSlqbcfK7KB", + "outputId": "66944459-bc22-4bd9-e4af-4d2aba9316f3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
้˜ฟๅฉ†ไธป/n ๆฅๅˆฐ/v ๅŒ—ไบฌ็ซ‹ๆ–นๅบญ/ns ๅ‚่ง‚/v ่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธ/n ใ€‚/w
" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "HanLP('้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks=['tok/coarse', 'pos/pku'], skip_tasks='tok/fine').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3nNojvHiK7KB" + }, + "source": [ + "### ๆ‰ง่กŒๅˆ†่ฏๅ’ŒMSRAๆ ‡ๅ‡†NER" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "tTVoEPiAK7KB", + "outputId": "b8dc8c24-3392-4712-d1b6-e2dc8b7710e8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
NER Type        
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
                
                
โ”€โ”€โ”€โ–บLOCATION    
โ”€โ”€โ”€โ–บLOCATION    
                
โ—„โ”€โ”             
  โ”‚             
  โ”œโ–บORGANIZATION
โ—„โ”€โ”˜             
                
" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "HanLP('้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks='ner/msra').pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uG2wYTfmK7KB" + }, + "source": [ + "### ๆ‰ง่กŒๅˆ†่ฏใ€่ฏๆ€งๆ ‡ๆณจๅ’Œไพๅญ˜ๅฅๆณ•ๅˆ†ๆž" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "WXl6f7zyK7KC", + "outputId": "8671e0e4-d0c3-40f4-a4db-ba9aaec225ab" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Dep Tree     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
         โ”Œโ”€โ–บ 
โ”Œโ”ฌโ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”ดโ”€โ”€ 
โ”‚โ”‚    โ”‚  โ”Œโ”€โ–บ 
โ”‚โ”‚    โ””โ”€โ–บโ””โ”€โ”€ 
โ”‚โ””โ”€โ–บโ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ”‚   โ”‚  โ”Œโ”€โ”€โ”€โ–บ 
โ”‚   โ”‚  โ”‚โ”Œโ”€โ”€โ–บ 
โ”‚   โ”‚  โ”‚โ”‚โ”Œโ”€โ–บ 
โ”‚   โ””โ”€โ–บโ””โ”ดโ”ดโ”€โ”€ 
โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
Relat 
โ”€โ”€โ”€โ”€โ”€ 
nsubj 
root  
nn    
dobj  
conj  
nn    
nn    
nn    
dobj  
punct 
Po
โ”€โ”€
NN
VV
NR
NR
VV
NN
NN
NN
NN
PU
" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "doc = HanLP('้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks=['pos', 'dep'])\n", + "doc.pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ocxM3LsGK7KC" + }, + "source": [ + "่ฝฌๆขไธบCoNLLๆ ผๅผ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NtKmSB_0K7KC", + "outputId": "cc9245b3-32c2-4d35-88a8-a7d91127eca7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\t้˜ฟๅฉ†ไธป\t_\tNN\t_\t_\t2\tnsubj\t_\t_\n", + "2\tๆฅๅˆฐ\t_\tVV\t_\t_\t0\troot\t_\t_\n", + "3\tๅŒ—ไบฌ\t_\tNR\t_\t_\t4\tnn\t_\t_\n", + "4\t็ซ‹ๆ–นๅบญ\t_\tNR\t_\t_\t2\tdobj\t_\t_\n", + "5\tๅ‚่ง‚\t_\tVV\t_\t_\t2\tconj\t_\t_\n", + "6\t่‡ช็„ถ\t_\tNN\t_\t_\t9\tnn\t_\t_\n", + "7\t่ฏญไน‰\t_\tNN\t_\t_\t9\tnn\t_\t_\n", + "8\t็ง‘ๆŠ€\t_\tNN\t_\t_\t9\tnn\t_\t_\n", + "9\tๅ…ฌๅธ\t_\tNN\t_\t_\t5\tdobj\t_\t_\n", + "10\tใ€‚\t_\tPU\t_\t_\t2\tpunct\t_\t_\n" + ] + } + ], + "source": [ + "print(doc.to_conll())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PNBo-kETK7KC" + }, + "source": [ + "### ๆ‰ง่กŒๅˆ†่ฏใ€่ฏๆ€งๆ ‡ๆณจๅ’Œ็Ÿญ่ฏญๆˆๅˆ†ๅˆ†ๆž" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 225 + }, + "id": "Ja8dib6XK7KC", + "outputId": "a972f5bb-ae23-47a9-cd9f-6070a5b39f50" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Tok 
โ”€โ”€โ”€ 
้˜ฟๅฉ†ไธป 
ๆฅๅˆฐ  
ๅŒ—ไบฌ  
็ซ‹ๆ–นๅบญ 
ๅ‚่ง‚  
่‡ช็„ถ  
่ฏญไน‰  
็ง‘ๆŠ€  
ๅ…ฌๅธ  
ใ€‚   
Po    3       4       5       6 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
NNโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
VVโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”               โ”‚   
NRโ”€โ”€โ”       โ”œโ–บVP โ”€โ”€โ”€โ”       โ”‚   
NRโ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜       โ”‚       โ”‚   
VVโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค   
NNโ”€โ”€โ”       โ”‚       โ”‚       โ”œโ–บIP
NN  โ”‚       โ”œโ–บVP โ”€โ”€โ”€โ”˜       โ”‚   
NN  โ”œโ–บNP โ”€โ”€โ”€โ”˜               โ”‚   
NNโ”€โ”€โ”˜                       โ”‚   
PUโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   
" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "doc = HanLP('้˜ฟๅฉ†ไธปๆฅๅˆฐๅŒ—ไบฌ็ซ‹ๆ–นๅบญๅ‚่ง‚่‡ช็„ถ่ฏญไน‰็ง‘ๆŠ€ๅ…ฌๅธใ€‚', tasks=['pos', 'con'])\n", + "doc.pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Mg3DhvjhK7KC" + }, + "source": [ + "#### ๅฐ†็Ÿญ่ฏญ็ป“ๆž„ๆ ‘ไปฅbracketedๅฝขๅผๆ‰“ๅฐ" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kE8iBZNUK7KC", + "outputId": "79e2a72d-e473-41ca-c054-9595a4dd5971" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(TOP\n", + " (IP\n", + " (NP (NN ้˜ฟๅฉ†ไธป))\n", + " (VP\n", + " (VP (VV ๆฅๅˆฐ) (NP (NR ๅŒ—ไบฌ) (NR ็ซ‹ๆ–นๅบญ)))\n", + " (VP (VV ๅ‚่ง‚) (NP (NN ่‡ช็„ถ) (NN ่ฏญไน‰) (NN ็ง‘ๆŠ€) (NN ๅ…ฌๅธ))))\n", + " (PU ใ€‚)))\n" + ] + } + ], + "source": [ + "print(doc['con']) # str(doc['con'])ไผšๅฐ†็Ÿญ่ฏญ็ป“ๆž„ๅˆ—่กจ่ฝฌๆขไธบๆ‹ฌๅทๅฝขๅผ" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfleaY_pK7KC" + }, + "source": [ + "ๅ…ณไบŽๆ ‡ๆณจ้›†ๅซไน‰๏ผŒ่ฏทๅ‚่€ƒ[ใ€Š่ฏญ่จ€ๅญฆๆ ‡ๆณจ่ง„่Œƒใ€‹](https://hanlp.hankcs.com/docs/annotations/index.html)ๅŠ[ใ€Šๆ ผๅผ่ง„่Œƒใ€‹](https://hanlp.hankcs.com/docs/data_format.html)ใ€‚ๆˆ‘ไปฌ่ดญไนฐใ€ๆ ‡ๆณจๆˆ–้‡‡็”จไบ†ไธ–็•ŒไธŠ้‡็บงๆœ€ๅคงใ€็ง็ฑปๆœ€ๅคš็š„่ฏญๆ–™ๅบ“็”จไบŽ่”ๅˆๅคš่ฏญ็งๅคšไปปๅŠกๅญฆไน ๏ผŒๆ‰€ไปฅHanLP็š„ๆ ‡ๆณจ้›†ไนŸๆ˜ฏ่ฆ†็›–้ขๆœ€ๅนฟ็š„ใ€‚\n", + "\n", + "## ๅคš่ฏญ็งๆ”ฏๆŒ\n", + "ๆ€ปไน‹๏ผŒๅฏไปฅ้€š่ฟ‡tasksๅ‚ๆ•ฐ็ตๆดป่ฐƒ็”จๅ„็งNLPไปปๅŠกใ€‚้™คไบ†ไธญๆ–‡่”ๅˆๆจกๅž‹ไน‹ๅค–๏ผŒไฝ ๅฏไปฅๅœจๆ–‡ๆกฃไธญ้€š่ฟ‡ๆ‰พๅˆฐ่ฎธๅคšๅ…ถไป–่ฏญ็ง็š„ๆจกๅž‹๏ผŒๆฏ”ๅฆ‚ๆ—ฅ่ฏญ๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oJP8dvfvK7KD", + "outputId": "2262ccdb-7cf5-4859-8d6c-18300e54c22e" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [] + } + ], + "source": [ + "ja = hanlp.load(hanlp.pretrained.mtl.NPCMJ_UD_KYOTO_TOK_POS_CON_BERT_BASE_CHAR_JA)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 991 + }, + "id": "3WPvCbH2K7KD", + "outputId": "46a9435d-ed5b-47ef-99c6-71d7ee0fc6e8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Dep Tree       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
           โ”Œโ”€โ–บ 
โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บโ”œโ”€โ”€ 
โ”‚          โ””โ”€โ–บ 
โ”‚   โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
โ”‚   โ”‚โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
โ”‚   โ”‚โ”‚     โ”Œโ”€โ–บ 
โ”‚   โ”‚โ”‚โ”Œโ”€โ”€โ”€โ–บโ”œโ”€โ”€ 
โ”‚   โ”‚โ”‚โ”‚    โ””โ”€โ–บ 
โ”‚   โ”‚โ”‚โ”‚โ”Œโ”€โ”€โ”€โ”€โ”€โ–บ 
โ”‚   โ”‚โ”‚โ”‚โ”‚โ”Œโ”€โ”€โ”€โ”€โ–บ 
โ”‚   โ”‚โ”‚โ”‚โ”‚โ”‚โ”Œโ”€โ”€โ”€โ–บ 
โ”‚   โ”‚โ”‚โ”‚โ”‚โ”‚โ”‚โ”Œโ”€โ”€โ–บ 
โ”‚   โ”‚โ”‚โ”‚โ”‚โ”‚โ”‚โ”‚โ”Œโ”€โ–บ 
โ”‚โ”Œโ”€โ–บโ””โ”ดโ”ดโ”ดโ”ดโ”ดโ”ดโ”ผโ”€โ”€ 
โ”‚โ”‚         โ””โ”€โ–บ 
โ”‚โ”‚         โ”Œโ”€โ–บ 
โ”‚โ”‚      โ”Œโ”€โ–บโ”œโ”€โ”€ 
โ”‚โ”‚      โ”‚  โ””โ”€โ–บ 
โ””โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”ฌโ”ฌโ”ฌโ”€โ”€ 
         โ”‚โ”‚โ””โ”€โ–บ 
         โ”‚โ””โ”€โ”€โ–บ 
         โ””โ”€โ”€โ”€โ–บ 
Token     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
ใ€         
HanLPv2.1 
ใฏ         
ๆฌก         
ไธ–ไปฃ        
ใฎ         
ๆœ€         
ๅ…ˆ็ซฏ        
ๅคš         
่จ€่ชž        
NLP       
ๆŠ€่ก“        
ใ‚’         
ๆœฌ็•ช        
็’ฐๅขƒ        
ใซ         
ๅฐŽๅ…ฅ        
ใ—         
ใพใ™        
ใ€‚         
Relation 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
nummod   
obl      
punct    
compound 
case     
compound 
nmod     
case     
compound 
compound 
compound 
compound 
compound 
obj      
case     
compound 
obl      
case     
root     
aux      
aux      
punct    
PoS 
โ”€โ”€โ”€ 
NUM 
CL  
PU  
NPR 
P   
N   
N   
P   
N   
N   
NUM 
N   
N   
N   
P   
N   
N   
P   
VB  
VB0 
AX  
PU  
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
ใ€         
HanLPv2.1 
ใฏ         
ๆฌก         
ไธ–ไปฃ        
ใฎ         
ๆœ€         
ๅ…ˆ็ซฏ        
ๅคš         
่จ€่ชž        
NLP       
ๆŠ€่ก“        
ใ‚’         
ๆœฌ็•ช        
็’ฐๅขƒ        
ใซ         
ๅฐŽๅ…ฅ        
ใ—         
ใพใ™        
ใ€‚         
NER Type     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ—„โ”€โ”          
โ—„โ”€โ”ดโ–บDATE     
             
โ”€โ”€โ”€โ–บARTIFACT 
             
             
             
             
             
             
             
             
             
             
             
             
             
             
             
             
             
             
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
ใ€         
HanLPv2.1 
ใฏ         
ๆฌก         
ไธ–ไปฃ        
ใฎ         
ๆœ€         
ๅ…ˆ็ซฏ        
ๅคš         
่จ€่ชž        
NLP       
ๆŠ€่ก“        
ใ‚’         
ๆœฌ็•ช        
็’ฐๅขƒ        
ใซ         
ๅฐŽๅ…ฅ        
ใ—         
ใพใ™        
ใ€‚         
SRL PA1  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
         
         
         
         
         
โ”€โ”€โ”€โ–บไฟฎ้ฃพ   
โ•Ÿโ”€โ”€โ–บPRED 
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
ใ€         
HanLPv2.1 
ใฏ         
ๆฌก         
ไธ–ไปฃ        
ใฎ         
ๆœ€         
ๅ…ˆ็ซฏ        
ๅคš         
่จ€่ชž        
NLP       
ๆŠ€่ก“        
ใ‚’         
ๆœฌ็•ช        
็’ฐๅขƒ        
ใซ         
ๅฐŽๅ…ฅ        
ใ—         
ใพใ™        
ใ€‚         
SRL PA3  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
         
         
         
         
         
         
         
         
โ—„โ”€โ”      
โ—„โ”€โ”ดโ–บไฟฎ้ฃพ   
         
โ•Ÿโ”€โ”€โ–บPRED 
         
         
         
         
         
         
         
         
         
         
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
ใ€         
HanLPv2.1 
ใฏ         
ๆฌก         
ไธ–ไปฃ        
ใฎ         
ๆœ€         
ๅ…ˆ็ซฏ        
ๅคš         
่จ€่ชž        
NLP       
ๆŠ€่ก“        
ใ‚’         
ๆœฌ็•ช        
็’ฐๅขƒ        
ใซ         
ๅฐŽๅ…ฅ        
ใ—         
ใพใ™        
ใ€‚         
SRL PA4  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
         
         
         
         
         
โ—„โ”€โ”      
  โ”‚      
  โ”‚      
  โ”œโ–บไฟฎ้ฃพ   
  โ”‚      
โ—„โ”€โ”˜      
โ—„โ”€โ”      
โ—„โ”€โ”ดโ–บใƒŽ    
โ•Ÿโ”€โ”€โ–บPRED 
         
         
         
         
         
         
         
         
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
ใ€         
HanLPv2.1 
ใฏ         
ๆฌก         
ไธ–ไปฃ        
ใฎ         
ๆœ€         
ๅ…ˆ็ซฏ        
ๅคš         
่จ€่ชž        
NLP       
ๆŠ€่ก“        
ใ‚’         
ๆœฌ็•ช        
็’ฐๅขƒ        
ใซ         
ๅฐŽๅ…ฅ        
ใ—         
ใพใ™        
ใ€‚         
SRL PA5  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
โ”€โ”€โ”€โ–บไฟฎ้ฃพ   
โ•Ÿโ”€โ”€โ–บPRED 
         
         
         
         
         
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
ใ€         
HanLPv2.1 
ใฏ         
ๆฌก         
ไธ–ไปฃ        
ใฎ         
ๆœ€         
ๅ…ˆ็ซฏ        
ๅคš         
่จ€่ชž        
NLP       
ๆŠ€่ก“        
ใ‚’         
ๆœฌ็•ช        
็’ฐๅขƒ        
ใซ         
ๅฐŽๅ…ฅ        
ใ—         
ใพใ™        
ใ€‚         
SRL PA6  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ—„โ”€โ”      
  โ”œโ–บๆ™‚้–“   
โ—„โ”€โ”˜      
โ—„โ”€โ”      
โ—„โ”€โ”ดโ–บใ‚ฌ    
โ—„โ”€โ”      
  โ”‚      
  โ”‚      
  โ”‚      
  โ”‚      
  โ”œโ–บใƒฒ    
  โ”‚      
  โ”‚      
  โ”‚      
โ—„โ”€โ”˜      
โ—„โ”€โ”      
  โ”œโ–บใƒ‹    
โ—„โ”€โ”˜      
โ•Ÿโ”€โ”€โ–บPRED 
         
         
         
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
ใ€         
HanLPv2.1 
ใฏ         
ๆฌก         
ไธ–ไปฃ        
ใฎ         
ๆœ€         
ๅ…ˆ็ซฏ        
ๅคš         
่จ€่ชž        
NLP       
ๆŠ€่ก“        
ใ‚’         
ๆœฌ็•ช        
็’ฐๅขƒ        
ใซ         
ๅฐŽๅ…ฅ        
ใ—         
ใพใ™        
ใ€‚         
PoS    3         4        5       6       7       8 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
NUMโ”€โ”€โ”                                              
CL โ”€โ”€โ”ดโ–บNUMCLPโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
PU โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค   
NPRโ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”€โ”€โ”                                โ”‚   
P โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”ดโ–บโ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บPPโ”€โ”€โ”€โ”€โ”ค   
N โ”€โ”€โ”€โ”                                          โ”‚   
N โ”€โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”€โ”€โ”                                โ”‚   
P โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”ดโ–บPP โ”€โ”€โ”€โ”€โ”                       โ”‚   
N โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€   โ”‚                       โ”‚   
N โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”€โ”€โ”€โ–บCONJPโ”€โ”€โ”ค                       โ”‚   
NUMโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€   โ”œโ–บNML โ”€โ”€โ”               โ”‚   
N โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€   โ”‚       โ”‚               โ”œโ–บIP
N โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”˜       โ”œโ–บNP โ”€โ”€โ”€โ”       โ”‚   
N โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜       โ”œโ–บPPโ”€โ”€โ”€โ”€โ”ค   
P โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜       โ”‚   
N โ”€โ”€โ”€โ”                                          โ”‚   
N โ”€โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”€โ”€โ”                                โ”‚   
P โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”ดโ–บโ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บPPโ”€โ”€โ”€โ”€โ”ค   
VB โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค   
VB0โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค   
AX โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค   
PU โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   

Dep Tree       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
           โ”Œโ”€โ–บ 
โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บโ”œโ”€โ”€ 
โ”‚          โ””โ”€โ–บ 
โ”‚      โ”Œโ”€โ”€โ”€โ”€โ”€โ–บ 
โ”‚      โ”‚โ”Œโ”€โ”€โ”€โ”€โ–บ 
โ”‚      โ”‚โ”‚โ”Œโ”€โ”€โ”€โ–บ 
โ”‚      โ”‚โ”‚โ”‚โ”Œโ”€โ”€โ–บ 
โ”‚      โ”‚โ”‚โ”‚โ”‚โ”Œโ”€โ–บ 
โ”‚   โ”Œโ”€โ–บโ””โ”ดโ”ดโ”ดโ”ผโ”€โ”€ 
โ”‚   โ”‚      โ””โ”€โ–บ 
โ”‚   โ”‚      โ”Œโ”€โ–บ 
โ”‚   โ”‚   โ”Œโ”€โ–บโ””โ”€โ”€ 
โ”‚   โ”‚   โ”‚  โ”Œโ”€โ–บ 
โ”‚   โ”‚โ”Œโ”€โ–บโ””โ”€โ”€โ”ผโ”€โ”€ 
โ”‚   โ”‚โ”‚     โ””โ”€โ–บ 
โ”‚โ”Œโ”€โ–บโ””โ”ดโ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€ 
โ”‚โ”‚         โ””โ”€โ–บ 
โ”‚โ”‚        โ”Œโ”€โ”€โ–บ 
โ”‚โ”‚        โ”‚โ”Œโ”€โ–บ 
โ”‚โ”‚   โ”Œโ”€โ–บโ”Œโ”ฌโ”ผโ”ผโ”€โ”€ 
โ”‚โ”‚   โ”‚  โ”‚โ”‚โ”‚โ””โ”€โ–บ 
โ”‚โ”‚   โ”‚  โ”‚โ”‚โ””โ”€โ”€โ–บ 
โ”‚โ”‚   โ”‚  โ”‚โ””โ”€โ”€โ”€โ–บ 
โ”‚โ”‚   โ”‚  โ””โ”€โ”€โ”€โ”€โ–บ 
โ”‚โ”‚   โ”‚     โ”Œโ”€โ–บ 
โ””โ”ดโ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”ฌโ”ผโ”€โ”€ 
          โ”‚โ””โ”€โ–บ 
          โ””โ”€โ”€โ–บ 
Toke 
โ”€โ”€โ”€โ”€ 
ๅฅˆ้ ˆ   
ใใฎใ“  
ใฏ    
1973 
ๅนด    
11   
ๆœˆ    
28   
ๆ—ฅ    
ใซ    
ๅƒ่‘‰   
็œŒ    
ๅ††็ฉบ   
ๅฑฑ    
ใง    
็”Ÿใพใ‚Œ  
ใ€    
ใ‚ฒใƒผใƒ   
ๅˆถไฝœ   
ไผš็คพ   
ใ€Œ    
ใƒŽใƒผใƒ„  
ใ€    
ใฎ    
่จญ็ซ‹   
่€…    
ใ     
ใ€‚    
Relation 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
compound 
nsubj    
case     
compound 
compound 
compound 
compound 
nummod   
obl      
case     
compound 
nmod     
compound 
obl      
case     
acl      
punct    
compound 
compound 
nmod     
punct    
compound 
punct    
case     
compound 
root     
cop      
punct    
PoS 
โ”€โ”€โ”€ 
NPR 
NPR 
P   
NUM 
CL  
NUM 
CL  
NUM 
CL  
P   
NPR 
NPR 
NPR 
NPR 
P   
VB  
PU  
N   
N   
N   
PUL 
NPR 
PUR 
P   
N   
N   
AX  
PU  
Tok  
โ”€โ”€โ”€โ”€ 
ๅฅˆ้ ˆ   
ใใฎใ“  
ใฏ    
1973 
ๅนด    
11   
ๆœˆ    
28   
ๆ—ฅ    
ใซ    
ๅƒ่‘‰   
็œŒ    
ๅ††็ฉบ   
ๅฑฑ    
ใง    
็”Ÿใพใ‚Œ  
ใ€    
ใ‚ฒใƒผใƒ   
ๅˆถไฝœ   
ไผš็คพ   
ใ€Œ    
ใƒŽใƒผใƒ„  
ใ€    
ใฎ    
่จญ็ซ‹   
่€…    
ใ     
ใ€‚    
NER Type         
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ—„โ”€โ”              
โ—„โ”€โ”ดโ–บPERSON       
                 
โ—„โ”€โ”              
  โ”‚              
  โ”‚              
  โ”œโ–บDATE         
  โ”‚              
โ—„โ”€โ”˜              
                 
โ—„โ”€โ”              
  โ”‚              
  โ”œโ–บLOCATION     
โ—„โ”€โ”˜              
                 
                 
                 
                 
                 
                 
                 
โ”€โ”€โ”€โ–บORGANIZATION 
                 
                 
                 
                 
                 
                 
Tok  
โ”€โ”€โ”€โ”€ 
ๅฅˆ้ ˆ   
ใใฎใ“  
ใฏ    
1973 
ๅนด    
11   
ๆœˆ    
28   
ๆ—ฅ    
ใซ    
ๅƒ่‘‰   
็œŒ    
ๅ††็ฉบ   
ๅฑฑ    
ใง    
็”Ÿใพใ‚Œ  
ใ€    
ใ‚ฒใƒผใƒ   
ๅˆถไฝœ   
ไผš็คพ   
ใ€Œ    
ใƒŽใƒผใƒ„  
ใ€    
ใฎ    
่จญ็ซ‹   
่€…    
ใ     
ใ€‚    
SRL PA1  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
         
         
         
         
         
         
         
         
         
         
โ—„โ”€โ”      
โ—„โ”€โ”ดโ–บใƒŽ๏ผŸ   
         
โ•Ÿโ”€โ”€โ–บPRED 
         
         
         
         
         
         
         
         
         
         
         
         
         
         
Tok  
โ”€โ”€โ”€โ”€ 
ๅฅˆ้ ˆ   
ใใฎใ“  
ใฏ    
1973 
ๅนด    
11   
ๆœˆ    
28   
ๆ—ฅ    
ใซ    
ๅƒ่‘‰   
็œŒ    
ๅ††็ฉบ   
ๅฑฑ    
ใง    
็”Ÿใพใ‚Œ  
ใ€    
ใ‚ฒใƒผใƒ   
ๅˆถไฝœ   
ไผš็คพ   
ใ€Œ    
ใƒŽใƒผใƒ„  
ใ€    
ใฎ    
่จญ็ซ‹   
่€…    
ใ     
ใ€‚    
SRL PA2  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ—„โ”€โ”      
  โ”œโ–บใ‚ฌ    
โ—„โ”€โ”˜      
โ—„โ”€โ”      
  โ”‚      
  โ”‚      
  โ”œโ–บๆ™‚้–“   
  โ”‚      
  โ”‚      
โ—„โ”€โ”˜      
โ—„โ”€โ”      
  โ”‚      
  โ”œโ–บใƒ‡    
  โ”‚      
โ—„โ”€โ”˜      
โ•Ÿโ”€โ”€โ–บPRED 
         
         
         
         
         
         
         
         
         
         
         
         
Tok  
โ”€โ”€โ”€โ”€ 
ๅฅˆ้ ˆ   
ใใฎใ“  
ใฏ    
1973 
ๅนด    
11   
ๆœˆ    
28   
ๆ—ฅ    
ใซ    
ๅƒ่‘‰   
็œŒ    
ๅ††็ฉบ   
ๅฑฑ    
ใง    
็”Ÿใพใ‚Œ  
ใ€    
ใ‚ฒใƒผใƒ   
ๅˆถไฝœ   
ไผš็คพ   
ใ€Œ    
ใƒŽใƒผใƒ„  
ใ€    
ใฎ    
่จญ็ซ‹   
่€…    
ใ     
ใ€‚    
SRL PA3  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
โ—„โ”€โ”      
โ—„โ”€โ”ดโ–บใƒŽ    
โ•Ÿโ”€โ”€โ–บPRED 
         
         
         
         
         
         
         
         
Tok  
โ”€โ”€โ”€โ”€ 
ๅฅˆ้ ˆ   
ใใฎใ“  
ใฏ    
1973 
ๅนด    
11   
ๆœˆ    
28   
ๆ—ฅ    
ใซ    
ๅƒ่‘‰   
็œŒ    
ๅ††็ฉบ   
ๅฑฑ    
ใง    
็”Ÿใพใ‚Œ  
ใ€    
ใ‚ฒใƒผใƒ   
ๅˆถไฝœ   
ไผš็คพ   
ใ€Œ    
ใƒŽใƒผใƒ„  
ใ€    
ใฎ    
่จญ็ซ‹   
่€…    
ใ     
ใ€‚    
SRL PA4  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ—„โ”€โ”      
  โ”œโ–บใ‚ฌ    
โ—„โ”€โ”˜      
         
         
         
         
         
         
         
         
         
         
         
         
         
         
โ—„โ”€โ”      
  โ”‚      
  โ”‚      
  โ”œโ–บใƒฒ    
  โ”‚      
  โ”‚      
โ—„โ”€โ”˜      
โ•Ÿโ”€โ”€โ–บPRED 
         
         
         
Tok  
โ”€โ”€โ”€โ”€ 
ๅฅˆ้ ˆ   
ใใฎใ“  
ใฏ    
1973 
ๅนด    
11   
ๆœˆ    
28   
ๆ—ฅ    
ใซ    
ๅƒ่‘‰   
็œŒ    
ๅ††็ฉบ   
ๅฑฑ    
ใง    
็”Ÿใพใ‚Œ  
ใ€    
ใ‚ฒใƒผใƒ   
ๅˆถไฝœ   
ไผš็คพ   
ใ€Œ    
ใƒŽใƒผใƒ„  
ใ€    
ใฎ    
่จญ็ซ‹   
่€…    
ใ     
ใ€‚    
SRL PA5  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ—„โ”€โ”      
  โ”œโ–บใ‚ฌ    
โ—„โ”€โ”˜      
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
โ•Ÿโ”€โ”€โ–บPRED 
         
         
Tok  
โ”€โ”€โ”€โ”€ 
ๅฅˆ้ ˆ   
ใใฎใ“  
ใฏ    
1973 
ๅนด    
11   
ๆœˆ    
28   
ๆ—ฅ    
ใซ    
ๅƒ่‘‰   
็œŒ    
ๅ††็ฉบ   
ๅฑฑ    
ใง    
็”Ÿใพใ‚Œ  
ใ€    
ใ‚ฒใƒผใƒ   
ๅˆถไฝœ   
ไผš็คพ   
ใ€Œ    
ใƒŽใƒผใƒ„  
ใ€    
ใฎ    
่จญ็ซ‹   
่€…    
ใ     
ใ€‚    
PoS    3         4       5       6       7       8       9       10      11
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
NPRโ”€โ”€โ”                                                                     
NPRโ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”€โ”€โ”                                                           
P โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บPP โ”€โ”€โ”€โ”   
NUMโ”€โ”€โ”                                                                 โ”‚   
CL โ”€โ”€โ”ดโ–บNUMCLPโ”€โ”€โ”                                                       โ”‚   
NUMโ”€โ”€โ”         โ”‚                                                       โ”‚   
CL โ”€โ”€โ”ดโ–บNUMCLPโ”€โ”€โ”ผโ–บNP โ”€โ”€โ”€โ”                                               โ”‚   
NUMโ”€โ”€โ”         โ”‚       โ”‚                                               โ”‚   
CL โ”€โ”€โ”ดโ–บNUMCLPโ”€โ”€โ”˜       โ”œโ–บPP โ”€โ”€โ”€โ”                                       โ”‚   
P โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜       โ”‚                                       โ”‚   
NPRโ”€โ”€โ”                         โ”‚                                       โ”‚   
NPRโ”€โ”€โ”ดโ–บPP โ”€โ”€โ”€โ”€โ”€โ”               โ”‚                                       โ”‚   
NPRโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€    โ”œโ–บNP โ”€โ”€โ”€โ”       โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บIPโ”€โ”€โ”€โ”€โ”ค   
NPRโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”˜       โ”œโ–บPPโ”€โ”€โ”€โ”€โ”ค                                       โ”‚   
P โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜       โ”‚                                       โ”‚   
VB โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜                                       โ”œโ–บIP
PU โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค   
N โ”€โ”€โ”€โ”                                                                 โ”‚   
N โ”€โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”€โ”€โ”€โ–บPRN โ”€โ”€โ”                                               โ”‚   
N โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”€โ–บPRN โ”€โ”€โ”                               โ”‚   
PULโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค                               โ”‚   
NPRโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ–บNP โ”€โ”€โ”€โ”                       โ”‚   
PURโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜       โ”œโ–บPP โ”€โ”€โ”€โ”               โ”‚   
P โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜       โ”œโ–บIP โ”€โ”€โ”€โ”       โ”‚   
N โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜       โ”œโ–บNPโ”€โ”€โ”€โ”€โ”ค   
N โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜       โ”‚   
AX โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค   
PU โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   
" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "ja(['2021ๅนดใ€HanLPv2.1ใฏๆฌกไธ–ไปฃใฎๆœ€ๅ…ˆ็ซฏๅคš่จ€่ชžNLPๆŠ€่ก“ใ‚’ๆœฌ็•ช็’ฐๅขƒใซๅฐŽๅ…ฅใ—ใพใ™ใ€‚',\n", + " 'ๅฅˆ้ ˆใใฎใ“ใฏ1973ๅนด11ๆœˆ28ๆ—ฅใซๅƒ่‘‰็œŒๅ††็ฉบๅฑฑใง็”Ÿใพใ‚Œใ€ใ‚ฒใƒผใƒ ๅˆถไฝœไผš็คพใ€ŒใƒŽใƒผใƒ„ใ€ใฎ่จญ็ซ‹่€…ใ ใ€‚',]).pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NifrOGlNK7KD" + }, + "source": [ + "ไปฅๅŠๆ”ฏๆŒ[130็ง่ฏญ่จ€](https://hanlp.hankcs.com/docs/api/hanlp/pretrained/mtl.html#hanlp.pretrained.mtl.UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_MMINILMV2L6)็š„ๅคš่ฏญ็ง่”ๅˆๆจกๅž‹๏ผš" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "ae-4j5sbK7KD", + "outputId": "2777cc5d-c1c5-4091-b754-0c220dafea8a" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [] + }, + { + "data": { + "text/html": [ + "
Dep Tree   
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
       โ”Œโ”€โ–บ 
    โ”Œโ”€โ–บโ”œโ”€โ”€ 
    โ”‚  โ””โ”€โ–บ 
    โ”‚  โ”Œโ”€โ–บ 
โ”Œโ”ฌโ”ฌโ”€โ”ดโ”€โ”€โ”ดโ”€โ”€ 
โ”‚โ”‚โ”‚  โ”Œโ”€โ”€โ”€โ–บ 
โ”‚โ”‚โ”‚  โ”‚โ”Œโ”€โ”€โ–บ 
โ”‚โ”‚โ”‚  โ”‚โ”‚โ”Œโ”€โ–บ 
โ”‚โ”‚โ””โ”€โ–บโ””โ”ดโ”ดโ”€โ”€ 
โ”‚โ”‚    โ”Œโ”€โ”€โ–บ 
โ”‚โ”‚    โ”‚โ”Œโ”€โ–บ 
โ”‚โ””โ”€โ”€โ”€โ–บโ””โ”ดโ”€โ”€ 
โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
Token            
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
In               
2021             
,                
HanLPv2.1        
delivers         
state-of-the-art 
multilingual     
NLP              
techniques       
to               
production       
environments     
.                
Relation 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
case     
obl      
punct    
nsubj    
root     
amod     
amod     
compound 
obj      
case     
compound 
obl      
punct    
Lemma            
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
in               
2021             
,                
HANlpv2.1        
deliver          
state-of-the-art 
multilingual     
NLP              
technique        
to               
production       
environment      
.                
PoS   
โ”€โ”€โ”€โ”€โ”€ 
ADP   
NUM   
PUNCT 
PROPN 
VERB  
ADJ   
ADJ   
PROPN 
NOUN  
ADP   
NOUN  
NOUN  
PUNCT 
Tok              
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
In               
2021             
,                
HanLPv2.1        
delivers         
state-of-the-art 
multilingual     
NLP              
techniques       
to               
production       
environments     
.                
NER Type        
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
                
โ”€โ”€โ”€โ–บDATE        
                
โ”€โ”€โ”€โ–บWORK_OF_ART 
                
                
                
                
                
                
                
                
                
Tok              
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
In               
2021             
,                
HanLPv2.1        
delivers         
state-of-the-art 
multilingual     
NLP              
techniques       
to               
production       
environments     
.                
SRL PA1      
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ—„โ”€โ”          
โ—„โ”€โ”ดโ–บARGM-TMP 
             
โ”€โ”€โ”€โ–บARG0     
โ•Ÿโ”€โ”€โ–บPRED     
             
             
             
             
โ—„โ”€โ”          
  โ”œโ–บARG2     
โ—„โ”€โ”˜          
             
Tok              
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
In               
2021             
,                
HanLPv2.1        
delivers         
state-of-the-art 
multilingual     
NLP              
techniques       
to               
production       
environments     
.                
PoS      3       4       5       6
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
ADP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”                  
NUM โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บPP โ”€โ”€โ”€โ”  
PUNCTโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค  
PROPNโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค  
VERB โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”‚  
ADJ โ”€โ”€โ”€โ”               โ”‚       โ”‚  
ADJ    โ”‚               โ”‚       โ”‚  
PROPN  โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ผโ–บVPโ”€โ”€โ”€โ”€โ”ผโ–บS
NOUN โ”€โ”€โ”˜               โ”‚       โ”‚  
ADP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”‚       โ”‚  
NOUN โ”€โ”€โ”       โ”œโ–บPP โ”€โ”€โ”€โ”˜       โ”‚  
NOUN โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜               โ”‚  
PUNCTโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜  

Dep Tree      
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
          โ”Œโ”€โ–บ 
โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บโ”œโ”€โ”€ 
โ”‚         โ””โ”€โ–บ 
โ”‚โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บโ”Œโ”€โ”€ 
โ”‚โ”‚        โ””โ”€โ–บ 
โ”‚โ”‚        โ”Œโ”€โ–บ 
โ”‚โ”‚   โ”Œโ”€โ”€โ”€โ–บโ”œโ”€โ”€ 
โ”‚โ”‚   โ”‚    โ””โ”€โ–บ 
โ”‚โ”‚   โ”‚โ”Œโ”€โ”€โ”€โ”€โ”€โ–บ 
โ”‚โ”‚   โ”‚โ”‚โ”Œโ”€โ”€โ”€โ”€โ–บ 
โ”‚โ”‚   โ”‚โ”‚โ”‚โ”Œโ”€โ”€โ”€โ–บ 
โ”‚โ”‚   โ”‚โ”‚โ”‚โ”‚โ”Œโ”€โ”€โ–บ 
โ”‚โ”‚   โ”‚โ”‚โ”‚โ”‚โ”‚โ”Œโ”€โ–บ 
โ”‚โ”‚โ”Œโ”€โ–บโ””โ”ดโ”ดโ”ดโ”ดโ”ผโ”€โ”€ 
โ”‚โ”‚โ”‚       โ””โ”€โ–บ 
โ”‚โ”‚โ”‚       โ”Œโ”€โ–บ 
โ”‚โ”‚โ”‚    โ”Œโ”€โ–บโ”œโ”€โ”€ 
โ”‚โ”‚โ”‚    โ”‚  โ””โ”€โ–บ 
โ””โ”ดโ”ดโ”€โ”€โ”€โ”€โ”ดโ”€โ”ฌโ”ฌโ”€โ”€ 
         โ”‚โ””โ”€โ–บ 
         โ””โ”€โ”€โ–บ 
Token     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
ใ€         
HanLPv2.1 
ใฏ         
ๆฌก         
ไธ–ไปฃ        
ใฎ         
ๆœ€         
ๅ…ˆ็ซฏ        
ๅคš         
่จ€่ชž        
NLP       
ๆŠ€่ก“        
ใ‚’         
ๆœฌ็•ช        
็’ฐๅขƒ        
ใซ         
ๅฐŽๅ…ฅ        
ใ—ใพใ™       
ใ€‚         
Relation 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
nummod   
obl      
punct    
nsubj    
case     
compound 
nmod     
case     
compound 
compound 
compound 
compound 
compound 
obj      
case     
compound 
obl      
case     
root     
aux      
punct    
Lemma     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
ใ€         
HANLPV2.1 
ใฏ         
ๆฌก         
ไธ–ไปฃ        
ใฎ         
ๆœ€         
ๅ…ˆ็ซฏ        
ๅคš         
่จ€่ชž        
NLP       
ๆŠ€่ก“        
ใ‚’         
ๆœฌ็•ช        
็’ฐๅขƒ        
ใซ         
ๅฐŽๅ…ฅ        
ใ—ใพใ™       
ใ€‚         
PoS   
โ”€โ”€โ”€โ”€โ”€ 
NUM   
NOUN  
PUNCT 
NOUN  
ADP   
NOUN  
NOUN  
ADP   
NOUN  
NOUN  
NOUN  
NOUN  
NOUN  
NOUN  
ADP   
NOUN  
NOUN  
ADP   
VERB  
AUX   
PUNCT 
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
ใ€         
HanLPv2.1 
ใฏ         
ๆฌก         
ไธ–ไปฃ        
ใฎ         
ๆœ€         
ๅ…ˆ็ซฏ        
ๅคš         
่จ€่ชž        
NLP       
ๆŠ€่ก“        
ใ‚’         
ๆœฌ็•ช        
็’ฐๅขƒ        
ใซ         
ๅฐŽๅ…ฅ        
ใ—ใพใ™       
ใ€‚         
NER Type 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ—„โ”€โ”      
โ—„โ”€โ”ดโ–บDATE 
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
         
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
ใ€         
HanLPv2.1 
ใฏ         
ๆฌก         
ไธ–ไปฃ        
ใฎ         
ๆœ€         
ๅ…ˆ็ซฏ        
ๅคš         
่จ€่ชž        
NLP       
ๆŠ€่ก“        
ใ‚’         
ๆœฌ็•ช        
็’ฐๅขƒ        
ใซ         
ๅฐŽๅ…ฅ        
ใ—ใพใ™       
ใ€‚         
PoS      3       4       5       6       7       8       9 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
NUM โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”   
NOUN โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค   
PUNCTโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค   
NOUN โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค   
ADP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”                       โ”‚   
NOUN โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค                       โ”‚   
NOUN โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค                       โ”‚   
ADP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ–บVP โ”€โ”€โ”€โ”€โ–บVP โ”€โ”€โ”€โ”€โ–บIPโ”€โ”€โ”€โ”€โ”ค   
NOUN โ”€โ”€โ”€โ–บADJPโ”€โ”€โ”               โ”‚                       โ”‚   
NOUN โ”€โ”€โ”€โ–บADJPโ”€โ”€โ”ดโ–บADJPโ”€โ”€โ”       โ”‚                       โ”‚   
NOUN โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บADJPโ”€โ”€โ”ดโ–บADJPโ”€โ”€โ”˜                       โ”œโ–บIP
NOUN โ”€โ”€โ”                                               โ”‚   
NOUN   โ”œโ–บNP โ”€โ”€โ”€โ”                                       โ”‚   
NOUN โ”€โ”€โ”˜       โ”œโ–บNP โ”€โ”€โ”€โ”                               โ”‚   
ADP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜       โ”‚                               โ”‚   
NOUN โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ–บNP โ”€โ”€โ”€โ”                       โ”‚   
NOUN โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜       โ”œโ–บNP โ”€โ”€โ”€โ”               โ”‚   
ADP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บPP โ”€โ”€โ”€โ”˜       โ”‚               โ”‚   
VERB โ”€โ”€โ”                               โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค   
AUX โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บVP โ”€โ”€โ”€โ”˜               โ”‚   
PUNCTโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   

Dep Tree     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
         โ”Œโ”€โ–บ 
   โ”Œโ”€โ”€โ”€โ”€โ–บโ””โ”€โ”€ 
   โ”‚โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
   โ”‚โ”‚   โ”Œโ”€โ”€โ–บ 
   โ”‚โ”‚   โ”‚โ”Œโ”€โ–บ 
   โ”‚โ”‚โ”Œโ”€โ–บโ””โ”ดโ”€โ”€ 
โ”Œโ”ฌโ”€โ”ดโ”ดโ”ดโ”€โ”€โ”€โ”€โ”€โ”€ 
โ”‚โ”‚  โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
โ”‚โ”‚  โ”‚    โ”Œโ”€โ–บ 
โ”‚โ”‚  โ”‚โ”Œโ”€โ”€โ–บโ””โ”€โ”€ 
โ”‚โ”‚  โ”‚โ”‚   โ”Œโ”€โ–บ 
โ”‚โ”‚  โ”‚โ”‚โ”Œโ”€โ–บโ””โ”€โ”€ 
โ”‚โ”‚  โ”‚โ”‚โ”‚  โ”Œโ”€โ–บ 
โ”‚โ””โ”€โ–บโ””โ”ดโ”ดโ”€โ”€โ”ดโ”€โ”€ 
โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บ 
Token     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌกไธ–ไปฃ       
ๆœ€         
ๅ…ˆ่ฟ›็š„       
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
Relation  
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
nummod    
nmod:tmod 
nsubj     
case      
nmod      
obl       
root      
nmod      
advmod    
amod      
nummod    
nmod      
nmod      
obj       
punct     
Lemma     
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
HANlpv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌกไธ–ไปฃ       
ๆœ€         
ๅ…ˆ่ฟ›็š„       
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
PoS   
โ”€โ”€โ”€โ”€โ”€ 
NUM   
NOUN  
X     
ADP   
NOUN  
NOUN  
VERB  
NOUN  
ADV   
ADJ   
NUM   
NOUN  
X     
NOUN  
PUNCT 
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌกไธ–ไปฃ       
ๆœ€         
ๅ…ˆ่ฟ›็š„       
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
NER Type   
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ—„โ”€โ”        
โ—„โ”€โ”ดโ–บDATE   
โ”€โ”€โ”€โ–บPERSON 
           
           
           
           
           
           
           
           
           
           
           
           
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌกไธ–ไปฃ       
ๆœ€         
ๅ…ˆ่ฟ›็š„       
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
SRL PA1      
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
โ—„โ”€โ”          
โ—„โ”€โ”ดโ–บARGM-TMP 
             
             
             
             
โ•Ÿโ”€โ”€โ–บPRED     
             
             
             
             
             
             
             
             
Tok       
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 
2021      
ๅนด         
HanLPv2.1 
ไธบ         
็”Ÿไบง        
็Žฏๅขƒ        
ๅธฆๆฅ        
ๆฌกไธ–ไปฃ       
ๆœ€         
ๅ…ˆ่ฟ›็š„       
ๅคš         
่ฏญ็ง        
NLP       
ๆŠ€ๆœฏ        
ใ€‚         
PoS      3       4       5       6       7       8 
โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
NUM โ”€โ”€โ”€โ”                                           
NOUN โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”   
X โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNPโ”€โ”€โ”€โ”€โ”ค   
ADP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”                               โ”‚   
NOUN โ”€โ”€โ”       โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บPP โ”€โ”€โ”€โ”       โ”‚   
NOUN โ”€โ”€โ”ดโ–บNP โ”€โ”€โ”€โ”˜                       โ”‚       โ”‚   
VERB โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”       โ”œโ–บVPโ”€โ”€โ”€โ”€โ”ค   
NOUN โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บADJPโ”€โ”€โ”       โ”‚       โ”‚       โ”‚   
ADV โ”€โ”€โ”€โ”€โ–บADVPโ”€โ”€โ”       โ”‚       โ”œโ–บVP โ”€โ”€โ”€โ”˜       โ”œโ–บIP
ADJ โ”€โ”€โ”€โ”€โ–บADJPโ”€โ”€โ”ดโ–บADJPโ”€โ”€โ”ค       โ”‚               โ”‚   
NUM โ”€โ”€โ”€โ”€โ–บQP โ”€โ”€โ”€โ”       โ”œโ–บNP โ”€โ”€โ”€โ”˜               โ”‚   
NOUN โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”ดโ–บNPโ”€โ”€โ”€โ”€โ”ค                       โ”‚   
X โ”€โ”€โ”€โ”€โ”€โ”               โ”‚                       โ”‚   
NOUN โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–บNP โ”€โ”€โ”€โ”˜                       โ”‚   
PUNCTโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜   
" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "from hanlp.utils.torch_util import gpus_available\n", + "if gpus_available(): # ๅปบ่ฎฎๅœจGPUไธŠ่ฟ่กŒXLMR_BASE๏ผŒๅฆๅˆ™่ฟ่กŒminiๆจกๅž‹\n", + " mul = hanlp.load(hanlp.pretrained.mtl.UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_XLMR_BASE)\n", + "else:\n", + " if 'ja' in globals(): # Binderๅ†…ๅญ˜ๅชๆœ‰2G๏ผŒ้‡Šๆ”พๅทฒๅŠ ่ฝฝ็š„ๆจกๅž‹\n", + " del ja\n", + " mul = hanlp.load(hanlp.pretrained.mtl.UD_ONTONOTES_TOK_POS_LEM_FEA_NER_SRL_DEP_SDP_CON_MMINILMV2L6)\n", + "mul(['In 2021, HanLPv2.1 delivers state-of-the-art multilingual NLP techniques to production environments.',\n", + " '2021ๅนดใ€HanLPv2.1ใฏๆฌกไธ–ไปฃใฎๆœ€ๅ…ˆ็ซฏๅคš่จ€่ชžNLPๆŠ€่ก“ใ‚’ๆœฌ็•ช็’ฐๅขƒใซๅฐŽๅ…ฅใ—ใพใ™ใ€‚',\n", + " '2021ๅนด HanLPv2.1ไธบ็”Ÿไบง็Žฏๅขƒๅธฆๆฅๆฌกไธ–ไปฃๆœ€ๅ…ˆ่ฟ›็š„ๅคš่ฏญ็งNLPๆŠ€ๆœฏใ€‚']).pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0QV_93CjK7KD" + }, + "source": [ + "ไฝ ๅฏไปฅๅœจไธ‹้ข่พ“ๅ…ฅไฝ ๆƒณๆ‰ง่กŒ็š„ไปฃ็ ~" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "tutorial.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}