From d041d9a6016a85dbb4ad86587a30c54f0a769ac9 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Thu, 8 Feb 2024 14:41:25 +0100 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5 --- .bumpversion.cfg | 6 ++--- .github/workflows/test.yml | 2 +- Makefile | 2 +- README.md | 10 ++++---- examples/hello-bert-mask/config.yaml | 2 +- pyproject.toml | 4 ++-- .../__init__.py | 24 ++++++++++--------- tests/test_version.py | 4 ++-- 8 files changed, 28 insertions(+), 26 deletions(-) rename src/{sparv_bert_neighbour => bert_neighbour}/__init__.py (82%) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 5ea14d4..09ec7b9 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -8,10 +8,10 @@ allow_dirty = False search = version = "{current_version}" replace = version = "{new_version}" -[bumpversion:file:src/sparv_bert_neighbour/__init__.py] +[bumpversion:file:src/bert_neighbour/__init__.py] search = __version__ = "{current_version}" replace = __version__ = "{new_version}" [bumpversion:file:tests/test_version.py] -search = assert sparv_bert_neighbour.__version__ == "{current_version}" -replace = assert sparv_bert_neighbour.__version__ == "{new_version}" +search = assert bert_neighbour.__version__ == "{current_version}" +replace = assert bert_neighbour.__version__ == "{new_version}" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 334fc1d..44804ee 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -206,7 +206,7 @@ jobs: name: pypi_files path: dist - - run: rm -r src/sparv_bert_neighbour + - run: rm -r src/bert_neighbour - run: pip install typing-extensions - run: pip install -r tests/requirements-testing.txt - run: pip install sparv-bert-neighbour-plugin --no-index --no-deps --find-links dist --force-reinstall diff --git a/Makefile b/Makefile index 2c02b77..adca1ae 100644 --- a/Makefile +++ b/Makefile @@ -50,7 +50,7 @@ help: PLATFORM := `uname -o` REPO := "sparv-bert-neighbour-plugin" -PROJECT_SRC := "src/sparv_bert_neighbour" +PROJECT_SRC := "src/bert_neighbour" ifeq (${VIRTUAL_ENV},) VENV_NAME = .venv diff --git a/README.md b/README.md index e0095a8..e2e6844 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ annotation exclusively by adding it as the only annotation to export under `xml_ ```yaml xml_export: annotations: - - :sparv_bert_neighbour.transformer-neighbour + - :bert_neighbour.transformer-neighbour ``` To use it together with other annotations you might add it under `export`: @@ -35,7 +35,7 @@ To use it together with other annotations you might add it under `export`: ```yaml export: annotations: - - :sparv_bert_neighbour.transformer-neighbour + - :bert_neighbour.transformer-neighbour ... ``` @@ -50,7 +50,7 @@ You can configure this plugin by choosing a huggingface model, huggingface trans The model defaults to [`KBLab/bert-base-swedish-cased`](https://huggingface.co/KBLab/bert-base-swedish-cased) but can be configured in `config.yaml`: ```yaml -sparv_bert_neighbour: +bert_neighbour: model: "KBLab/bert-base-swedish-cased" ``` @@ -59,7 +59,7 @@ sparv_bert_neighbour: The tokenizer defaults to [`KBLab/bert-base-swedish-cased`](https://huggingface.co/KBLab/bert-base-swedish-cased) but can be configured in `config.yaml`: ```yaml -sparv_bert_neighbour: +bert_neighbour: tokenizer: "KBLab/bert-base-swedish-cased" ``` @@ -68,6 +68,6 @@ sparv_bert_neighbour: The number of neighbours defaults to `5` but can be configured in `config.yaml`: ```yaml -sparv_bert_neighbour: +bert_neighbour: num_neighbours: 5 ``` diff --git a/examples/hello-bert-mask/config.yaml b/examples/hello-bert-mask/config.yaml index 06860f6..9fccd0d 100644 --- a/examples/hello-bert-mask/config.yaml +++ b/examples/hello-bert-mask/config.yaml @@ -10,7 +10,7 @@ export: - - - :stanza.pos - - :sparv_bert_neighbour.transformer-neighbour + - :bert_neighbour.transformer-neighbour sparv: compression: none diff --git a/pyproject.toml b/pyproject.toml index e687d02..08e1fac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project.entry-points."sparv.plugin"] -sparv_bert_neighbour = "sparv_bert_neighbour" +bert_neighbour = "bert_neighbour" [project.urls] Homepage = "https://github.com/spraakbanken/sparv-bert-neighbour-plugin" @@ -55,7 +55,7 @@ dev-dependencies = [ exclude = ["/.github", "/docs"] [tool.hatch.build.targets.wheel] -packages = ["src/sparv_bert_neighbour"] +packages = ["src/bert_neighbour"] [tool.hatch.metadata] allow-direct-references = true diff --git a/src/sparv_bert_neighbour/__init__.py b/src/bert_neighbour/__init__.py similarity index 82% rename from src/sparv_bert_neighbour/__init__.py rename to src/bert_neighbour/__init__.py index 20b0118..6cdcad9 100644 --- a/src/sparv_bert_neighbour/__init__.py +++ b/src/bert_neighbour/__init__.py @@ -18,17 +18,17 @@ __config__ = [ Config( - "sparv_bert_neighbour.model", + "bert_neighbour.model", description="Huggingface pretrained model name", default="KBLab/bert-base-swedish-cased", ), Config( - "sparv_bert_neighbour.tokenizer", + "bert_neighbour.tokenizer", description="HuggingFace pretrained tokenizer name", default="KBLab/bert-base-swedish-cased", ), Config( - "sparv_bert_neighbour.num_neighbours", + "bert_neighbour.num_neighbours", description="The number of neighbours to list", default=5, ), @@ -46,22 +46,22 @@ ) def annotate_masked_bert( out_neighbour: Output = Output( - ":sparv_bert_neighbour.transformer-neighbour", + ":bert_neighbour.transformer-neighbour", cls="transformer_neighbour", description="Transformer neighbours from masked BERT (format: '|:|...|)", ), word: Annotation = Annotation(""), sentence: Annotation = Annotation(""), - model_name: str = Config("sparv_bert_neighbour.model"), - tokenizer_name: str = Config("sparv_bert_neighbour.tokenizer"), - num_neighbours_str: str = Config("sparv_bert_neighbour.num_neighbours"), + model_name: str = Config("bert_neighbour.model"), + tokenizer_name: str = Config("bert_neighbour.tokenizer"), + num_neighbours_str: str = Config("bert_neighbour.num_neighbours"), ) -> None: logger.info("annotate_masked_bert") try: num_neighbours = int(num_neighbours_str) except ValueError as exc: raise SparvErrorMessage( - f"'sparv_bert_neighbour.num_neighbours' must contain an 'int' got: '{num_neighbours_str}'" + f"'bert_neighbour.num_neighbours' must contain an 'int' got: '{num_neighbours_str}'" ) from exc tokenizer = BertTokenizer.from_pretrained(tokenizer_name) model = BertForMaskedLM.from_pretrained(model_name) @@ -78,9 +78,11 @@ def annotate_masked_bert( token_indices = list(sent) for token_index_to_mask in token_indices: sent_to_tag = TOK_SEP.join( - "[MASK]" - if token_index == token_index_to_mask - else token_word[token_index] + ( + "[MASK]" + if token_index == token_index_to_mask + else token_word[token_index] + ) for token_index in sent ) diff --git a/tests/test_version.py b/tests/test_version.py index cb3f7a2..612b484 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -1,5 +1,5 @@ -import sparv_bert_neighbour +import bert_neighbour def test_version() -> None: - assert sparv_bert_neighbour.__version__ == "0.2.1" + assert bert_neighbour.__version__ == "0.2.1"