Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/pypi2021'
Browse files Browse the repository at this point in the history
  • Loading branch information
searchivarius committed Mar 2, 2023
2 parents fa768e8 + 467c642 commit d847ac1
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 10 deletions.
1 change: 0 additions & 1 deletion flexneuart/ir_datasets/spacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def __init__(self, input_fields : list,
:param model_name a name of the spacy model to use, e.g., en_core_web_sm
:param stop_word_file the name of the stop word file
:param remove_punct a bool flag indicating if the punctuation tokens need to be removed
:param sent_split a bool flag indicating if sentence splitting is necessary
:param keep_only_alpha_num a bool flag indicating if we need to keep only alpha-numeric characters
:param enable_pos a bool flag that enables POS tagging (which, e.g., can improve lemmatization)
"""
Expand Down
5 changes: 2 additions & 3 deletions flexneuart/text_proc/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init__(self, model_name):
:param model_name: a name of the spacy model to use, e.g., en_core_web_sm
"""
self._nlp = spacy.load(model_name, disable=[SPACY_NER, SPACY_PARSER, SPACY_POS])
self._nlp.add_pipe(self._nlp.create_pipe("sentencizer"))
self._nlp.add_pipe("sentencizer")

def __call__(self, text):
"""A thin wrapper that merely calls spacy.
Expand Down Expand Up @@ -111,8 +111,7 @@ def __init__(self, model_name, stop_words,

self._nlp = spacy.load(model_name, disable=disable_list)
if sent_split:
sentencizer = self._nlp.create_pipe("sentencizer")
self._nlp.add_pipe(sentencizer)
self._nlp.add_pipe("sentencizer")

self._removePunct = remove_punct
self._stopWords = frozenset([w.lower() for w in stop_words])
Expand Down
12 changes: 7 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ torch
torchtext

transformers
# we fix Spacy version, though in the future we would have to upgrade too
spacy==2.2.3
spacy
sentence-transformers
sentencepiece
krovetzstemmer
Expand All @@ -30,6 +29,9 @@ typing-extensions

# Colbert specific
ujson
# Deeberta specific, might fail with older
# protobuf versions.
protobuf==3.20

# Deeberta specific, might fail with older protobuf versions.
# but let's not install it by default, actually newer
# transformer versions should install protbuf on its own
#protobuf==3.20

Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
]
},
{
"part_name": "all",
"part_name": "queries_all",
"dataset_name": "trec-robust04",
"is_query": true,
"src_attributes": [
Expand Down

0 comments on commit d847ac1

Please sign in to comment.