Skip to content

Commit

Permalink
auto lookahead
Browse files Browse the repository at this point in the history
  • Loading branch information
advancehs committed Mar 3, 2023
1 parent d7390fe commit de5c689
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions addana/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,12 +289,12 @@ def transform(location_strs, umap=myumap, index=[], cut=False, lookahead=8, pos_
from .exceptions import InputTypeNotSuportException
raise InputTypeNotSuportException(
'location_strs参数必须为可迭代的类型(比如list, Series等实现了__iter__方法的对象)')
lookahead = 16 if location_strs>8 else 16

result = pd.DataFrame(
[_handle_one_record(addr, umap, cut, lookahead, pos_sensitive, open_warning) for addr in location_strs],
[_handle_one_record(addr, umap, cut, 16 if len(addr)>8 else 8, pos_sensitive, open_warning) for addr in location_strs],
index=index) \
if index else pd.DataFrame(
[_handle_one_record(addr, umap, cut, lookahead, pos_sensitive, open_warning) for addr in location_strs])
[_handle_one_record(addr, umap, cut, 16 if len(addr)>8 else 8, pos_sensitive, open_warning) for addr in location_strs])
# 这句的唯一作用是让列的顺序好看一些
if pos_sensitive:
return result.loc[:, ('省', '市', '区', '地名', '省_pos', '市_pos', '区_pos')]
Expand All @@ -315,7 +315,7 @@ def _handle_one_record(addr, umap, cut, lookahead, pos_sensitive, open_warning):
return empty

# 地名提取
pca, left_addr = _extract_addr(addr, cut, lookahead)
pca, left_addr = _extract_addr(addr, cut, 16 if len(addr)>8 else 8)
# 填充市
_fill_city(pca, umap, open_warning)
# 填充省
Expand Down Expand Up @@ -369,7 +369,7 @@ def _extract_addr(addr, cut, lookahead):
Returns:
[sheng, shi, qu, (sheng_pos, shi_pos, qu_pos)], addr
"""
return _jieba_extract(addr) if cut else _full_text_extract(addr, lookahead)
return _jieba_extract(addr) if cut else _full_text_extract(addr, 16 if len(addr)>8 else 8)


def _jieba_extract(addr):
Expand Down Expand Up @@ -435,7 +435,7 @@ def _defer_set():
# 用于设置pca属性的函数
defer_fun = None
# length为从起始位置开始的长度,从中提取出最长的地址
for length in range(1, lookahead + 1):
for length in range(1, 16 if len(addr)>8 else 8 + 1):
end_pos = i + length
if end_pos > len(addr):
break
Expand Down

0 comments on commit de5c689

Please sign in to comment.