Skip to content

Commit

Permalink
Update files
Browse files Browse the repository at this point in the history
  • Loading branch information
SWHL committed Nov 20, 2023
1 parent f3f87cb commit 1aed9be
Show file tree
Hide file tree
Showing 10 changed files with 159 additions and 39 deletions.
13 changes: 13 additions & 0 deletions .github/FUNDING.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# These are supported funding model platforms

github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
custom: https://raw.githubusercontent.com/RapidAI/.github/6db6b6b9273f3151094a462a61fbc8e88564562c/assets/Sponsor.png
17 changes: 17 additions & 0 deletions .github/ISSUE_TEMPLATE/bug_report.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
name: 🐞 Bug
about: Bug
title: 'Bug'
labels: 'Bug'
assignees: ''

---

请提供下述完整信息以便快速定位问题
(Please provide the following information to quickly locate the problem)
- **系统环境/System Environment**:
- **使用的是哪门语言的程序/Which programing language**:
- **使用当前库的版本/Use version**:
- **可复现问题的demo和文件/Demo of reproducible problems**:
- **完整报错/Complete Error Message**:
- **可能的解决方案/Possible solutions**:
8 changes: 8 additions & 0 deletions .github/ISSUE_TEMPLATE/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
blank_issues_enabled: false
contact_links:
- name: ❓ Questions
url: https://github.com/RapidAI/TableStructureRec/discussions/categories/q-a
about: Please use the community forum for help and questions regarding ProcessLaTeXFormulaTools Docs
- name: 💡 Feature requests and ideas
url: https://github.com/RapidAI/TableStructureRec/discussions/new?category=feature-requests
about: Please vote for and post new feature ideas in the community forum
77 changes: 77 additions & 0 deletions .github/workflows/lineless_table_rec.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: Push lineless_table_rec to pypi

on:
push:
# branches: [ main ]
# paths:
# - 'label_convert/**'
# - 'docs/doc_whl.md'
# - 'setup.py'
# - '.github/workflows/gen_whl_to_pypi.yml'
tags:
- v*

jobs:
UnitTesting:
runs-on: ubuntu-latest
steps:
- name: Pull latest code
uses: actions/checkout@v3

- name: Set up Python 3.7
uses: actions/setup-python@v4
with:
python-version: '3.7'
architecture: 'x64'

- name: Display Python version
run: python -c "import sys; print(sys.version)"

- name: Unit testings
run: |
pip install -r requirements.txt
pip install pytest
wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/lineless_table_rec_models.zip
unzip lineless_table_rec_models.zip
mv lineless_table_rec_models/*.onnx lineless_table_rec/models/
pytest tests/test*.py
GenerateWHL_PushPyPi:
needs: UnitTesting
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: Set up Python 3.7
uses: actions/setup-python@v4
with:
python-version: '3.7'
architecture: 'x64'

- name: Run setup.py
run: |
pip install -r requirements.txt
python -m pip install --upgrade pip
pip install wheel get_pypi_latest_version
wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/lineless_table_rec_models.zip
unzip lineless_table_rec_models.zip
mv lineless_table_rec_models/*.onnx lineless_table_rec/models/
python setup_lineless_table_rec.py bdist_wheel ${{ github.event.head_commit.message }}
- name: Publish distribution 📦 to Test PyPI
uses: pypa/[email protected]
with:
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
repository_url: https://test.pypi.org/legacy/
packages_dir: dist/

# - name: Publish distribution 📦 to PyPI
# uses: pypa/[email protected]
# with:
# password: ${{ secrets.PYPI_API_TOKEN }}
# packages_dir: dist/
11 changes: 4 additions & 7 deletions demo.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# -*- encoding: utf-8 -*-
# @Author: SWHL
# @Contact: [email protected]
import time
from pathlib import Path

from lineless_table_rec import LinelessTableRecognition
Expand All @@ -10,13 +9,11 @@
img_path = "tests/test_files/test.jpg"
img_name = Path(img_path).stem

s = time.perf_counter()
table_str = engine(img_path)
elapse = time.perf_counter() - s
table_str, elapse = engine(img_path)

print(table_str)
print(elapse)
with open(f"temp/{img_name}.html", "w", encoding="utf-8") as f:
f.write(table_str)
# with open(f"temp/{img_name}.html", "w", encoding="utf-8") as f:
# f.write(table_str)

print("ok")
# print("ok")
Empty file added docs/doc_lineless_table_rec.md
Empty file.
17 changes: 7 additions & 10 deletions lineless_table_rec/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,11 @@ def __init__(
self.ocr = RapidOCR()

def __call__(self, content: Dict[str, Any]) -> str:
ss = time.perf_counter()
img = self.load_img(content)

s = time.perf_counter()
ocr_res, _ = self.ocr(img)
ocr_elapse = time.perf_counter() - s
print(f"ocr elapse:{ocr_elapse:.5f}")

ss = time.perf_counter()
input_info = self.preprocess(img)
try:
polygons, slct_logi = self.infer(input_info)
Expand All @@ -65,14 +62,13 @@ def __call__(self, content: Dict[str, Any]) -> str:
logi_points = self.sort_logi_by_polygons(
sorted_polygons, polygons, logi_points
)

table_str = plot_html_table(logi_points, cell_box_map)
table_elapse = time.perf_counter() - ss
print(f"table rec: {table_elapse:.4f}")
return table_str, table_elapse
except Exception:
logging.warning(traceback.format_exc())
return ""
else:
table_str = plot_html_table(logi_points, cell_box_map)
return table_str
return "", 0.0

def preprocess(self, img: np.ndarray) -> Dict[str, Any]:
height, width = img.shape[:2]
Expand Down Expand Up @@ -160,8 +156,9 @@ def main():
args = parser.parse_args()

table_rec = LinelessTableRecognition()
table_str = table_rec(args.img_path)
table_str, elapse = table_rec(args.img_path)
print(table_str)
print(f"cost: {elapse:.5f}")


if __name__ == "__main__":
Expand Down
8 changes: 7 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
rapidocr_onnxruntime
numpy>=1.21.6
onnxruntime>=1.14.1
opencv_contrib_python>=4.8.1.78
pytest>=7.1.2
rapidocr_onnxruntime>=1.3.8
setuptools>=61.2.0
shapely>=2.0.2
45 changes: 25 additions & 20 deletions setup.py → setup_lineless_table_rec.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,50 @@
# -*- encoding: utf-8 -*-
# @Author: SWHL
# @Contact: [email protected]
import re
import sys
from typing import List
from typing import List, Union
from pathlib import Path
from get_pypi_latest_version import GetPyPiLatestVersion

import setuptools


def extract_version(message: str) -> str:
pattern = r"\d+\.(?:\d+\.)*\d+"
matched_versions = re.findall(pattern, message)
if matched_versions:
return matched_versions[0]
return ""

def get_readme() -> str:
root_dir = Path(__file__).resolve().parent
readme_path = str(root_dir / "docs" / "doc_lineless_table_rec.md")
with open(readme_path, "r", encoding="utf-8") as f:
readme = f.read()
return readme

def read_txt(txt_path: str) -> List:
if not isinstance(txt_path, str):
txt_path = str(txt_path)

def read_txt(txt_path: Union[Path, str]) -> List[str]:
with open(txt_path, "r", encoding="utf-8") as f:
data = list(map(lambda x: x.rstrip("\n"), f))
data = [v.rstrip("\n") for v in f]
return data


MODULE_NAME = "lineless_table_rec"

if len(sys.argv) > 2:
argv_str = "".join(sys.argv[2:])
version = extract_version(argv_str)
else:
version = "2."
obtainer = GetPyPiLatestVersion()
try:
latest_version = obtainer(MODULE_NAME)
except Exception:
latest_version = "0.0.0"

VERSION_NUM = obtainer.version_add_one(latest_version)

if len(sys.argv) > 2:
match_str = " ".join(sys.argv[2:])
matched_versions = obtainer.extract_version(match_str)
if matched_versions:
VERSION_NUM = matched_versions
sys.argv = sys.argv[:2]

setuptools.setup(
name=MODULE_NAME,
version=version,
version=VERSION_NUM,
platforms="Any",
description="无线表格还原库",
description="",
author="SWHL",
author_email="[email protected]",
install_requires=read_txt("requirements.txt"),
Expand Down
2 changes: 1 addition & 1 deletion tests/test_lore.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_input_normal(img_path, table_str_len, td_nums):
img_path = test_file_dir / img_path
img = cv2.imread(str(img_path))

table_str = table_recog(img)
table_str, _ = table_recog(img)

assert len(table_str) >= table_str_len
assert table_str.count("td") == td_nums

0 comments on commit 1aed9be

Please sign in to comment.