From 1aed9be0a89bda2c72d90a3cc228c9bd7b3aaf51 Mon Sep 17 00:00:00 2001 From: SWHL Date: Mon, 20 Nov 2023 22:08:17 +0800 Subject: [PATCH] Update files --- .github/FUNDING.yml | 13 ++++ .github/ISSUE_TEMPLATE/bug_report.yaml | 17 ++++++ .github/ISSUE_TEMPLATE/config.yml | 8 +++ .github/workflows/lineless_table_rec.yml | 77 ++++++++++++++++++++++++ demo.py | 11 ++-- docs/doc_lineless_table_rec.md | 0 lineless_table_rec/main.py | 17 +++--- requirements.txt | 8 ++- setup.py => setup_lineless_table_rec.py | 45 ++++++++------ tests/test_lore.py | 2 +- 10 files changed, 159 insertions(+), 39 deletions(-) create mode 100644 .github/FUNDING.yml create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yaml create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/workflows/lineless_table_rec.yml create mode 100644 docs/doc_lineless_table_rec.md rename setup.py => setup_lineless_table_rec.py (55%) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..9efae38 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,13 @@ +# These are supported funding model platforms + +github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] +patreon: # Replace with a single Patreon username +open_collective: # Replace with a single Open Collective username +ko_fi: # Replace with a single Ko-fi username +tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel +community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +liberapay: # Replace with a single Liberapay username +issuehunt: # Replace with a single IssueHunt username +otechie: # Replace with a single Otechie username +lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry +custom: https://raw.githubusercontent.com/RapidAI/.github/6db6b6b9273f3151094a462a61fbc8e88564562c/assets/Sponsor.png diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml new file mode 100644 index 0000000..5758947 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -0,0 +1,17 @@ +--- +name: 🐞 Bug +about: Bug +title: 'Bug' +labels: 'Bug' +assignees: '' + +--- + +请提供下述完整信息以便快速定位问题 +(Please provide the following information to quickly locate the problem) +- **系统环境/System Environment**: +- **使用的是哪门语言的程序/Which programing language**: +- **使用当前库的版本/Use version**: +- **可复现问题的demo和文件/Demo of reproducible problems**: +- **完整报错/Complete Error Message**: +- **可能的解决方案/Possible solutions**: \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..5a766be --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: + - name: ❓ Questions + url: https://github.com/RapidAI/TableStructureRec/discussions/categories/q-a + about: Please use the community forum for help and questions regarding ProcessLaTeXFormulaTools Docs + - name: 💡 Feature requests and ideas + url: https://github.com/RapidAI/TableStructureRec/discussions/new?category=feature-requests + about: Please vote for and post new feature ideas in the community forum diff --git a/.github/workflows/lineless_table_rec.yml b/.github/workflows/lineless_table_rec.yml new file mode 100644 index 0000000..eaccf5e --- /dev/null +++ b/.github/workflows/lineless_table_rec.yml @@ -0,0 +1,77 @@ +name: Push lineless_table_rec to pypi + +on: + push: + # branches: [ main ] + # paths: + # - 'label_convert/**' + # - 'docs/doc_whl.md' + # - 'setup.py' + # - '.github/workflows/gen_whl_to_pypi.yml' + tags: + - v* + +jobs: + UnitTesting: + runs-on: ubuntu-latest + steps: + - name: Pull latest code + uses: actions/checkout@v3 + + - name: Set up Python 3.7 + uses: actions/setup-python@v4 + with: + python-version: '3.7' + architecture: 'x64' + + - name: Display Python version + run: python -c "import sys; print(sys.version)" + + - name: Unit testings + run: | + pip install -r requirements.txt + pip install pytest + + wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/lineless_table_rec_models.zip + unzip lineless_table_rec_models.zip + mv lineless_table_rec_models/*.onnx lineless_table_rec/models/ + + pytest tests/test*.py + + GenerateWHL_PushPyPi: + needs: UnitTesting + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python 3.7 + uses: actions/setup-python@v4 + with: + python-version: '3.7' + architecture: 'x64' + + - name: Run setup.py + run: | + pip install -r requirements.txt + python -m pip install --upgrade pip + pip install wheel get_pypi_latest_version + + wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/lineless_table_rec_models.zip + unzip lineless_table_rec_models.zip + mv lineless_table_rec_models/*.onnx lineless_table_rec/models/ + + python setup_lineless_table_rec.py bdist_wheel ${{ github.event.head_commit.message }} + + - name: Publish distribution 📦 to Test PyPI + uses: pypa/gh-action-pypi-publish@v1.5.0 + with: + password: ${{ secrets.TEST_PYPI_API_TOKEN }} + repository_url: https://test.pypi.org/legacy/ + packages_dir: dist/ + + # - name: Publish distribution 📦 to PyPI + # uses: pypa/gh-action-pypi-publish@v1.5.0 + # with: + # password: ${{ secrets.PYPI_API_TOKEN }} + # packages_dir: dist/ diff --git a/demo.py b/demo.py index 1836491..ae4a5a9 100644 --- a/demo.py +++ b/demo.py @@ -1,7 +1,6 @@ # -*- encoding: utf-8 -*- # @Author: SWHL # @Contact: liekkaskono@163.com -import time from pathlib import Path from lineless_table_rec import LinelessTableRecognition @@ -10,13 +9,11 @@ img_path = "tests/test_files/test.jpg" img_name = Path(img_path).stem -s = time.perf_counter() -table_str = engine(img_path) -elapse = time.perf_counter() - s +table_str, elapse = engine(img_path) print(table_str) print(elapse) -with open(f"temp/{img_name}.html", "w", encoding="utf-8") as f: - f.write(table_str) +# with open(f"temp/{img_name}.html", "w", encoding="utf-8") as f: +# f.write(table_str) -print("ok") +# print("ok") diff --git a/docs/doc_lineless_table_rec.md b/docs/doc_lineless_table_rec.md new file mode 100644 index 0000000..e69de29 diff --git a/lineless_table_rec/main.py b/lineless_table_rec/main.py index eae730c..3e6de06 100644 --- a/lineless_table_rec/main.py +++ b/lineless_table_rec/main.py @@ -44,14 +44,11 @@ def __init__( self.ocr = RapidOCR() def __call__(self, content: Dict[str, Any]) -> str: + ss = time.perf_counter() img = self.load_img(content) - s = time.perf_counter() ocr_res, _ = self.ocr(img) - ocr_elapse = time.perf_counter() - s - print(f"ocr elapse:{ocr_elapse:.5f}") - ss = time.perf_counter() input_info = self.preprocess(img) try: polygons, slct_logi = self.infer(input_info) @@ -65,14 +62,13 @@ def __call__(self, content: Dict[str, Any]) -> str: logi_points = self.sort_logi_by_polygons( sorted_polygons, polygons, logi_points ) + + table_str = plot_html_table(logi_points, cell_box_map) table_elapse = time.perf_counter() - ss - print(f"table rec: {table_elapse:.4f}") + return table_str, table_elapse except Exception: logging.warning(traceback.format_exc()) - return "" - else: - table_str = plot_html_table(logi_points, cell_box_map) - return table_str + return "", 0.0 def preprocess(self, img: np.ndarray) -> Dict[str, Any]: height, width = img.shape[:2] @@ -160,8 +156,9 @@ def main(): args = parser.parse_args() table_rec = LinelessTableRecognition() - table_str = table_rec(args.img_path) + table_str, elapse = table_rec(args.img_path) print(table_str) + print(f"cost: {elapse:.5f}") if __name__ == "__main__": diff --git a/requirements.txt b/requirements.txt index b734151..dd3b987 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,7 @@ -rapidocr_onnxruntime +numpy>=1.21.6 +onnxruntime>=1.14.1 +opencv_contrib_python>=4.8.1.78 +pytest>=7.1.2 +rapidocr_onnxruntime>=1.3.8 +setuptools>=61.2.0 +shapely>=2.0.2 diff --git a/setup.py b/setup_lineless_table_rec.py similarity index 55% rename from setup.py rename to setup_lineless_table_rec.py index adacd1d..7654aee 100644 --- a/setup.py +++ b/setup_lineless_table_rec.py @@ -1,45 +1,50 @@ # -*- encoding: utf-8 -*- # @Author: SWHL # @Contact: liekkaskono@163.com -import re import sys -from typing import List +from typing import List, Union +from pathlib import Path +from get_pypi_latest_version import GetPyPiLatestVersion import setuptools -def extract_version(message: str) -> str: - pattern = r"\d+\.(?:\d+\.)*\d+" - matched_versions = re.findall(pattern, message) - if matched_versions: - return matched_versions[0] - return "" - +def get_readme() -> str: + root_dir = Path(__file__).resolve().parent + readme_path = str(root_dir / "docs" / "doc_lineless_table_rec.md") + with open(readme_path, "r", encoding="utf-8") as f: + readme = f.read() + return readme -def read_txt(txt_path: str) -> List: - if not isinstance(txt_path, str): - txt_path = str(txt_path) +def read_txt(txt_path: Union[Path, str]) -> List[str]: with open(txt_path, "r", encoding="utf-8") as f: - data = list(map(lambda x: x.rstrip("\n"), f)) + data = [v.rstrip("\n") for v in f] return data MODULE_NAME = "lineless_table_rec" -if len(sys.argv) > 2: - argv_str = "".join(sys.argv[2:]) - version = extract_version(argv_str) -else: - version = "2." +obtainer = GetPyPiLatestVersion() +try: + latest_version = obtainer(MODULE_NAME) +except Exception: + latest_version = "0.0.0" +VERSION_NUM = obtainer.version_add_one(latest_version) + +if len(sys.argv) > 2: + match_str = " ".join(sys.argv[2:]) + matched_versions = obtainer.extract_version(match_str) + if matched_versions: + VERSION_NUM = matched_versions sys.argv = sys.argv[:2] setuptools.setup( name=MODULE_NAME, - version=version, + version=VERSION_NUM, platforms="Any", - description="无线表格还原库", + description="", author="SWHL", author_email="liekkaskono@163.com", install_requires=read_txt("requirements.txt"), diff --git a/tests/test_lore.py b/tests/test_lore.py index 6f59632..0d09e52 100644 --- a/tests/test_lore.py +++ b/tests/test_lore.py @@ -30,7 +30,7 @@ def test_input_normal(img_path, table_str_len, td_nums): img_path = test_file_dir / img_path img = cv2.imread(str(img_path)) - table_str = table_recog(img) + table_str, _ = table_recog(img) assert len(table_str) >= table_str_len assert table_str.count("td") == td_nums