Update files

RapidAI · Nov 20, 2023 · 1aed9be · 1aed9be
1 parent f3f87cb
commit 1aed9be
Show file tree

Hide file tree

Showing 10 changed files with 159 additions and 39 deletions.
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
@@ -0,0 +1,13 @@
+# These are supported funding model platforms
+
+github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
+patreon: # Replace with a single Patreon username
+open_collective: # Replace with a single Open Collective username
+ko_fi: # Replace with a single Ko-fi username
+tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
+community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
+liberapay: # Replace with a single Liberapay username
+issuehunt: # Replace with a single IssueHunt username
+otechie: # Replace with a single Otechie username
+lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
+custom: https://raw.githubusercontent.com/RapidAI/.github/6db6b6b9273f3151094a462a61fbc8e88564562c/assets/Sponsor.png
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml
@@ -0,0 +1,17 @@
+---
+name: 🐞 Bug
+about: Bug
+title: 'Bug'
+labels: 'Bug'
+assignees: ''
+
+---
+
+请提供下述完整信息以便快速定位问题
+(Please provide the following information to quickly locate the problem)
+- **系统环境/System Environment**:
+- **使用的是哪门语言的程序/Which programing language**:
+- **使用当前库的版本/Use version**:
+- **可复现问题的demo和文件/Demo of reproducible problems**:
+- **完整报错/Complete Error Message**:
+- **可能的解决方案/Possible solutions**:
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,8 @@
+blank_issues_enabled: false
+contact_links:
+  - name: ❓ Questions
+    url: https://github.com/RapidAI/TableStructureRec/discussions/categories/q-a
+    about: Please use the community forum for help and questions regarding ProcessLaTeXFormulaTools Docs
+  - name: 💡 Feature requests and ideas
+    url: https://github.com/RapidAI/TableStructureRec/discussions/new?category=feature-requests
+    about: Please vote for and post new feature ideas in the community forum
diff --git a/.github/workflows/lineless_table_rec.yml b/.github/workflows/lineless_table_rec.yml
@@ -0,0 +1,77 @@
+name: Push lineless_table_rec to pypi
+
+on:
+  push:
+    # branches: [ main ]
+    # paths:
+      # - 'label_convert/**'
+      # - 'docs/doc_whl.md'
+      # - 'setup.py'
+      # - '.github/workflows/gen_whl_to_pypi.yml'
+    tags:
+      - v*
+
+jobs:
+  UnitTesting:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Pull latest code
+        uses: actions/checkout@v3
+
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.7'
+          architecture: 'x64'
+
+      - name: Display Python version
+        run: python -c "import sys; print(sys.version)"
+
+      - name: Unit testings
+        run: |
+          pip install -r requirements.txt
+          pip install pytest
+
+          wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/lineless_table_rec_models.zip
+          unzip lineless_table_rec_models.zip
+          mv lineless_table_rec_models/*.onnx lineless_table_rec/models/
+
+          pytest tests/test*.py
+
+  GenerateWHL_PushPyPi:
+    needs: UnitTesting
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.7'
+          architecture: 'x64'
+
+      - name: Run setup.py
+        run: |
+          pip install -r requirements.txt
+          python -m pip install --upgrade pip
+          pip install wheel get_pypi_latest_version
+
+          wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/lineless_table_rec_models.zip
+          unzip lineless_table_rec_models.zip
+          mv lineless_table_rec_models/*.onnx lineless_table_rec/models/
+
+          python setup_lineless_table_rec.py bdist_wheel ${{ github.event.head_commit.message }}
+
+      - name: Publish distribution 📦 to Test PyPI
+        uses: pypa/[email protected]
+        with:
+          password: ${{ secrets.TEST_PYPI_API_TOKEN }}
+          repository_url: https://test.pypi.org/legacy/
+          packages_dir:  dist/
+
+      # - name: Publish distribution 📦 to PyPI
+      #   uses: pypa/[email protected]
+      #   with:
+      #     password: ${{ secrets.PYPI_API_TOKEN }}
+      #     packages_dir: dist/
diff --git a/demo.py b/demo.py
@@ -1,7 +1,6 @@
 # -*- encoding: utf-8 -*-
 # @Author: SWHL
 # @Contact: [email protected]
-import time
 from pathlib import Path
 
 from lineless_table_rec import LinelessTableRecognition
@@ -10,13 +9,11 @@
 img_path = "tests/test_files/test.jpg"
 img_name = Path(img_path).stem
 
-s = time.perf_counter()
-table_str = engine(img_path)
-elapse = time.perf_counter() - s
+table_str, elapse = engine(img_path)
 
 print(table_str)
 print(elapse)
-with open(f"temp/{img_name}.html", "w", encoding="utf-8") as f:
-    f.write(table_str)
+# with open(f"temp/{img_name}.html", "w", encoding="utf-8") as f:
+#     f.write(table_str)
 
-print("ok")
+# print("ok")
diff --git a/docs/doc_lineless_table_rec.md b/docs/doc_lineless_table_rec.md
diff --git a/lineless_table_rec/main.py b/lineless_table_rec/main.py
@@ -44,14 +44,11 @@ def __init__(
         self.ocr = RapidOCR()
 
     def __call__(self, content: Dict[str, Any]) -> str:
+        ss = time.perf_counter()
         img = self.load_img(content)
 
-        s = time.perf_counter()
         ocr_res, _ = self.ocr(img)
-        ocr_elapse = time.perf_counter() - s
-        print(f"ocr elapse:{ocr_elapse:.5f}")
 
-        ss = time.perf_counter()
         input_info = self.preprocess(img)
         try:
             polygons, slct_logi = self.infer(input_info)
@@ -65,14 +62,13 @@ def __call__(self, content: Dict[str, Any]) -> str:
             logi_points = self.sort_logi_by_polygons(
                 sorted_polygons, polygons, logi_points
             )
+
+            table_str = plot_html_table(logi_points, cell_box_map)
             table_elapse = time.perf_counter() - ss
-            print(f"table rec: {table_elapse:.4f}")
+            return table_str, table_elapse
         except Exception:
             logging.warning(traceback.format_exc())
-            return ""
-        else:
-            table_str = plot_html_table(logi_points, cell_box_map)
-            return table_str
+            return "", 0.0
 
     def preprocess(self, img: np.ndarray) -> Dict[str, Any]:
         height, width = img.shape[:2]
@@ -160,8 +156,9 @@ def main():
     args = parser.parse_args()
 
     table_rec = LinelessTableRecognition()
-    table_str = table_rec(args.img_path)
+    table_str, elapse = table_rec(args.img_path)
     print(table_str)
+    print(f"cost: {elapse:.5f}")
 
 
 if __name__ == "__main__":

diff --git a/requirements.txt b/requirements.txt
@@ -1 +1,7 @@
-rapidocr_onnxruntime
+numpy>=1.21.6
+onnxruntime>=1.14.1
+opencv_contrib_python>=4.8.1.78
+pytest>=7.1.2
+rapidocr_onnxruntime>=1.3.8
+setuptools>=61.2.0
+shapely>=2.0.2
diff --git a/setup.py → setup_lineless_table_rec.py b/setup.py → setup_lineless_table_rec.py
@@ -1,45 +1,50 @@
 # -*- encoding: utf-8 -*-
 # @Author: SWHL
 # @Contact: [email protected]
-import re
 import sys
-from typing import List
+from typing import List, Union
+from pathlib import Path
+from get_pypi_latest_version import GetPyPiLatestVersion
 
 import setuptools
 
 
-def extract_version(message: str) -> str:
-    pattern = r"\d+\.(?:\d+\.)*\d+"
-    matched_versions = re.findall(pattern, message)
-    if matched_versions:
-        return matched_versions[0]
-    return ""
-
+def get_readme() -> str:
+    root_dir = Path(__file__).resolve().parent
+    readme_path = str(root_dir / "docs" / "doc_lineless_table_rec.md")
+    with open(readme_path, "r", encoding="utf-8") as f:
+        readme = f.read()
+    return readme
 
-def read_txt(txt_path: str) -> List:
-    if not isinstance(txt_path, str):
-        txt_path = str(txt_path)
 
+def read_txt(txt_path: Union[Path, str]) -> List[str]:
     with open(txt_path, "r", encoding="utf-8") as f:
-        data = list(map(lambda x: x.rstrip("\n"), f))
+        data = [v.rstrip("\n") for v in f]
     return data
 
 
 MODULE_NAME = "lineless_table_rec"
 
-if len(sys.argv) > 2:
-    argv_str = "".join(sys.argv[2:])
-    version = extract_version(argv_str)
-else:
-    version = "2."
+obtainer = GetPyPiLatestVersion()
+try:
+    latest_version = obtainer(MODULE_NAME)
+except Exception:
+    latest_version = "0.0.0"
 
+VERSION_NUM = obtainer.version_add_one(latest_version)
+
+if len(sys.argv) > 2:
+    match_str = " ".join(sys.argv[2:])
+    matched_versions = obtainer.extract_version(match_str)
+    if matched_versions:
+        VERSION_NUM = matched_versions
 sys.argv = sys.argv[:2]
 
 setuptools.setup(
     name=MODULE_NAME,
-    version=version,
+    version=VERSION_NUM,
     platforms="Any",
-    description="无线表格还原库",
+    description="",
     author="SWHL",
     author_email="[email protected]",
     install_requires=read_txt("requirements.txt"),

diff --git a/tests/test_lore.py b/tests/test_lore.py
@@ -30,7 +30,7 @@ def test_input_normal(img_path, table_str_len, td_nums):
     img_path = test_file_dir / img_path
     img = cv2.imread(str(img_path))
 
-    table_str = table_recog(img)
+    table_str, _ = table_recog(img)
 
     assert len(table_str) >= table_str_len
     assert table_str.count("td") == td_nums