Skip to content

Commit

Permalink
Merge branch 'main' into typos
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Apr 22, 2024
2 parents 0e677fa + 33b4cd6 commit 35832ac
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 30 deletions.
7 changes: 3 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,18 @@ jobs:
fail-fast: false
matrix:
python-version:
# - '3.7'
- '3.8'
- '3.9'
# - '3.10'
# - '3.11'
- '3.10'
- '3.11'
os:
- ubuntu-22.04
# - macos-latest

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install and test
Expand Down
19 changes: 18 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,22 @@ Versioned according to [Semantic Versioning](http://semver.org/).

## Unreleased

## [0.6.0] - 2024-04-22

Changed:

* rename parameter `replace_textstyle` -> `overwrite_style`, #8, #9
* rename parameter `network` -> `model`, #8, #9
* parameter `overwrite_text`: If true, replace all existing textequivs, else (default) just add a textequiv, #8, #9
* parameter `overwrite_style`: if true (default), replace the `fontFamily` attribute of existing textstyle or create new style if non exists., #8, #9
* parameter `min_score_style`: Score between 0 and 100, font classification results below this score will not be serialized or used for OCR, default: 0, #8, #9

Fixed:

* Require OCR-D/core v2.64.1+ with proper support for `importlib{.,_}metadata`, #10
* CI: Use most recent actions, #15
* missing top-level `__init__.py`, #12

## [0.5.2] - 2024-02-01

Fixed:
Expand All @@ -22,6 +38,7 @@ Fixed:
- First release in ocrd_all

<!-- link-labels -->
[0.5.1]: ../../compare/v0.5.2...v0.5.1
[0.6.0]: ../../compare/v0.6.0...v0.5.2
[0.5.2]: ../../compare/v0.5.2...v0.5.1
[0.5.1]: ../../compare/v0.5.1...v0.5.0
[0.5.0]: ../../compare/v0.5.0...HEAD
Empty file added ocrd_froc/__init__.py
Empty file.
21 changes: 17 additions & 4 deletions ocrd_froc/ocrd-tool.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "0.5.2",
"version": "0.6.0",
"git_url": "https://github.com/OCR-D/ocrd_froc",
"tools": {
"ocrd-froc-recognize": {
Expand All @@ -21,13 +21,26 @@
"enum": ["none", "SelOCR", "COCR", "adaptive"],
"default": "none"
},
"replace_textstyle": {
"description": "Whether to replace existing textStyle",
"overwrite_style": {
"description": "Whether to overwrite existing TextStyle/@fontFamily attributes",
"type": "boolean",
"required": false,
"default": true
},
"network": {
"min_score_style": {
"description": "The minimum score of a font classification to be serialized/used as input for OCR",
"type": "number",
"format": "float",
"required": false,
"default": 0
},
"overwrite_text": {
"description": "Whether to remove any existing TextEquiv before adding text",
"type": "boolean",
"required": false,
"default": false
},
"model": {
"description": "The file name of the neural network to use, including sufficient path information. Defaults to the model bundled with ocrd_froc.",
"type": "string",
"required": false
Expand Down
46 changes: 26 additions & 20 deletions ocrd_froc/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Wrap FROC as an ocrd.Processor
"""
import os
from typing import List, Tuple

from ocrd import Processor
from ocrd_utils import (
Expand All @@ -21,7 +22,7 @@
from ocrd_modelfactory import page_from_file
from .froc import Froc

OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json'))
OCRD_TOOL = loads(resource_string(__package__, 'ocrd-tool.json'))

class FROCProcessor(Processor):

Expand All @@ -35,29 +36,21 @@ def __init__(self, *args, **kwargs):

def setup(self):

if 'network' not in self.parameter:
self.parameter['network'] = str(resource_filename(f'ocrd_froc.models', 'default.froc'))
if 'model' not in self.parameter:
self.parameter['model'] = str(resource_filename(f'ocrd_froc.models', 'default.froc'))

network_file = self.resolve_resource(self.parameter['network'])
self.froc = Froc.load(network_file)
model = self.resolve_resource(self.parameter['model'])
self.froc = Froc.load(model)

def _process_segment(self, segment, image):
textStyle = segment.get_TextStyle()
if textStyle and self.parameter['replace_textstyle']:
textStyle = None
segment.set_TextStyle(textStyle)
if not textStyle:
textStyle = TextStyleType()
segment.set_TextStyle(textStyle)

ocr_method = self.parameter['ocr_method']

result = {}

if ocr_method != 'COCR':

result = self.froc.classify(image)
classification_result = ''
fonts_detected : List[Tuple[str, float]] = []

font_class_priors = self.parameter['font_class_priors']
output_font = True
Expand Down Expand Up @@ -87,12 +80,21 @@ def _process_segment(self, segment, image):
score = round(100 * score)
if score <= 0:
continue
if classification_result != '':
classification_result += ', '
classification_result += '%s:%d' % (typegroup, score)
fonts_detected.append((typegroup, score))

classification_result = ', '.join([
f'{family}:{score}' \
for family, score in fonts_detected \
if score > self.parameter['min_score_style']
])

if output_font:
textStyle.set_fontFamily(classification_result)
textStyle = segment.get_TextStyle()
if not textStyle or self.parameter['overwrite_style']:
if not textStyle:
textStyle = TextStyleType()
segment.set_TextStyle(textStyle)
textStyle.set_fontFamily(classification_result)


if ocr_method == 'COCR':
Expand All @@ -111,8 +113,12 @@ def _process_segment(self, segment, image):
method=ocr_method,
classification_result=result,
fast_cocr=fast_cocr,
adaptive_threshold=adaptive_threshold)
segment.set_TextEquiv([TextEquivType(Unicode=transcription, conf=score)])
adaptive_treshold=adaptive_threshold)

if self.parameter['overwrite_text']:
segment.set_TextEquiv([TextEquivType(Unicode=transcription, conf=score)])
else:
segment.add_TextEquiv(TextEquivType(Unicode=transcription, conf=score))


def process(self): # type: ignore
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ocrd >= 2.22.3
ocrd >= 2.64.1
pandas
scikit-image
torch >= 1.4.0
Expand Down

0 comments on commit 35832ac

Please sign in to comment.