From 3bde7cb37ff54458f4f13e55ccf08f5e8b43cbda Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sat, 26 Jun 2021 13:50:00 +0200 Subject: [PATCH 1/2] init from constructor not process(), use conventional name setup() --- ocrd_calamari/recognize.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index 43b8930..9cb2238 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -39,9 +39,14 @@ def __init__(self, *args, **kwargs): kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL] kwargs['version'] = '%s (calamari %s, tensorflow %s)' % (OCRD_TOOL['version'], calamari_version, tensorflow_version) super(CalamariRecognize, self).__init__(*args, **kwargs) + if hasattr(self, 'output_file_grp'): + # processing context + self.setup() - def _init_calamari(self): - + def setup(self): + """ + Set up the model prior to processing. + """ if not self.parameter.get('checkpoint', None) and self.parameter.get('checkpoint_dir', None): resolved = self.resolve_resource(self.parameter['checkpoint_dir']) self.parameter['checkpoint'] = '%s/*.ckpt.json' % resolved @@ -69,8 +74,6 @@ def process(self): assert_file_grp_cardinality(self.input_file_grp, 1) assert_file_grp_cardinality(self.output_file_grp, 1) - self._init_calamari() - for (n, input_file) in enumerate(self.input_files): page_id = input_file.pageId or input_file.ID log.info("INPUT FILE %i / %s", n, page_id) From 4c6d6655e12bc7f63b8a446eace26be2a459d357 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sat, 26 Jun 2021 13:50:20 +0200 Subject: [PATCH 2/2] improve process() docstring --- ocrd_calamari/recognize.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index 9cb2238..3cdd26f 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -67,7 +67,31 @@ def setup(self): def process(self): """ - Performs the recognition. + Perform text recognition with Calamari on the workspace. + + For each page of the input file group, open and deserialize input PAGE-XML + and its respective images. Then iterate over the element hierarchy down to + the line level. + + For each textline, retrieve a segment image according to the layout annotation + (from an existing ``AlternativeImage``, or by cropping into the higher-level + images, and deskewing when applicable). + + If the line element contained any previous text results or word segmentation, + delete it. + + Convert the line image to a Numpy array and pass it to the recognizer. Aggregate + character results on the line level, stripping leading and trailing white space, + and selecting the best hypothesis for each position. Annotate the resulting + TextEquiv string and (average) confidence on the line segment. + + If ``texequiv_level`` is ``word`` or ``glyph``, then additionally create word + level segments by splitting at white space characters, using the vertical + line coordinates and horizontal white space boundaries. In the case of ``glyph``, + create glyph level segments as well, adding all alternative character hypotheses + down to ``glyph_conf_cutoff`` confidence threshold. + + Produce a new PAGE output file by serialising the resulting hierarchy. """ log = getLogger('processor.CalamariRecognize')