Merge pull request #82 from kba/spec30

Adapt to spec 1.2.0
OCR-D · Jun 4, 2018 · 60cbb4a · 60cbb4a
2 parents c18f1ae + f37dcfd
commit 60cbb4a
Show file tree

Hide file tree

Showing 8 changed files with 68 additions and 61 deletions.
diff --git a/ocrd/decorators.py b/ocrd/decorators.py
@@ -35,6 +35,7 @@ def cli(mets_url):
         click.option('-g', '--group-id', help="mets:file GROUPID"),
         click.option('-o', '--output-mets', help="METS URL to write resulting METS to"),
         click.option('-p', '--parameter', type=click.Path()),
+        click.option('-J', '--dump-json', help="Dump tool description as JSON and exit", is_flag=True, default=False),
         click.option('-l', '--log-level', help="Log level", type=click.Choice(['OFF', 'ERROR', 'WARN', 'INFO', 'DEBUG', 'TRACE']), default='INFO'),
     ]
     for param in params:

diff --git a/ocrd/model/ocrd_swagger.py b/ocrd/model/ocrd_swagger.py
@@ -22,7 +22,7 @@ def _clone(obj):
 
 class OcrdSwagger(object):
     """
-    Representing a Swagger OAI 2 schema.
+    Representing a Swagger OAI 3 schema.
     """
 
     @staticmethod
@@ -45,7 +45,7 @@ def from_ocrd_tools(swagger_template, *ocrd_tool):
         for ocrd_tool_file in ocrd_tool:
             with codecs.open(ocrd_tool_file, encoding='utf-8') as f:
                 ocrd_json = json.load(f)
-                for tool in ocrd_json['tools']:
+                for tool in ocrd_json['tools'].values():
                     OcrdSwagger._add_paths_for_tool(swagger, tool)
 
         return swagger
@@ -54,7 +54,7 @@ def from_ocrd_tools(swagger_template, *ocrd_tool):
     def _add_paths_for_tool(swagger, tool):
 
         # e.g. /preprocessing/binarization/kraken-binarize
-        p = "/%s/%s" % (tool['step'], tool['executable'].replace('ocrd_', '').replace('ocrd-', ''))
+        p = "/%s" % (tool['executable'].replace('ocrd_', '').replace('ocrd-', ''))
 
         # parameters are optional
         if 'parameterSchema' not in tool:
@@ -69,24 +69,24 @@ def _add_paths_for_tool(swagger, tool):
 
         # POST /ocrd/processor/{{ PROCESSOR_NAME }}
         post = _clone(OCRD_OAS3_SPEC['paths']['/ocrd/processor']['post'])
-        post['tags'] = tool['tags']
+        post['tags'] = tool['categories']
         post['summary'] = tool['summary']
         post['description'] = tool['description']
         post['requestBody']['content']['multipart/form-data']['schema'] = _clone(OCRD_OAS3_SPEC['components']['schemas']['processors'])
         post['requestBody']['content']['multipart/form-data']['schema']['parameters'] = tool['parameterSchema']
 
         get_schema = _clone(GET_SCHEMA)
-        get_schema['tags'] = tool['tags']
+        get_schema['tags'] = tool['categories']
         swagger['paths'][p] = {
             'post': post,
             'get': get_schema
         }
 
         # GET /ocrd/processor/{{ PROCESSOR_NAME }}/{_id}
         get = _clone(OCRD_OAS3_SPEC['paths']['/ocrd/processor/jobid/{jobID}']['get'])
-        get['tags'] = tool['tags']
+        get['tags'] = tool['categories']
         delete = _clone(OCRD_OAS3_SPEC['paths']['/ocrd/processor/jobid/{jobID}']['delete'])
-        delete['tags'] = tool['tags']
+        delete['tags'] = tool['categories']
         swagger['paths']["%s/{_id}" % p] = {
             'delete': delete,
             'get': get

diff --git a/ocrd/model/yaml/ocrd_tool.schema.yml b/ocrd/model/yaml/ocrd_tool.schema.yml
@@ -13,53 +13,57 @@ properties:
     description: DockerHub image
     type: string
   tools:
-    type: array
-    items:
-      type: object
-      additionalProperties: false
-      required:
-        - description
-        - step
-        - executable
-      properties:
-        executable:
-          description: The name of the CLI executable in $PATH
-          type: string
-        parameterSchema:
-          description: JSON Schema for the parameters.json file
-          type: object
-        description:
-          description: Concise description what the tool does
-          type: string
-        step:
-          description: Step in the OCR-D functional model for this tool
-          type: string
-          enum:
-            - preprocessing/characterization
-            - preprocessing/optimization
-            - preprocessing/optimization/cropping
-            - preprocessing/optimization/deskewing
-            - preprocessing/optimization/despeckling
-            - preprocessing/optimization/dewarping
-            - preprocessing/optimization/binarization
-            - preprocessing/optimization/grayscale_normalization
-            - recognition/text-recognition
-            - recognition/font-identification
-            - layout/segmentation
-            - layout/segmentation/region
-            - layout/segmentation/line
-            - layout/segmentation/word
-            - layout/segmentation/classification
-            - layout/analysis
-        tags:
-          description: Tools belong to this category, representing modules within the OCR-D project structure
-          type: array
-          items:
+    type: object
+    additionalProperties: false
+    patternProperties:
+      'ocrd-.*':
+        type: object
+        additionalProperties: false
+        required:
+          - description
+          - steps
+          - executable
+          - categories
+        properties:
+          executable:
+            description: The name of the CLI executable in $PATH
             type: string
-            enum:
-              - Image preprocessing
-              - Layout analysis
-              - Text recognition and optimization
-              - Model training
-              - Long-term preservation
-              - Quality assurance
+          parameters:
+            description: Object describing the parameters of a tool. Keys are parameter names, values sub-schemas.
+            type: object
+          description:
+            description: Concise description what the tool does
+          categories:
+            description: Tools belong to this categories, representing modules within the OCR-D project structure
+            type: array
+            items:
+              type: string
+              enum:
+                - Image preprocessing
+                - Layout analysis
+                - Text recognition and optimization
+                - Model training
+                - Long-term preservation
+                - Quality assurance
+          steps:
+            description: This tool can be used at these steps in the OCR-D functional model
+            type: array
+            items:
+              type: string
+              enum:
+                - preprocessing/characterization
+                - preprocessing/optimization
+                - preprocessing/optimization/cropping
+                - preprocessing/optimization/deskewing
+                - preprocessing/optimization/despeckling
+                - preprocessing/optimization/dewarping
+                - preprocessing/optimization/binarization
+                - preprocessing/optimization/grayscale_normalization
+                - recognition/text-recognition
+                - recognition/font-identification
+                - layout/segmentation
+                - layout/segmentation/region
+                - layout/segmentation/line
+                - layout/segmentation/word
+                - layout/segmentation/classification
+                - layout/analysis
diff --git a/ocrd/processor/base.py b/ocrd/processor/base.py
@@ -39,6 +39,8 @@ def run_processor(
         fname = workspace.download_url(parameter)
         with open(fname, 'r') as param_json_file:
             parameter = json.load(param_json_file)
+    else:
+        parameter = {}
     log.debug("Running processor %s", processorClass)
     processor = processorClass(workspace, ocrd_tool=ocrd_tool, input_file_grp=input_file_grp, output_file_grp=output_file_grp, parameter=parameter)
     log.debug("Processor instance %s", processor)

diff --git a/ocrd/resolver.py b/ocrd/resolver.py
@@ -248,7 +248,7 @@ def add_files_to_mets(self, convention, mets, directory):
                     fileGrp + '_' + upper(Basename of file without extension) == mets:file @ID
                     File in root folder == mets:fileGrp @USE == 'OCR-D-IMG'
                     Extension ==> mets.file @MIMETYPE
-                        .tif => image/tif
+                        .tif => image/tiff
                         .png => image/png
                         .jpg => image/jpg
                         .xml => image/xml

diff --git a/ocrd/validator.py b/ocrd/validator.py
@@ -157,7 +157,7 @@ def _validate_mets_unique_identifier(self):
             self.report.add_error("METS has no unique identifier")
 
     def _validate_pixel_density(self):
-        for f in self.mets.find_files(mimetype='image/tif'):
+        for f in self.mets.find_files(mimetype='image/tiff'):
             exif = self.workspace.resolve_image_exif(f.url)
             for k in ['xResolution', 'yResolution']:
                 v = exif.__dict__.get(k)

diff --git a/test/model/test_ocrd_mets.py b/test/model/test_ocrd_mets.py
@@ -17,7 +17,7 @@ def test_file_groups(self):
     def test_find_files(self):
         self.assertEqual(len(self.mets.find_files(fileGrp='OCR-D-IMG')), 2, '2 files in "OCR-D-IMG"')
         self.assertEqual(len(self.mets.find_files(groupId='FILE_0001_IMAGE')), 17, '17 files with GROUPID "FILE_0001_IMAGE"')
-        self.assertEqual(len(self.mets.find_files(mimetype='image/tif')), 12, '12 image/tif')
+        self.assertEqual(len(self.mets.find_files(mimetype='image/tiff')), 12, '12 image/tiff')
         self.assertEqual(len(self.mets.find_files(mimetype=MIMETYPE_PAGE)), 20, '20 ' + MIMETYPE_PAGE)
         self.assertEqual(len(self.mets.find_files()), 34, '34 files total')
 

diff --git a/test/model/test_ocrd_page.py b/test/model/test_ocrd_page.py
@@ -23,10 +23,10 @@ def setUp(self):
     def test_from_file(self):
         f = ocrd_file.OcrdFile(
             None,
-            mimetype='image/tif',
+            mimetype='image/tiff',
             local_filename=assets.path_to('kant_aufklaerung_1784/kant_aufklaerung_1784_0017.tif')
         )
-        self.assertEqual(f.mimetype, 'image/tif')
+        self.assertEqual(f.mimetype, 'image/tiff')
         p = ocrd_page.from_file(f)
         self.assertEqual(p.get_Page().imageWidth, 1457)