Skip to content

Commit

Permalink
Merge pull request #82 from kba/spec30
Browse files Browse the repository at this point in the history
Adapt to spec 1.2.0
  • Loading branch information
kba authored Jun 4, 2018
2 parents c18f1ae + f37dcfd commit 60cbb4a
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 61 deletions.
1 change: 1 addition & 0 deletions ocrd/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def cli(mets_url):
click.option('-g', '--group-id', help="mets:file GROUPID"),
click.option('-o', '--output-mets', help="METS URL to write resulting METS to"),
click.option('-p', '--parameter', type=click.Path()),
click.option('-J', '--dump-json', help="Dump tool description as JSON and exit", is_flag=True, default=False),
click.option('-l', '--log-level', help="Log level", type=click.Choice(['OFF', 'ERROR', 'WARN', 'INFO', 'DEBUG', 'TRACE']), default='INFO'),
]
for param in params:
Expand Down
14 changes: 7 additions & 7 deletions ocrd/model/ocrd_swagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def _clone(obj):

class OcrdSwagger(object):
"""
Representing a Swagger OAI 2 schema.
Representing a Swagger OAI 3 schema.
"""

@staticmethod
Expand All @@ -45,7 +45,7 @@ def from_ocrd_tools(swagger_template, *ocrd_tool):
for ocrd_tool_file in ocrd_tool:
with codecs.open(ocrd_tool_file, encoding='utf-8') as f:
ocrd_json = json.load(f)
for tool in ocrd_json['tools']:
for tool in ocrd_json['tools'].values():
OcrdSwagger._add_paths_for_tool(swagger, tool)

return swagger
Expand All @@ -54,7 +54,7 @@ def from_ocrd_tools(swagger_template, *ocrd_tool):
def _add_paths_for_tool(swagger, tool):

# e.g. /preprocessing/binarization/kraken-binarize
p = "/%s/%s" % (tool['step'], tool['executable'].replace('ocrd_', '').replace('ocrd-', ''))
p = "/%s" % (tool['executable'].replace('ocrd_', '').replace('ocrd-', ''))

# parameters are optional
if 'parameterSchema' not in tool:
Expand All @@ -69,24 +69,24 @@ def _add_paths_for_tool(swagger, tool):

# POST /ocrd/processor/{{ PROCESSOR_NAME }}
post = _clone(OCRD_OAS3_SPEC['paths']['/ocrd/processor']['post'])
post['tags'] = tool['tags']
post['tags'] = tool['categories']
post['summary'] = tool['summary']
post['description'] = tool['description']
post['requestBody']['content']['multipart/form-data']['schema'] = _clone(OCRD_OAS3_SPEC['components']['schemas']['processors'])
post['requestBody']['content']['multipart/form-data']['schema']['parameters'] = tool['parameterSchema']

get_schema = _clone(GET_SCHEMA)
get_schema['tags'] = tool['tags']
get_schema['tags'] = tool['categories']
swagger['paths'][p] = {
'post': post,
'get': get_schema
}

# GET /ocrd/processor/{{ PROCESSOR_NAME }}/{_id}
get = _clone(OCRD_OAS3_SPEC['paths']['/ocrd/processor/jobid/{jobID}']['get'])
get['tags'] = tool['tags']
get['tags'] = tool['categories']
delete = _clone(OCRD_OAS3_SPEC['paths']['/ocrd/processor/jobid/{jobID}']['delete'])
delete['tags'] = tool['tags']
delete['tags'] = tool['categories']
swagger['paths']["%s/{_id}" % p] = {
'delete': delete,
'get': get
Expand Down
102 changes: 53 additions & 49 deletions ocrd/model/yaml/ocrd_tool.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,53 +13,57 @@ properties:
description: DockerHub image
type: string
tools:
type: array
items:
type: object
additionalProperties: false
required:
- description
- step
- executable
properties:
executable:
description: The name of the CLI executable in $PATH
type: string
parameterSchema:
description: JSON Schema for the parameters.json file
type: object
description:
description: Concise description what the tool does
type: string
step:
description: Step in the OCR-D functional model for this tool
type: string
enum:
- preprocessing/characterization
- preprocessing/optimization
- preprocessing/optimization/cropping
- preprocessing/optimization/deskewing
- preprocessing/optimization/despeckling
- preprocessing/optimization/dewarping
- preprocessing/optimization/binarization
- preprocessing/optimization/grayscale_normalization
- recognition/text-recognition
- recognition/font-identification
- layout/segmentation
- layout/segmentation/region
- layout/segmentation/line
- layout/segmentation/word
- layout/segmentation/classification
- layout/analysis
tags:
description: Tools belong to this category, representing modules within the OCR-D project structure
type: array
items:
type: object
additionalProperties: false
patternProperties:
'ocrd-.*':
type: object
additionalProperties: false
required:
- description
- steps
- executable
- categories
properties:
executable:
description: The name of the CLI executable in $PATH
type: string
enum:
- Image preprocessing
- Layout analysis
- Text recognition and optimization
- Model training
- Long-term preservation
- Quality assurance
parameters:
description: Object describing the parameters of a tool. Keys are parameter names, values sub-schemas.
type: object
description:
description: Concise description what the tool does
categories:
description: Tools belong to this categories, representing modules within the OCR-D project structure
type: array
items:
type: string
enum:
- Image preprocessing
- Layout analysis
- Text recognition and optimization
- Model training
- Long-term preservation
- Quality assurance
steps:
description: This tool can be used at these steps in the OCR-D functional model
type: array
items:
type: string
enum:
- preprocessing/characterization
- preprocessing/optimization
- preprocessing/optimization/cropping
- preprocessing/optimization/deskewing
- preprocessing/optimization/despeckling
- preprocessing/optimization/dewarping
- preprocessing/optimization/binarization
- preprocessing/optimization/grayscale_normalization
- recognition/text-recognition
- recognition/font-identification
- layout/segmentation
- layout/segmentation/region
- layout/segmentation/line
- layout/segmentation/word
- layout/segmentation/classification
- layout/analysis
2 changes: 2 additions & 0 deletions ocrd/processor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def run_processor(
fname = workspace.download_url(parameter)
with open(fname, 'r') as param_json_file:
parameter = json.load(param_json_file)
else:
parameter = {}
log.debug("Running processor %s", processorClass)
processor = processorClass(workspace, ocrd_tool=ocrd_tool, input_file_grp=input_file_grp, output_file_grp=output_file_grp, parameter=parameter)
log.debug("Processor instance %s", processor)
Expand Down
2 changes: 1 addition & 1 deletion ocrd/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def add_files_to_mets(self, convention, mets, directory):
fileGrp + '_' + upper(Basename of file without extension) == mets:file @ID
File in root folder == mets:fileGrp @USE == 'OCR-D-IMG'
Extension ==> mets.file @MIMETYPE
.tif => image/tif
.tif => image/tiff
.png => image/png
.jpg => image/jpg
.xml => image/xml
Expand Down
2 changes: 1 addition & 1 deletion ocrd/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def _validate_mets_unique_identifier(self):
self.report.add_error("METS has no unique identifier")

def _validate_pixel_density(self):
for f in self.mets.find_files(mimetype='image/tif'):
for f in self.mets.find_files(mimetype='image/tiff'):
exif = self.workspace.resolve_image_exif(f.url)
for k in ['xResolution', 'yResolution']:
v = exif.__dict__.get(k)
Expand Down
2 changes: 1 addition & 1 deletion test/model/test_ocrd_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_file_groups(self):
def test_find_files(self):
self.assertEqual(len(self.mets.find_files(fileGrp='OCR-D-IMG')), 2, '2 files in "OCR-D-IMG"')
self.assertEqual(len(self.mets.find_files(groupId='FILE_0001_IMAGE')), 17, '17 files with GROUPID "FILE_0001_IMAGE"')
self.assertEqual(len(self.mets.find_files(mimetype='image/tif')), 12, '12 image/tif')
self.assertEqual(len(self.mets.find_files(mimetype='image/tiff')), 12, '12 image/tiff')
self.assertEqual(len(self.mets.find_files(mimetype=MIMETYPE_PAGE)), 20, '20 ' + MIMETYPE_PAGE)
self.assertEqual(len(self.mets.find_files()), 34, '34 files total')

Expand Down
4 changes: 2 additions & 2 deletions test/model/test_ocrd_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ def setUp(self):
def test_from_file(self):
f = ocrd_file.OcrdFile(
None,
mimetype='image/tif',
mimetype='image/tiff',
local_filename=assets.path_to('kant_aufklaerung_1784/kant_aufklaerung_1784_0017.tif')
)
self.assertEqual(f.mimetype, 'image/tif')
self.assertEqual(f.mimetype, 'image/tiff')
p = ocrd_page.from_file(f)
self.assertEqual(p.get_Page().imageWidth, 1457)

Expand Down

0 comments on commit 60cbb4a

Please sign in to comment.