Skip to content

Commit

Permalink
v0.2.0: Use 2018 PAGE NS
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed May 8, 2018
1 parent 12878e9 commit 1a21dcf
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 23 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ Changed
Fixed
Removed

## [0.2.0] - 2018-05-08

Changed:
* Use 2018 PAGE namespace http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15, PRImA-Research-Lab/PAGE-XML#4

## [0.1.0] - 2018-04-26

Changed:
Expand Down Expand Up @@ -47,6 +52,7 @@ Fixed
Initial Release

<!-- link-labels -->
[0.1.0]: ../../compare/v0.1.0...v0.2.0
[0.1.0]: ../../compare/v0.0.7...v0.1.0
[0.0.7]: ../../compare/v0.0.6...v0.0.7
[0.0.6]: ../../compare/v0.0.5...v0.0.6
Expand Down
2 changes: 1 addition & 1 deletion ocrd/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import yaml
from pkg_resources import resource_string

VERSION = '0.1.0'
VERSION = '0.2.0'

TMP_PREFIX = 'pyocrd-'

Expand Down
4 changes: 2 additions & 2 deletions ocrd/model/ocrd_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
TextLineType,
WordType,
)
from ocrd.constants import NAMESPACES, VERSION
from ocrd.constants import NAMESPACES, VERSION, MIMETYPE_PAGE
from ocrd.model.ocrd_exif import OcrdExif

def to_xml(el):
Expand Down Expand Up @@ -54,7 +54,7 @@ def from_file(input_file):
# print("PARSING PARSING '%s'" % input_file)
if input_file.mimetype.startswith('image'):
return page_from_image(input_file)
elif input_file.mimetype == 'text/page+xml':
elif input_file.mimetype == MIMETYPE_PAGE:
return parse(input_file.local_filename, silence=True)
else:
raise Exception("Unsupported mimetype '%s'" % input_file.mimetype)
87 changes: 69 additions & 18 deletions ocrd/model/ocrd_page_generateds.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-

#
# Generated Fri Apr 27 17:03:45 2018 by generateDS.py version 2.29.11.
# Generated Tue May 8 12:14:28 2018 by generateDS.py version 2.29.11.
# Python 3.6.3 (default, Oct 3 2017, 21:45:48) [GCC 7.2.0]
#
# Command line options:
Expand Down Expand Up @@ -1309,16 +1309,17 @@ class PageType(GeneratedsSuper):
definitions override the page-level definition) Inner-block
order of text lines (in addition to “readingDirection” which is
the inner-text line order of words and characters) (lower-level
definitions override the page-level definition)"""
definitions override the page-level definition)Confidence value
for whole page (between 0 and 1)"""
subclass = None
superclass = None
def __init__(self, imageFilename=None, imageWidth=None, imageHeight=None, imageXResolution=None, imageYResolution=None, imageResolutionUnit=None, custom=None, type_=None, primaryLanguage=None, secondaryLanguage=None, primaryScript=None, secondaryScript=None, readingDirection=None, textLineOrder=None, AlternativeImage=None, Border=None, PrintSpace=None, ReadingOrder=None, Layers=None, Relations=None, UserDefined=None, Labels=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, MapRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None):
def __init__(self, imageFilename=None, imageWidth=None, imageHeight=None, imageXResolution=None, imageYResolution=None, imageResolutionUnit=None, custom=None, type_=None, primaryLanguage=None, secondaryLanguage=None, primaryScript=None, secondaryScript=None, readingDirection=None, textLineOrder=None, conf=None, AlternativeImage=None, Border=None, PrintSpace=None, ReadingOrder=None, Layers=None, Relations=None, UserDefined=None, Labels=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, MapRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None):
self.original_tagname_ = None
self.imageFilename = _cast(None, imageFilename)
self.imageWidth = _cast(int, imageWidth)
self.imageHeight = _cast(int, imageHeight)
self.imageXResolution = _cast(int, imageXResolution)
self.imageYResolution = _cast(int, imageYResolution)
self.imageXResolution = _cast(float, imageXResolution)
self.imageYResolution = _cast(float, imageYResolution)
self.imageResolutionUnit = _cast(None, imageResolutionUnit)
self.custom = _cast(None, custom)
self.type_ = _cast(None, type_)
Expand All @@ -1328,6 +1329,7 @@ def __init__(self, imageFilename=None, imageWidth=None, imageHeight=None, imageX
self.secondaryScript = _cast(None, secondaryScript)
self.readingDirection = _cast(None, readingDirection)
self.textLineOrder = _cast(None, textLineOrder)
self.conf = _cast(float, conf)
if AlternativeImage is None:
self.AlternativeImage = []
else:
Expand Down Expand Up @@ -1529,6 +1531,8 @@ def get_readingDirection(self): return self.readingDirection
def set_readingDirection(self, readingDirection): self.readingDirection = readingDirection
def get_textLineOrder(self): return self.textLineOrder
def set_textLineOrder(self, textLineOrder): self.textLineOrder = textLineOrder
def get_conf(self): return self.conf
def set_conf(self, conf): self.conf = conf
def hasContent_(self):
if (
self.AlternativeImage or
Expand Down Expand Up @@ -1590,10 +1594,10 @@ def exportAttributes(self, outfile, level, already_processed, namespace_='pc:',
outfile.write(' imageHeight="%s"' % self.gds_format_integer(self.imageHeight, input_name='imageHeight'))
if self.imageXResolution is not None and 'imageXResolution' not in already_processed:
already_processed.add('imageXResolution')
outfile.write(' imageXResolution="%s"' % self.gds_format_integer(self.imageXResolution, input_name='imageXResolution'))
outfile.write(' imageXResolution="%s"' % self.gds_format_float(self.imageXResolution, input_name='imageXResolution'))
if self.imageYResolution is not None and 'imageYResolution' not in already_processed:
already_processed.add('imageYResolution')
outfile.write(' imageYResolution="%s"' % self.gds_format_integer(self.imageYResolution, input_name='imageYResolution'))
outfile.write(' imageYResolution="%s"' % self.gds_format_float(self.imageYResolution, input_name='imageYResolution'))
if self.imageResolutionUnit is not None and 'imageResolutionUnit' not in already_processed:
already_processed.add('imageResolutionUnit')
outfile.write(' imageResolutionUnit=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.imageResolutionUnit), input_name='imageResolutionUnit')), ))
Expand Down Expand Up @@ -1621,6 +1625,9 @@ def exportAttributes(self, outfile, level, already_processed, namespace_='pc:',
if self.textLineOrder is not None and 'textLineOrder' not in already_processed:
already_processed.add('textLineOrder')
outfile.write(' textLineOrder=%s' % (quote_attrib(self.textLineOrder), ))
if self.conf is not None and 'conf' not in already_processed:
already_processed.add('conf')
outfile.write(' conf="%s"' % self.gds_format_float(self.conf, input_name='conf'))
def exportChildren(self, outfile, level, namespace_='pc:', name_='PageType', fromsubclass_=False, pretty_print=True):
if pretty_print:
eol_ = '\n'
Expand Down Expand Up @@ -1700,16 +1707,16 @@ def buildAttributes(self, node, attrs, already_processed):
if value is not None and 'imageXResolution' not in already_processed:
already_processed.add('imageXResolution')
try:
self.imageXResolution = int(value)
self.imageXResolution = float(value)
except ValueError as exp:
raise_parse_error(node, 'Bad integer attribute: %s' % exp)
raise ValueError('Bad float/double attribute (imageXResolution): %s' % exp)
value = find_attr_value_('imageYResolution', node)
if value is not None and 'imageYResolution' not in already_processed:
already_processed.add('imageYResolution')
try:
self.imageYResolution = int(value)
self.imageYResolution = float(value)
except ValueError as exp:
raise_parse_error(node, 'Bad integer attribute: %s' % exp)
raise ValueError('Bad float/double attribute (imageYResolution): %s' % exp)
value = find_attr_value_('imageResolutionUnit', node)
if value is not None and 'imageResolutionUnit' not in already_processed:
already_processed.add('imageResolutionUnit')
Expand Down Expand Up @@ -1746,6 +1753,13 @@ def buildAttributes(self, node, attrs, already_processed):
if value is not None and 'textLineOrder' not in already_processed:
already_processed.add('textLineOrder')
self.textLineOrder = value
value = find_attr_value_('conf', node)
if value is not None and 'conf' not in already_processed:
already_processed.add('conf')
try:
self.conf = float(value)
except ValueError as exp:
raise ValueError('Bad float/double attribute (conf): %s' % exp)
def buildChildren(self, child_, node, nodeName_, fromsubclass_=False):
if nodeName_ == 'AlternativeImage':
obj_ = AlternativeImageType.factory()
Expand Down Expand Up @@ -2929,11 +2943,12 @@ def buildChildren(self, child_, node, nodeName_, fromsubclass_=False):


class GridPointsType(GeneratedsSuper):
"""Points with x,y coordinates."""
"""Points with x,y coordinates. The grid row index"""
subclass = None
superclass = None
def __init__(self, points=None):
def __init__(self, index=None, points=None):
self.original_tagname_ = None
self.index = _cast(int, index)
self.points = _cast(None, points)
def factory(*args_, **kwargs_):
if CurrentSubclassModule_ is not None:
Expand All @@ -2946,6 +2961,8 @@ def factory(*args_, **kwargs_):
else:
return GridPointsType(*args_, **kwargs_)
factory = staticmethod(factory)
def get_index(self): return self.index
def set_index(self, index): self.index = index
def get_points(self): return self.points
def set_points(self, points): self.points = points
def hasContent_(self):
Expand Down Expand Up @@ -2976,6 +2993,9 @@ def export(self, outfile, level, namespace_='pc:', name_='GridPointsType', names
else:
outfile.write('/>%s' % (eol_, ))
def exportAttributes(self, outfile, level, already_processed, namespace_='pc:', name_='GridPointsType'):
if self.index is not None and 'index' not in already_processed:
already_processed.add('index')
outfile.write(' index="%s"' % self.gds_format_integer(self.index, input_name='index'))
if self.points is not None and 'points' not in already_processed:
already_processed.add('points')
outfile.write(' points=%s' % (quote_attrib(self.points), ))
Expand All @@ -2989,6 +3009,13 @@ def build(self, node):
self.buildChildren(child, node, nodeName_)
return self
def buildAttributes(self, node, attrs, already_processed):
value = find_attr_value_('index', node)
if value is not None and 'index' not in already_processed:
already_processed.add('index')
try:
self.index = int(value)
except ValueError as exp:
raise_parse_error(node, 'Bad integer attribute: %s' % exp)
value = find_attr_value_('points', node)
if value is not None and 'points' not in already_processed:
already_processed.add('points')
Expand Down Expand Up @@ -4510,11 +4537,13 @@ def buildChildren(self, child_, node, nodeName_, fromsubclass_=False):


class BaselineType(GeneratedsSuper):
"""Confidence value (between 0 and 1)"""
subclass = None
superclass = None
def __init__(self, points=None):
def __init__(self, points=None, conf=None):
self.original_tagname_ = None
self.points = _cast(None, points)
self.conf = _cast(float, conf)
def factory(*args_, **kwargs_):
if CurrentSubclassModule_ is not None:
subclass = getSubclassFromModule_(
Expand All @@ -4528,6 +4557,8 @@ def factory(*args_, **kwargs_):
factory = staticmethod(factory)
def get_points(self): return self.points
def set_points(self, points): self.points = points
def get_conf(self): return self.conf
def set_conf(self, conf): self.conf = conf
def hasContent_(self):
if (

Expand Down Expand Up @@ -4559,6 +4590,9 @@ def exportAttributes(self, outfile, level, already_processed, namespace_='pc:',
if self.points is not None and 'points' not in already_processed:
already_processed.add('points')
outfile.write(' points=%s' % (quote_attrib(self.points), ))
if self.conf is not None and 'conf' not in already_processed:
already_processed.add('conf')
outfile.write(' conf="%s"' % self.gds_format_float(self.conf, input_name='conf'))
def exportChildren(self, outfile, level, namespace_='pc:', name_='BaselineType', fromsubclass_=False, pretty_print=True):
pass
def build(self, node):
Expand All @@ -4573,6 +4607,13 @@ def buildAttributes(self, node, attrs, already_processed):
if value is not None and 'points' not in already_processed:
already_processed.add('points')
self.points = value
value = find_attr_value_('conf', node)
if value is not None and 'conf' not in already_processed:
already_processed.add('conf')
try:
self.conf = float(value)
except ValueError as exp:
raise ValueError('Bad float/double attribute (conf): %s' % exp)
def buildChildren(self, child_, node, nodeName_, fromsubclass_=False):
pass
# end class BaselineType
Expand Down Expand Up @@ -4664,7 +4705,7 @@ class RelationType(GeneratedsSuper):
"""One-to-one relation between to layout object. Use 'link' for loose
relations and 'join' for strong relations (where something is
fragmented for instance). Examples for 'link': caption - image
floating - paragraph paragraph - paragraph (when a pragraph is
floating - paragraph paragraph - paragraph (when a paragraph is
split across columns and the last word of the first paragraph
DOES NOT continue in the second paragraph) drop-cap - paragraph
(when the drop-cap is a whole word) Examples for 'join': word -
Expand All @@ -4675,8 +4716,9 @@ class RelationType(GeneratedsSuper):
generic use"""
subclass = None
superclass = None
def __init__(self, type_=None, custom=None, comments=None, Labels=None, SourceRegionRef=None, TargetRegionRef=None):
def __init__(self, id=None, type_=None, custom=None, comments=None, Labels=None, SourceRegionRef=None, TargetRegionRef=None):
self.original_tagname_ = None
self.id = _cast(None, id)
self.type_ = _cast(None, type_)
self.custom = _cast(None, custom)
self.comments = _cast(None, comments)
Expand Down Expand Up @@ -4706,6 +4748,8 @@ def get_SourceRegionRef(self): return self.SourceRegionRef
def set_SourceRegionRef(self, SourceRegionRef): self.SourceRegionRef = SourceRegionRef
def get_TargetRegionRef(self): return self.TargetRegionRef
def set_TargetRegionRef(self, TargetRegionRef): self.TargetRegionRef = TargetRegionRef
def get_id(self): return self.id
def set_id(self, id): self.id = id
def get_type(self): return self.type_
def set_type(self, type_): self.type_ = type_
def get_custom(self): return self.custom
Expand Down Expand Up @@ -4743,6 +4787,9 @@ def export(self, outfile, level, namespace_='pc:', name_='RelationType', namespa
else:
outfile.write('/>%s' % (eol_, ))
def exportAttributes(self, outfile, level, already_processed, namespace_='pc:', name_='RelationType'):
if self.id is not None and 'id' not in already_processed:
already_processed.add('id')
outfile.write(' id=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.id), input_name='id')), ))
if self.type_ is not None and 'type_' not in already_processed:
already_processed.add('type_')
outfile.write(' type=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.type_), input_name='type')), ))
Expand Down Expand Up @@ -4771,6 +4818,10 @@ def build(self, node):
self.buildChildren(child, node, nodeName_)
return self
def buildAttributes(self, node, attrs, already_processed):
value = find_attr_value_('id', node)
if value is not None and 'id' not in already_processed:
already_processed.add('id')
self.id = value
value = find_attr_value_('type', node)
if value is not None and 'type' not in already_processed:
already_processed.add('type')
Expand Down Expand Up @@ -8189,7 +8240,7 @@ def parse(inFileName, silence=False):
sys.stdout.write('<?xml version="1.0" ?>\n')
rootObj.export(
sys.stdout, 0, name_=rootTag,
namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15"',
namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15"',
pretty_print=True)
return rootObj

Expand Down Expand Up @@ -8240,7 +8291,7 @@ def parseString(inString, silence=False):
sys.stdout.write('<?xml version="1.0" ?>\n')
rootObj.export(
sys.stdout, 0, name_=rootTag,
namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15"')
namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15"')
return rootObj


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setup(
name='ocrd',
version='0.1.0',
version='0.2.0',
description='OCR-D framework',
long_description=README,
author='Kay-Michael Würzner, Konstantin Baierer',
Expand Down
3 changes: 2 additions & 1 deletion test/model/test_ocrd_mets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from test.base import TestCase, main, assets

from ocrd.constants import MIMETYPE_PAGE
from ocrd.model import OcrdMets

class TestOcrdMets(TestCase):
Expand All @@ -17,7 +18,7 @@ def test_find_files(self):
self.assertEqual(len(self.mets.find_files(fileGrp='OCR-D-IMG')), 2, '2 files in "OCR-D-IMG"')
self.assertEqual(len(self.mets.find_files(groupId='FILE_0001_IMAGE')), 17, '17 files with GROUPID "FILE_0001_IMAGE"')
self.assertEqual(len(self.mets.find_files(mimetype='image/tif')), 12, '12 image/tif')
self.assertEqual(len(self.mets.find_files(mimetype='text/xml')), 22, '22 text/xml')
self.assertEqual(len(self.mets.find_files(mimetype=MIMETYPE_PAGE)), 20, '20 ' + MIMETYPE_PAGE)
self.assertEqual(len(self.mets.find_files()), 34, '34 files total')

def test_add_group(self):
Expand Down

0 comments on commit 1a21dcf

Please sign in to comment.