From 969c9253b71caba9e87154b127376e3d49b9142b Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 26 Jul 2018 12:54:10 +0200 Subject: [PATCH 1/5] fix exif resolution, test tif,png,jpg,jp2 --- ocrd/model/ocrd_exif.py | 13 +++++++++++-- test/model/test_exif.py | 32 +++++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/ocrd/model/ocrd_exif.py b/ocrd/model/ocrd_exif.py index 570308a6f..f223c5fda 100644 --- a/ocrd/model/ocrd_exif.py +++ b/ocrd/model/ocrd_exif.py @@ -77,14 +77,23 @@ def from_filename(image_filename): raise Exception("Must pass 'image_filename' to OcrdExif.from_filename") with exiftool.ExifTool() as et: exif_props = et.get_metadata(image_filename) + # import json + # print(json.dumps(exif_props, indent=2)) return OcrdExif(exif_props) def __init__(self, props): - for selfattr in ['width', 'height', 'xResolution', 'yResolution']: - for prefix in ['EXIF', 'File', 'PNG', 'JFIF']: + for selfattr in ['width', 'height']: + for prefix in ['EXIF', 'File', 'PNG', 'JFIF', 'Jpeg2000']: prop = "%s:Image%s" % (prefix, selfattr[0].upper() + selfattr[1:]) if prop in props: setattr(self, selfattr, props[prop]) + break + for selfattr in ['XResolution', 'YResolution']: + for prefix in ['EXIF', 'File', 'PNG', 'JFIF', 'Jpeg2000']: + prop = "%s:%s" % (prefix, selfattr) + if prop in props: + setattr(self, selfattr[0].lower() + selfattr[1:], props[prop]) + break for requiredattr in ['width', 'height']: if getattr(self, requiredattr) is None: raise Exception("Failed to determine image %s" % requiredattr) diff --git a/test/model/test_exif.py b/test/model/test_exif.py index aef93c082..86774f2f7 100644 --- a/test/model/test_exif.py +++ b/test/model/test_exif.py @@ -1,14 +1,36 @@ -from ocrd.model import OcrdExif - from test.base import TestCase, main, assets -TEST_IMG = assets.path_to('SBB0000F29300010000/00000001.tif') +from ocrd.model import OcrdExif +# pylint: disable=no-member class TestOcrdExif(TestCase): - def runTest(self): - exif = OcrdExif.from_filename(TEST_IMG) + def test_tiff(self): + exif = OcrdExif.from_filename(assets.path_to('SBB0000F29300010000/00000001.tif')) self.assertEqual(exif.width, 2875) self.assertEqual(exif.height, 3749) + self.assertEqual(exif.xResolution, 300) + self.assertEqual(exif.yResolution, 300) + + def test_png(self): + exif = OcrdExif.from_filename(assets.path_to('kant_aufklaerung_1784-binarized/kant_aufklaerung_1784_0020.bin.png')) + self.assertEqual(exif.width, 1457) + self.assertEqual(exif.height, 2084) + self.assertEqual(exif.xResolution, 0) + self.assertEqual(exif.yResolution, 0) + + def test_jpg(self): + exif = OcrdExif.from_filename(assets.path_to('leptonica_samples/1555.007.jpg')) + self.assertEqual(exif.width, 944) + self.assertEqual(exif.height, 1472) + self.assertEqual(exif.xResolution, 1) + self.assertEqual(exif.yResolution, 1) + + def test_jp2(self): + exif = OcrdExif.from_filename(assets.path_to('kant_aufklaerung_1784-jp2/kant_aufklaerung_1784_0020.jp2')) + self.assertEqual(exif.width, 1457) + self.assertEqual(exif.height, 2084) + # self.assertEqual(exif.xResolution, 1) + # self.assertEqual(exif.yResolution, 1) if __name__ == '__main__': main() From 015832233000e89619bc3f0edc9dd9e2196aba3f Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 26 Jul 2018 16:07:44 +0200 Subject: [PATCH 2/5] replace exiftool with native python, fix #71 --- ocrd/model/ocrd_exif.py | 124 +++++++++------------------------------- ocrd/utils.py | 1 + test/model/test_exif.py | 26 +++++++-- 3 files changed, 50 insertions(+), 101 deletions(-) diff --git a/ocrd/model/ocrd_exif.py b/ocrd/model/ocrd_exif.py index f223c5fda..e5ca20eb4 100644 --- a/ocrd/model/ocrd_exif.py +++ b/ocrd/model/ocrd_exif.py @@ -1,70 +1,4 @@ -import exiftool - -EXIF_COMPRESSION_METHODS = { - 1: "Uncompressed", - 2: "CCITT 1D", - 3: "T4/Group 3 Fax", - 4: "T6/Group 4 Fax", - 5: "LZW", - 6: "JPEG (old-style)", - 7: "JPEG", - 8: "Adobe Deflate", - 9: "JBIG B&W", - 10: "JBIG Color", - 99: "JPEG", - 262: "Kodak 262", - 32766: "Next", - 32767: "Sony ARW Compressed", - 32769: "Packed RAW", - 32770: "Samsung SRW Compressed", - 32771: "CCIRLEW", - 32772: "Samsung SRW Compressed 2", - 32773: "PackBits", - 32809: "Thunderscan", - 32867: "Kodak KDC Compressed", - 32895: "IT8CTPAD", - 32896: "IT8LW", - 32897: "IT8MP", - 32898: "IT8BL", - 32908: "PixarFilm", - 32909: "PixarLog", - 32946: "Deflate", - 32947: "DCS", - 34661: "JBIG", - 34676: "SGILog", - 34677: "SGILog24", - 34712: "JPEG 2000", - 34713: "Nikon NEF Compressed", - 34715: "JBIG2 TIFF FX", - 34718: "Microsoft Document Imaging (MDI) Binary Level Codec", - 34719: "Microsoft Document Imaging (MDI) Progressive Transform Codec", - 34720: "Microsoft Document Imaging (MDI) Vector", - 34892: "Lossy JPEG", - 65000: "Kodak DCR Compressed", - 65535: "Pentax PEF Compressed", -} - -EXIF_PHOTOMETRICINTERPRETATION_VALUES = { - 0: "WhiteIsZero", - 1: "BlackIsZero", - 2: "RGB", - 3: "RGB Palette", - 4: "Transparency Mask", - 5: "CMYK", - 6: "YCbCr", - 8: "CIELab", - 9: "ICCLab", - 10: "ITULab", - 32803: "Color Filter Array", - 32844: "Pixar LogL", - 32845: "Pixar LogLuv", - 34892: "Linear Raw", -} - -EXIF_RESOLUTIONUNIT_VALUES = { - 2: "inches", - 3: "cm", -} +import PIL class OcrdExif(object): """ @@ -75,36 +9,34 @@ class OcrdExif(object): def from_filename(image_filename): if image_filename is None: raise Exception("Must pass 'image_filename' to OcrdExif.from_filename") - with exiftool.ExifTool() as et: - exif_props = et.get_metadata(image_filename) - # import json - # print(json.dumps(exif_props, indent=2)) - return OcrdExif(exif_props) + return OcrdExif(PIL.Image.open(image_filename)) - def __init__(self, props): - for selfattr in ['width', 'height']: - for prefix in ['EXIF', 'File', 'PNG', 'JFIF', 'Jpeg2000']: - prop = "%s:Image%s" % (prefix, selfattr[0].upper() + selfattr[1:]) - if prop in props: - setattr(self, selfattr, props[prop]) - break - for selfattr in ['XResolution', 'YResolution']: - for prefix in ['EXIF', 'File', 'PNG', 'JFIF', 'Jpeg2000']: - prop = "%s:%s" % (prefix, selfattr) - if prop in props: - setattr(self, selfattr[0].lower() + selfattr[1:], props[prop]) - break - for requiredattr in ['width', 'height']: - if getattr(self, requiredattr) is None: - raise Exception("Failed to determine image %s" % requiredattr) - setattr(self, 'xResolution', getattr(self, 'xResolution', 0)) - setattr(self, 'yResolution', getattr(self, 'yResolution', 0)) - if "EXIF:Compression" in props: - self.compression = EXIF_COMPRESSION_METHODS.get(props["EXIF:Compression"], "Unknown") - if "EXIF:PhotometricInterpretation" in props: - self.photometricInterpretation = EXIF_PHOTOMETRICINTERPRETATION_VALUES.get(props["EXIF:PhotometricInterpretation"], "Unknown") - if "EXIF:ResolutionUnit" in props: - self.resolutionUnit = "%s" % EXIF_RESOLUTIONUNIT_VALUES.get(props["EXIF:ResolutionUnit"], "None") + def __init__(self, img): + # print(img.__dict__) + self.width = img.width + self.height = img.height + self.photometricInterpretation = img.mode + for prop in ['compression', 'photometric_interpretation']: + setattr(self, prop, img.info[prop] if prop in img.info else None) + if img.format == 'TIFF' and 'dpi' in img.info: + self.xResolution = img.info['dpi'][0] + self.yResolution = img.info['dpi'][1] + self.resolutionUnit = 'cm' if img.tag[296] == 3 else 'inches' + elif img.format == 'JPEG': + self.xResolution = img.info['jfif_density'][0] + self.yResolution = img.info['jfif_density'][1] + self.resolutionUnit = img.info['jfif_unit'] + elif img.format == 'PNG' and 'dpi' in img.info: + print(img.info['pnginfo']) + self.xResolution = img.info['dpi'][0] + self.yResolution = img.info['dpi'][1] + else: + # if img.format == 'JPEG2000': + # import sys + # print('JPEG 2000 not supported yet :(', file=sys.stderr) + self.xResolution = -1 + self.yResolution = -1 + self.resolutionUnit = 'inches' def to_xml(self): ret = '' diff --git a/ocrd/utils.py b/ocrd/utils.py index 081f2c285..1d5f22144 100644 --- a/ocrd/utils.py +++ b/ocrd/utils.py @@ -21,6 +21,7 @@ # logging.getLogger('ocrd.resolver').setLevel(logging.INFO) logging.getLogger('ocrd.resolver.download_to_directory').setLevel(logging.INFO) logging.getLogger('ocrd.resolver.add_files_to_mets').setLevel(logging.INFO) +logging.getLogger('PIL').setLevel(logging.INFO) def getLogger(*args, **kwargs): return logging.getLogger(*args, **kwargs) diff --git a/test/model/test_exif.py b/test/model/test_exif.py index 86774f2f7..8f9e6e0a5 100644 --- a/test/model/test_exif.py +++ b/test/model/test_exif.py @@ -1,4 +1,6 @@ from test.base import TestCase, main, assets +from PIL import Image + from ocrd.model import OcrdExif # pylint: disable=no-member @@ -10,13 +12,25 @@ def test_tiff(self): self.assertEqual(exif.height, 3749) self.assertEqual(exif.xResolution, 300) self.assertEqual(exif.yResolution, 300) + self.assertEqual(exif.compression, 'jpeg') + self.assertEqual(exif.photometricInterpretation, 'RGB') - def test_png(self): + def test_png1(self): exif = OcrdExif.from_filename(assets.path_to('kant_aufklaerung_1784-binarized/kant_aufklaerung_1784_0020.bin.png')) self.assertEqual(exif.width, 1457) self.assertEqual(exif.height, 2084) - self.assertEqual(exif.xResolution, 0) - self.assertEqual(exif.yResolution, 0) + self.assertEqual(exif.xResolution, -1) + self.assertEqual(exif.yResolution, -1) + self.assertEqual(exif.compression, None) + self.assertEqual(exif.photometricInterpretation, 'L') + + def test_png2(self): + exif = OcrdExif.from_filename(assets.path_to('scribo-test/orig.sauvola.png')) + self.assertEqual(exif.width, 2097) + self.assertEqual(exif.height, 3062) + self.assertEqual(exif.xResolution, -1) + self.assertEqual(exif.yResolution, -1) + self.assertEqual(exif.photometricInterpretation, '1') def test_jpg(self): exif = OcrdExif.from_filename(assets.path_to('leptonica_samples/1555.007.jpg')) @@ -24,13 +38,15 @@ def test_jpg(self): self.assertEqual(exif.height, 1472) self.assertEqual(exif.xResolution, 1) self.assertEqual(exif.yResolution, 1) + self.assertEqual(exif.photometricInterpretation, 'RGB') def test_jp2(self): exif = OcrdExif.from_filename(assets.path_to('kant_aufklaerung_1784-jp2/kant_aufklaerung_1784_0020.jp2')) self.assertEqual(exif.width, 1457) self.assertEqual(exif.height, 2084) - # self.assertEqual(exif.xResolution, 1) - # self.assertEqual(exif.yResolution, 1) + self.assertEqual(exif.xResolution, -1) + self.assertEqual(exif.yResolution, -1) + self.assertEqual(exif.photometricInterpretation, 'RGB') if __name__ == '__main__': main() From 5790f8c6a3846fc47b8352c091669d7e779a31dc Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 26 Jul 2018 16:08:14 +0200 Subject: [PATCH 3/5] :memo: drop references to exiftool installation/ ocrd-exiftool --- Makefile | 1 - README.rst | 1 - requirements.txt | 1 - setup.py | 1 - 4 files changed, 4 deletions(-) diff --git a/Makefile b/Makefile index 3bc202829..f7020aa7f 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,6 @@ deps-ubuntu: sudo apt install -y \ python3 \ python3-pip \ - libimage-exiftool-perl \ libxml2-utils # Install python deps via pip diff --git a/README.rst b/README.rst index 2297bff62..b7ed21067 100644 --- a/README.rst +++ b/README.rst @@ -21,7 +21,6 @@ To bootstrap the tool, you'll need installed (Ubuntu packages): * Python (``python``) * pip (``python-pip``) -* exiftool (``libimage-exiftool-perl``) * libxml2-utils for xmllint (``libxml2-utils``) To install system-wide: diff --git a/requirements.txt b/requirements.txt index 9cfaf7341..4c249d961 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ click requests -ocrd-pyexiftool lxml Pillow numpy diff --git a/setup.py b/setup.py index a4ad66553..afabf2743 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,6 @@ 'jsonschema', 'lxml', 'numpy', - 'ocrd-pyexiftool', 'opencv-python', 'pyyaml', 'requests', From 51e12c3e0ccaa1237bf849b802e4166bb3ed82b0 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 26 Jul 2018 18:23:37 +0200 Subject: [PATCH 4/5] default dpi value should be 1, not -1 --- ocrd/model/ocrd_exif.py | 4 ++-- test/model/test_exif.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ocrd/model/ocrd_exif.py b/ocrd/model/ocrd_exif.py index e5ca20eb4..fbc2e22cf 100644 --- a/ocrd/model/ocrd_exif.py +++ b/ocrd/model/ocrd_exif.py @@ -34,8 +34,8 @@ def __init__(self, img): # if img.format == 'JPEG2000': # import sys # print('JPEG 2000 not supported yet :(', file=sys.stderr) - self.xResolution = -1 - self.yResolution = -1 + self.xResolution = 1 + self.yResolution = 1 self.resolutionUnit = 'inches' def to_xml(self): diff --git a/test/model/test_exif.py b/test/model/test_exif.py index 8f9e6e0a5..da324702b 100644 --- a/test/model/test_exif.py +++ b/test/model/test_exif.py @@ -19,8 +19,8 @@ def test_png1(self): exif = OcrdExif.from_filename(assets.path_to('kant_aufklaerung_1784-binarized/kant_aufklaerung_1784_0020.bin.png')) self.assertEqual(exif.width, 1457) self.assertEqual(exif.height, 2084) - self.assertEqual(exif.xResolution, -1) - self.assertEqual(exif.yResolution, -1) + self.assertEqual(exif.xResolution, 1) + self.assertEqual(exif.yResolution, 1) self.assertEqual(exif.compression, None) self.assertEqual(exif.photometricInterpretation, 'L') @@ -28,8 +28,8 @@ def test_png2(self): exif = OcrdExif.from_filename(assets.path_to('scribo-test/orig.sauvola.png')) self.assertEqual(exif.width, 2097) self.assertEqual(exif.height, 3062) - self.assertEqual(exif.xResolution, -1) - self.assertEqual(exif.yResolution, -1) + self.assertEqual(exif.xResolution, 1) + self.assertEqual(exif.yResolution, 1) self.assertEqual(exif.photometricInterpretation, '1') def test_jpg(self): @@ -44,8 +44,8 @@ def test_jp2(self): exif = OcrdExif.from_filename(assets.path_to('kant_aufklaerung_1784-jp2/kant_aufklaerung_1784_0020.jp2')) self.assertEqual(exif.width, 1457) self.assertEqual(exif.height, 2084) - self.assertEqual(exif.xResolution, -1) - self.assertEqual(exif.yResolution, -1) + self.assertEqual(exif.xResolution, 1) + self.assertEqual(exif.yResolution, 1) self.assertEqual(exif.photometricInterpretation, 'RGB') if __name__ == '__main__': From 88c49e5cc4665d794897bcfcb5bdb1271759e419 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 26 Jul 2018 18:32:24 +0200 Subject: [PATCH 5/5] :memo: changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7826e57dd..3e19be966 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Changed: + + * Remove dependency on `exiftool`, #71, #150 + ## [0.7.0] - 2018-07-25 Changed: