Skip to content

Commit

Permalink
Merge branch 'replace-exiftool'
Browse files Browse the repository at this point in the history
Conflicts:
	CHANGELOG.md
	Makefile
	README.rst
  • Loading branch information
kba committed Jul 26, 2018
2 parents a8c14f8 + 88c49e5 commit c36730c
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 97 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Versioned according to [Semantic Versioning](http://semver.org/).
Changed:

* Remove dependency on `xmllint` command line tool, #72, #151
* Remove dependency on `exiftool`, #71, #150

## [0.7.0] - 2018-07-25

Expand Down
3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ DOCKER_TAG = 'ocrd/pyocrd'
deps-ubuntu:
sudo apt install -y \
python3 \
python3-pip \
libimage-exiftool-perl
python3-pip

# Install python deps via pip
deps-pip:
Expand Down
1 change: 0 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ To bootstrap the tool, you'll need installed (Ubuntu packages):

* Python (``python``)
* pip (``python-pip``)
* exiftool (``libimage-exiftool-perl``)

To install system-wide:

Expand Down
115 changes: 28 additions & 87 deletions ocrd/model/ocrd_exif.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,4 @@
import exiftool

EXIF_COMPRESSION_METHODS = {
1: "Uncompressed",
2: "CCITT 1D",
3: "T4/Group 3 Fax",
4: "T6/Group 4 Fax",
5: "LZW",
6: "JPEG (old-style)",
7: "JPEG",
8: "Adobe Deflate",
9: "JBIG B&W",
10: "JBIG Color",
99: "JPEG",
262: "Kodak 262",
32766: "Next",
32767: "Sony ARW Compressed",
32769: "Packed RAW",
32770: "Samsung SRW Compressed",
32771: "CCIRLEW",
32772: "Samsung SRW Compressed 2",
32773: "PackBits",
32809: "Thunderscan",
32867: "Kodak KDC Compressed",
32895: "IT8CTPAD",
32896: "IT8LW",
32897: "IT8MP",
32898: "IT8BL",
32908: "PixarFilm",
32909: "PixarLog",
32946: "Deflate",
32947: "DCS",
34661: "JBIG",
34676: "SGILog",
34677: "SGILog24",
34712: "JPEG 2000",
34713: "Nikon NEF Compressed",
34715: "JBIG2 TIFF FX",
34718: "Microsoft Document Imaging (MDI) Binary Level Codec",
34719: "Microsoft Document Imaging (MDI) Progressive Transform Codec",
34720: "Microsoft Document Imaging (MDI) Vector",
34892: "Lossy JPEG",
65000: "Kodak DCR Compressed",
65535: "Pentax PEF Compressed",
}

EXIF_PHOTOMETRICINTERPRETATION_VALUES = {
0: "WhiteIsZero",
1: "BlackIsZero",
2: "RGB",
3: "RGB Palette",
4: "Transparency Mask",
5: "CMYK",
6: "YCbCr",
8: "CIELab",
9: "ICCLab",
10: "ITULab",
32803: "Color Filter Array",
32844: "Pixar LogL",
32845: "Pixar LogLuv",
34892: "Linear Raw",
}

EXIF_RESOLUTIONUNIT_VALUES = {
2: "inches",
3: "cm",
}
import PIL

class OcrdExif(object):
"""
Expand All @@ -75,27 +9,34 @@ class OcrdExif(object):
def from_filename(image_filename):
if image_filename is None:
raise Exception("Must pass 'image_filename' to OcrdExif.from_filename")
with exiftool.ExifTool() as et:
exif_props = et.get_metadata(image_filename)
return OcrdExif(exif_props)
return OcrdExif(PIL.Image.open(image_filename))

def __init__(self, props):
for selfattr in ['width', 'height', 'xResolution', 'yResolution']:
for prefix in ['EXIF', 'File', 'PNG', 'JFIF']:
prop = "%s:Image%s" % (prefix, selfattr[0].upper() + selfattr[1:])
if prop in props:
setattr(self, selfattr, props[prop])
for requiredattr in ['width', 'height']:
if getattr(self, requiredattr) is None:
raise Exception("Failed to determine image %s" % requiredattr)
setattr(self, 'xResolution', getattr(self, 'xResolution', 0))
setattr(self, 'yResolution', getattr(self, 'yResolution', 0))
if "EXIF:Compression" in props:
self.compression = EXIF_COMPRESSION_METHODS.get(props["EXIF:Compression"], "Unknown")
if "EXIF:PhotometricInterpretation" in props:
self.photometricInterpretation = EXIF_PHOTOMETRICINTERPRETATION_VALUES.get(props["EXIF:PhotometricInterpretation"], "Unknown")
if "EXIF:ResolutionUnit" in props:
self.resolutionUnit = "%s" % EXIF_RESOLUTIONUNIT_VALUES.get(props["EXIF:ResolutionUnit"], "None")
def __init__(self, img):
# print(img.__dict__)
self.width = img.width
self.height = img.height
self.photometricInterpretation = img.mode
for prop in ['compression', 'photometric_interpretation']:
setattr(self, prop, img.info[prop] if prop in img.info else None)
if img.format == 'TIFF' and 'dpi' in img.info:
self.xResolution = img.info['dpi'][0]
self.yResolution = img.info['dpi'][1]
self.resolutionUnit = 'cm' if img.tag[296] == 3 else 'inches'
elif img.format == 'JPEG':
self.xResolution = img.info['jfif_density'][0]
self.yResolution = img.info['jfif_density'][1]
self.resolutionUnit = img.info['jfif_unit']
elif img.format == 'PNG' and 'dpi' in img.info:
print(img.info['pnginfo'])
self.xResolution = img.info['dpi'][0]
self.yResolution = img.info['dpi'][1]
else:
# if img.format == 'JPEG2000':
# import sys
# print('JPEG 2000 not supported yet :(', file=sys.stderr)
self.xResolution = 1
self.yResolution = 1
self.resolutionUnit = 'inches'

def to_xml(self):
ret = '<exif>'
Expand Down
1 change: 1 addition & 0 deletions ocrd/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# logging.getLogger('ocrd.resolver').setLevel(logging.INFO)
logging.getLogger('ocrd.resolver.download_to_directory').setLevel(logging.INFO)
logging.getLogger('ocrd.resolver.add_files_to_mets').setLevel(logging.INFO)
logging.getLogger('PIL').setLevel(logging.INFO)

def getLogger(*args, **kwargs):
return logging.getLogger(*args, **kwargs)
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
click
requests
ocrd-pyexiftool
lxml
Pillow
numpy
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
'jsonschema',
'lxml',
'numpy',
'ocrd-pyexiftool',
'opencv-python',
'pyyaml',
'requests',
Expand Down
48 changes: 43 additions & 5 deletions test/model/test_exif.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,52 @@
from ocrd.model import OcrdExif

from test.base import TestCase, main, assets
TEST_IMG = assets.path_to('SBB0000F29300010000/00000001.tif')
from PIL import Image

from ocrd.model import OcrdExif

# pylint: disable=no-member
class TestOcrdExif(TestCase):

def runTest(self):
exif = OcrdExif.from_filename(TEST_IMG)
def test_tiff(self):
exif = OcrdExif.from_filename(assets.path_to('SBB0000F29300010000/00000001.tif'))
self.assertEqual(exif.width, 2875)
self.assertEqual(exif.height, 3749)
self.assertEqual(exif.xResolution, 300)
self.assertEqual(exif.yResolution, 300)
self.assertEqual(exif.compression, 'jpeg')
self.assertEqual(exif.photometricInterpretation, 'RGB')

def test_png1(self):
exif = OcrdExif.from_filename(assets.path_to('kant_aufklaerung_1784-binarized/kant_aufklaerung_1784_0020.bin.png'))
self.assertEqual(exif.width, 1457)
self.assertEqual(exif.height, 2084)
self.assertEqual(exif.xResolution, 1)
self.assertEqual(exif.yResolution, 1)
self.assertEqual(exif.compression, None)
self.assertEqual(exif.photometricInterpretation, 'L')

def test_png2(self):
exif = OcrdExif.from_filename(assets.path_to('scribo-test/orig.sauvola.png'))
self.assertEqual(exif.width, 2097)
self.assertEqual(exif.height, 3062)
self.assertEqual(exif.xResolution, 1)
self.assertEqual(exif.yResolution, 1)
self.assertEqual(exif.photometricInterpretation, '1')

def test_jpg(self):
exif = OcrdExif.from_filename(assets.path_to('leptonica_samples/1555.007.jpg'))
self.assertEqual(exif.width, 944)
self.assertEqual(exif.height, 1472)
self.assertEqual(exif.xResolution, 1)
self.assertEqual(exif.yResolution, 1)
self.assertEqual(exif.photometricInterpretation, 'RGB')

def test_jp2(self):
exif = OcrdExif.from_filename(assets.path_to('kant_aufklaerung_1784-jp2/kant_aufklaerung_1784_0020.jp2'))
self.assertEqual(exif.width, 1457)
self.assertEqual(exif.height, 2084)
self.assertEqual(exif.xResolution, 1)
self.assertEqual(exif.yResolution, 1)
self.assertEqual(exif.photometricInterpretation, 'RGB')

if __name__ == '__main__':
main()

0 comments on commit c36730c

Please sign in to comment.