Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update #186

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 51 additions & 53 deletions EXIF.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
#
#
#AliElTop
# Library to extract Exif information from digital camera image files.
# https://github.com/ianare/exif-py
#
Expand Down Expand Up @@ -63,66 +63,64 @@ def get_args() -> argparse.Namespace:
return args


def main(args) -> None:
def main(args: argparse.Namespace) -> None:
"""Extract tags based on options (args)."""

exif_log.setup_logger(args.debug, args.color)

# output info for each file
for filename in args.files:
# avoid errors when printing to console
escaped_fn = escaped_fn = filename.encode(
sys.getfilesystemencoding(), 'surrogateescape'
).decode()
escaped_fn = filename.encode(sys.getfilesystemencoding(), 'surrogateescape').decode()

file_start = timeit.default_timer()
try:
img_file = open(escaped_fn, 'rb')
except IOError:
logger.error("'%s' is unreadable", escaped_fn)
continue
logger.info('Opening: %s', escaped_fn)

tag_start = timeit.default_timer()

# get the tags
data = process_file(
img_file,
stop_tag=args.stop_tag,
details=args.detailed,
strict=args.strict,
debug=args.debug,
extract_thumbnail=args.detailed
)

tag_stop = timeit.default_timer()

if not data:
logger.warning('No EXIF information found')
print()
continue

if 'JPEGThumbnail' in data:
logger.info('File has JPEG thumbnail')
del data['JPEGThumbnail']
if 'TIFFThumbnail' in data:
logger.info('File has TIFF thumbnail')
del data['TIFFThumbnail']

tag_keys = list(data.keys())
tag_keys.sort()

for i in tag_keys:
try:
logger.info('%s (%s): %s', i, FIELD_TYPES[data[i].field_type][2], data[i].printable)
except:
logger.error("%s : %s", i, str(data[i]))

file_stop = timeit.default_timer()

logger.debug("Tags processed in %s seconds", tag_stop - tag_start)
logger.debug("File processed in %s seconds", file_stop - file_start)
print()
with open(escaped_fn, 'rb') as img_file:
logger.info('Opening: %s', escaped_fn)

tag_start = timeit.default_timer()

data = process_file(
img_file,
stop_tag=args.stop_tag,
details=args.detailed,
strict=args.strict,
debug=args.debug,
extract_thumbnail=args.detailed
)

tag_stop = timeit.default_timer()

if not data:
logger.warning('No EXIF information found')
logger.info('')
continue

if 'JPEGThumbnail' in data:
logger.info('File has JPEG thumbnail')
del data['JPEGThumbnail']
if 'TIFFThumbnail' in data:
logger.info('File has TIFF thumbnail')
del data['TIFFThumbnail']

tag_keys = list(data.keys())
tag_keys.sort()

for i in tag_keys:
try:
field_type_info = FIELD_TYPES.get(data[i].field_type, ('Unknown', 'Unknown', 'Unknown'))
logger.info('%s (%s): %s', i, field_type_info[2], data[i].printable)
except Exception as e:
logger.error("%s : %s", i, str(e))

file_stop = timeit.default_timer()

logger.debug("Tags processed in %s seconds", tag_stop - tag_start)
logger.debug("File processed in %s seconds", file_stop - file_start)
logger.info('')

except FileNotFoundError:
logger.error("'%s' not found", escaped_fn)
except Exception as e:
logger.error("An error occurred while processing '%s': %s", escaped_fn, str(e))


if __name__ == '__main__':
Expand Down
118 changes: 26 additions & 92 deletions exifread/heic.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,42 @@
# Find Exif data in an HEIC file.

# As of 2019, the latest standard seems to be "ISO/IEC 14496-12:2015"
# There are many different related standards. (quicktime, mov, mp4, etc...)
# See https://en.wikipedia.org/wiki/ISO_base_media_file_format for more details.

# We parse just enough of the ISO format to locate the Exif data in the file.
# Inside the 'meta' box are two directories we need:
# 1) the 'iinf' box contains 'infe' records, we look for the item_id for 'Exif'.
# 2) once we have the item_id, we find a matching entry in the 'iloc' box, which
# gives us position and size information.

import struct
from typing import Any, List, Dict, Callable, BinaryIO, Optional

from exifread.exif_log import get_logger

logger = get_logger()


class WrongBox(Exception):
pass

class BoxVersion(Exception):
pass

class BadSize(Exception):
pass


class Box:
version = 0
minor_version = 0
item_count = 0
size = 0
after = 0
pos = 0
compat = [] # type: List
base_offset = 0
# this is full of boxes, but not in a predictable order.
subs = {} # type: Dict[str, Box]
locs = {} # type: Dict
exif_infe = None # type: Optional[Box]
item_id = 0
item_type = b''
item_name = b''
item_protection_index = 0
major_brand = b''
offset_size = 0
length_size = 0
base_offset_size = 0
index_size = 0
flags = 0

def __init__(self, name: str):
self.name = name

def __repr__(self) -> str:
return "<box '%s'>" % self.name

def set_sizes(self, offset: int, length: int, base_offset: int, index: int):
self.offset_size = offset
self.length_size = length
self.base_offset_size = base_offset
self.index_size = index

def set_full(self, vflags: int):
"""
ISO boxes come in 'old' and 'full' variants.
The 'full' variant contains version and flags information.
"""
self.version = vflags >> 24
self.flags = vflags & 0x00ffffff

self.version = 0
self.minor_version = 0
self.item_count = 0
self.size = 0
self.after = 0
self.pos = 0
self.compat = []
self.base_offset = 0
self.subs = {}
self.locs = {}
self.exif_infe = None
self.item_id = 0
self.item_type = b''
self.item_name = b''
self.item_protection_index = 0
self.major_brand = b''
self.offset_size = 0
self.length_size = 0
self.base_offset_size = 0
self.index_size = 0
self.flags = 0

class HEICExifFinder:

def __init__(self, file_handle: BinaryIO):
self.file_handle = file_handle

Expand All @@ -81,12 +45,7 @@ def get(self, nbytes: int) -> bytes:
if not read:
raise EOFError
if len(read) != nbytes:
msg = "get(nbytes={nbytes}) found {read} bytes at position {pos}".format(
nbytes=nbytes,
read=len(read),
pos=self.file_handle.tell()
)
raise BadSize(msg)
raise BadSize
return read

def get16(self) -> int:
Expand All @@ -105,7 +64,6 @@ def get_int4x2(self) -> tuple:
return num0, num1

def get_int(self, size: int) -> int:
"""some fields have variant-sized data."""
if size == 2:
return self.get16()
if size == 4:
Expand All @@ -118,7 +76,7 @@ def get_int(self, size: int) -> int:

def get_string(self) -> bytes:
read = []
while 1:
while True:
char = self.get(1)
if char == b'\x00':
break
Expand All @@ -131,10 +89,8 @@ def next_box(self) -> Box:
kind = self.get(4).decode('ascii')
box = Box(kind)
if size == 0:
# signifies 'to the end of the file', we shouldn't see this.
raise NotImplementedError
if size == 1:
# 64-bit size follows type.
size = self.get64()
box.size = size - 16
box.after = pos + size
Expand Down Expand Up @@ -171,7 +127,6 @@ def parse_box(self, box: Box) -> Box:
probe = self.get_parser(box)
if probe is not None:
probe(box)
# in case anything is left unread
self.file_handle.seek(box.after)
return box

Expand All @@ -192,9 +147,6 @@ def _parse_meta(self, meta: Box):
if psub is not None:
psub(box)
meta.subs[box.name] = box
else:
logger.debug('HEIC: skipping %r', box)
# skip any unparsed data
self.skip(box)

def _parse_infe(self, box: Box):
Expand All @@ -207,7 +159,6 @@ def _parse_infe(self, box: Box):
box.item_protection_index = self.get16()
box.item_type = self.get(4)
box.item_name = self.get_string()
# ignore the rest

def _parse_iinf(self, box: Box):
self.get_full(box)
Expand All @@ -216,7 +167,6 @@ def _parse_iinf(self, box: Box):
for _ in range(count):
infe = self.expect_parse('infe')
if infe.item_type == b'Exif':
logger.debug("HEIC: found Exif 'infe' box")
box.exif_infe = infe
break

Expand All @@ -229,22 +179,14 @@ def _parse_iloc(self, box: Box):
box.item_count = self.get16()
elif box.version == 2:
box.item_count = self.get32()
else:
raise BoxVersion(2, box.version)
box.locs = {}
logger.debug('HEIC: %d iloc items', box.item_count)
for _ in range(box.item_count):
if box.version < 2:
item_id = self.get16()
elif box.version == 2:
item_id = self.get32()
else:
# notreached
raise BoxVersion(2, box.version)
if box.version in (1, 2):
# ignore construction_method
self.get16()
# ignore data_reference_index
self.get16()
box.base_offset = self.get_int(box.base_offset_size)
extent_count = self.get16()
Expand All @@ -259,23 +201,15 @@ def _parse_iloc(self, box: Box):

def find_exif(self) -> tuple:
ftyp = self.expect_parse('ftyp')
meta = self.expect_parse('meta')
assert ftyp.major_brand == b'heic'
assert ftyp.minor_version == 0
meta = self.expect_parse('meta')
assert meta.subs['iinf'].exif_infe is not None
item_id = meta.subs['iinf'].exif_infe.item_id
extents = meta.subs['iloc'].locs[item_id]
logger.debug('HEIC: found Exif location.')
# we expect the Exif data to be in one piece.
assert len(extents) == 1
pos, _ = extents[0]
# looks like there's a kind of pseudo-box here.
self.file_handle.seek(pos)
# the payload of "Exif" item may be start with either
# b'\xFF\xE1\xSS\xSSExif\x00\x00' (with APP1 marker, e.g. Android Q)
# or
# b'Exif\x00\x00' (without APP1 marker, e.g. iOS)
# according to "ISO/IEC 23008-12, 2017-12", both of them are legal
exif_tiff_header_offset = self.get32()
assert exif_tiff_header_offset >= 6
assert self.get(exif_tiff_header_offset)[-6:] == b'Exif\x00\x00'
Expand Down