forked from x4nth055/ethical-hacking-tools-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmetadata.py
65 lines (55 loc) · 1.94 KB
/
metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import ffmpeg
from tinytag import TinyTag
import sys
from pprint import pprint # for printing Python dictionaries in a human-readable way
from PIL import Image
from PIL.ExifTags import TAGS
import sys
import pikepdf
def get_media_metadata(media_file):
# uses ffprobe command to extract all possible metadata from the media file
ffmpeg_data = ffmpeg.probe(media_file)["streams"][0]
tt_data = TinyTag.get(media_file).as_dict()
# add both data to a single dict
return {**tt_data, **ffmpeg_data}
def get_image_metadata(image_file):
# read the image data using PIL
image = Image.open(image_file)
# extract other basic metadata
info_dict = {
"Filename": image.filename,
"Image Size": image.size,
"Image Height": image.height,
"Image Width": image.width,
"Image Format": image.format,
"Image Mode": image.mode,
"Image is Animated": getattr(image, "is_animated", False),
"Frames in Image": getattr(image, "n_frames", 1)
}
# extract EXIF data
exifdata = image.getexif()
# iterating over all EXIF data fields
for tag_id in exifdata:
# get the tag name, instead of human unreadable tag id
tag = TAGS.get(tag_id, tag_id)
data = exifdata.get(tag_id)
# decode bytes
if isinstance(data, bytes):
data = data.decode()
# print(f"{tag:25}: {data}")
info_dict[tag] = data
return info_dict
def get_pdf_metadata(pdf_file):
# read the pdf file
pdf = pikepdf.Pdf.open(pdf_file)
# .docinfo attribute contains all the metadata of
# the PDF document
return dict(pdf.docinfo)
if __name__ == "__main__":
file = sys.argv[1]
if file.endswith(".pdf"):
print(get_pdf_metadata(file))
elif file.endswith(".jpg"):
pprint(get_image_metadata(file))
else:
pprint(get_media_metadata(file))