forked from AmritSd/video-search-deep-learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvidcaption.py
71 lines (49 loc) · 2.01 KB
/
vidcaption.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from IPython.display import Image
from matplotlib import pyplot as plt
import pandas as pd, numpy as np
pd.options.display.float_format = '{:,.2f}'.format
from google.cloud import vision
import io
import warnings
warnings.simplefilter("ignore")
import os
import cv2
import re
import ntpath
from google.cloud import vision
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "C:\Users\amrit\Documents\GitHub\Video-search\electron-app\public\keyFile.json"
def set_gcp_key(key_filepath):
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_filepath
def CloudVisionTextExtractor(handwritings):
# convert image from numpy to bytes for submittion to Google Cloud Vision
_, encoded_image = cv2.imencode('.png', handwritings)
content = encoded_image.tobytes()
image = vision.Image(content=content)
# feed handwriting image segment to the Google Cloud Vision API
client = vision.ImageAnnotatorClient()
response = client.document_text_detection(image=image)
return response
def getTextFromVisionResponse(response):
texts = []
for page in response.full_text_annotation.pages:
for i, block in enumerate(page.blocks):
for paragraph in block.paragraphs:
for word in paragraph.words:
word_text = ''.join([symbol.text for symbol in word.symbols])
texts.append(word_text)
return ' '.join(texts)
def get_vid_text(timestamp_dict):
vid_caption_dict = {}
for filepath in timestamp_dict:
timestamp = timestamp_dict[filepath]
vidcap = cv2.VideoCapture(str(filepath))
caption_dict = {}
for time in timestamp:
time_int = int(time*1000)
vidcap.set(cv2.CAP_PROP_POS_MSEC,time_int)
success,image = vidcap.read()
response = CloudVisionTextExtractor(image)
image_text = getTextFromVisionResponse(response)
caption_dict[time] = image_text
vid_caption_dict[filepath] = caption_dict
return vid_caption_dict