-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdoc_iden.py
31 lines (20 loc) · 876 Bytes
/
doc_iden.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from lib.gvision_extract_features import extract_features
from lib.direct_matcher import direct_match
from lib.spacy_nlp import spacy_nlp_match
import sys
import os
from lib.doc_category import find_best_doc_category
from lib.training_data import save_for_training
def main_doc_iden(file_name):
features = extract_features(file_name)
gvision_direct_match = direct_match (features)
#gvision_nlp_match = spacy_nlp_match (features)
predictions = gvision_direct_match #+ gvision_nlp_match
result = find_best_doc_category(predictions)
if result:
print('File identified as {} with {:.2f}% confidence\n'.format(result[0], result[1]*100))
return (result[0],result[1]*100)
else :
print('File not matched with any predefined categories\n')
return ("No_Match",0)
#save_for_training(image_name = sys.argv[1] + ".jpg")