Skip to content

Commit

Permalink
chg: [tools] add reprocess title + CEDetector
Browse files Browse the repository at this point in the history
  • Loading branch information
Terrtia committed Oct 16, 2024
1 parent 1112615 commit 3f78457
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 9 deletions.
2 changes: 2 additions & 0 deletions bin/lib/objects/ail_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ def obj_iterator(obj_type, filters):
return Pgps.get_all_pgps_objects(filters=filters)
elif obj_type == 'message':
return chats_viewer.get_messages_iterator(filters=filters)
elif obj_type == 'title':
return Titles.Titles().get_iterator()


def card_objs_iterators(filters):
Expand Down
27 changes: 19 additions & 8 deletions bin/modules/CEDetector.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,17 +111,28 @@ def test_detection():
if not is_detected:
not_detected.add(domain)

print()
print()
print()
print()
for domain in not_detected:
dom = Domain(domain)
print('-----------', domain)
# print('-----------', domain)
for h in dom.get_correlation('title').get('title', []):
print(Title(h[1:]).get_content().lower())
print()
print()
c = Title(h[1:]).get_content().lower()
if c == '404 not found':
lt = []
dom = Domain(domain)
print('-----------', domain)
for hi in dom.get_correlation('title').get('title', []):
print(Title(hi[1:]).get_content().lower())
ci = Title(hi[1:]).get_content().lower()
if ci != '404 not found' and ci not in []:
lt.append(ci)
if lt:
print('-----------', domain)
for ti in lt:
print(ti)
print()
print()

# Tag.delete_object_tag(tag, 'domain', domain)


if __name__ == "__main__":
Expand Down
4 changes: 3 additions & 1 deletion tools/reprocess_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@
# from modules.Onion import Onion
# from modules.Telegram import Telegram

from modules.CEDetector import CEDetector
from modules.Languages import Languages
from modules.OcrExtractor import OcrExtractor
from modules.QrCodeReader import QrCodeReader

MODULES = {
'CEDetector': CEDetector,
'Languages': Languages,
'OcrExtractor': OcrExtractor,
'QrCodeReader': QrCodeReader
Expand Down Expand Up @@ -70,7 +72,7 @@ def reprocess_message_objects(object_type, module_name=None):
obj_type = args.type
if not is_object_type(obj_type):
raise Exception(f'Invalid Object Type: {obj_type}')
if obj_type not in ['image', 'item', 'message', 'screenshot']:
if obj_type not in ['image', 'item', 'message', 'screenshot', 'title']:
raise Exception(f'Currently not supported Object Type: {obj_type}')

modulename = args.module
Expand Down

0 comments on commit 3f78457

Please sign in to comment.