diff --git a/src/MAGIST/NLP/AudioTranscriber.py b/src/MAGIST/NLP/AudioTranscriber.py index 83c1bf2..6b139de 100644 --- a/src/MAGIST/NLP/AudioTranscriber.py +++ b/src/MAGIST/NLP/AudioTranscriber.py @@ -19,10 +19,6 @@ def __init__(self, config): :param config: The config file(config.json). """ - self.log = MainLogger(config).StandardLogger("GoogleAudioTranscriber") - self.r = sr.Recognizer() - self.m = sr.Microphone() - root_log = MainLogger(config) self.log = root_log.StandardLogger("GoogleAudioTranscriber") # Create a script specific logging instance @@ -34,6 +30,9 @@ def microphone_listener(self): :return: The transcription of the audio as a string. """ + self.r = sr.Recognizer() + self.m = sr.Microphone() + with self.m as source: self.log.info("GoogleAudioTranscriber Listening...") audio = self.r.listen(source) @@ -56,6 +55,10 @@ def file_transcriber(self, file): :return: The transcription of the audio as a string. """ + + self.r = sr.Recognizer() + self.m = sr.Microphone() + file = pathlib.Path(file) file = file.resolve() # Find absolute path from a relative one. file = str(file) @@ -76,6 +79,4 @@ def file_transcriber(self, file): self.log.info("Time taken: " + str(end - start)) - return transcription - - + return transcription \ No newline at end of file diff --git a/src/MAGIST/NeuralDB/PrimaryNeuralDB.py b/src/MAGIST/NeuralDB/PrimaryNeuralDB.py index 9d7ab51..1a2c5d7 100644 --- a/src/MAGIST/NeuralDB/PrimaryNeuralDB.py +++ b/src/MAGIST/NeuralDB/PrimaryNeuralDB.py @@ -7,6 +7,7 @@ import json import pathlib import time +import re from ..Utils.LogMaster.log_init import MainLogger @@ -34,7 +35,6 @@ def __init__(self, config, db_client): self.db_string = i["db_search_zone"] except KeyError: pass - print(self.db_string) def recreate_db(self): """Recreate the databases and collections @@ -47,6 +47,8 @@ def recreate_db(self): time.sleep(1) self.log.warning("Resetting and recreating all databases and tables...") + self.dbs = [] + for d in self.db_string: if d == "vision": self.vision = self.client['VisionDB'] @@ -57,6 +59,8 @@ def recreate_db(self): self.obj_users = self.vision["ObjectUsers"] self.log.info("Vision database is included in NeuralDB search.") + + self.dbs.append(self.vision) if d == "nlp": self.nlp = self.client["NLP"] @@ -64,6 +68,8 @@ def recreate_db(self): self.word_location = self.nlp["WordLocation"] self.log.info("NLP database is included in NeuralDB search.") + + self.dbs.append(self.nlp) if d == "common": self.common = self.client["Common"] @@ -71,6 +77,8 @@ def recreate_db(self): self.log.info("Common database is included in NeuralDB search.") + self.dbs.append(self.common) + try: if self.vision is None: self.log.warning("Vision database was not included from NeuralDB search.") @@ -169,7 +177,7 @@ def search_obj_details(self, obj_name): for i in d.list_collection_names(): self.log.info(f" ===> Collection: {i}") - for j in self.vision[i].find({"obj_name": {"$regex": obj_name}}): + for j in self.vision[i].find({"obj_name": re.compile(rf"\b{obj_name}\b", re.IGNORECASE)}): self.log.info(f" ===> {j}") data.append(j) return data @@ -190,7 +198,7 @@ def search_obj_desc(self, keyword): for i in d.list_collection_names(): self.log.info(f" ===> Collection: {i}") - for j in self.vision[i].find({"obj_desc" : {"$regex" : keyword}}): + for j in self.vision[i].find({"obj_desc" : re.compile(rf"\b{keyword}\b", re.IGNORECASE)}): self.log.info(f" ===> {j}") data.append(j) return data @@ -211,7 +219,7 @@ def search_obj_location(self, location): for i in d.list_collection_names(): self.log.info(f" ===> Collection: {i}") - for j in self.vision[i].find({"obj_location" : {"$regex" : location}}): + for j in self.vision[i].find({"obj_location" : re.compile(rf"\b{location}\b", re.IGNORECASE)}): self.log.info(f" ===> {j}") data.append(j) return data @@ -232,7 +240,7 @@ def search_obj_user(self, user): for i in d.list_collection_names(): self.log.info(f" ===> Collection: {i}") - for j in self.vision[i].find({"user_name" : {"$regex" : user}}): + for j in self.vision[i].find({"user_name" : re.compile(rf"\b{user}\b", re.IGNORECASE)}): self.log.info(f" ===> {j}") data.append(j) return data @@ -257,7 +265,7 @@ def search_word_details(self, word): for i in d.list_collection_names(): self.log.info(f" ===> Collection: {i}") - for j in self.vision[i].find({"word_name" : {"$regex" : word}}): + for j in self.vision[i].find({"word_name" : re.compile(rf"\b{word}\b", re.IGNORECASE)}): self.log.info(f" ===> {j}") data.append(j) return data @@ -278,7 +286,7 @@ def search_word_desc(self, keyword): for i in d.list_collection_names(): self.log.info(f" ===> Collection: {i}") - for j in self.vision[i].find({"word_desc" : {"$regex" : keyword}}): + for j in self.vision[i].find({"word_desc" : re.compile(rf"\b{keyword}\b", re.IGNORECASE)}): self.log.info(f" ===> {j}") data.append(j) return data @@ -299,7 +307,7 @@ def search_word_location(self, location): for i in d.list_collection_names(): self.log.info(f" ===> Collection: {i}") - for j in self.vision[i].find({"word_location" : {"$regex" : location}}): + for j in self.vision[i].find({"word_location" : re.compile(rf"\b{location}\b", re.IGNORECASE)}): self.log.info(f" ===> {j}") data.append(j) return data @@ -320,8 +328,7 @@ def search_word_relation(self, relation): for i in d.list_collection_names(): self.log.info(f" ===> Collection: {i}") - for j in self.vision[i].find({"word_relation" : {"$regex" : relation}}): + for j in self.vision[i].find({"word_relation" : re.compile(rf"\b{relation}\b", re.IGNORECASE)}): self.log.info(f" ===> {j}") data.append(j) return data - diff --git a/src/MAGIST/TaskManagment/ThreadedQueue.py b/src/MAGIST/TaskManagment/ThreadedQueue.py index 123b148..37d306b 100644 --- a/src/MAGIST/TaskManagment/ThreadedQueue.py +++ b/src/MAGIST/TaskManagment/ThreadedQueue.py @@ -7,6 +7,8 @@ import threading import uuid import numpy as np +import pathlib +import json from ..Utils.LogMaster.log_init import MainLogger @@ -27,6 +29,17 @@ def __init__(self, config): self.function_returns = [] + config = pathlib.Path(config) + config = config.resolve() # Find absolute path from a relative one. + f = open(config) + config = json.load(f) + + for i in config['task_management']: + try: + self.worker_threads = i["num_of_worker_threads"] + except KeyError: + pass + def __worker(self): """The worker thread. This actually executes the tasks in the queue. """ @@ -82,7 +95,8 @@ def detach_thread(self): """ # Turn-on the worker thread. - threading.Thread(target=self.__worker, daemon=True).start() + for i in range(self.worker_threads): + threading.Thread(target=self.__worker, daemon=True).start() self.log.info("Thread created and daemonized. Queue started...") def join_thread(self): diff --git a/src/main.py b/src/main.py index 3a088be..6c791ad 100644 --- a/src/main.py +++ b/src/main.py @@ -1,5 +1,6 @@ from MAGIST.Vision.UnsupervisedModels.img_cluster import RoughCluster import time, os +import numpy as np cluster = RoughCluster("config.json") @@ -29,7 +30,7 @@ def dummy(a, b, c): time.sleep(30) for l in labels: - queue.put_queue(dummy, l, 200, "Data/", name=f"Downloading {l}", priority=priority) + queue.put_queue(scraper.download_raw_img_dataset, l, 10, "Data/", name=f"Downloading {l}", priority=priority) priority += 1 @@ -39,12 +40,12 @@ def dummy(a, b, c): -for l in labels: - path = os.path.join("Data", l) - slicer.image_integrity_verification(path, delete_invalid=True) - slicer.resizer((500, 500), path) - coordinates = slicer.coordinate_compute((500, 500), (100, 100)) - slicer.crop_segments(coordinates, path, "Sliced", l) +# for l in labels: +# path = os.path.join("Data", l) +# slicer.image_integrity_verification(path, delete_invalid=True) +# slicer.resizer((500, 500), path) +# coordinates = slicer.coordinate_compute((500, 500), (100, 100)) +# slicer.crop_segments(coordinates, path, "Sliced", l) @@ -53,7 +54,7 @@ def dummy(a, b, c): cnn = MAGIST_CNN("config.json") -queue.put_queue(cnn, name="MAGIST_CNN_Trainer", priority=10) +# queue.put_queue(cnn, name="MAGIST_CNN_Trainer", priority=10) from MAGIST.Utils.WebScraper.wikipedia import WikipediaScraper from MAGIST.NeuralDB.MongoUtils import AdminUtils @@ -72,4 +73,30 @@ def dummy(a, b, c): description = wiki.get_summary(l) neural_db.insert_obj_desc(l, description) -queue.join_thread() + +from MAGIST.NLP.AudioTranscriber import GoogleAudioTranscriber + +transcriber = GoogleAudioTranscriber("config.json") + +text = transcriber.microphone_listener() + +from MAGIST.NLP.SelfAttention import TextPreprocessing + +selfattention = TextPreprocessing("config.json") + +selected = [] +for i in selfattention.__call__(text): + if i[2] == "Good": + selected.append(i[1]) + + +search_res = [] +for i in selected: + res = neural_db.search_obj_details(i) + if res != []: + search_res.append(res) + +search_res = np.array(search_res) +search_res = np.squeeze(search_res) + +print(search_res[0][3]) \ No newline at end of file