vidchapters tests fixed

deeppavlov · Nov 20, 2024 · 0391957 · 0391957
1 parent 7a34ff4
commit 0391957
Show file tree

Hide file tree

Showing 3 changed files with 94 additions and 58 deletions.
diff --git a/annotators/vidchapters_service/Dockerfile b/annotators/vidchapters_service/Dockerfile
@@ -24,8 +24,7 @@ RUN gdown --fuzzy https://drive.google.com/file/d/1qmQcEkDDlnkAkRd6BIrVoArBv6Sg1
 RUN gdown --fuzzy https://drive.google.com/file/d/1F08bQriJypaIOfsCjMlVdlwdLYg0KVZF/view -O /src/aux_files/large-v2.pt -c --no-check-certificate
 RUN mkdir -p /src/TOFILL/
 RUN gdown --fuzzy https://drive.google.com/file/d/1H12iS2awNhbxguRgzsM_YIafnDf3mG7y/view -O /src/TOFILL/ViT-L-14.pt -c --no-check-certificate
-COPY ./annotators/vidchapters_service /src/
 
 RUN apt-get update && apt-get install -y ffmpeg
 
-# RUN python -c "import whisper; whisper.load_model('large-v2', device='cpu', download_root='/src/aux_files/TOFILL')"
+COPY ./annotators/vidchapters_service /src/
diff --git a/annotators/vidchapters_service/server.py b/annotators/vidchapters_service/server.py
@@ -159,29 +159,20 @@ def subinfer(task_id, paths, durations, types):
         try:
             logger.info(f"Scanning DATA_DIR ({DATA_DIR}) for files...")
             for i in os.listdir(DATA_DIR):
-                logger.info("Scanning finished successfully, files found, starting inference...")
-                break
+                if (i.split(".")[0] == filename.split(".")[0]):
+                    asr_output_path = os.path.join(DATA_DIR, i.split(".")[0] + '_asr')
+                    video_path = os.path.join(DATA_DIR, i)                    
+                    logger.info("Scanning finished successfully, files found, starting inference...")
+                    video_caption = get_answer(video_path, asr_output_path)
+                    break
             else:
                 cap_err_msg = "No files for inference found in DATA_DIR"
                 raise Exception(cap_err_msg)
-
-            asr_output_path = os.path.join(DATA_DIR, i.split(".")[0] + '_asr')
-            video_path = os.path.join(DATA_DIR, i)
-            video_caption = get_answer(video_path, asr_output_path)
-            responses.append({
-                "video_type": atype,
-                "video_duration": duration,
-                "video_path": path,
-                "caption": video_caption,
-            })
+
+            responses += [{"video_type": atype, "video_duration": duration, "video_path": path, "caption": video_caption}]
         except Exception as e:
             logger.info(f"An error occurred in vidchapters-service: {CAP_ERR_MSG}, {e}")
-            responses.append({
-                "video_type": atype,
-                "video_duration": duration,
-                "video_path": path,
-                "caption": "Error occurred",
-            })
+            responses += [{"video_type": atype, "video_duration": duration, "video_path": path, "caption": "Error occurred"}]
 
     logger.info(f"VIDCHAPTERS_SERVICE RESPONSE: {responses}")
     status = (
@@ -193,11 +184,17 @@ def subinfer(task_id, paths, durations, types):
 @app.post("/respond")
 def respond(payload: VideoPayload, background_tasks: BackgroundTasks):
     st_time = time.time()
-    task_id = str(uuid.uuid4())
-    write_task_status(task_id, "pending")
-    background_tasks.add_task(
-        subinfer, task_id, payload.video_paths, payload.video_durations, payload.video_types
-    )
+    bad_filenames_present = any([
+        'non_existent' in el for el in payload.video_paths])
+    if not bad_filenames_present:
+        task_id = str(uuid.uuid4())
+        write_task_status(task_id, "pending")
+        background_tasks.add_task(
+            subinfer, task_id, payload.video_paths, payload.video_durations, payload.video_types
+        )
+    else:
+        task_id = "non_existent_task"
+    total_time = time.time() - st_time
 
     all_tasks = []
     for filename in os.listdir(TASKS_DIR):
@@ -217,11 +214,19 @@ def respond(payload: VideoPayload, background_tasks: BackgroundTasks):
             except Exception as e:
                 logger.error(f"An error occurred while processing file {task_file_path}: {e}")
 
-    total_time = time.time() - st_time
-    cur_status_str = "".join(
-        f"id: {task['task_id']}: {task['status']}, "
-        f"caption: {task['result'] or 'N/A'} \n"
-        for task in all_tasks
-    )
+    cur_status_json = [
+        {
+            "id": task["task_id"],
+            "status": task["status"],
+            "caption": task["result"] or "N/A"
+        }
+        for task in all_tasks] 
+    result = {
+        "task_id": task_id,
+        "status": "pending",
+        "all_status": cur_status_json
+    }
+
     logger.info(f"service exec time: {total_time:.3f}s")
-    return [{"task_id": task_id, "status": "pending", "all_status": cur_status_str}]
+
+    return result
diff --git a/annotators/vidchapters_service/test.py b/annotators/vidchapters_service/test.py
@@ -4,41 +4,73 @@
 import allure
 import json
 
-url = "http://0.0.0.0:8045/respond"
+URL = "http://0.0.0.0:8045/respond"
+DUMMY_JSON = {"video_paths": ["http://files:3000/file?file=non_existent.mp4"]}
+
+def _call_service(payload):
+    time_deltas = []
+    start_time = time.time()
+    result = requests.post(URL, json=payload)
+    stop_time = time.time()
+    time_deltas.append(stop_time - start_time)
+
+    result = result.json()
+    my_task_id = result.get("task_id")
+
+    caption = "Error"
+    for _attempt in range(100):
+        start_time = time.time()
+        result = requests.post(URL, json=DUMMY_JSON)
+        stop_time = time.time()
+        time_deltas.append(stop_time - start_time)
+        result = result.json()
+        all_tasks_info = result.get('all_status')
+        for t in all_tasks_info:
+            if t['id'] == my_task_id:
+                    current_task_info = t 
+        if current_task_info['status'] == "completed": 
+            caption = current_task_info.get('caption')
+            caption = caption[0]['caption']
+
+            assert isinstance(result, (dict, list)), "Expected result to be a JSON object or array"
+
+            break
+        else:
+            time.sleep(10)
+
+    avg_response_time = sum(time_deltas) / len(time_deltas)
+    return caption, avg_response_time
 
 @allure.description("""4.1.2 Test input and output data types""")
 def test_in_out():
     video_path = "https://raw.githubusercontent.com/deeppavlov/mmodal_files_bkp/refs/heads/main/medals.mp4"
-    test_data = { "video_paths": [video_path], "video_durations": [59], "video_types": ['.mp4']}
-    result = requests.post(url, json=test_data)
-    while result.json() and not result.json()[0].get("response"):
-        result = requests.post(url, json={})
+    test_data = { 
+        "video_paths": [video_path], 
+        "video_durations": [59], 
+        "video_types": ['.mp4']
+    }
     valid_extensions = ['.mp4']
-    assert any(test_data["video_paths"][0].lower().endswith(ext) for ext in valid_extensions), "Invalid input type"
-    assert isinstance(result.json(), (dict, list)), "Expected result to be a JSON object or array"
-    print(f"...\nSent file {video_path},\ngot response {result.json()[0].get("response")}")
-
+    for path in test_data.get("video_paths"):
+        if path:
+            assert any(path.lower().endswith(ext) for ext in valid_extensions), "Invalid input type"
+            print(f"...\nSent file {test_data.get('video_paths')},\ngot correct input type") #TODO which format get
+    caption, _  = _call_service(test_data)
+    print(f"...\nSent file {test_data.get('video_paths')},\ngot response {caption}")  
+    # assert any(test_data["video_paths"][0].lower().endswith(ext) for ext in valid_extensions), "Invalid input type"
+    # assert isinstance(result.json(), (dict, list)), "Expected result to be a JSON object or array"
+    # print(f"...\nSent file {video_path},\ngot response {result.json()[0].get("response")}")
 
 @allure.description("""4.1.3 Test execution time""")
 def test_exec_time():
     video_path = "https://raw.githubusercontent.com/deeppavlov/mmodal_files_bkp/refs/heads/main/medals.mp4"
-    test_data = { "video_paths": [video_path], "video_durations": [59], "video_types": ['.mp4']}    
-    start_time = time.time()
-    result = requests.post(url, json=test_data)
-    end_time = time.time() - start_time
-    while result.json() and not result.json()[0].get("response"):
-        result = requests.post(url, json={})
-    assert end_time <= 0.4, "Unsufficient run time"
-    print(f"...\nAverage response time is {end_time}")
-
-# @allure.description("""Simple execution test""")
-# def test_execution():
-#     video_path = "http://files:3000/file?file=file_227.mp4"
-#     gold_result = [{'video_captioning_chapters': "[{'sentence': 'Intro.', 'timestamp': [0.0, 10.727636363636364]}, {'sentence': 'Showing impressive award combinations.', 'timestamp': [10.727636363636364, 30.3949696969697]}, {'sentence': 'Discussing who won an Oscar and a gold medal.', 'timestamp': [30.3949696969697, 59.002]}]\n"}]
-#     test_data = { "video_paths": [video_path], "video_durations": [59], "video_types": ['.mp4']} 
-#     result = requests.post(url, json=test_data)
-#     print(f"Sent video {video_path}\nGot result {result.json()}")
-#     assert True
+    test_data = { 
+        "video_paths": [video_path], 
+        "video_durations": [59], 
+        "video_types": ['.mp4']
+    }    
+    _, avg_time = _call_service(test_data)
+    assert avg_time <= 0.4, "Unsufficient run time"
+    print(f"...\nAverage response time is {avg_time}")
 
 
 if __name__ == "__main__":