Skip to content

Commit

Permalink
vidchapters tests fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
rock-n-shrimproll committed Nov 20, 2024
1 parent 7a34ff4 commit 0391957
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 58 deletions.
3 changes: 1 addition & 2 deletions annotators/vidchapters_service/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ RUN gdown --fuzzy https://drive.google.com/file/d/1qmQcEkDDlnkAkRd6BIrVoArBv6Sg1
RUN gdown --fuzzy https://drive.google.com/file/d/1F08bQriJypaIOfsCjMlVdlwdLYg0KVZF/view -O /src/aux_files/large-v2.pt -c --no-check-certificate
RUN mkdir -p /src/TOFILL/
RUN gdown --fuzzy https://drive.google.com/file/d/1H12iS2awNhbxguRgzsM_YIafnDf3mG7y/view -O /src/TOFILL/ViT-L-14.pt -c --no-check-certificate
COPY ./annotators/vidchapters_service /src/

RUN apt-get update && apt-get install -y ffmpeg

# RUN python -c "import whisper; whisper.load_model('large-v2', device='cpu', download_root='/src/aux_files/TOFILL')"
COPY ./annotators/vidchapters_service /src/
65 changes: 35 additions & 30 deletions annotators/vidchapters_service/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,29 +159,20 @@ def subinfer(task_id, paths, durations, types):
try:
logger.info(f"Scanning DATA_DIR ({DATA_DIR}) for files...")
for i in os.listdir(DATA_DIR):
logger.info("Scanning finished successfully, files found, starting inference...")
break
if (i.split(".")[0] == filename.split(".")[0]):
asr_output_path = os.path.join(DATA_DIR, i.split(".")[0] + '_asr')
video_path = os.path.join(DATA_DIR, i)
logger.info("Scanning finished successfully, files found, starting inference...")
video_caption = get_answer(video_path, asr_output_path)
break
else:
cap_err_msg = "No files for inference found in DATA_DIR"
raise Exception(cap_err_msg)

asr_output_path = os.path.join(DATA_DIR, i.split(".")[0] + '_asr')
video_path = os.path.join(DATA_DIR, i)
video_caption = get_answer(video_path, asr_output_path)
responses.append({
"video_type": atype,
"video_duration": duration,
"video_path": path,
"caption": video_caption,
})

responses += [{"video_type": atype, "video_duration": duration, "video_path": path, "caption": video_caption}]
except Exception as e:
logger.info(f"An error occurred in vidchapters-service: {CAP_ERR_MSG}, {e}")
responses.append({
"video_type": atype,
"video_duration": duration,
"video_path": path,
"caption": "Error occurred",
})
responses += [{"video_type": atype, "video_duration": duration, "video_path": path, "caption": "Error occurred"}]

logger.info(f"VIDCHAPTERS_SERVICE RESPONSE: {responses}")
status = (
Expand All @@ -193,11 +184,17 @@ def subinfer(task_id, paths, durations, types):
@app.post("/respond")
def respond(payload: VideoPayload, background_tasks: BackgroundTasks):
st_time = time.time()
task_id = str(uuid.uuid4())
write_task_status(task_id, "pending")
background_tasks.add_task(
subinfer, task_id, payload.video_paths, payload.video_durations, payload.video_types
)
bad_filenames_present = any([
'non_existent' in el for el in payload.video_paths])
if not bad_filenames_present:
task_id = str(uuid.uuid4())
write_task_status(task_id, "pending")
background_tasks.add_task(
subinfer, task_id, payload.video_paths, payload.video_durations, payload.video_types
)
else:
task_id = "non_existent_task"
total_time = time.time() - st_time

all_tasks = []
for filename in os.listdir(TASKS_DIR):
Expand All @@ -217,11 +214,19 @@ def respond(payload: VideoPayload, background_tasks: BackgroundTasks):
except Exception as e:
logger.error(f"An error occurred while processing file {task_file_path}: {e}")

total_time = time.time() - st_time
cur_status_str = "".join(
f"id: {task['task_id']}: {task['status']}, "
f"caption: {task['result'] or 'N/A'} \n"
for task in all_tasks
)
cur_status_json = [
{
"id": task["task_id"],
"status": task["status"],
"caption": task["result"] or "N/A"
}
for task in all_tasks]
result = {
"task_id": task_id,
"status": "pending",
"all_status": cur_status_json
}

logger.info(f"service exec time: {total_time:.3f}s")
return [{"task_id": task_id, "status": "pending", "all_status": cur_status_str}]

return result
84 changes: 58 additions & 26 deletions annotators/vidchapters_service/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,73 @@
import allure
import json

url = "http://0.0.0.0:8045/respond"
URL = "http://0.0.0.0:8045/respond"
DUMMY_JSON = {"video_paths": ["http://files:3000/file?file=non_existent.mp4"]}

def _call_service(payload):
time_deltas = []
start_time = time.time()
result = requests.post(URL, json=payload)
stop_time = time.time()
time_deltas.append(stop_time - start_time)

result = result.json()
my_task_id = result.get("task_id")

caption = "Error"
for _attempt in range(100):
start_time = time.time()
result = requests.post(URL, json=DUMMY_JSON)
stop_time = time.time()
time_deltas.append(stop_time - start_time)
result = result.json()
all_tasks_info = result.get('all_status')
for t in all_tasks_info:
if t['id'] == my_task_id:
current_task_info = t
if current_task_info['status'] == "completed":
caption = current_task_info.get('caption')
caption = caption[0]['caption']

assert isinstance(result, (dict, list)), "Expected result to be a JSON object or array"

break
else:
time.sleep(10)

avg_response_time = sum(time_deltas) / len(time_deltas)
return caption, avg_response_time

@allure.description("""4.1.2 Test input and output data types""")
def test_in_out():
video_path = "https://raw.githubusercontent.com/deeppavlov/mmodal_files_bkp/refs/heads/main/medals.mp4"
test_data = { "video_paths": [video_path], "video_durations": [59], "video_types": ['.mp4']}
result = requests.post(url, json=test_data)
while result.json() and not result.json()[0].get("response"):
result = requests.post(url, json={})
test_data = {
"video_paths": [video_path],
"video_durations": [59],
"video_types": ['.mp4']
}
valid_extensions = ['.mp4']
assert any(test_data["video_paths"][0].lower().endswith(ext) for ext in valid_extensions), "Invalid input type"
assert isinstance(result.json(), (dict, list)), "Expected result to be a JSON object or array"
print(f"...\nSent file {video_path},\ngot response {result.json()[0].get("response")}")

for path in test_data.get("video_paths"):
if path:
assert any(path.lower().endswith(ext) for ext in valid_extensions), "Invalid input type"
print(f"...\nSent file {test_data.get('video_paths')},\ngot correct input type") #TODO which format get
caption, _ = _call_service(test_data)
print(f"...\nSent file {test_data.get('video_paths')},\ngot response {caption}")
# assert any(test_data["video_paths"][0].lower().endswith(ext) for ext in valid_extensions), "Invalid input type"
# assert isinstance(result.json(), (dict, list)), "Expected result to be a JSON object or array"
# print(f"...\nSent file {video_path},\ngot response {result.json()[0].get("response")}")

@allure.description("""4.1.3 Test execution time""")
def test_exec_time():
video_path = "https://raw.githubusercontent.com/deeppavlov/mmodal_files_bkp/refs/heads/main/medals.mp4"
test_data = { "video_paths": [video_path], "video_durations": [59], "video_types": ['.mp4']}
start_time = time.time()
result = requests.post(url, json=test_data)
end_time = time.time() - start_time
while result.json() and not result.json()[0].get("response"):
result = requests.post(url, json={})
assert end_time <= 0.4, "Unsufficient run time"
print(f"...\nAverage response time is {end_time}")

# @allure.description("""Simple execution test""")
# def test_execution():
# video_path = "http://files:3000/file?file=file_227.mp4"
# gold_result = [{'video_captioning_chapters': "[{'sentence': 'Intro.', 'timestamp': [0.0, 10.727636363636364]}, {'sentence': 'Showing impressive award combinations.', 'timestamp': [10.727636363636364, 30.3949696969697]}, {'sentence': 'Discussing who won an Oscar and a gold medal.', 'timestamp': [30.3949696969697, 59.002]}]\n"}]
# test_data = { "video_paths": [video_path], "video_durations": [59], "video_types": ['.mp4']}
# result = requests.post(url, json=test_data)
# print(f"Sent video {video_path}\nGot result {result.json()}")
# assert True
test_data = {
"video_paths": [video_path],
"video_durations": [59],
"video_types": ['.mp4']
}
_, avg_time = _call_service(test_data)
assert avg_time <= 0.4, "Unsufficient run time"
print(f"...\nAverage response time is {avg_time}")


if __name__ == "__main__":
Expand Down

0 comments on commit 0391957

Please sign in to comment.