-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
143 lines (113 loc) · 4.7 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import json
import time
from PIL import Image
import streamlit as st
from utils import constant, prompt, response_model
from logic.grounding import (
extract_show_segment_info,
init_sam,
generate_mask,
save_cropped_images,
cropped_image,
generate_captioner,
caption_image,
save_to_txt,
init_openai,
execute_llm,
draw_selected_segment,
save_temp_image,
reset_segmentation_output,
show_table_stats
)
st.set_page_config(
page_title="LLM Grounding Demo",
page_icon="🔍",
initial_sidebar_state="expanded",
)
if "is_running" not in st.session_state:
st.session_state.is_running = False
if "system_prompt" not in st.session_state:
st.session_state.system_prompt = prompt.SYSTEM_PROMPT
def start_analyze(image_file, user_goal):
if image_file is not None and user_goal is not None:
st.subheader("**Processed Image**")
st.image(image_file,
caption="Webpage Screenshot", use_container_width=True)
with st.spinner("Analyzing image..."):
st.session_state.is_running = True
print("Reset previous segmentation...")
reset_segmentation_output()
print("Loading SAM models...")
sam_mask = init_sam()
print("Generating masks...")
masks = generate_mask(constant.TEMP_IMAGE, sam_mask)
print("Cropping images...")
crop_time_start = time.time()
cropped_images = cropped_image(constant.TEMP_IMAGE, masks)
save_cropped_images(cropped_images)
crop_time_end = time.time()
crop_time_inference = crop_time_end - crop_time_start
print("Loading captioner models...")
processor, model = generate_captioner(is_quantized=True)
print("Generating segmentation caption...")
caption_time_start = time.time()
captions = caption_image(processor, model)
caption_time_end = time.time()
caption_time_inference = caption_time_end - caption_time_start
if captions is not None:
print("Segmentation caption generated")
print("Saving captions to txt...")
save_to_txt(captions)
else:
print("Error generating captions")
print("Initializing LLM...")
openai = init_openai()
print("Thinking...")
llm_time_start = time.time()
result = execute_llm(openai, user_goal, [])
llm_time_end = time.time()
llm_time_inference = llm_time_end - llm_time_start
extract_json = json.loads(result.choices[0].message.content)
print("Showing selected segment...")
st.subheader("**Grounding Result**")
seg_index, coordinates = extract_show_segment_info(
masks, extract_json)
result_img = draw_selected_segment(seg_index, masks, coordinates)
st.image(result_img, caption="Selected Segment",
use_container_width=True)
print("\nProcess completed!")
print(
f"Time taken to crop images: {crop_time_inference:.4f} seconds")
print(
f"Time taken to generate captions: {caption_time_inference:.4f} seconds")
print(f"Time taken to analyze: {llm_time_inference:.8f} seconds")
show_table_stats(crop_time_inference,
caption_time_inference, llm_time_inference)
st.session_state.is_running = False
with st.sidebar:
st.title('Settings')
user_goal = st.text_input(
label="**User Goal**", placeholder="Enter specific user goals here")
image_file = st.file_uploader(
"**Upload an Image**", type=["jpg", "jpeg", "png"])
if image_file is not None:
try:
image = Image.open(image_file)
except Exception as e:
st.error("The uploaded file is not a valid image.")
st.error(f"Error: {e}")
with st.expander("**Current Prompt**"):
st.markdown(prompt.SYSTEM_PROMPT)
new_prompt = st.text_area(
"**Update Prompt (Optional)**", placeholder="Enter a new prompt")
analyze_clicked = st.button("Run Analyze")
if analyze_clicked and user_goal and image_file and new_prompt:
st.session_state.system_prompt = new_prompt if new_prompt else prompt.SYSTEM_PROMPT
st.success("Prompt updated successfully!")
st.title('LLM Grounding Demo')
st.divider()
if analyze_clicked and user_goal and image_file and not st.session_state.is_running:
save_temp_image(image)
start_analyze(image_file, user_goal)
else:
st.info("Please input user goals and upload an image to start analyzing!")