Update index.html

MICV-yonsei · Nov 27, 2024 · 472ac15 · 472ac15
1 parent f4e4cfa
commit 472ac15
Showing 1 changed file with 4 additions and 14 deletions.
diff --git a/cass/index.html b/cass/index.html
@@ -313,16 +313,12 @@ <h3 class="title is-3 has-text-centered">Method</h3>
 
         <br>
         <h4 class="title is-4 has-text-centered">Overall Pipeline</h4>
-        <div class="content">
-          <div style="text-align: center;">
+        <div class="content has-text-centered">
             <img src="./static/images/overview.png" alt="Main figure" width="75%">
-          </div>
-          <div style="display: flex; justify-content: center;">
             <p style="max-width: 800px; text-align: center;">
               We present CASS, object-level Context-Aware training-free open-vocabulary Semantic Segmentation model. 
               Our method distills the vision foundation model's (VFM) object-level contextual spectral graph into CLIP's attention and refines query text embeddings towards object-specific semantics.
             </p>
-          </div>
         </div>
 
         <br>
@@ -339,8 +335,7 @@ <h4 class="title is-4 has-text-centered">Spectral Object-Level Context Distillat
         <h4 class="title is-4 has-text-centered">Object Presence-Driven Object-Level Context</h4>
         <div class="content has-text-centered">
           <img src="./static/images/OTA.png" alt="Main figure" width="80%">
-          <div style="display: flex; justify-content: center;">
-            <p style="max-width: 800px; text-align: center;">
+            <p>
               Detailed illustration of our object presence prior-guided text embedding adjustment module.
               The CLIP text encoder generates text embeddings for each object class, and the object presence prior is derived from both visual and text embeddings. 
               Within hierarchically defined class groups, text embeddings are selected based on object presence prior, then refined in an object-specific direction to align with components likely present in the image.
@@ -364,17 +359,12 @@ <h3 class="title is-3 has-text-centered">Visualization</h3>
 
         <br>
         <h4 class="title is-4 has-text-centered">Effect of Spectral Object-Level Context Distillation</h4>
-        <div class="content">
-          <div style="text-align: center;">
+        <div class="content has-text-centered">
             <img src="./static/images/attention.png" alt="attention visualization" width="80%">
-          </div>
-          <div style="display: flex; justify-content: center;">
-            <p style="max-width: 800px; text-align: justify;">
+            <p>
               Attention score visualization for various query points. Left: Vanilla CLIP (A<sub>CLIP</sub>) shows noisy, unfocused attention. Center: VFM-to-CLIP distillation without low-rank eigenscaling shows partial object grouping with limited detail. Right: Incorporating our low-rank eigenscaling captures object-level context, improving grouping within a single object.
             </p>
-          </div>
         </div>
-
       </div>
     </div>