docs

uncbiag · Apr 16, 2024 · 5464e32 · 5464e32
1 parent 6b1a39d
commit 5464e32
Show file tree

Hide file tree

Showing 21 changed files with 3,304 additions and 0 deletions.
diff --git a/docs/index.html b/docs/index.html
@@ -0,0 +1,221 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <meta name="description"
+        content="Deformable Neural Radiance Fields creates free-viewpoint portraits (nerfies) from casually captured videos.">
+  <meta name="keywords" content="Nerfies, D-NeRF, NeRF">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>Rethinking Interactive Image Segmentation with Low Latency, High Quality, and Diverse Prompts</title>
+
+  <!-- Global site tag (gtag.js) - Google Analytics -->
+  <script async src="https://www.googletagmanager.com/gtag/js?id=G-PYVRSFMDRL"></script>
+  <script>
+    window.dataLayer = window.dataLayer || [];
+
+    function gtag() {
+      dataLayer.push(arguments);
+    }
+
+    gtag('js', new Date());
+
+    gtag('config', 'G-PYVRSFMDRL');
+  </script>
+
+  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
+        rel="stylesheet">
+
+  <link rel="stylesheet" href="./static/css/bulma.min.css">
+  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
+  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
+  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
+  <link rel="stylesheet"
+        href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
+  <link rel="stylesheet" href="./static/css/index.css">
+  <link rel="icon" href="./static/images/favicon.svg">
+
+  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
+  <script defer src="./static/js/fontawesome.all.min.js"></script>
+  <script src="./static/js/bulma-carousel.min.js"></script>
+  <script src="./static/js/bulma-slider.min.js"></script>
+  <script src="./static/js/index.js"></script>
+</head>
+<body>
+
+<section class="hero">
+  <div class="hero-body">
+    <div class="container is-max-desktop">
+      <div class="columns is-centered">
+        <div class="column has-text-centered">
+          <h2 class="title is-2 publication-title">Rethinking Interactive Image Segmentation <br> with Low Latency, High Quality, and Diverse Prompts</h2>
+          <div class="is-size-5 publication-authors">
+            <span class="author-block">
+              <a href="https://sites.google.com/cs.unc.edu/qinliu/home">Qin Liu</a>,</span>
+            <span class="author-block">
+              <a href="https://j-min.io">Jaemin Cho</a>,</span>
+            <span class="author-block">
+              <a href="https://www.cs.unc.edu/~mbansal/">Mohit Bansal</a>,</span>
+            <span class="author-block">
+              <a href="https://biag.cs.unc.edu/">Marc Niethammer</a></span>
+          </div>
+
+          <div class="is-size-5 publication-authors">
+            <span class="author-block">University of North Carolina at Chapel Hill</span> <br>
+            <span class="author-block">CVPR 2024</span>
+          </div>
+
+          <div class="column has-text-centered">
+            <div class="publication-links">
+              <!-- PDF Link. -->
+              <span class="link-block">
+                <a href="https://arxiv.org/abs/2404.00741"
+                   class="external-link button is-normal is-rounded is-dark">
+                  <span class="icon">
+                      <i class="fas fa-file-pdf"></i>
+                  </span>
+                  <span>Paper</span>
+                </a>
+              </span>
+              <!-- Code Link. -->
+              <span class="link-block">
+                <a href="https://github.com/uncbiag/SegNext"
+                   class="external-link button is-normal is-rounded is-dark">
+                  <span class="icon">
+                      <i class="fab fa-github"></i>
+                  </span>
+                  <span>Code</span>
+                  </a>
+              </span>
+              </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  </div>
+</section>
+
+<section class="hero teaser">
+  <div class="container is-max-desktop">
+    <div class="hero-body">
+      <center><img src="./static/images/teaser.png" width="750"></center>
+      <!-- <h2 class="subtitle has-text-centered"> -->
+        <!-- We propose <span class="dnerf">SegNext</span> for next-generation promptable segmentation <br> with low latency, high quality, and diverse prompts.  -->
+        Conceptual comparison between our approach <span class="dnerf">SegNext</span> and prior state-of-the-art methods, SimpleClick and
+        SAM, for the interactive segmentation task. Our method combines the best of both worlds for interactive segmentation with low
+        latency, high quality, and diverse prompts.
+
+      <!-- </h2> -->
+    </div>
+  </div>
+</section>
+
+<section class="hero teaser">
+  <div class="container is-max-desktop">
+    <div class="hero-body">
+      <center>
+      <img src="./static/images/medal.gif" width=220>
+      <img src="./static/images/bicyclestand.gif" width=317>
+      <img src="./static/images/crack.gif" width=220>
+    </center>
+      <!-- <h2 class="subtitle has-text-centered"> -->
+    <center>Demos of <span class="dnerf">SegNext</span> for high-quality segmentation on HQSeg-44K with One Click.</center>
+      <!-- </h2> -->
+    </div>
+  </div>
+</section>
+
+<section class="section">
+  <div class="container is-max-desktop">
+    <div class="columns is-centered has-text-centered">
+      <div class="column is-four-fifths">
+        <h2 class="title is-3">Abstract</h2>
+        <div class="content has-text-justified">
+        <p>
+          The goal of interactive image segmentation is to delineate specific regions
+          within an image via visual or language prompts. Low-latency and high-quality
+          interactive segmentation with diverse prompts remain challenging for existing 
+          specialist and generalist models. Specialist models, with their limited 
+          prompts and task-specific designs, experience high latency because the image 
+          must be recomputed every time the prompt is updated, due to the joint 
+          encoding of image and visual prompts. Generalist models, exemplified by the 
+          Segment Anything Model (SAM), have recently excelled in prompt diversity and 
+          efficiency, lifting image segmentation to the foundation model era.
+          However, for high-quality segmentations, SAM still lags behind state-of-the-art
+           specialist models despite SAM being trained with ×100 more
+          segmentation masks. 
+        </p>
+        <p>
+          In this work, we delve deep into the
+          architectural differences between the two types of models.
+          We observe that dense representation and fusion of visual
+          prompts are the key design choices contributing to the high
+          segmentation quality of specialist models. In light of this,
+          we reintroduce this dense design into the generalist models, to facilitate the development of generalist models with
+          high segmentation quality. To densely represent diverse visual prompts, we propose to use a dense map to capture
+          five types: clicks, boxes, polygons, scribbles, and masks.
+        </p>
+        <p>
+          Thus, we propose SegNext, a next-generation interactive
+          segmentation approach offering low latency, high quality,
+          and diverse prompt support. Our method outperforms current state-of-the-art methods on HQSeg-44K and DAVIS,
+          both quantitatively and qualitatively.
+        </p>
+       </div>
+      </div>
+    </div>
+  </div>
+</section>
+
+
+<section class="section">
+  <div class="container is-max-desktop">    
+    <center><h2 class="title is-3">Method</h2></center>
+    <center><img src="./static/images/method.png"></center>
+    SegNext overview. We use a three-channel dense map to represent five diverse visual prompts: clicks, boxes, polygons, scribbles, 
+    and masks. The embeddings of image and visual prompts are fused by element-wise addition, followed by an enhanced fusion via one
+    or two self-attention blocks. The language prompt is encoded as a vector by CLIP, followed by querying the image embedding via
+    cross-attention blocks for the mask embeddings. A lightweight decoder processes the mask embeddings for segmentation.  
+  </div>
+</section>
+
+
+
+<section class="section">
+  <div class="container is-max-desktop">    
+    <center><h2 class="title is-3">Experiments</h2></center><br>
+    <center><img src="./static/images/result_click_curve.png"></center>
+    Click to segmentation evaluation on HQSeg-44K and DAVIS. With varying numbers of clicks, our method consistently outperforms 
+    existing competitive approaches. The metric is mean Intersection over Union (mIoU).
+
+    <br><br><br>
+
+    <center><img src="./static/images/result_quanti_table.png"></center>
+    Quantitative comparison with existing methods on HQSeg-44K and DAVIS. We compare two types of baselines: specialist and
+    generalist models. Our model achieves comparable performance to the specialist baselines but with significantly lower latency; our model
+    achieves comparable performance to the generalist models in terms of latency and segmentation quality despite being trained with much
+    fewer segmentation data. “HQ” denotes the HQSeg-44K dataset; “SA×2” denotes the model has two self-attention blocks for dense fusion. 
+
+    <br><br><br>
+
+    <center><img src="./static/images/result_qualitative.png"></center>
+    Qualitative results with diverse prompts. Left: an example from DAVIS. Right: three examples from HQSeg-44K. The results 
+    are achieved by a user providing all the prompts using our best-performing model.
+
+  </div>
+</section>
+
+
+<section class="section" id="BibTeX">
+  <div class="container is-max-desktop content">
+    <h2 class="title">BibTeX</h2>
+    <pre><code>@article{liu2024rethinking,
+  author    = {Liu, Qin and Cho, Jaemin and Bansal, Mohit and Niethammer, Marc},
+  title     = {Rethinking Interactive Image Segmentation with Low Latency, High Quality, and Diverse Prompts},
+  journal   = {CVPR},
+  year      = {2024},
+}</code></pre>
+  </div>
+</section>
+
+</body>
+</html>
diff --git a/docs/static/css/bulma-carousel.min.css b/docs/static/css/bulma-carousel.min.css