-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
569 lines (430 loc) · 25.1 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
<!DOCTYPE html>
<html lang="cn">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">
<meta name="generator" content="Hexo 5.4.0">
<link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png">
<link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png">
<link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png">
<link rel="mask-icon" href="/images/logo.svg" color="#222">
<link rel="stylesheet" href="/css/main.css">
<link rel="stylesheet" href="/lib/font-awesome/css/all.min.css">
<script id="hexo-configurations">
var NexT = window.NexT || {};
var CONFIG = {"hostname":"example.com","root":"/","scheme":"Muse","version":"7.8.0","exturl":false,"sidebar":{"position":"left","display":"post","padding":18,"offset":12,"onmobile":false},"copycode":{"enable":false,"show_result":false,"style":null},"back2top":{"enable":true,"sidebar":false,"scrollpercent":false},"bookmark":{"enable":false,"color":"#222","save":"auto"},"fancybox":false,"mediumzoom":false,"lazyload":false,"pangu":false,"comments":{"style":"tabs","active":null,"storage":true,"lazyload":false,"nav":null},"algolia":{"hits":{"per_page":10},"labels":{"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}},"localsearch":{"enable":false,"trigger":"auto","top_n_per_article":1,"unescape":false,"preload":false},"motion":{"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}}};
</script>
<meta property="og:type" content="website">
<meta property="og:title" content="阿翟的炼丹之旅">
<meta property="og:url" content="http://example.com/index.html">
<meta property="og:site_name" content="阿翟的炼丹之旅">
<meta property="og:locale">
<meta property="article:author" content="ZhaiYukun">
<meta name="twitter:card" content="summary">
<link rel="canonical" href="http://example.com/">
<script id="page-configurations">
// https://hexo.io/docs/variables.html
CONFIG.page = {
sidebar: "",
isHome : true,
isPost : false,
lang : 'cn'
};
</script>
<title>阿翟的炼丹之旅</title>
<noscript>
<style>
.use-motion .brand,
.use-motion .menu-item,
.sidebar-inner,
.use-motion .post-block,
.use-motion .pagination,
.use-motion .comments,
.use-motion .post-header,
.use-motion .post-body,
.use-motion .collection-header { opacity: initial; }
.use-motion .site-title,
.use-motion .site-subtitle {
opacity: initial;
top: initial;
}
.use-motion .logo-line-before i { left: initial; }
.use-motion .logo-line-after i { right: initial; }
</style>
</noscript>
</head>
<body itemscope itemtype="http://schema.org/WebPage">
<div class="container use-motion">
<div class="headband"></div>
<header class="header" itemscope itemtype="http://schema.org/WPHeader">
<div class="header-inner"><div class="site-brand-container">
<div class="site-nav-toggle">
<div class="toggle" aria-label="تشغيل شريط التصفح">
<span class="toggle-line toggle-line-first"></span>
<span class="toggle-line toggle-line-middle"></span>
<span class="toggle-line toggle-line-last"></span>
</div>
</div>
<div class="site-meta">
<a href="/" class="brand" rel="start">
<span class="logo-line-before"><i></i></span>
<h1 class="site-title">阿翟的炼丹之旅</h1>
<span class="logo-line-after"><i></i></span>
</a>
<p class="site-subtitle" itemprop="description">修炼境界:筑基期</p>
</div>
<div class="site-nav-right">
<div class="toggle popup-trigger">
</div>
</div>
</div>
<nav class="site-nav">
<ul id="menu" class="main-menu menu">
<li class="menu-item menu-item-home">
<a href="/" rel="section"><i class="fa fa-home fa-fw"></i>Home</a>
</li>
<li class="menu-item menu-item-archives">
<a href="/archives/" rel="section"><i class="fa fa-archive fa-fw"></i>الأرشيفات</a>
</li>
</ul>
</nav>
</div>
</header>
<div class="back-to-top">
<i class="fa fa-arrow-up"></i>
<span>0%</span>
</div>
<main class="main">
<div class="main-inner">
<div class="content-wrap">
<div class="content index posts-expand">
<article itemscope itemtype="http://schema.org/Article" class="post-block" lang="cn">
<link itemprop="mainEntityOfPage" href="http://example.com/2021/08/04/MaskTrack-RCNN%EF%BC%9AVideo-Instance-Segmentation/">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.gif">
<meta itemprop="name" content="ZhaiYukun">
<meta itemprop="description" content="">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="阿翟的炼丹之旅">
</span>
<header class="post-header">
<h2 class="post-title" itemprop="name headline">
<a href="/2021/08/04/MaskTrack-RCNN%EF%BC%9AVideo-Instance-Segmentation/" class="post-title-link" itemprop="url">MaskTrack-RCNN:Video Instance Segmentation</a>
</h2>
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">نُشر في</span>
<time title="أُنشأ: 2021-08-04 20:11:16" itemprop="dateCreated datePublished" datetime="2021-08-04T20:11:16+08:00">2021-08-04</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">عُدل في</span>
<time title="عُدل: 2021-08-05 12:46:26" itemprop="dateModified" datetime="2021-08-05T12:46:26+08:00">2021-08-05</time>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h1 id="MaskTrack-RCNN《Video-Instance-Segmentation》"><a href="#MaskTrack-RCNN《Video-Instance-Segmentation》" class="headerlink" title="MaskTrack-RCNN《Video Instance Segmentation》"></a>MaskTrack-RCNN《Video Instance Segmentation》</h1><h1 id="论文解读"><a href="#论文解读" class="headerlink" title="论文解读"></a>论文解读</h1><p>论文地址:<a target="_blank" rel="noopener" href="http://openaccess.thecvf.com/content_ICCV_2019/papers/Yang_Video_Instance_Segmentation_ICCV_2019_paper.pdf">http://openaccess.thecvf.com/content_ICCV_2019/papers/Yang_Video_Instance_Segmentation_ICCV_2019_paper.pdf</a></p>
<p>代码地址:<a target="_blank" rel="noopener" href="https://github.com/">https://github.com/ youtubevos/MaskTrackRCNN</a></p>
<p>数据集地址:[<a target="_blank" rel="noopener" href="https://competitions.codalab.org/competitions/2898">https://competitions.codalab.org/competitions/2898</a></p>
<h3 id="相似任务区别"><a href="#相似任务区别" class="headerlink" title="相似任务区别"></a>相似任务区别</h3><table>
<thead>
<tr>
<th>任务</th>
<th>定义</th>
<th>区别</th>
</tr>
</thead>
<tbody><tr>
<td>VSS(Video Semantic Segmentation)</td>
<td>在每一帧进行语义分割</td>
<td>没有考虑不同帧之间目标实例的匹配</td>
</tr>
<tr>
<td>VOS (Video Object Segmentation)</td>
<td>半监督:给定第一帧的mask,将目标从视频背景中分离出来 无监督:不需要给第一帧mask,不需要区分实例,只需要分分割出单个目标即可</td>
<td>没有考虑实例信息</td>
</tr>
<tr>
<td>VOD (Video Object Detection)</td>
<td>检测目标,但是不需要分割</td>
<td>没有分割和追踪</td>
</tr>
<tr>
<td>VOT(video Object Tracking)</td>
<td>DBT(Detection-Based Tracking):在每一帧进行目标检测,再利用目标检测的结果来进行目标跟踪 DFT(Detection-Free Tracking):在第一帧给定初始框,无需检测器进行追踪</td>
<td>只进行检测,没有分割</td>
</tr>
</tbody></table>
<hr>
<h2 id="要解决什么问题"><a href="#要解决什么问题" class="headerlink" title="要解决什么问题?"></a>要解决什么问题?</h2><ol>
<li>实例分割仅局限于图像领域,未扩展到视频领域。</li>
<li>没有合适的针对视频实例分割的数据集(无benchmark)。</li>
<li>相较于传统图像实例分割,视频实例分割要考虑在不同帧之间实例的追踪。</li>
</ol>
<hr>
<h2 id="如何解决该问题?"><a href="#如何解决该问题?" class="headerlink" title="如何解决该问题?"></a>如何解决该问题?</h2><ol>
<li>提出了新的计算机视觉任务:视频实例分割VIS</li>
</ol>
<blockquote>
<p>目标是同时检测、分割和追踪视频中的实例。</p>
</blockquote>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled.png"></p>
<ol>
<li>提出了大规模基准数据集YouTube-VIS,包含2883个高分辨率YouTube视频,40个种类,131k个高质量实例掩码标注。</li>
<li>提出了新的算法MaskTrackRCNN,在Mask R-CNN的基础上添加追踪分支。预测的实例信息被保存在外部存储器中,之后会和其它帧的实例进行匹配。</li>
</ol>
<hr>
<h2 id="具体实现细节?"><a href="#具体实现细节?" class="headerlink" title="具体实现细节?"></a>具体实现细节?</h2><h3 id="1-首先定义任务VIS:"><a href="#1-首先定义任务VIS:" class="headerlink" title="(1) 首先定义任务VIS:"></a>(1) 首先定义任务VIS:</h3><ol>
<li>给定预定义类别集合C, K是种类数量</li>
</ol>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%201.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%201.png"></p>
<ol>
<li><p>给定视频序列T帧,假设该视频序列有N个属于C的目标</p>
</li>
<li><p>对于每个物体i,ci表示其类别</p>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%202.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%202.png"></p>
</li>
<li><p>对于每个目标i,下式表示其在视频序列中的分割掩码,p属于[1,T],q属于[p,T],分别表示开始和结束时间</p>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%203.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%203.png"></p>
</li>
<li><p>对于每一个视频序列,模型生成H个实例的预测信息,对于其中一个实例j,预测信息应包括:类别,置信度,掩码序列。(在最后生成预测结果时,p取第一帧,q取最后一一帧,如果某些帧该实例未存在,则该帧mask为NULL</p>
</li>
</ol>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%204.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%204.png"></p>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%205.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%205.png"></p>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%206.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%206.png"></p>
<h3 id="2-评测标准"><a href="#2-评测标准" class="headerlink" title="(2) 评测标准"></a>(2) 评测标准</h3><p>使用AP和AR进行评测,但IOU的计算稍有不同,因为是针对一个视频序列进行评测,因此把某个实例GT的mask序列和预测的mask序列都扩展到从视频帧开始到结束的时间段,如果某一帧不存在,就padding为0。</p>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%207.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%207.png"></p>
<h3 id="3-YouTube-VIS"><a href="#3-YouTube-VIS" class="headerlink" title="(3) YouTube-VIS"></a>(3) YouTube-VIS</h3><ul>
<li>每个视频长度为3-6s,速度为30fps,每1s取5帧图像,也就是每个视频大约15-30张图像。</li>
<li>共4453个视频。</li>
<li>总共有40个种类,131k个实例标注。</li>
<li>在YouTube-VOS数据集的基础上采样并重新标注得到的。</li>
</ul>
<h3 id="4-MaskTrack-R-CNN算法"><a href="#4-MaskTrack-R-CNN算法" class="headerlink" title="(4) MaskTrack R-CNN算法"></a>(4) MaskTrack R-CNN算法</h3><p>x<img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%208.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%208.png"></p>
<ul>
<li>在Mask R-CNN原有的分类、回归和掩码生成分支上,增加了第四个分支,利用外部的存储空间(externel memory)来追踪不同帧之间的实例。追踪分支主要是利用物体的外观相似性(appearance similarity)来进行实例之间的匹配。同时,本文也提出了一种简单却有效的方法来结合其它线索,比如语义一致性(semantic consistency)和空间关联(spatial correlation)。</li>
<li>推理阶段使用的在线推理的方法(只知道当前帧前面帧的信息,而不知道后面帧的信息)。</li>
</ul>
<hr>
<ol>
<li>追踪分支</li>
</ol>
<blockquote>
<p>目的:给每个候选框分配实例标签(给实例标一个序号,以区分不同实例)。</p>
</blockquote>
<ul>
<li><p>假设已经有了N个确认的实例,给新的候选框分配标签就只有两种可能</p>
<p>(a) 新的候选框就是N个实例之一</p>
<p>(b) 新的候选框是未曾出现的实例</p>
</li>
<li><p>从而将问题转化为:多标签分类问题(N+1类,N代表已经确认的实例,1代表未曾出现的实例,用数字0表示),将标签分配给候选框的概率可以定义为下式(衡量两个实例的相似度)</p>
</li>
</ul>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%209.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%209.png"></p>
<ul>
<li>其中,fi代表新的候选框的特征,fj代表已经确认的N个候选框中第j个候选框的特征,它们都是一维向量(追踪分支用两层全连接层实现,输入是候选框的RoIAlign特征,输出是新的特征)。既然之前帧已经识别出的实例的特征已经经过计算,可以利用外部空间来存储其新特征提高效率。</li>
<li>当一个新的候选框被分配实例标签时,我们会动态更新外部空间(如果候选框已经属于某个存在的实例,我们会将memory中的该实例的特征更新,换成最新的)。</li>
<li>如果候选物体被分配为标签0,那么将他的特征嵌入memory中,并将所有已经识别出的实例标签增加1。loss</li>
</ul>
<hr>
<ul>
<li>该分支的损失使用交叉熵损失,yi是GT</li>
</ul>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%2010.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%2010.png"></p>
<ul>
<li>总体网络的损失:</li>
</ul>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%2011.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%2011.png"></p>
<hr>
<ol>
<li>训练过程(训练追踪分支)</li>
</ol>
<blockquote>
<p>随机从一个视频中选取两帧,第一帧为参考帧,第二帧为查询帧</p>
</blockquote>
<ul>
<li>reference frame 参考帧:不通过网络得到候选框,而是直接从GT的实例区域中提取特征,将他们保存到外部空间</li>
<li>query frame 查询帧:在第一阶段产生候选框,将IOU大于70%的候选框作为正样本,送入追踪分支,进行实例匹配。</li>
</ul>
<hr>
<ol>
<li>结合其它线索</li>
</ol>
<blockquote>
<p>仅用外观特征相似度来计算实例匹配是否不太够,那么能否结合其它视觉线索,比如语义一致性、空间关联和检测置信度共同作为匹配标准?</p>
</blockquote>
<ul>
<li>具体来说对于一个候选框i,bi、ci、si分别代表预测的bbox,class和score。bn、cn代表匹配的在外部空间中的实例的bbox和class,那么就可以定义分配分数(pi在上面有定义)</li>
</ul>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%2012.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%2012.png"></p>
<ul>
<li>其中关于class的函数是一个脉冲函数,当类别相同时返回1,不同时返回0。</li>
</ul>
<p>注意,分配分数只适用于在测试阶段,训练时仍只使用外观相似度</p>
<hr>
<ol>
<li>推理过程</li>
</ol>
<ul>
<li>给出一个新的视频,外部空间设为空,已经识别出的实例个数设置为0。我们的方法会以online的方式顺序的处理每一帧。每一帧会产生很多预测的实例,这些实例先经过NMS(阈值为0.5),然后经过过滤的预测框会和之前帧产生的实例进行匹配。</li>
</ul>
<p>注意:为了防止歧义,匹配的实例是之前所有帧产生使的实例,而不仅仅是上一帧。</p>
<ul>
<li>在预测第一帧时,其所有实例都会被送入memory中存储,对于我们的方法,一个标签可能会被分配给多个预测的实例,这个常识违背(一个视频中的每一个实例只能有一个),我们只保留得分最高的实例。</li>
<li>当处理完所有帧,会产生一系列实例的预测,每个预测包含要给独一无二的实例标签、一个视频序列的二值分割图像和列别标签和检测置信度。我们将检测置信度取平均值作为整个视频序列中该实例的置信度,使用投票法获得该实例的类别。</li>
</ul>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%2013.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%2013.png"></p>
<hr>
<h1 id="效果如何?"><a href="#效果如何?" class="headerlink" title="效果如何?"></a>效果如何?</h1><h3 id="1-数据集划分"><a href="#1-数据集划分" class="headerlink" title="1. 数据集划分"></a>1. 数据集划分</h3><table>
<thead>
<tr>
<th>集合</th>
<th>数量</th>
<th>性质</th>
</tr>
</thead>
<tbody><tr>
<td>训练集</td>
<td>2238</td>
<td></td>
</tr>
<tr>
<td>验证集</td>
<td>302</td>
<td>保证每个种类至少4个实例</td>
</tr>
<tr>
<td>测试集</td>
<td>343</td>
<td>保证每个种类至少4个实例</td>
</tr>
</tbody></table>
<h3 id="2-参数:"><a href="#2-参数:" class="headerlink" title="2. 参数:"></a>2. 参数:</h3><ul>
<li>backbone:resnet-50-fpn(在coco上预训练)</li>
<li>跟踪分支:两个全连接层(第一层将7<em>7</em>256的特征图→1024维向量,第二次将1024→1024)</li>
<li>epochs:12</li>
<li>lr:0.05 [8, 11](缩小10倍)</li>
<li>超参数:a:1,b:2,y:10</li>
<li>输入图像大小:640*360</li>
</ul>
<h3 id="3-结果"><a href="#3-结果" class="headerlink" title="3. 结果"></a>3. 结果</h3><p>在单块nvidia 1080ti上推理速度可以达到20FPS</p>
<p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%2014.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%2014.png"></p>
<h3 id="4-badcase分析:"><a href="#4-badcase分析:" class="headerlink" title="4. badcase分析:"></a>4. badcase分析:</h3><p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%2015.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%2015.png"></p>
<p>e:dear鹿在不同的姿势下其外形变换很大,算法将其视为两类不同的物体。</p>
<p>d:在水箱中有很多🐟移动,互相形成遮挡,算法在第二帧和第三帧将两个互相遮挡的🐟视为了同一条🐟,影响了后续的识别。</p>
<h3 id="5-消融实验:"><a href="#5-消融实验:" class="headerlink" title="5. 消融实验:"></a>5. 消融实验:</h3><p><img src="https://gitee.com/gumengyihan/photo/raw/master//pic/Untitled%2016.png" alt="MaskTrack-RCNN%EF%BC%9AVideo%20Instance%20Segmentation%20665c684dd5594646a53aaca569ed0d34/Untitled%2016.png"></p>
<ul>
<li>对不同视觉线索对该方法的影响做了消融实验,分别有:检测置信度,bbox的iou,类别一致性</li>
</ul>
<p>实验发现,iou和类别一致性影响最大(❓但是,明明看实验结果iou影响不大)</p>
<ul>
<li>解释:iou与空间联系有关,类别一致性提供了强有了的约束(一个视频序列中,一个实例的label应该不发生改变)。</li>
<li>但是,过度依赖这些视觉线索会造成错误的预测,因此我们旨在测试阶段将其作为软限制,而不在训练阶段使用他们。</li>
</ul>
<hr>
<h2 id="总结"><a href="#总结" class="headerlink" title="总结"></a>总结</h2><p>本文的贡献点主要在于提出了新的视觉任务。</p>
</div>
<footer class="post-footer">
<div class="post-eof"></div>
</footer>
</article>
</div>
<script>
window.addEventListener('tabs:register', () => {
let { activeClass } = CONFIG.comments;
if (CONFIG.comments.storage) {
activeClass = localStorage.getItem('comments_active') || activeClass;
}
if (activeClass) {
let activeTab = document.querySelector(`a[href="#comment-${activeClass}"]`);
if (activeTab) {
activeTab.click();
}
}
});
if (CONFIG.comments.storage) {
window.addEventListener('tabs:click', event => {
if (!event.target.matches('.tabs-comment .tab-content .tab-pane')) return;
let commentClass = event.target.classList[1];
localStorage.setItem('comments_active', commentClass);
});
}
</script>
</div>
<div class="toggle sidebar-toggle">
<span class="toggle-line toggle-line-first"></span>
<span class="toggle-line toggle-line-middle"></span>
<span class="toggle-line toggle-line-last"></span>
</div>
<aside class="sidebar">
<div class="sidebar-inner">
<ul class="sidebar-nav motion-element">
<li class="sidebar-nav-toc">
المحتويات
</li>
<li class="sidebar-nav-overview">
عام
</li>
</ul>
<!--noindex-->
<div class="post-toc-wrap sidebar-panel">
</div>
<!--/noindex-->
<div class="site-overview-wrap sidebar-panel">
<div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
<p class="site-author-name" itemprop="name">ZhaiYukun</p>
<div class="site-description" itemprop="description"></div>
</div>
<div class="site-state-wrap motion-element">
<nav class="site-state">
<div class="site-state-item site-state-posts">
<a href="/archives/">
<span class="site-state-item-count">1</span>
<span class="site-state-item-name">المقالات</span>
</a>
</div>
<div class="site-state-item site-state-tags">
<span class="site-state-item-count">2</span>
<span class="site-state-item-name">الوسوم</span>
</div>
</nav>
</div>
</div>
</div>
</aside>
<div id="sidebar-dimmer"></div>
</div>
</main>
<footer class="footer">
<div class="footer-inner">
<div class="copyright">
©
<span itemprop="copyrightYear">2021</span>
<span class="with-love">
<i class="fa fa-heart"></i>
</span>
<span class="author" itemprop="copyrightHolder">ZhaiYukun</span>
</div>
<div class="powered-by">تطبيق الموقع <a href="https://hexo.io/" class="theme-link" rel="noopener" target="_blank">Hexo</a> & <a href="https://muse.theme-next.org/" class="theme-link" rel="noopener" target="_blank">NexT.Muse</a>
</div>
</div>
</footer>
</div>
<script src="/lib/anime.min.js"></script>
<script src="/lib/velocity/velocity.min.js"></script>
<script src="/lib/velocity/velocity.ui.min.js"></script>
<script src="/js/utils.js"></script>
<script src="/js/motion.js"></script>
<script src="/js/schemes/muse.js"></script>
<script src="/js/next-boot.js"></script>
<script src="/live2dw/lib/L2Dwidget.min.js?094cbace49a39548bed64abff5988b05"></script><script>L2Dwidget.init({"model":{"jsonPath":"/live2dw/assets/hibiki.model.json"},"display":{"position":"right","width":150,"height":400},"mobile":{"show":false},"log":false,"pluginJsPath":"lib/","pluginModelPath":"assets/","pluginRootPath":"live2dw/","tagMode":false});</script></body>
</html>