-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
1214 lines (839 loc) · 201 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Enjoy Short Life</title>
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta property="og:type" content="website">
<meta property="og:title" content="Enjoy Short Life">
<meta property="og:url" content="https://sunyasheng.github.io/index.html">
<meta property="og:site_name" content="Enjoy Short Life">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Enjoy Short Life">
<link rel="alternate" href="/atom.xml" title="Enjoy Short Life" type="application/atom+xml">
<link rel="icon" href="/favicon.png">
<link href="//fonts.googleapis.com/css?family=Source+Code+Pro" rel="stylesheet" type="text/css">
<link rel="stylesheet" href="/css/style.css">
</head>
<body>
<div id="container">
<div id="wrap">
<header id="header">
<div id="banner"></div>
<div id="header-outer" class="outer">
<div id="header-title" class="inner">
<h1 id="logo-wrap">
<a href="/" id="logo">Enjoy Short Life</a>
</h1>
</div>
<div id="header-inner" class="inner">
<nav id="main-nav">
<a id="main-nav-toggle" class="nav-icon"></a>
<a class="main-nav-link" href="/">Home</a>
<a class="main-nav-link" href="/archives">Archives</a>
</nav>
<nav id="sub-nav">
<a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="RSS Feed"></a>
<a id="nav-search-btn" class="nav-icon" title="Search"></a>
</nav>
<div id="search-form-wrap">
<form action="//google.com/search" method="get" accept-charset="UTF-8" class="search-form"><input type="search" name="q" class="search-form-input" placeholder="Search"><button type="submit" class="search-form-submit"></button><input type="hidden" name="sitesearch" value="https://sunyasheng.github.io"></form>
</div>
</div>
</div>
</header>
<div class="outer">
<section id="main">
<article id="post-Byesian-Summary" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2018/04/21/Byesian-Summary/" class="article-date">
<time datetime="2018-04-21T07:48:11.000Z" itemprop="datePublished">2018-04-21</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2018/04/21/Byesian-Summary/">Summary of Byesian Theory</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>$$X \sim Beta(\alpha,\beta)$$<br>$$p(x;\alpha,\beta)=\frac{x^{\alpha-1}(1-x)^{\beta-1}}{B(\alpha,\beta)}$$<br>Note that if \(\alpha=1 \beta=1\), Beta distribution boils down to uniform distribution. Formally,<br>$$Beta(1,1) = U(0,1)$$</p>
<p>$$\theta \sim InvGamma(\alpha, \beta)$$ $$p(\theta) \propto \theta^{-\alpha-1}e^{-\frac{\beta}{\theta}} $$</p>
<p>$$\theta \sim Gamma(\alpha, \beta)$$ $$p(\theta) \propto \theta^{\alpha-1}e^{-\beta \theta} $$</p>
<h3 id="Conjugate-Prior"><a href="#Conjugate-Prior" class="headerlink" title="Conjugate Prior"></a>Conjugate Prior</h3><table>
<thead>
<tr>
<th style="text-align:center">prior distribution</th>
<th style="text-align:center">likelihood</th>
<th style="text-align:center">posterior distribution</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:center">\(Beta(\alpha,\beta)\)</td>
<td style="text-align:center">\(B(n,p)\)</td>
<td style="text-align:center">\(Beta(\alpha+n,\beta+n-p)\)</td>
</tr>
<tr>
<td style="text-align:center">\(Normal(m_0, \sigma_0)\)</td>
<td style="text-align:center">\(Normal(\theta, \sigma)(\theta unknown)\)</td>
<td style="text-align:center">Normal Distribution(Weigted)</td>
</tr>
<tr>
<td style="text-align:center">\(InvGamma(\alpha, \beta)\)</td>
<td style="text-align:center">\(Normal(\theta, \sigma)(\sigma unknown)\)</td>
<td style="text-align:center">InvGamma( \(\alpha + 0.5, \beta + \frac{(y-m)^2}{2}\) )</td>
</tr>
<tr>
<td style="text-align:center">\(Gamma(\alpha, \beta)\)</td>
<td style="text-align:center">\(Possion(\theta)\)</td>
<td style="text-align:center">\(Gamma(\alpha+y, \beta + 1)\)</td>
</tr>
<tr>
<td style="text-align:center">\(Gamma(\alpha, \beta)\)</td>
<td style="text-align:center">\(Exponential(\theta)\)</td>
<td style="text-align:center">\(Gamma(\alpha+1, \beta + y)\)</td>
</tr>
</tbody>
</table>
<h3 id="Exponential-Family"><a href="#Exponential-Family" class="headerlink" title="Exponential Family"></a>Exponential Family</h3><p>Exponential family is a family of distribution in the form of<br>$$p(y|\theta)=f(y)g(\theta)e^{\phi (\theta) u(y)}$$<br>\(\phi (\theta)\) is the natural parameter of the family.<br>\(u(y)\) is sufficient statistics of \(\theta\), the likelihood function depends on \(y\) only through \(u(y)\).</p>
<p>The conjugate prior of exponential family is $$\theta \sim g(\theta)^a e^{\phi (\theta) b}$$.<br>The \(a\) and \(b\) are parameters of this distribution.<br>In the posterior distribution, the parameters become \(a+1\) and \(b+u(y)\) respectively.</p>
<h3 id="Uninformative-Prior"><a href="#Uninformative-Prior" class="headerlink" title="Uninformative Prior"></a>Uninformative Prior</h3><h4 id="Improper-prior"><a href="#Improper-prior" class="headerlink" title="Improper prior"></a>Improper prior</h4><p>\(\int p(\theta) = \infty \), \(p(\theta) \propto 1\), \(\theta \in (- \infty, + \infty)\)<br>It may cause the posterior distribution non-integrable</p>
<h4 id="Jeffery’s-invariance-prior"><a href="#Jeffery’s-invariance-prior" class="headerlink" title="Jeffery’s invariance prior"></a>Jeffery’s invariance prior</h4><p>The uninformative prior is supposed to independent of parameterization. It implies that prior is variable-invariant.<br>Define Fisher Information<br>$$J(\theta) = E[(\frac{dlog(p(y|\theta))}{d\theta})^2] = - E[\frac{d^2log(p(y|\theta))}{d\theta^2}]$$<br>Prior is \(p(\theta) = \theta^{-0.5}(1-\theta)^{-0.5}\) when \(p(y|\theta) \propto \theta^y (1-\theta)^{n-y}\) .<br>Prior is $$p(\theta) \propto \frac{1}{\theta} $$ when $$p(y|\theta) \propto \theta^{-0.5}exp(-\frac{(y-m)^2}{2\theta}) $$.<br>Prior is $$p(\theta) \propto 1 $$ when $$p(y|\theta) \propto exp(-\frac{(y-\theta)^2}{2\tau}) $$.</p>
<h3 id="Multi-Parameter-Model"><a href="#Multi-Parameter-Model" class="headerlink" title="Multi Parameter Model"></a>Multi Parameter Model</h3><table>
<thead>
<tr>
<th style="text-align:left">prior</th>
<th style="text-align:center">model</th>
<th style="text-align:right">posterior</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:left">\(p(\theta_1, \theta_2)\)</td>
<td style="text-align:center">\(p(y \mid \theta_1, \theta_2)\)</td>
<td style="text-align:right">\(p(\theta_1, \theta_2 \mid y)\)</td>
</tr>
</tbody>
</table>
<p>If we are only interested in \(\theta_1\), we call \(\theta_2\) the nuisance parameter. The existance of nuisance parameter adds uncertainty to this model. Formally,<br>$$p(\theta_1 \mid y) = \int p(\theta_1, \theta_2 \mid y) d\theta_2 = \int p(\theta_1 \mid \theta_2, y)p(\theta_2 \mid y) d\theta_2$$<br>Final \(p(\theta_1 \mid y)\) is mixture of lots of \(p(\theta_1 \mid \theta_2, y)\) parametered by \(\theta_2\).</p>
<h3 id="Uninformative-Prior-in-Multi-Parameter-Model"><a href="#Uninformative-Prior-in-Multi-Parameter-Model" class="headerlink" title="Uninformative Prior in Multi Parameter Model"></a>Uninformative Prior in Multi Parameter Model</h3><h4 id="Jeffery’s-Prior"><a href="#Jeffery’s-Prior" class="headerlink" title="Jeffery’s Prior"></a>Jeffery’s Prior</h4><p>Now Suppose that our model is Normal Distribution. \(p(y \mid \theta) \sim N(\mu, \tau^2)\)<br>Based on the Jeffery’s Prior, the prior indepence between multi-varibale can be proved.<br>$$p(\mu, \tau^2) = p(\mu)p(\tau^2) = 1 * \frac{1}{\tau^2}$$<br>The posterior become $$p(\mu, \tau^2 \mid y) \propto \tau^{-n-2} exp[-\frac{S^2+n(\hat y - \mu)^2}{2\tau^2}]$$ $$S^2 = (y_i - \hat y)^2$$<br>we got</p>
<table>
<thead>
<tr>
<th style="text-align:left">\(p(\mu \mid \tau^2, y)\)</th>
<th style="text-align:center">\(p(\tau^2 \mid y)\)</th>
<th style="text-align:right">\(p(\mu \mid y)\)</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:left">\(N(\hat y, \frac{n}{\tau^2})\)</td>
<td style="text-align:center">\(invKappa(n-1, S^2)\)</td>
<td style="text-align:right">\(t_\{n-1\} (\hat y, \frac{S^2}{n})\)</td>
</tr>
</tbody>
</table>
<h3 id="Theoretical-Assurance-of-Baysian-Model-Corretness"><a href="#Theoretical-Assurance-of-Baysian-Model-Corretness" class="headerlink" title="Theoretical Assurance of Baysian Model Corretness"></a>Theoretical Assurance of Baysian Model Corretness</h3><p>With infinite nuumber of data, the probability of this distribution \(\theta\) will collapse to the true \(\theta^\star\).</p>
<h4 id="Requirements"><a href="#Requirements" class="headerlink" title="Requirements"></a>Requirements</h4><ol>
<li>\((y_1, y_2, …, y_n) \sim p(y \mid \theta^\star)\) iid</li>
<li>There exist \(\theta_0\) such that \(D_{KL}(p(y \mid \theta^\star), p(y \mid \theta))\) is minimized. \(p(y \mid \theta)\) is our proposed model.</li>
</ol>
<h3 id="Proof"><a href="#Proof" class="headerlink" title="Proof"></a>Proof</h3><p>Convergence of the posterior for discrete parameter space, if our parameter space H is finite and \(p(\theta = \theta_0) > 0\), \(p(\theta = \theta_0 \mid y) \to 1, n \to \infty\)<br>Proof:<br>$$log(\frac{p(\theta \mid y)}{p(\theta_0 \mid y)}) = log(\frac{p(\theta)p(y \mid \theta)}{p(\theta_0)p(y \mid \theta_0)})$$<br>$$ = log \frac{p(\theta)}{p(\theta_0)} + log \frac{p(y \mid \theta)}{p(y \mid \theta_0)} $$ $$ = log \frac{p(\theta)}{\theta_0} + \sum_i^n log \frac{p(y_i \mid \theta)}{p(y_i \mid \theta_0)}$$</p>
<p>$$ E(log \frac{p(y_i \mid \theta)}{p(y_i \mid \theta_0)}) = E_f(log(\frac{f}{log(p(y_i \mid \theta_0))})) - E_f(log(\frac{f}{log(p(y_i \mid \theta))})) < 0$$<br>\(f = p(y \mid \theta^\star)\) is the true distribution.<br>Therefore, right side of this equation converges toward inifinity when n is infinite. Plus, \(p(\theta_0 \mid y)\) is not zero. We obtain \(p(\theta \mid y) \to 0, \forall \theta \ne \theta_0 \Rightarrow p(\theta_0 \mid y) = 1 \)</p>
<p>Specifically, \(p(\theta \mid y) \to N(\theta_0, \frac{1}{n J(\theta_0)})\) under some regularity conditinos.</p>
<h3 id="Decision-of-hyerparameter-Prior-Distribution"><a href="#Decision-of-hyerparameter-Prior-Distribution" class="headerlink" title="Decision of hyerparameter(Prior Distribution)"></a>Decision of hyerparameter(Prior Distribution)</h3><h4 id="Hierachical-Bayes-Full-Bayes"><a href="#Hierachical-Bayes-Full-Bayes" class="headerlink" title="Hierachical Bayes(Full Bayes)"></a>Hierachical Bayes(Full Bayes)</h4><p>Make the prior hyerparameter also probabilistic (recursive).<br>$$p(\theta, \alpha \mid y) \propto p(\theta, \alpha) p(y \mid \theta, \alpha) \propto p(y \mid \theta) p(\theta \mid \alpha) p(\alpha)$$ $$p(\theta \mid y) = \int p(\theta, \alpha \mid y) d \alpha$$</p>
<h4 id="Empirical-Bayes"><a href="#Empirical-Bayes" class="headerlink" title="Empirical Bayes"></a>Empirical Bayes</h4><p>$$\hat \alpha = argmax p(y \mid \alpha) = \int p(y \mid \theta) p(\theta \mid \alpha) d\theta$$ $$p(\theta \mid y) = \int p(\theta \mid \alpha, y) p(\alpha \mid y) d\alpha = \sum_{i=1}^n w_i p(\theta \mid \alpha_i, y) $$<br>Empirical distribution chooses largest probabilistic distribution \( \hat \alpha = \alpha_i \), but full bayes mixes these distributions.</p>
<h5 id="stein’s-example"><a href="#stein’s-example" class="headerlink" title="stein’s example"></a>stein’s example</h5><p>$$p(y\mid \theta) \sim N(0,\sigma^2 I_n)$$ $$p(\theta) \sim N(0,\tau^2 I_n)$$ $$ p(\theta \mid y) \sim N((1 - \frac{\sigma^2}{\sigma^2 + \tau^2})y, \frac{\sigma^2 \tau^2}{\sigma^2+\tau^2} I_n) $$<br>Denote point estimation \(\hat A = \frac{1}{\sigma^2 + \tau^2}\). It’s unbiased estimation is \(\hat A = \frac{n-2}{y^Ty}\). \(\hat \theta_{EB} = (1-\sigma^2 \hat A)y\). This empirical estimation is called James-Stein estimation, denoted by \(\theta_{JS}\).<br>In this case, the components of \(\theta\) is not independent because the \(\hat A\) depends on all the data and affects the \(\hat \theta_{EB}\)<br>Back to statistics, \(\theta\) is supposed to be estimated to \(y\). Formally,<br>$$\hat \theta = y$$<br>Here comes the question. What kind of estimation is supposed to be an excellent estimation? Smaller \(R(\theta, \hat \theta)\) is, better this estimation is. (Usually, the estimation can be divided into two parts including variance and bias. For unbiased estimation, bias equals zero). Here we specify \(\sigma \) to 1.<br>$$R(\theta, \hat \theta_{JS}) = E[(\theta - \hat \theta_{JS})^2] $$<br>$$= E[\theta - y + y \frac{n-2}{\parallel y \parallel ^2}] = E[(\theta - y)^2] + 2(n-2) E[\frac{(\theta -y)^Ty}{\parallel y \parallel ^2}] + (n-2)^2 E[\frac{1}{\parallel y \parallel^2}]$$<br>Introduce stein’s Lemma:<br>$$E[(\theta_i - y_i)h(y)] = -E[\frac{\partial h(y_i)}{\partial y_i} ]$$<br>Let<br>$$h(y)= \frac{y_i}{\parallel y \parallel_2^2}$$<br>We obtain<br>$$E[\frac{(\theta - y)^T y}{\parallel y \parallel ^2}] = \sum_i^n E[\frac{(\theta_i -y_i)y_i}{\parallel y \parallel ^2}] $$ $$= -\sum_i^n E[\frac{1}{\parallel y \parallel^2} - \frac{2y_i^2}{\parallel y \parallel^4}] = - (n-2) E[\frac{1}{\parallel y \parallel^2}]$$<br>To sum up,<br>$$ R(\theta, \hat \theta_{JS}) = E[(\theta - y)^2] - (n-2)^2 E[\frac{1}{\parallel y \parallel^2}] < E[(\theta - y)^2]$$<br>It implies that the JS estimater is a better estimation. But it seems a little wired that we could obtain a better result by just adding more components to \(y\) (boost \(n\)). That’s why this theory is broadly criticized when it is first published. </p>
<h3 id="MC-Markov-Chain"><a href="#MC-Markov-Chain" class="headerlink" title="MC(Markov Chain)"></a>MC(Markov Chain)</h3><p>A Stocastical Process is called a Markov Process when it satisfies those two conditions.</p>
<ol>
<li>\(p(x^{i} \mid x^{i-1}, x^{i-2}, … , x^{1}) = T(x_{i} \mid x_{i-1})\)</li>
<li>\(T \triangleq T(x^{i}\mid T(x^{i-1}))\) invariant for all i.</li>
</ol>
<p>A Markov Process will converges to an invariant distribution when it satisfies two conditions.</p>
<ol>
<li>Irreducibility</li>
<li>Aperiodicity</li>
</ol>
<p>A sufficient but not necessary condition for a chain to converge to an invariant distribution is the detailed balance condition.<br>$$P(x^i)T(x^{i-1}\mid x^{i}) = P(x^{i-1})T(x^i \mid x^{i-1})$$<br>Metropolis-Hasting algorithm is just sampling and rejecting to force the sample to walk towards high probability place. This process satisfies the detailed balance condition such that it finally converges to the original probability distribution.</p>
<h4 id="Gibbs-Sampling"><a href="#Gibbs-Sampling" class="headerlink" title="Gibbs Sampling"></a>Gibbs Sampling</h4><p>The proposed distribution is defined as<br>$$ q(x^\star \mid x^i) = p(x_j^{\star} \mid x_{-j}^{i}) 1[x_{-j}^{\star} = x_{-j}^i]$$</p>
<h3 id="MCMC-Markov-Chain-Monte-Carlo"><a href="#MCMC-Markov-Chain-Monte-Carlo" class="headerlink" title="MCMC(Markov Chain Monte-Carlo)"></a>MCMC(Markov Chain Monte-Carlo)</h3><h4 id="Adaptive-MCMC"><a href="#Adaptive-MCMC" class="headerlink" title="Adaptive MCMC"></a>Adaptive MCMC</h4><h5 id="Independence-Sampler"><a href="#Independence-Sampler" class="headerlink" title="Independence Sampler"></a>Independence Sampler</h5><p>Intuitively, we accept our proposed sample with a higher probability when the proposed sampler is more similar with the original probability distribution. If the proposed probability distribution is Gaussian, it requires the mean and covariance is sufficient to describe the original probability distribution. Therefore, algorithm can be stated as first sample the data following the traditional routine to estimate mean and coveriance and than make use of this estimated parameter to do the independence sampler. </p>
<h5 id="Adaptive-Metropolis-Algorithm"><a href="#Adaptive-Metropolis-Algorithm" class="headerlink" title="Adaptive Metropolis Algorithm"></a>Adaptive Metropolis Algorithm</h5><p>Recall that in Metropolis Algorithm, the proposed sample is random walk surrounding \(x_n\). Formally, \(x^\star = x_n + \epsilon\), \(\epsilon \sim N(0, \sigma^2 I_n)\). When the dimension of the variable is dependent and not isotropic, the random walk behave poorly. A better coveriance \(C_t\) is required to describe the original distribution. So the proposed distribution is defined as<br>$$ p(x^\star \mid x_n) = N(x_n, \sigma^2 C_t)$$ </p>
<p>The \(C_t\) is updated at every time t following<br>$$Cov(X_0, X_1, … , X_t) = \frac{\sum_{i=1}^{t-1}(x_ix_i^{t} - ((t-1)+1)\bar x_{t-1}\bar x_{t-1}^T)}{t}$$.<br>$$\bar x_{t-1} = \frac{\sum_{i=0}^{t-1} x_i}{t}$$. </p>
<p>Or we could compute \(C_t\) recursively, $$C_{t+1} = \frac{(t-1)C_t}{t} + \frac{t\bar x_{t-1} \bar x_{t-1}^T - (t+1)\bar x_t \bar x_t ^T + x_t x_t^T}{t}$$ </p>
<p>Although the Markov Property can not be guranteed here, it has been proven that this algorithm will finally converges to the original probability without using the detailed balance condition.</p>
<h3 id="Metrics-to-Evaluate-the-MCMC-Algorithm"><a href="#Metrics-to-Evaluate-the-MCMC-Algorithm" class="headerlink" title="Metrics to Evaluate the MCMC Algorithm"></a>Metrics to Evaluate the MCMC Algorithm</h3><p>The data generated by MCMC algorithm is related violating independent sampling rule we expected. But how many indepedent samples are there or what is the effective smaple size in all the samples. The ESS(effective sample size) is empirically computed following<br>$$ESS = \frac{n}{\sum_{\tau=1}^n R(\tau)}$$</p>
<p>The self-correlation is used to evaluate the MCMC.<br>$$ R(s,t) = \frac{E(X_t - \mu_t)(X_s - \mu_s)}{\sigma_s \sigma_t}$$.<br>In Markov Chain, \(\mu = const, \sigma = const\). So the \(R(s,t) = R(t-s) = R(\tau)\). In matlab, \(autocorr(x, size)\) is defined to compute \(R(\tau) \sim \tau\) and \(mhsample()\) to do Metropolis Sampling. Usually, \(R(\tau)\) vanishes slowly when the accept probability is too low or too high. Empirically, the \(R(\tau)\) vanishes quickly when the accept probability belongs to 0.3 to 0.7 under an appropriate step size.</p>
<h3 id="Decision-Theory"><a href="#Decision-Theory" class="headerlink" title="Decision Theory"></a>Decision Theory</h3><p>Parameter Estimatation is essentially a decision theroy problem. Find a \(\theta^{\star} \) based on<br>$$ p(\theta \mid y) $$ through Loss Expectation Minimization or other loss information.</p>
<p>In a decision problem, many information are unknown called state of nature( unknown parameter ): \( \theta \in \Theta \). History data \( x \in X \) is provided to give us information. The possible action is denoted by \( a \in A \). Specifically, the action \( a \) is an estimation of \( \theta \) in parameter estimation.</p>
<p>There key information:</p>
<ol>
<li>Loss function: \( L(\theta, a) \) represents the loss caused by action \(a\) when parameter is \( \theta \).</li>
<li>Prior distribution of \( \theta \): \( \pi(\theta) \).</li>
<li>Likelihood function: \( f(X \mid \theta) \)</li>
</ol>
<p>Example 1: </p>
<ol>
<li>\( \theta \in [0, 1] \): proportion of market.</li>
<li>\( a \in [0, 1]\): estimation.</li>
<li>\( x \): data from market investigation.</li>
</ol>
<p>Loss:<br>$$<br>L(\theta, a) =<br> \begin{cases}<br> \theta - a & \theta \geq a \\<br> 2(a - \theta) & a \geq \theta<br> \end{cases}<br>$$</p>
<p>Model:<br>$$f(x \mid \theta) = C_n^x \theta ^x (1 - \theta)^{(n-x)}$$</p>
<p>Prior:<br>$$ U[0.1, 0.2] $$ </p>
<p>Example 2: </p>
<ol>
<li>\( \theta \in [0,1]\): defective rate.</li>
<li>\((x, n)\): defective samples in all samples.</li>
<li>\(a \in (A, R)\): A is acception and R is Rejection.</li>
</ol>
<p>Loss:<br>$$ L(\theta, A) = 10 \theta $$ $$ L(\theta, R) = const $$</p>
<p>Model:<br>$$f(x \mid \theta) = C_n^x \theta ^x (1 - \theta)^{(n-x)}$$</p>
<p>Prior:<br>$$ U[0, 1] $$ or $$ Beta(0.05, 1) $$</p>
<p>Decision Rule(Frequentist):<br>\( a = \delta(x) \): \( X \to A \)</p>
<p>Risk function:<br>$$ R(\theta, \delta) = E_{\theta}^{x}[ L(\theta , \delta(x))] = \int L(\theta, \delta(x)) f(x \mid \theta) dx $$</p>
<ol>
<li><p>The Bayes risk principle:<br>A decision rule \(\delta_1 \) is prefered to rule \( \delta_2 \) if<br>$$ r(\pi , \delta_1) < r(\pi, \delta_2)$$ where<br>$$ r(\pi, \delta) = E^{\pi}(R(\theta, \delta)) = \int R(\theta, \delta) \pi(\theta) d\theta $$</p>
</li>
<li><p>The minmax principle:<br>A decision rule \( \delta_1 \) is prefered to rule \( \delta_2 \) if<br>$$ sup_\theta R(\theta, \delta_1) < sup_\theta R(\theta, \delta_2)$$ </p>
</li>
</ol>
<p>A decision rule \(p_1\) is said to be R-Better that \(p_2\) if \(R(\theta, p_1) \leq R(\theta, p_2)\) for all \(\theta \in \Theta\), with some strict inequality for some \( \theta \). A decision rule is admissable if there exists no R-Better decision rule. </p>
<p>Expected Loss:<br>$$ E_{p(\theta)}(L(\theta, a)) = \int L(\theta, a) p(\theta) d\theta$$<br>If \( p(\theta) = \pi(\theta) \), we call it prior expected loss. Contrastly, \( p(\theta) = \pi(\theta \mid x) \) is posterior expected loss.</p>
<p>The Bayesian(Posterior) expected loss:<br>\( \int L(\theta, a) \pi(\theta \mid x) d\theta \)<br>Choose an action \(a \in \triangle \) by minimize expected loss. Such an action is called a Bayes action. </p>
<h2 id="Bayesian-Regression"><a href="#Bayesian-Regression" class="headerlink" title="Bayesian Regression"></a>Bayesian Regression</h2><h3 id="Learning"><a href="#Learning" class="headerlink" title="Learning"></a>Learning</h3><p>$$y = f(x) + noise = w^Tx + noise$$ $$noise \sim N(0, \sigma^2)$$<br>There are many common approches to learn the parameter weight \(w\) such as MSE which will not be described here.<br>Instead we will talk about it from the perspective of Bayesian Statistics.<br>Now we obtain the model or the likelihood \( p(y \mid w) = N(w^Tx, \sigma^2)\) and we assume the prior probability to be<br>\(p(w) = N(0, \Sigma_w)\). Hence the posterior probability is \( N(\mu_1, \sigma_1) \).<br>$$p(y \mid w) = N(X^Tw, \sigma^2 I)$$<br>$$p(w \mid y) \sim exp(-(y-X^Tw)^T(y-X^Tw) - 0.5 w^T \Sigma^{-1} w) $$<br>$$\sim exp(-0.5(w-\bar w)^T(\frac{1}{\sigma^2} XX^T + \Sigma_w^{-1})(w-\bar w))$$<br>$$\bar w = \sigma^{-2}(\sigma^2 XX^T + \Sigma_w^{-1})^{-1}Xy $$<br>$$ A = (\frac{1}{\sigma^2}XX^T + \Sigma_w^{-1})$$<br>If the prior is Laplacian, then this is LASSO Regression.<br>If the prior is Gaussian, then this is Ridge Regression.</p>
<h3 id="Inference"><a href="#Inference" class="headerlink" title="Inference"></a>Inference</h3><p>$$p(y^{\star}\mid y) = \int p(y^{\star} \mid w)p(w\mid y)dw$$<br>$$ = \int x^{\star T}w p(w \mid y)dw$$<br>$$ = N(\frac{x^{\star T} A^{-1} X y}{\sigma^2}, x^{\star T}A^{-1}x^{\star})$$</p>
<h3 id="Kernel-Trick"><a href="#Kernel-Trick" class="headerlink" title="Kernel Trick"></a>Kernel Trick</h3><p>To make our model adaptable to broader cases, kernel trick is usually introduced.<br>$$k(x, x) = \phi(x) A^{-1} \phi(x)$$<br>$$p(y^{\star}\mid y) = N(\frac{1}{\sigma^2}\Phi(x^{\star})^T A^{-1}\Phi y, \Phi(x^{\star})^T A^{-1}\Phi(x^{\star}) )$$</p>
<h2 id="Gaussian-Process"><a href="#Gaussian-Process" class="headerlink" title="Gaussian Process"></a>Gaussian Process</h2><p>Assumption: \(f(x)\) is a random process.<br>For any given \(x\), \(f(x)\) is random variable.<br>Assumption(GP): \(f(x)\) is a Gaussian Process: let any \((x_1, x_2, …, x_n)\) be any n points in the input<br>space, for any finite n, \((f(x_1), f(x_2), …, f(x_n))\) follows a multivariate Gaussian Distribution).<br>$$E(f(x)) = m(x) $$<br>$$k(x,x’) = E((f(x)-m(x))(f(x’)-m(x’))) = cov(f(x), f(x’))$$<br>covariance function of GP f(x)</p>
<p>Assume that $$w \sim N(0, I)$$<br>If $$f(x) = w^Tx$$ then $$f(x) \sim GP(0, x^Tx’)$$<br>If $$f(x) = w^T\Phi(x)$$ then $$f(x) \sim GP(0, \Phi(x)^T\Phi(x’))$$</p>
<p>Training Set:<br>$$(X, Y)$$<br>Testing Set:<br>$$(X^{\star}, Y^{\star})$$<br>$$f \sim (0, k(x, x’))$$</p>
<p>The joint distribution of the \(y\) and \(y^{\star}\) can be described as<br>$$\left[<br> \begin{matrix}<br> y\\<br> y^{\star}<br> \end{matrix}<br> \right]<br> \sim<br> N(0,<br> \left[<br> \begin{matrix}<br> k(x,x) & k(x, x^{\star}\\<br> k(x^{\star}, x) & k(x^{\star}, x^{\star})<br> \end{matrix}<br> \right]<br> )$$</p>
<p>$$p(y^{\star} \mid y) = \frac{p(y, y^{\star})}{p(y)}$$<br>where the<br>$$p(y) = N(0, k(y,y))$$<br>We can obtain the \(p(y^{\star}\mid y)\).<br>$$p(y^{\star}\mid y) = N(k(y^{\star}, y)k^{-1}(y,y)y, k(y^{\star}, y^{\star})- k(y,y^{\star})k^{-1}(y,y)k^{T}(y,y^{\star}))$$<br>There are three typical kinds of kernel function. And the parameters in kernel function plays an important role in<br>successfully applying the GP to practical problem. So be careful to pick those parameters.</p>
<h3 id="Acknowlegement"><a href="#Acknowlegement" class="headerlink" title="Acknowlegement"></a>Acknowlegement</h3><p>Thank <a href="http://math.sjtu.edu.cn/faculty/show.php?id=122" target="_blank" rel="noopener">Prof. Li</a> for his amazing lecture. </p>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2018/04/21/Byesian-Summary/" data-id="cju13wdlc00048ys6u4vdz5ho" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-Torch-Tutorial" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2018/03/18/Torch-Tutorial/" class="article-date">
<time datetime="2018-03-18T15:17:21.000Z" itemprop="datePublished">2018-03-18</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2018/03/18/Torch-Tutorial/">Torch_Tutorial</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h2 id="Define-Your-Own-Module"><a href="#Define-Your-Own-Module" class="headerlink" title="Define Your Own Module"></a>Define Your Own Module</h2><figure class="highlight lua"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">-- 使用自己定义的module,参考 http://blog.csdn.net/happyer88/article/details/52884586</span></span><br><span class="line"><span class="comment">-- 自定义dropout的写法,参考torch官方文档 http://torch.ch/docs/developer-docs.html#_</span></span><br><span class="line"><span class="built_in">require</span> <span class="string">'nn'</span></span><br><span class="line"></span><br><span class="line"><span class="keyword">local</span> NewClass, Parent = torch.class(<span class="string">'nn.NewClass'</span>, <span class="string">'nn.Module'</span>)</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">function</span> <span class="title">NewClass:__init</span><span class="params">()</span></span></span><br><span class="line"> Parent.__init(self)</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">function</span> <span class="title">NewClass:updateOutput</span><span class="params">(input)</span></span></span><br><span class="line"> self.<span class="built_in">output</span>:resizeAs(<span class="built_in">input</span>):copy(<span class="built_in">input</span>)</span><br><span class="line"> <span class="keyword">return</span> self.<span class="built_in">output</span></span><br><span class="line"><span class="keyword">end</span></span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">function</span> <span class="title">NewClass:updateGradInput</span><span class="params">(input, gradOutput)</span></span></span><br><span class="line"> self.gradInput:resizeAs(gradOutput):copy(gradOutput)</span><br><span class="line"> <span class="keyword">return</span> self.gradInput</span><br><span class="line"><span class="keyword">end</span></span><br><span class="line"></span><br><span class="line"></span><br><span class="line"><span class="comment">-- parameters</span></span><br><span class="line"><span class="keyword">local</span> precision = <span class="number">1e-5</span></span><br><span class="line"><span class="keyword">local</span> jac = nn.Jacobian</span><br><span class="line"></span><br><span class="line"><span class="comment">-- define inputs and module</span></span><br><span class="line"><span class="keyword">local</span> ini = <span class="built_in">math</span>.<span class="built_in">random</span>(<span class="number">10</span>,<span class="number">20</span>)</span><br><span class="line"><span class="keyword">local</span> percentage = <span class="number">0.5</span></span><br><span class="line"><span class="keyword">local</span> <span class="built_in">input</span> = torch.Tensor(ini):zero()</span><br><span class="line"><span class="keyword">local</span> module = nn.NewClass()</span><br><span class="line"></span><br><span class="line"><span class="comment">-- test backprop, with Jacobian</span></span><br><span class="line"><span class="keyword">local</span> err = jac.testJacobian(module,<span class="built_in">input</span>)</span><br><span class="line"><span class="built_in">print</span>(<span class="string">'==> error: '</span> .. err)</span><br><span class="line"><span class="keyword">if</span> err<precision <span class="keyword">then</span></span><br><span class="line"> <span class="built_in">print</span>(<span class="string">'==> module OK'</span>)</span><br><span class="line"><span class="keyword">else</span></span><br><span class="line"> <span class="built_in">print</span>(<span class="string">'==> error too large, incorrect implementation'</span>)</span><br><span class="line"><span class="keyword">end</span></span><br></pre></td></tr></table></figure>
<h2 id="Simple-Test-Demo"><a href="#Simple-Test-Demo" class="headerlink" title="Simple Test Demo"></a>Simple Test Demo</h2><figure class="highlight lua"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br><span class="line">86</span><br><span class="line">87</span><br><span class="line">88</span><br><span class="line">89</span><br><span class="line">90</span><br><span class="line">91</span><br><span class="line">92</span><br><span class="line">93</span><br><span class="line">94</span><br></pre></td><td class="code"><pre><span class="line"><span class="built_in">require</span> <span class="string">'nn'</span>;</span><br><span class="line"><span class="keyword">local</span> matio = <span class="built_in">require</span> <span class="string">'matio'</span></span><br><span class="line"><span class="comment">-- Model Construction</span></span><br><span class="line">nhidden = <span class="number">20</span></span><br><span class="line">ninput = <span class="number">310</span></span><br><span class="line">noutput = <span class="number">3</span></span><br><span class="line"></span><br><span class="line">net = nn.Sequential()</span><br><span class="line">net:add(nn.Linear(ninput, nhidden))</span><br><span class="line">net:add(nn.Linear(nhidden, noutput))</span><br><span class="line"></span><br><span class="line">criterion = nn.CrossEntropyCriterion()</span><br><span class="line"></span><br><span class="line"><span class="comment">-- Data Preparation</span></span><br><span class="line">train_input = matio.<span class="built_in">load</span>(<span class="string">'hw1/train_test/train_data.mat'</span>)</span><br><span class="line">train_label = matio.<span class="built_in">load</span>(<span class="string">'hw1/train_test/train_label.mat'</span>)</span><br><span class="line">test_input = matio.<span class="built_in">load</span>(<span class="string">'hw1/train_test/test_data.mat'</span>)</span><br><span class="line">test_label = matio.<span class="built_in">load</span>(<span class="string">'hw1/train_test/test_label.mat'</span>)</span><br><span class="line">train_label.train_label = train_label.train_label + <span class="number">2</span></span><br><span class="line">test_label.test_label = test_label.test_label + <span class="number">2</span></span><br><span class="line"></span><br><span class="line">trainset = {}</span><br><span class="line">trainset.data = train_input.train_data</span><br><span class="line">trainset.label = train_label.train_label</span><br><span class="line"><span class="built_in">setmetatable</span>(trainset, </span><br><span class="line"> {<span class="built_in">__index</span> = <span class="function"><span class="keyword">function</span><span class="params">(t, i)</span></span> </span><br><span class="line"> <span class="keyword">return</span> {t.data[i], t.label[i]} </span><br><span class="line"> <span class="keyword">end</span>}</span><br><span class="line">);</span><br><span class="line">trainset.data = trainset.data:double() <span class="comment">-- convert the data from a ByteTensor to a DoubleTensor.</span></span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">function</span> <span class="title">trainset:size</span><span class="params">()</span></span> </span><br><span class="line"> <span class="keyword">return</span> self.data:size(<span class="number">1</span>) </span><br><span class="line"><span class="keyword">end</span></span><br><span class="line"></span><br><span class="line"><span class="comment">-- train feature normalization</span></span><br><span class="line">mean = {}</span><br><span class="line">stdv = {}</span><br><span class="line"><span class="keyword">for</span> i = <span class="number">1</span>,trainset.data:size(<span class="number">2</span>) <span class="keyword">do</span></span><br><span class="line"> mean[i] = trainset.data[{{},{i}}]:mean()</span><br><span class="line"><span class="comment">-- print('mean of feature' .. i .. ':'.. mean[i])</span></span><br><span class="line"> trainset.data[{{},{i}}]:add(-mean[i])</span><br><span class="line"> </span><br><span class="line"> stdv[i] = trainset.data[{{},{i}}]:std()</span><br><span class="line"><span class="comment">-- print('deviation of feature' .. i .. ':' .. stdv[i])</span></span><br><span class="line"> trainset.data[{{},{i}}]:div(stdv[i])</span><br><span class="line"><span class="keyword">end</span></span><br><span class="line"><span class="comment">-- training settings</span></span><br><span class="line">trainer = nn.StochasticGradient(net, criterion)</span><br><span class="line">trainer.learningRate = <span class="number">0.005</span></span><br><span class="line">trainer.maxIteration = <span class="number">10</span> <span class="comment">-- just do 10 epochs of training.</span></span><br><span class="line"></span><br><span class="line"><span class="comment">-- training</span></span><br><span class="line">trainer:train(trainset)</span><br><span class="line"></span><br><span class="line"><span class="comment">-- evaluation of model</span></span><br><span class="line">testset = {}</span><br><span class="line">testset.data = test_input.test_data</span><br><span class="line">testset.label = test_label.test_label</span><br><span class="line"><span class="built_in">setmetatable</span>(testset, </span><br><span class="line"> {<span class="built_in">__index</span> = <span class="function"><span class="keyword">function</span><span class="params">(t, i)</span></span> </span><br><span class="line"> <span class="keyword">return</span> {t.data[i], t.label[i]} </span><br><span class="line"> <span class="keyword">end</span>}</span><br><span class="line">);</span><br><span class="line">testset.data = testset.data:double() <span class="comment">-- convert the data from a ByteTensor to a DoubleTensor.</span></span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">function</span> <span class="title">testset:size</span><span class="params">()</span></span> </span><br><span class="line"> <span class="keyword">return</span> self.data:size(<span class="number">1</span>) </span><br><span class="line"><span class="keyword">end</span></span><br><span class="line"></span><br><span class="line"><span class="comment">-- test feature normalization</span></span><br><span class="line">mean = {}</span><br><span class="line">stdv = {}</span><br><span class="line"><span class="keyword">for</span> i = <span class="number">1</span>,testset.data:size(<span class="number">2</span>) <span class="keyword">do</span></span><br><span class="line"> mean[i] = testset.data[{{},{i}}]:mean()</span><br><span class="line"><span class="comment">-- print('mean of feature' .. i .. ':'.. mean[i])</span></span><br><span class="line"> testset.data[{{},{i}}]:add(-mean[i])</span><br><span class="line"> </span><br><span class="line"> stdv[i] = testset.data[{{},{i}}]:std()</span><br><span class="line"><span class="comment">-- print('deviation of feature' .. i .. ':' .. stdv[i])</span></span><br><span class="line"> testset.data[{{},{i}}]:div(stdv[i])</span><br><span class="line"><span class="keyword">end</span></span><br><span class="line"></span><br><span class="line">correct = <span class="number">0</span></span><br><span class="line"><span class="keyword">for</span> i=<span class="number">1</span>,testset:size() <span class="keyword">do</span></span><br><span class="line"> <span class="keyword">local</span> groundtruth = testset.label[i]</span><br><span class="line"> <span class="keyword">local</span> prediction = net:forward(testset.data[i])</span><br><span class="line"> <span class="keyword">local</span> confidences, indices = torch.<span class="built_in">sort</span>(prediction, <span class="literal">true</span>)</span><br><span class="line"> <span class="keyword">if</span> groundtruth[<span class="number">1</span>] == indices[<span class="number">1</span>] <span class="keyword">then</span></span><br><span class="line"> correct = correct + <span class="number">1</span></span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"><span class="keyword">end</span></span><br><span class="line"></span><br><span class="line"><span class="built_in">print</span>(correct, <span class="number">100</span>*correct/testset:size() .. <span class="string">' % '</span>)</span><br></pre></td></tr></table></figure>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2018/03/18/Torch-Tutorial/" data-id="cju13wdmi000i8ys624nx9yko" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-GAN-Workflow" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2018/03/04/GAN-Workflow/" class="article-date">
<time datetime="2018-03-04T12:12:16.000Z" itemprop="datePublished">2018-03-04</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2018/03/04/GAN-Workflow/">GAN Workflow</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h2 id="GAN-Workflow"><a href="#GAN-Workflow" class="headerlink" title="GAN Workflow"></a>GAN Workflow</h2><p>The whole workflow of GAN including data processing, model constructing and other details is recorded here based on <a href="https://github.com/soumith/dcgan.torch" target="_blank" rel="noopener">https://github.com/soumith/dcgan.torch</a>. Here we take the LSUN(Large-Scale Scene Understanding Challenge) as an example.</p>
<ol>
<li>Download the dataset through this script <a href="https://github.com/fyu/lsun/blob/master/download.py" target="_blank" rel="noopener">https://github.com/fyu/lsun/blob/master/download.py</a>. The dataset is stored as a lmdb(Lightning Memory-Mapped Database) database file. Lmdb database(folder named of <u>bedroom_train_lmdb</u>) contains <u>data.mdb</u> and <u>lock.mdb</u>.</li>
<li>Generate an index file to record all the keys in lmdb database. <u>bedroom_train_lmdb_hashes_chartensor.t7</u></li>
<li>Achieve the data:get_batches() function. This work is finished through the cooperation of two files <u>data.lua</u> and <u>donkey_lsun.lua</u>. In <u>data.lua</u>, multi threads are built to speed the data-reading. In <u>donkey_lsun.lua</u>, data reading from lmdb database and image cropping behaviors are defined. </li>
<li>Model building and training is achieved in <u>main.lua</u>. Pretrained model is stored in <u>checkpoints/train_netG.t7</u> and <u>checkspoints/train_netD.t7</u>.</li>
<li>Generate new samples using <u>generate.lua</u>. Vector Arithmetic is implemented in <u>arithmetic.lua</u>.</li>
</ol>
<p>Note that if the training images are not database files, step 2 and 3 will is suppose d to be replaced with the code in <u>donkey_folder.lua</u> and <u>dataset.lua</u>.</p>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2018/03/04/GAN-Workflow/" data-id="cju13wdlw000a8ys6tzqclvwb" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-Frequently-Used-Script" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2018/02/04/Frequently-Used-Script/" class="article-date">
<time datetime="2018-02-04T03:07:48.000Z" itemprop="datePublished">2018-02-04</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2018/02/04/Frequently-Used-Script/">Frequently Used Script</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h2 id="Download-through-python3"><a href="#Download-through-python3" class="headerlink" title="Download through python3"></a>Download through python3</h2><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">from</span> urllib <span class="keyword">import</span> request</span><br><span class="line">base_url = <span class="string">"http://ilab.usc.edu/neo2/dataset/tower/training/Neovision2-Training-Tower-"</span></span><br><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">download</span><span class="params">(base_url)</span>:</span></span><br><span class="line"> <span class="keyword">for</span> i <span class="keyword">in</span> range(<span class="number">1</span>,<span class="number">51</span>):</span><br><span class="line"> url = base_url + <span class="string">"%03d"</span>%i + <span class="string">'.csv'</span></span><br><span class="line"> <span class="keyword">with</span> request.urlopen(url) <span class="keyword">as</span> web:</span><br><span class="line"> <span class="keyword">with</span> open(<span class="string">"Neovision2-Training-Tower-"</span>+<span class="string">"%03d"</span>%i + <span class="string">'.csv'</span>, <span class="string">'wb'</span>) <span class="keyword">as</span> outfile:</span><br><span class="line"> outfile.write(web.read())</span><br></pre></td></tr></table></figure>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2018/02/04/Frequently-Used-Script/" data-id="cju13wdlv00098ys60iwzcw5n" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-Frequently-Used-Commands-in-Windows" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2018/01/16/Frequently-Used-Commands-in-Windows/" class="article-date">
<time datetime="2018-01-16T05:30:02.000Z" itemprop="datePublished">2018-01-16</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2018/01/16/Frequently-Used-Commands-in-Windows/">Frequently Used Commands in Windows</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>Check the using status of GPU<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">set PATH=C:\Program Files\NVIDIA Corporation\NVSMI;%PATH%</span><br><span class="line">nvidia-smi</span><br></pre></td></tr></table></figure></p>
<p>Use of Package Manager choco<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">choco search "package_name"</span><br><span class="line">choco install "package_name"</span><br></pre></td></tr></table></figure></p>
<p>pip install with another source<br><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">pip3 install --pre mxnet-cu80 -i https://pypi.douban.com/simple</span><br></pre></td></tr></table></figure></p>
<p>Check the package in python<br><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> pip</span><br><span class="line"><span class="keyword">for</span> pkg <span class="keyword">in</span> [<span class="string">'mxnet'</span>, <span class="string">'mxnet-cu75'</span>, <span class="string">'mxnet-cu80'</span>]:</span><br><span class="line"> pip.main([<span class="string">'show'</span>, pkg])</span><br></pre></td></tr></table></figure></p>
<p>See the current directory and change to a new directory<br><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> os</span><br><span class="line">os.getcwd() <span class="comment"># 查看当前工作目录</span></span><br><span class="line">os.chdir(<span class="string">"C:\\python36\\lib\\site-packages"</span>)</span><br></pre></td></tr></table></figure></p>
<p>See all the devices working in computer<br><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">from</span> tensorflow.python.client <span class="keyword">import</span> device_lib <span class="keyword">as</span> _device_lib</span><br><span class="line">local_device_protos = _device_lib.list_local_devices()</span><br></pre></td></tr></table></figure></p>
<p>See the current time<br><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> time</span><br><span class="line">start = time.clock()</span><br><span class="line"><span class="comment"># start = time()</span></span><br></pre></td></tr></table></figure></p>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2018/01/16/Frequently-Used-Commands-in-Windows/" data-id="cju13wdlt00088ys69xpdtcce" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-Papers-relevant-to-Video-Prediction" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2018/01/16/Papers-relevant-to-Video-Prediction/" class="article-date">
<time datetime="2018-01-16T05:17:24.000Z" itemprop="datePublished">2018-01-16</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2018/01/16/Papers-relevant-to-Video-Prediction/">Papers relevant to Video Prediction</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h2 id="Unsupervised-Learning-for-Physical-Interaction-through-Video-Prediction"><a href="#Unsupervised-Learning-for-Physical-Interaction-through-Video-Prediction" class="headerlink" title="Unsupervised Learning for Physical Interaction through Video Prediction"></a>Unsupervised Learning for Physical Interaction through Video Prediction</h2><ul>
<li>Develop a predictive model which merges appearance infromation from previous frames with motion predicted by the model.</li>
<li>This paper proposes three models, including DNA, CDNA and STP. </li>
<li>The construction of this network is pretty classical.</li>
</ul>
<h2 id="Deep-Multi-Scale-Video-Prediction-Beyond-Mean-Square-Error"><a href="#Deep-Multi-Scale-Video-Prediction-Beyond-Mean-Square-Error" class="headerlink" title="Deep Multi-Scale Video Prediction Beyond Mean Square Error"></a>Deep Multi-Scale Video Prediction Beyond Mean Square Error</h2><ul>
<li>Introduce a network to predict the difference between real $Y_{k}$ and $u(Y_{k-1})$ from $X_{k}$ and a coarse guess of $Y_{k}$</li>
<li>Usually, “ Using $l_{2}$ loss, and to a lesser extent $l_{1}$, produces blurry predictions, increasingly worse when predicting further in the future. “</li>
<li>“Convolutions only aacount for short-range dependencies, limited by the size of their kernels.” But pooling will lead to loss of resolution. There are two methods to tackle this problem. One is using many convolution layers without pooling and the other is “skip” connection to skip the pooling-unpooling pairs and preserve the high frequency information.</li>
</ul>
<h2 id="Decomposing-Motion-and-Content-for-Natural-Video-Sequence-Prediction"><a href="#Decomposing-Motion-and-Content-for-Natural-Video-Sequence-Prediction" class="headerlink" title="Decomposing Motion and Content for Natural Video Sequence Prediction"></a>Decomposing Motion and Content for Natural Video Sequence Prediction</h2><p>This paper proposes MCNet in which the video is decomposed to motion and content part. Many latest technique is introduced in this model.<br> <img src="content/images/2018/1/MCNet_Framework.png" alt="Fig.1. Forward Prediction" style="width: 600px;"> </p>
<h2 id="Unsupervised-Learning-of-Disentangled-Representation-from-Video"><a href="#Unsupervised-Learning-of-Disentangled-Representation-from-Video" class="headerlink" title="Unsupervised Learning of Disentangled Representation from Video"></a>Unsupervised Learning of Disentangled Representation from Video</h2><p>This paper proposes a new model DRNET that learns disentangled image representations from video. This approch factorizes each frame into a stationary part and a temporally varying component. </p>
<ul>
<li><p>Basically, this framework extracts the content(stationary) and pose(dynamic) part explicitly.</p>
</li>
<li><p>The core idea is to explore an efficient way to represent content and pose respectively, which can be stated to train a encoder and decoder. </p>
</li>
<li><p>Generating future frames by recurrently predicting the pose vector $h_{p}$, as is shown below.</p>
<p><img src="content/images/2018/1/forward_prediction.png" alt="Fig.1. Forward Prediction" style="width: 800px;"> </p>
</li>
<li><p>The adversarial loss is introduced to exploit the fact that the objects do not typically change within a video, but they do between different videos.</p>
<p><img src="content/images/2018/1/samevideo.png" alt="Fig.1. Ensure Same Video" style="width: 600px;"> </p>
</li>
<li><p>Similarity loss, reconstruction loss and adversarial loss are introduce to train the pose encoder and content encoder.</p>
<p><img src="content/images/2018/1/encoder_decoder.png" alt="Fig.1. Encoder and Decoder Loss" style="width: 600px;"> </p>
</li>
</ul>
<h2 id="Unsupervised-Learning-of-Video-Representations-using-LSTMs"><a href="#Unsupervised-Learning-of-Video-Representations-using-LSTMs" class="headerlink" title="Unsupervised Learning of Video Representations using LSTMs"></a>Unsupervised Learning of Video Representations using LSTMs</h2><ul>
<li>This paper gives us an intuition about LSTM antoencoder model.</li>
</ul>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2018/01/16/Papers-relevant-to-Video-Prediction/" data-id="cju13wdlz000c8ys6w1eolttx" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-Details-of-Neural-Network" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2018/01/16/Details-of-Neural-Network/" class="article-date">
<time datetime="2018-01-16T05:00:06.000Z" itemprop="datePublished">2018-01-16</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2018/01/16/Details-of-Neural-Network/">Details of Deep Learning</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h2 id="About-the-Batch-Normalization"><a href="#About-the-Batch-Normalization" class="headerlink" title="About the Batch Normalization"></a>About the Batch Normalization</h2><p>Usually, BN is cancelled in the last layer in semi-supervised learning. </p>
<h2 id="Embeddings"><a href="#Embeddings" class="headerlink" title="Embeddings"></a>Embeddings</h2><p>Embedding is introduced to find a meaningful representation or encoding of vectors based on the context information. The core idea is the embedding is affected by the final softmax probablity since the input of nn is a vector and the output of nn is those vectors near this vector. Note usually the number of negative samples are larger than the positive samples, so sampling part of negative outputs to speed up the trainning process is necessary.</p>
<h2 id="CNN-LSTM"><a href="#CNN-LSTM" class="headerlink" title="CNN LSTM"></a>CNN LSTM</h2><p>Actually, CNN LSTM is a natural product when it comes to the video processing. CNN LSTM simply replaces the multiplication with convolution in LSTM. Remember that what LSTM does is just building two items $c_{t}$ and $h_{t}$ to form four gates for the information control. This extension is pretty natural.</p>
<h2 id="Spatial-Transformer-Networks"><a href="#Spatial-Transformer-Networks" class="headerlink" title="Spatial Transformer Networks"></a>Spatial Transformer Networks</h2><p>Give networks the ability to actively spatially transform feature maps, conditional on the featuremap</p>
<h2 id="Laplacian-Pyramid-Reconstruction-and-Refinement-for-Semantic-Segmentation"><a href="#Laplacian-Pyramid-Reconstruction-and-Refinement-for-Semantic-Segmentation" class="headerlink" title="Laplacian Pyramid Reconstruction and Refinement for Semantic Segmentation"></a>Laplacian Pyramid Reconstruction and Refinement for Semantic Segmentation</h2><p>Laterally, Laplacian Reconstruction is defined on the basis of Gaussian Reconstruction. Higher layer includes the lower frequency information and lower layer includes the higher frequency information in Laplacian Reconstruction. Adding up all the layers leads to the original image.</p>
<h2 id="Power-of-GAN-Loss"><a href="#Power-of-GAN-Loss" class="headerlink" title="Power of GAN Loss"></a>Power of GAN Loss</h2><p><img src="content/images/2018/1/powerofganloss.png" alt="Fig.1. Power of GAN" style="width: 400px;"><br>I think this maybe the power of discriminator in GAN. Discriminator forces the generatered images to<br>be similar to the samples from the original distribution.</p>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2018/01/16/Details-of-Neural-Network/" data-id="cju13wdld00058ys675uxzqj0" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-Framework-of-Unsupervised-Learning" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2018/01/16/Framework-of-Unsupervised-Learning/" class="article-date">
<time datetime="2018-01-16T04:53:46.000Z" itemprop="datePublished">2018-01-16</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2018/01/16/Framework-of-Unsupervised-Learning/">Framework of Unsupervised Learning</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h2 id="Unsupervised-Learning"><a href="#Unsupervised-Learning" class="headerlink" title="Unsupervised Learning"></a>Unsupervised Learning</h2><ul>
<li>Data: Just Data, No Labels!</li>
<li>Goal: Lean Some Underlying Hidden Structure of the Data</li>
<li>Examples: Clustering, Dimensionality Reduction, Feature Learning, Density Estimation, etc. </li>
</ul>
<h3 id="Common-Used-Approch"><a href="#Common-Used-Approch" class="headerlink" title="Common Used Approch"></a>Common Used Approch</h3><ul>
<li>K-means</li>
<li>PCA</li>
<li>Antoencoder</li>
<li>Density Estimation</li>
</ul>
<h3 id="Generative-Model"><a href="#Generative-Model" class="headerlink" title="Generative Model"></a>Generative Model</h3><h4 id="Explicitly-Learn-the-Probablity-Distribution-Basically-Tractable-Density-Function"><a href="#Explicitly-Learn-the-Probablity-Distribution-Basically-Tractable-Density-Function" class="headerlink" title="Explicitly Learn the Probablity Distribution, Basically Tractable Density Function"></a>Explicitly Learn the Probablity Distribution, Basically Tractable Density Function</h4><ul>
<li>Pixel RNN</li>
<li>Pixel CNN</li>
<li>VAE (Variatinal Autoencoders)<ul>
<li>Background: Autoencoder<ul>
<li>Applications: Learn to Initialize a Supervised Model</li>
</ul>
</li>
<li>Methodology<ul>
<li>Encoder and Decoder is Defined to Be Probabilisic</li>
<li>Variational Autoencoder Optimizes the Lower Bound of the Probablity</li>
<li>Cons: Bluring</li>
</ul>
</li>
</ul>
</li>
</ul>
<h4 id="GAN——Give-up-on-Explicitly-Modeling-Density-and-Just-Want-Ability-to-Sample"><a href="#GAN——Give-up-on-Explicitly-Modeling-Density-and-Just-Want-Ability-to-Sample" class="headerlink" title="GAN——Give up on Explicitly Modeling Density, and Just Want Ability to Sample"></a>GAN——Give up on Explicitly Modeling Density, and Just Want Ability to Sample</h4><ul>
<li>Learn to Generate from Training Distribution through 2-player Game</li>
<li>Researching Area: Make Training Process of GAN Stable</li>
</ul>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2018/01/16/Framework-of-Unsupervised-Learning/" data-id="cju13wdlg00078ys62zp5wftw" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-Representation" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2018/01/16/Representation/" class="article-date">
<time datetime="2018-01-16T04:42:52.000Z" itemprop="datePublished">2018-01-16</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2018/01/16/Representation/">Reading Notes of "Toward Learning a Compositional Visual Representation"</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>Naturally, knowledge representation is compositional. Can we make the most of compositionality to better predict or describe?</p>
<h3 id="State-of-Art"><a href="#State-of-Art" class="headerlink" title="State of Art"></a>State of Art</h3><p>Build hierarchical feature representation based on statistical models.</p>
<h3 id="Compostionality-in-CNN"><a href="#Compostionality-in-CNN" class="headerlink" title="Compostionality in CNN"></a>Compostionality in CNN</h3><ul>
<li><p>CNN is powerful in extracting representations for various tasks, such as image segmentation and object detection. Can CNN learn to disentangle the natural parts that combine to make full images?</p>
</li>
<li><p>Expectation: CNN will have the same activation in the region of airliner regardless of what objects appear adjacent to the airliner.</p>
</li>
<li><p>Following the expectation, moving the coffee mug closer to the airliner should not shift activations in the region of the airliner.<br><img src="https://www.vicarious.com/wp-content/uploads/2017/10/airplane_coffee.gif" width="500"></p>
</li>
</ul>
<h3 id="Teaching-Compositional-to-CNNs"><a href="#Teaching-Compositional-to-CNNs" class="headerlink" title="Teaching Compositional to CNNs"></a>Teaching Compositional to CNNs</h3><p>Two feature maps: one obtained from masking the input, and another derived from applying a mask in the feature space. Therefore, the compsitionality cost can be computed from the two feature maps and the projected mask. The whole framework is shown below.</p>
<p><img src="https://www.vicarious.com/wp-content/uploads/2017/10/compositionality3.png" width="500"></p>
<h3 id="Evaluate-the-Compositionality-of-CNN"><a href="#Evaluate-the-Compositionality-of-CNN" class="headerlink" title="Evaluate the Compositionality of CNN"></a>Evaluate the Compositionality of CNN</h3><p>Backtrace a classfication decision to a heat-map in the input image through a visualization technique like guided backpropogation. The heatmap shows what was the most important for the classifcation decision in the input image. This technique can classify an object without depending on irrelevant context.<br><img src="https://www.vicarious.com/wp-content/uploads/2017/10/image2017-7-20-20-18-15.png" width="800"></p>
<h3 id="Core-Idea"><a href="#Core-Idea" class="headerlink" title="Core Idea"></a>Core Idea</h3><p>If you want to achieve something, just optimize it.</p>
<h2 id="Reference"><a href="#Reference" class="headerlink" title="Reference"></a>Reference</h2><p><a href="https://www.vicarious.com/2017/10/20/toward-learning-a-compositional-visual-representation/" target="_blank" rel="noopener">https://www.vicarious.com/2017/10/20/toward-learning-a-compositional-visual-representation/</a></p>
<p>Jost Tobias Springenberg, Alexey Dosovitskiy, Thomas Brox, and Martin Riedmiller. “Striving for simplicity: The all convolutional net.” International Conference on Learning Representations-WS. 2015.</p>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2018/01/16/Representation/" data-id="cju13wdm5000g8ys6uf6k408c" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-Fast-Fourier-transform" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/12/12/Fast-Fourier-transform/" class="article-date">
<time datetime="2017-12-12T07:55:33.000Z" itemprop="datePublished">2017-12-12</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/12/12/Fast-Fourier-transform/">Fast Fourier transform</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h3 id="Theory-and-Code-of-FFT"><a href="#Theory-and-Code-of-FFT" class="headerlink" title="Theory and Code of FFT"></a>Theory and Code of FFT</h3><p>FFT(Fast Fourier transform) is first developed to speed the DFT(Discrete Fourier transform). Below is my matlab code. To be brief, it is similar to an divide and conquer algorithm. The complexity of FFT is O(NlogN).<br><figure class="highlight matlab"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">function</span> <span class="params">[ X ]</span> = <span class="title">FFT</span><span class="params">( x, N )</span></span></span><br><span class="line"> wn = <span class="built_in">exp</span>(<span class="built_in">i</span>*<span class="number">2</span>*<span class="built_in">pi</span>/N);</span><br><span class="line"> X = <span class="number">0</span>:N<span class="number">-1</span>;</span><br><span class="line"> <span class="keyword">if</span>(N==<span class="number">1</span>)</span><br><span class="line"> X(<span class="number">1</span>)=x(<span class="number">1</span>);</span><br><span class="line"> <span class="keyword">return</span> ;</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> y0 = <span class="number">0</span>:N/<span class="number">2</span><span class="number">-1</span>; y1 = <span class="number">0</span>:N/<span class="number">2</span><span class="number">-1</span>;</span><br><span class="line"> <span class="keyword">for</span> k = <span class="number">0</span>:N/<span class="number">2</span><span class="number">-1</span></span><br><span class="line"> y0(k+<span class="number">1</span>) = x(k+<span class="number">1</span>) + x(k+<span class="number">1</span>+N/<span class="number">2</span>);</span><br><span class="line"> y1(k+<span class="number">1</span>) = (x(k+<span class="number">1</span>) - x(k+<span class="number">1</span>+N/<span class="number">2</span>))*wn^(k);</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> Y0 = FFT(y0, N/<span class="number">2</span>); Y1 = FFT(y1, N/<span class="number">2</span>);</span><br><span class="line"> <span class="keyword">for</span> k = <span class="number">0</span>:N/<span class="number">2</span><span class="number">-1</span></span><br><span class="line"> X(<span class="number">2</span>*k+<span class="number">1</span>) = Y0(k+<span class="number">1</span>);</span><br><span class="line"> X(<span class="number">2</span>*k+<span class="number">1</span>+<span class="number">1</span>) = Y1(k+<span class="number">1</span>);</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"><span class="keyword">end</span></span><br></pre></td></tr></table></figure></p>
<p>Usually, the FFT is applied to image processing, where 2-dimensional FFT is used more frequently. Here is the 2-dimensional FFT code.<br><figure class="highlight matlab"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">function</span> <span class="params">[ img ]</span> = <span class="title">FFT2</span><span class="params">( img, M, N )</span></span></span><br><span class="line"> <span class="keyword">for</span> <span class="built_in">i</span> = <span class="number">1</span>:M</span><br><span class="line"> img(<span class="built_in">i</span>,:) = FFT(img(<span class="built_in">i</span>,:), N);</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> <span class="keyword">for</span> <span class="built_in">j</span> = <span class="number">1</span>:N</span><br><span class="line"> img(:,<span class="built_in">j</span>) = FFT(img(:,<span class="built_in">j</span>),M);</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"><span class="keyword">end</span></span><br><span class="line">``` </span><br><span class="line"></span><br><span class="line">After operations in frequency domain, transformation from frequency domain to time domain is necessary. This is often realized by Inverse Fast Fourier transformation(IFFT). The difference between FFT and IFFT</span><br><span class="line">is just the choice of base <span class="function"><span class="keyword">function</span> <span class="title">in</span> <span class="title">frequency</span> <span class="title">domain</span>. <span class="title">Below</span> <span class="title">is</span> <span class="title">the</span> <span class="title">FFT</span> <span class="title">and</span> 2-<span class="title">dimensional</span> <span class="title">FFT</span> <span class="title">code</span>.</span></span><br><span class="line">```Matlab</span><br><span class="line"><span class="function"><span class="keyword">function</span> <span class="params">[ X ]</span> = <span class="title">IFFT</span><span class="params">( x, N )</span></span></span><br><span class="line"> wn = <span class="built_in">exp</span>(-<span class="built_in">i</span>*<span class="number">2</span>*<span class="built_in">pi</span>/N);</span><br><span class="line"> X = <span class="number">0</span>:N<span class="number">-1</span>;</span><br><span class="line"> <span class="keyword">if</span>(N==<span class="number">1</span>)</span><br><span class="line"> X(<span class="number">1</span>)=x(<span class="number">1</span>);</span><br><span class="line"> <span class="keyword">return</span> ;</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> y0 = <span class="number">0</span>:N/<span class="number">2</span><span class="number">-1</span>; y1 = <span class="number">0</span>:N/<span class="number">2</span><span class="number">-1</span>;</span><br><span class="line"> <span class="keyword">for</span> k = <span class="number">0</span>:N/<span class="number">2</span><span class="number">-1</span></span><br><span class="line"> y0(k+<span class="number">1</span>) = x(k+<span class="number">1</span>) + x(k+<span class="number">1</span>+N/<span class="number">2</span>);</span><br><span class="line"> y1(k+<span class="number">1</span>) = (x(k+<span class="number">1</span>) - x(k+<span class="number">1</span>+N/<span class="number">2</span>))*wn^(k);</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> Y0 = IFFT(y0, N/<span class="number">2</span>); Y1 = IFFT(y1, N/<span class="number">2</span>);</span><br><span class="line"> <span class="keyword">for</span> k = <span class="number">0</span>:N/<span class="number">2</span><span class="number">-1</span></span><br><span class="line"> X(<span class="number">2</span>*k+<span class="number">1</span>) = Y0(k+<span class="number">1</span>);</span><br><span class="line"> X(<span class="number">2</span>*k+<span class="number">1</span>+<span class="number">1</span>) = Y1(k+<span class="number">1</span>);</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"><span class="keyword">end</span></span><br></pre></td></tr></table></figure></p>
<figure class="highlight matlab"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">function</span> <span class="params">[ img ]</span> = <span class="title">IFFT2</span><span class="params">( img, M, N )</span></span></span><br><span class="line"> <span class="keyword">for</span> <span class="built_in">i</span> = <span class="number">1</span>:M</span><br><span class="line"> img(<span class="built_in">i</span>,:) = IFFT(img(<span class="built_in">i</span>,:), N);</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> <span class="keyword">for</span> <span class="built_in">j</span> = <span class="number">1</span>:N</span><br><span class="line"> img(:,<span class="built_in">j</span>) = IFFT(img(:,<span class="built_in">j</span>),M);</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"><span class="keyword">end</span></span><br></pre></td></tr></table></figure>
<h3 id="Application-of-FFT-in-Imageprocessing"><a href="#Application-of-FFT-in-Imageprocessing" class="headerlink" title="Application of FFT in Imageprocessing"></a>Application of FFT in Imageprocessing</h3><p>Below is the code I’ve written to do imageprocessing and test my FFT.<br><figure class="highlight matlab"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment">%% Proprocessing</span></span><br><span class="line">clear; clc; close all;</span><br><span class="line">img = imread(<span class="string">'aobama.png'</span>);</span><br><span class="line">img = rgb2gray(img);</span><br><span class="line">img = im2double(img);</span><br><span class="line">I = imresize(img, [<span class="number">256</span>, <span class="number">256</span>]);</span><br><span class="line">figure;</span><br><span class="line">imshow(I); title(<span class="string">'Original Picture'</span>);</span><br><span class="line"><span class="comment">%% My FFT2</span></span><br><span class="line">figure;</span><br><span class="line">imshow(<span class="built_in">log</span>(<span class="built_in">abs</span>(fftshift(FFT2(I,<span class="number">256</span>,<span class="number">256</span>)))+<span class="number">1</span>)); title(<span class="string">'My FFT'</span>);</span><br><span class="line"><span class="comment">%% My IFFT2</span></span><br><span class="line">figure; title(<span class="string">'My IFFT'</span>);</span><br><span class="line">FFT_I = FFT2(I,<span class="number">256</span>,<span class="number">256</span>)/<span class="number">256.0</span>/<span class="number">256.0</span>;</span><br><span class="line"><span class="comment">%FFT_I(1:120,1:120) = 0; FFT_I(256-120+1:256,256-120+1:256:256) = 0; %high pass</span></span><br><span class="line">FFT_I(<span class="number">5</span>:<span class="number">256</span><span class="number">-5</span>,<span class="number">5</span>:<span class="number">256</span><span class="number">-5</span>) = <span class="number">0</span>; <span class="comment">%low pass</span></span><br><span class="line">imshow(<span class="built_in">abs</span>(IFFT2(FFT_I,<span class="number">256</span>,<span class="number">256</span>)));title(<span class="string">'My Filter and IFFT'</span>);</span><br></pre></td></tr></table></figure></p>
<p><img src="content/images/2017/12/di1.jpg" alt="Fig.1. Original Picture" style="width: 400px;"><br><img src="content/images/2017/12/di2.jpg" alt="Fig.2. After Low Pass Processing" style="width: 400px;"><br>After low pass, the average color of the image is kept but the details become unclear due to loss of high frequency informatino.</p>
<p><img src="content/images/2017/12/gao1.jpg" alt="Fig.3. Original Picture" style="width: 400px;"><br><img src="content/images/2017/12/gao2.jpg" alt="Fig.4. After High Pass Processing" style="width: 400px;"><br>After high pass, the details of the image such as edges become clear but the average color changes obviously.</p>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2017/12/12/Fast-Fourier-transform/" data-id="cju13wdlf00068ys6gb2ejhrh" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-Actor-Critic-And-Deep-Deterministic-Policy-Gradient" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/12/11/Actor-Critic-And-Deep-Deterministic-Policy-Gradient/" class="article-date">
<time datetime="2017-12-11T02:32:52.000Z" itemprop="datePublished">2017-12-11</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/12/11/Actor-Critic-And-Deep-Deterministic-Policy-Gradient/">Actor Critic and Deep Deterministic Policy Gradient</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>There is minor difference between Deep Q Neural Network(DQN) and Policy Gradient, could we combine these two algorithms? The answer is Yes. That is exactly Actor-Critic. However, it may be difficult for Actor-Critic to learn something due to the complexity of its mechanism and its quick updating at every timestamp. Fortunately, Google Deep Mind proposes a elegant strategy, famous deep deterministic policy gradient(DDPG),to solve this problem. Here I will briefly compare DQN and Policy Gradient and introduce DDPG.</p>
<h3 id="Comparsion-of-DQN-and-Policy-Gradient"><a href="#Comparsion-of-DQN-and-Policy-Gradient" class="headerlink" title="Comparsion of DQN and Policy Gradient"></a>Comparsion of DQN and Policy Gradient</h3><p>The difference of DQN and Policy Gradient is described in Fig. 1. Reward s is used to compute the output value v. In DQN, computing value v requires one time step but needs one episode to compute value v in Policy Grdient. This is why DQN update faster than Policy Gradient. But could we use the value v computed through DQN and update each iteration in Policy Gradient? Here comes the Actor Critic. Briefly, DQN functions like a critic who evaluate (state s, action a) with value v and Policy Gradient functions like a actor who takes action a in state s. Therefore, DQN can tell Policy Gradient the Value v at each step while Policy Gradient can tell DQN which action a is supposed to take at state s. Everything matches perfectly at this time.</p>
<p><img src="content/images/2017/12/DQN_PG_Comparation.jpeg" alt="Fig.1. Comparasion of DQN and Policy" style="width: 600px;"></p>
<h3 id="Actor-Critic-And-Deep-Deterministic-Policy-Gradient"><a href="#Actor-Critic-And-Deep-Deterministic-Policy-Gradient" class="headerlink" title="Actor Critic And Deep Deterministic Policy Gradient"></a>Actor Critic And Deep Deterministic Policy Gradient</h3>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2017/12/11/Actor-Critic-And-Deep-Deterministic-Policy-Gradient/" data-id="cju13wdl800028ys6vmmx8voa" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-Deep-Q-Neural-Network" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/12/10/Deep-Q-Neural-Network/" class="article-date">
<time datetime="2017-12-10T01:25:08.000Z" itemprop="datePublished">2017-12-10</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/12/10/Deep-Q-Neural-Network/">Deep Q Neural Network and Policy Gradient</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>Q learning is offline policy learning, indicating that it is able to learn from previous experience usually stored in its memory library or to learn from others’ experience. The core idea of Q learning also reinforcement learning is learning from (state, action, reward). This process simulating a kid exploring the world and gradually know in which way to obtain highest reward. </p>
<h3 id="Q-Learning"><a href="#Q-Learning" class="headerlink" title="Q Learning"></a>Q Learning</h3><p>To be brief, our aim is to determine an optimal action given a specified state. In Q learning, the action which have highest value is selected as its decision at this state. Therefore, the problem is transform to obtain a Q table whose column index is action and row index is state storing the value Q(s, a) of action in a specified state. When we make a decision if we are in state s, we go through all the value in the row recording state s and choose the action with the maximum value.<br>Below is the procedure of Q learning to learn a Q table.</p>
<p><img src="content/images/2017/12/Q-learning.png" alt="Fig.1. Q learning" style="width: 600px;"></p>
<h3 id="Deep-Q-Neural-Network"><a href="#Deep-Q-Neural-Network" class="headerlink" title="Deep Q Neural Network"></a>Deep Q Neural Network</h3><p>Learning Q table is impractical when state space is huge. Hence, the neural network is introduced to replace the Q table. Two traditional NN are built to do this job. In first one, input state s and action a to NN and its value is computed. In second one, input state and the values of all actions in this state is computed and outputted. Actually, there is no essential difference between these two representations. The output of the neural network stands for a voting of an action.<br>Furthermore, two techniques, experience replay and fixed Q-targets, make deep Q neural network powerful.</p>
<h4 id="Experience-Replay"><a href="#Experience-Replay" class="headerlink" title="Experience Replay"></a>Experience Replay</h4><p>Experience replay is a common technique used to improve the efficiency. Instead of training the network using current data, experience replay also train the network using its previous experience.</p>
<h4 id="Fixed-Q-targets"><a href="#Fixed-Q-targets" class="headerlink" title="Fixed Q-targets"></a>Fixed Q-targets</h4><p>There two networks in fixed Q-targets, old network with parameters a few times ago to give estimation of Q and new network with newest parameters to give reality of Q. </p>
<h3 id="Policy-Gradient"><a href="#Policy-Gradient" class="headerlink" title="Policy Gradient"></a>Policy Gradient</h3><p>In Policy Gradient, the output of NN is action. An advantage of this strategy is that it is powerful when action is a continuous value. DQN can only tackle the situation where action is a discrete value because the output of DQN is the value of a specific action. Since only value of this specific action is backpropagated, the action is fixed and can only indicates discrete value such as turning left or right. However, the action is an continuous output directly from neural network while the reward is used as an coefficient to affect backpropagation. In this sense, action is usually can be interpreted with a continuous value such as velocity or temperature. The procedure of policy gradient is shown below.</p>
<p><img src="content/images/2017/12/Policy Gradient.png" alt="Fig.2. Policy Gradient" style="width: 500px;"></p>
<h4 id="Training-data-for-policy-gradient"><a href="#Training-data-for-policy-gradient" class="headerlink" title="Training data for policy gradient"></a>Training data for policy gradient</h4><p>The training data for policy gradient is a series of (s, a, r), indicating (state, action, reward) in one episode. The s is for the input and The a indicates output. The reward is only an coefficient to influence backpropagation.</p>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2017/12/10/Deep-Q-Neural-Network/" data-id="cju13wdla00038ys6zdaytw9s" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-An-Elegant-Way-to-Cover-Line-Segments-with-Grids" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/12/08/An-Elegant-Way-to-Cover-Line-Segments-with-Grids/" class="article-date">
<time datetime="2017-12-08T03:32:38.000Z" itemprop="datePublished">2017-12-08</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/12/08/An-Elegant-Way-to-Cover-Line-Segments-with-Grids/">An Elegant Way to Cover Line Segments with Grids</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>Sometimes we are required to cover line segments with grids. Here, I will introduce a simple trick to build the grids elegantly. The whole precedure can be divied into two steps as is shown below.</p>
<h3 id="Clip-a-Line-Segment-with-Boundaries-of-Grid"><a href="#Clip-a-Line-Segment-with-Boundaries-of-Grid" class="headerlink" title="Clip a Line Segment with Boundaries of Grid"></a>Clip a Line Segment with Boundaries of Grid</h3><p>Below is the code for clipping a line segment with grids. The idea is pretty straigtfoward. A line can be described by the formula in Fig. 1 parameterized by t. So We can represent a cuting point by t when we cut a line segment with the boundary of grids. The procedure in code consists of three steps. Firstly, we cut a line with the boundaries of grids parallel to x axis and insert the t to a set. And similarly, we cut a line with the boundaries of grids parallel to y axis and also insert the t to a set. Finally, we compute the coordinates of the clipping points according to Fig.1 and put them to a vector. </p>
<p><img src="content/images/2017/12/line_representation.jpeg" alt="Fig.1. Parameter description of a line" style="width: 150px;"><br><figure class="highlight c"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br></pre></td><td class="code"><pre><span class="line"><span class="built_in">set</span><<span class="keyword">double</span>> t;</span><br><span class="line">t.insert(<span class="number">0</span>);t.insert(<span class="number">1</span>);<span class="comment">//insert start point and end point of a line segment</span></span><br><span class="line"></span><br><span class="line"><span class="keyword">for</span>(<span class="keyword">int</span> i = min(x1, x2)/GRID_LENGTH); i < max(x1, x2)/GRID_LENGTH; i++){<span class="comment">//clip a line segment with boundary of grids</span></span><br><span class="line"> <span class="keyword">if</span>(!(x1=<i*GRID_LENGTH&&i*GRID_LENGTH<x2))<span class="keyword">continue</span>;</span><br><span class="line"> t.insert((i*GRID_LENGTH)/(x2-x1));</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="keyword">for</span>(<span class="keyword">int</span> j = min(y1, y2)/GRID_LENGTH); j < max(y1, y2)/GRID_LENGTH; j++){<span class="comment">//clip a line segment with boundary of grids</span></span><br><span class="line"> <span class="keyword">if</span>(!(y1=<j*GRID_LENGTH&&i*GRID_LENGTH<y2))<span class="keyword">continue</span>;</span><br><span class="line"> t.insert((j*GRID_LENGTH)/(y2-y1));</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line"><span class="built_in">set</span><<span class="keyword">double</span>>::iterator it; <span class="keyword">double</span> last_it;</span><br><span class="line"><span class="built_in">vector</span><pair<<span class="keyword">double</span>,<span class="keyword">double</span>> > segment_points;</span><br><span class="line"><span class="keyword">for</span>(it = t.begin(); it != t.end(); it++){<span class="comment">//insert clipping points to a vector</span></span><br><span class="line"> <span class="keyword">if</span>(it!=t.begin()&&<span class="built_in">abs</span>(*it-last_it)<eps)<span class="keyword">continue</span>;</span><br><span class="line"> last_it = *it;</span><br><span class="line"> segment_points.push_back(make_pair(x1+*it*(x2-x1), y1+*it*(y2-y1)));</span><br><span class="line">}</span><br></pre></td></tr></table></figure></p>
<h3 id="Put-Line-Segment-to-Its-Corresponding-Grid"><a href="#Put-Line-Segment-to-Its-Corresponding-Grid" class="headerlink" title="Put Line Segment to Its Corresponding Grid"></a>Put Line Segment to Its Corresponding Grid</h3><p>After line segment clipping, the endpoints of a line is guaranteed to be lied in the grid. Without loss of generality, we represent a grid with its lower left corner coordinate. Therefore, we could quickly determine which grid the current line is located at by its midpoint.</p>
<pre><code class="C"><span class="built_in">set</span><pair<<span class="keyword">int</span>, <span class="keyword">int</span>> >grids;
grids.insert(make_pair(<span class="keyword">int</span>(midpoint_x/GRID_LENGTH) - midpoint_x<<span class="number">0</span>?<span class="number">1</span>:<span class="number">0</span>, <span class="keyword">int</span>(midpoint_y/GRID_LENGTH) - midpoint_y<<span class="number">0</span>?<span class="number">1</span>:<span class="number">0</span>))
</code></pre>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2017/12/08/An-Elegant-Way-to-Cover-Line-Segments-with-Grids/" data-id="cju13wdlx000b8ys6zj5g0s71" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-3-Dimensional-Reconstruction-from-2-Dimensional-Slicing" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/12/06/3-Dimensional-Reconstruction-from-2-Dimensional-Slicing/" class="article-date">
<time datetime="2017-12-06T05:54:48.000Z" itemprop="datePublished">2017-12-06</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/12/06/3-Dimensional-Reconstruction-from-2-Dimensional-Slicing/">3D Model Repairing -- Widen the Thin Wall</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h2 id="Widen-the-thin-wall"><a href="#Widen-the-thin-wall" class="headerlink" title="Widen the thin wall"></a>Widen the thin wall</h2><p>The workflow consists of two parts. Considering the efficiency of algorithm, rasterization is firstly execuated for later detection. And then by enumerating all the grids offset is applied to contours one by one.</p>
<h3 id="Code-for-Rasterization"><a href="#Code-for-Rasterization" class="headerlink" title="Code for Rasterization"></a>Code for Rasterization</h3><p>In this part, all the contours are simply cut by grids.<br><figure class="highlight c++"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">for</span> (<span class="keyword">int</span> i = <span class="number">0</span>; i < ptr_slice_->GetNumPieces(); i++) {</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">vector</span><<span class="built_in">std</span>::<span class="built_in">vector</span><<span class="built_in">std</span>::pair<<span class="keyword">double</span>, <span class="keyword">double</span>> > > contours; contours.clear();</span><br><span class="line"> New2Origin layerlinenew2origin;</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> j = <span class="number">0</span>; j < pieces_[i].size(); j++) {</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">vector</span><<span class="built_in">std</span>::pair<<span class="keyword">double</span>, <span class="keyword">double</span>> > contour; contour.clear();</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> k = <span class="number">0</span>; k < pieces_[i][j].size(); k++) {</span><br><span class="line"> <span class="keyword">double</span> x1 = pieces_[i][j][k].first[<span class="number">0</span>]; <span class="keyword">double</span> y1 = pieces_[i][j][k].first[<span class="number">1</span>];</span><br><span class="line"> <span class="keyword">double</span> x2 = pieces_[i][j][k].second[<span class="number">0</span>]; <span class="keyword">double</span> y2 = pieces_[i][j][k].second[<span class="number">1</span>];</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">set</span><<span class="keyword">double</span>> godis; godis.clear();</span><br><span class="line"> godis.insert(<span class="number">0</span>); godis.insert(<span class="number">1</span>);</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> ii = <span class="keyword">int</span>( min(x1, x2)/MIN_DIS); ii < <span class="keyword">int</span>(max(x1, x2) / MIN_DIS); ii++) {</span><br><span class="line"> <span class="keyword">if</span> (!(x1 < ii*MIN_DIS&&ii*MIN_DIS<x2))<span class="keyword">continue</span>;</span><br><span class="line"> godis.insert((ii*MIN_DIS - x1) / (x2 - x1));</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> jj = <span class="keyword">int</span>( min(y1, y2)/MIN_DIS); jj < <span class="keyword">int</span>(max(y1, y2) / MIN_DIS); jj++) {</span><br><span class="line"> <span class="keyword">if</span> (!(y1<jj*MIN_DIS&&jj*MIN_DIS<y2))<span class="keyword">continue</span>;</span><br><span class="line"> godis.insert((jj*MIN_DIS - y1) / (y2 - y1));</span><br><span class="line"> }</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">set</span><<span class="keyword">double</span>>::iterator it;</span><br><span class="line"> <span class="keyword">double</span> last_it;</span><br><span class="line"> <span class="keyword">for</span> (it = godis.begin(); it != godis.end(); ++it) {</span><br><span class="line"> <span class="keyword">if</span> (it != godis.begin() && <span class="built_in">abs</span>(*it - last_it)<eps)<span class="keyword">continue</span>;</span><br><span class="line"> last_it = *it;</span><br><span class="line"> layerlinenew2origin.layermap[j][contour.size()] = k;</span><br><span class="line"> contour.push_back(<span class="built_in">std</span>::make_pair(x1 + *it*(x2 - x1), y1 + *it*(y2 - y1)));</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> contours.push_back(contour);</span><br><span class="line"> }</span><br><span class="line"> lines_new2origin.push_back(layerlinenew2origin);</span><br><span class="line"> layers.push_back(contours);</span><br><span class="line"> }</span><br></pre></td></tr></table></figure></p>
<p>After cutting, new generated contours are stored in its corresponding grids.<br><figure class="highlight c++"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">for</span> (<span class="keyword">int</span> j = <span class="number">0</span>; j < layers[i].size(); j++) {</span><br><span class="line"> IsInner.push_back(!JudgeLoopDir(layers[i][j]));</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> k = <span class="number">0</span>; k < layers[i][j].size(); k++) {</span><br><span class="line"> layer_offdis.dis[j].push_back(<span class="number">0</span>);</span><br><span class="line"> <span class="keyword">double</span> x1 = layers[i][j][k].first, x2 = layers[i][j][(k + <span class="number">1</span>) % layers[i][j].size()].first;</span><br><span class="line"> <span class="keyword">double</span> y1 = layers[i][j][k].second, y2 = layers[i][j][(k + <span class="number">1</span>) % layers[i][j].size()].second;</span><br><span class="line"> <span class="keyword">double</span> x = <span class="number">0.5</span>*(x1 + x2), y = <span class="number">0.5</span>*(y1 + y2);</span><br><span class="line"> <span class="built_in">std</span>::pair<<span class="keyword">int</span>, <span class="keyword">int</span>> g = point2grid(<span class="built_in">std</span>::make_pair(x, y));</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">vector</span><<span class="keyword">int</span>> a; a.push_back(g.first); a.push_back(g.second); a.push_back(j); a.push_back(layer_grids[g].grids[j].size());</span><br><span class="line"> lineincontour[a] = k;</span><br><span class="line"> layer_grids[g].grids[j].push_back(<span class="built_in">std</span>::make_pair(<span class="built_in">std</span>::make_pair(x1, y1), <span class="built_in">std</span>::make_pair(x2, y2)));</span><br><span class="line"> }</span><br><span class="line">}</span><br></pre></td></tr></table></figure></p>
<h3 id="Offset-in-each-grids"><a href="#Offset-in-each-grids" class="headerlink" title="Offset in each grids"></a>Offset in each grids</h3><p>Code for calculating offseting distance required for each line in a contour.<br><figure class="highlight c++"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">for</span> (it1 = layer_grids.begin(); it1 != layer_grids.end(); it1++) {</span><br><span class="line"> <span class="keyword">int</span> x = it1->first.first, y = it1->first.second;</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> k = <span class="number">0</span>; k < <span class="number">9</span>; k++) {</span><br><span class="line"> <span class="keyword">int</span> next_x = x + dir[k][<span class="number">0</span>], next_y = y + dir[k][<span class="number">1</span>];</span><br><span class="line"> <span class="keyword">if</span> (vis.find(<span class="built_in">std</span>::make_pair(<span class="built_in">std</span>::make_pair(x, y), <span class="built_in">std</span>::make_pair(next_x, next_y))) != vis.end())<span class="keyword">continue</span>;</span><br><span class="line"> vis.insert(<span class="built_in">std</span>::make_pair(<span class="built_in">std</span>::make_pair(next_x, next_y), <span class="built_in">std</span>::make_pair(x, y))); vis.insert(<span class="built_in">std</span>::make_pair(<span class="built_in">std</span>::make_pair(next_x, next_y), <span class="built_in">std</span>::make_pair(x, y)));</span><br><span class="line"> it2 = layer_grids.find(<span class="built_in">std</span>::make_pair(next_x, next_y));</span><br><span class="line"> <span class="keyword">if</span> (it2 == layer_grids.end())<span class="keyword">continue</span>;</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> j1 = <span class="number">0</span>; j1 < layers[i].size(); j1++) <span class="keyword">if</span> (it1->second.grids[j1].size()) {</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> j2 = <span class="number">0</span>; j2 < layers[i].size(); j2++)<span class="keyword">if</span> (it2->second.grids[j2].size()) {</span><br><span class="line"> <span class="keyword">if</span> (IsInner[j1] && IsInner[j2] && j1 != j2) {</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> k1 = <span class="number">0</span>; k1 < it1->second.grids[j1].size(); k1++) {</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> k2 = <span class="number">0</span>; k2 < it2->second.grids[j2].size(); k2++) {</span><br><span class="line"> pa p[<span class="number">3</span>][<span class="number">3</span>], optimal_dir, offdir[<span class="number">3</span>];</span><br><span class="line"> p[<span class="number">1</span>][<span class="number">1</span>] = it1->second.grids[j1][k1].first, p[<span class="number">1</span>][<span class="number">2</span>] = it1->second.grids[j1][k1].second;</span><br><span class="line"> p[<span class="number">2</span>][<span class="number">1</span>] = it2->second.grids[j2][k2].first, p[<span class="number">2</span>][<span class="number">2</span>] = it2->second.grids[j2][k2].second;</span><br><span class="line"> optimal_dir = furthestdir(p);</span><br><span class="line"> <span class="keyword">double</span> line_dis = <span class="built_in">sqrt</span>(dot(optimal_dir, optimal_dir));</span><br><span class="line"> <span class="comment">//line_dis = sqrt((p[1][1].first + p[1][2].first - p[2][1].first - p[2][2].first) * (p[1][1].first + p[1][2].first - p[2][1].first - p[2][2].first)* 0.25 + (p[1][1].second + p[1][2].second - p[2][1].second - p[2][2].second) *(p[1][1].second + p[1][2].second - p[2][1].second - p[2][2].second)*0.25);</span></span><br><span class="line"> <span class="keyword">double</span> move = (MIN_DIS - line_dis);</span><br><span class="line"> <span class="keyword">if</span> (move < <span class="number">0</span>)<span class="keyword">continue</span>;</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">vector</span><<span class="keyword">int</span>> a; a.push_back(x); a.push_back(y); a.push_back(j1); a.push_back(k1);</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">vector</span><<span class="keyword">int</span>> b; b.push_back(next_x); b.push_back(next_y); b.push_back(j2); b.push_back(k2);</span><br><span class="line"> layer_offdis.dis[j1][lineincontour[a]] = max(move*<span class="number">0.5</span>, layer_offdis.dis[j1][lineincontour[a]]);</span><br><span class="line"> layer_offdis.dis[j2][lineincontour[b]] = max(move*<span class="number">0.5</span>, layer_offdis.dis[j2][lineincontour[b]]);</span><br><span class="line"> <span class="comment">//if (dot(p1, optimal_dir) > 0 && dot(p1, optimal_dir) < 0)</span></span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">else</span> <span class="keyword">if</span> (IsInner[j1] && !IsInner[j2] && j1 != j2) {</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> k1 = <span class="number">0</span>; k1 < it1->second.grids[j1].size(); k1++) {</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> k2 = <span class="number">0</span>; k2 < it2->second.grids[j2].size(); k2++) {</span><br><span class="line"> pa p[<span class="number">3</span>][<span class="number">3</span>], optimal_dir, offdir[<span class="number">3</span>];</span><br><span class="line"> p[<span class="number">1</span>][<span class="number">1</span>] = it1->second.grids[j1][k1].first, p[<span class="number">1</span>][<span class="number">2</span>] = it1->second.grids[j1][k1].second;</span><br><span class="line"> p[<span class="number">2</span>][<span class="number">1</span>] = it2->second.grids[j2][k2].first, p[<span class="number">2</span>][<span class="number">2</span>] = it2->second.grids[j2][k2].second;</span><br><span class="line"> optimal_dir = furthestdir(p);</span><br><span class="line"> <span class="keyword">double</span> line_dis = <span class="built_in">sqrt</span>(dot(optimal_dir, optimal_dir));</span><br><span class="line"> <span class="keyword">double</span> move = (MIN_DIS - line_dis);</span><br><span class="line"> <span class="keyword">if</span> (move < <span class="number">0</span>)<span class="keyword">continue</span>;</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">vector</span><<span class="keyword">int</span>> a; a.push_back(x); a.push_back(y); a.push_back(j1); a.push_back(k1);</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">vector</span><<span class="keyword">int</span>> b; b.push_back(next_x); b.push_back(next_y); b.push_back(j2); b.push_back(k2);</span><br><span class="line"> layer_offdis.dis[j1][lineincontour[a]] = max(move, layer_offdis.dis[j1][lineincontour[a]]);</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">else</span> <span class="keyword">if</span> (!IsInner[j1] && IsInner[j2] && j1 != j2) {</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> k1 = <span class="number">0</span>; k1 < it1->second.grids[j1].size(); k1++) {</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> k2 = <span class="number">0</span>; k2 < it2->second.grids[j2].size(); k2++) {</span><br><span class="line"> pa p[<span class="number">3</span>][<span class="number">3</span>], optimal_dir, offdir[<span class="number">3</span>];</span><br><span class="line"> p[<span class="number">1</span>][<span class="number">1</span>] = it1->second.grids[j1][k1].first, p[<span class="number">1</span>][<span class="number">2</span>] = it1->second.grids[j1][k1].second;</span><br><span class="line"> p[<span class="number">2</span>][<span class="number">1</span>] = it2->second.grids[j2][k2].first, p[<span class="number">2</span>][<span class="number">2</span>] = it2->second.grids[j2][k2].second;</span><br><span class="line"> optimal_dir = furthestdir(p);</span><br><span class="line"> <span class="keyword">double</span> line_dis = <span class="built_in">sqrt</span>(dot(optimal_dir, optimal_dir));</span><br><span class="line"> <span class="keyword">double</span> move = (MIN_DIS - line_dis);</span><br><span class="line"> <span class="keyword">if</span> (move < <span class="number">0</span>)<span class="keyword">continue</span>;</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">vector</span><<span class="keyword">int</span>> a; a.push_back(x); a.push_back(y); a.push_back(j1); a.push_back(k1);</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">vector</span><<span class="keyword">int</span>> b; b.push_back(next_x); b.push_back(next_y); b.push_back(j2); b.push_back(k2);</span><br><span class="line"> layer_offdis.dis[j2][lineincontour[b]] = max(move, layer_offdis.dis[j2][lineincontour[b]]);</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">else</span> <span class="keyword">if</span> (!IsInner[j1] && !IsInner[j2] && j1 == j2) {</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> k1 = <span class="number">0</span>; k1 < it1->second.grids[j1].size(); k1++) {</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> k2 = <span class="number">0</span>; k2 < it2->second.grids[j2].size(); k2++) <span class="keyword">if</span> (k1 != k2) {</span><br><span class="line"> pa p[<span class="number">3</span>][<span class="number">3</span>], optimal_dir, offdir[<span class="number">3</span>];</span><br><span class="line"> p[<span class="number">1</span>][<span class="number">1</span>] = it1->second.grids[j1][k1].first, p[<span class="number">1</span>][<span class="number">2</span>] = it1->second.grids[j1][k1].second;</span><br><span class="line"> p[<span class="number">2</span>][<span class="number">1</span>] = it2->second.grids[j2][k2].first, p[<span class="number">2</span>][<span class="number">2</span>] = it2->second.grids[j2][k2].second;</span><br><span class="line"> optimal_dir = furthestdir(p);</span><br><span class="line"> <span class="keyword">double</span> line_dis = <span class="built_in">sqrt</span>(dot(optimal_dir, optimal_dir));</span><br><span class="line"> <span class="keyword">double</span> move = (MIN_DIS - line_dis);</span><br><span class="line"> <span class="keyword">if</span> (move < <span class="number">0</span>)<span class="keyword">continue</span>;</span><br><span class="line"> pa p1, p2; p1.first = p[<span class="number">1</span>][<span class="number">1</span>].first - p[<span class="number">1</span>][<span class="number">2</span>].first; p1.second = p[<span class="number">1</span>][<span class="number">1</span>].second - p[<span class="number">1</span>][<span class="number">2</span>].second;</span><br><span class="line"> p2.first = p[<span class="number">2</span>][<span class="number">1</span>].first - p[<span class="number">2</span>][<span class="number">2</span>].first; p2.second = p[<span class="number">2</span>][<span class="number">1</span>].second - p[<span class="number">2</span>][<span class="number">2</span>].second;</span><br><span class="line"> <span class="keyword">if</span> (dot(p1, p2) > <span class="number">0</span>)<span class="keyword">continue</span>;</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">vector</span><<span class="keyword">int</span>> a; a.push_back(x); a.push_back(y); a.push_back(j1); a.push_back(k1);</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">vector</span><<span class="keyword">int</span>> b; b.push_back(next_x); b.push_back(next_y); b.push_back(j2); b.push_back(k2);</span><br><span class="line"> layer_offdis.dis[j2][lineincontour[b]] = max(move, layer_offdis.dis[j2][lineincontour[b]]);</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> offdis.push_back(layer_offdis);</span><br></pre></td></tr></table></figure></p>
<p>Below is the code to offset each contour.<br><figure class="highlight c++"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">void</span> RenderingWidget::LayerOffset(LayerOffDis layer_offdis, <span class="keyword">int</span> layernum) {</span><br><span class="line"> <span class="built_in">std</span>::<span class="built_in">vector</span><<span class="keyword">bool</span>> IsInner;</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> j = <span class="number">0</span>; j < layers[layernum].size(); j++) {</span><br><span class="line"> IsInner.push_back(!JudgeLoopDir(layers[layernum][j]));</span><br><span class="line"> }</span><br><span class="line"> Paths final_clippers, subs, tmp_res, tmp_res1, res;</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> i = <span class="number">0</span>; i < layers[layernum].size(); i++) <span class="keyword">if</span> (IsInner[i]) {</span><br><span class="line"> Path sub, clipper; ClipperOffset offset; Clipper difference; Paths clippers;</span><br><span class="line"> <span class="keyword">double</span> max_d = <span class="number">0</span>;</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> j = <span class="number">0</span>; j < layers[layernum][i].size(); j++) {</span><br><span class="line"> <span class="keyword">double</span> d = layer_offdis.dis[i][j];</span><br><span class="line"> ClipperLib::cInt x = ScaleNumber*layers[layernum][i][j].first;</span><br><span class="line"> ClipperLib::cInt y = ScaleNumber*layers[layernum][i][j].second;</span><br><span class="line"> max_d = max(max_d, d);</span><br><span class="line"> sub << IntPoint(x, y);</span><br><span class="line"> <span class="keyword">if</span> (max_d>eps)clipper << IntPoint(x, y);</span><br><span class="line"></span><br><span class="line"> <span class="keyword">if</span> (<span class="built_in">fabs</span>(d) < eps&&max_d>eps) {</span><br><span class="line"> offset.AddPath(clipper, jtMiter, etOpenButt);</span><br><span class="line"> offset.Execute(clippers, max_d*ScaleNumber); ClipperLib::CleanPolygons(clippers);</span><br><span class="line"> difference.AddPaths(clippers, ptClip, TRUE);</span><br><span class="line"> max_d = <span class="number">0</span>;</span><br><span class="line"> clipper.clear();</span><br><span class="line"> <span class="keyword">continue</span>;</span><br><span class="line"> }</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">if</span> (clipper.size()) {</span><br><span class="line"> offset.AddPath(clipper, jtMiter, etOpenButt);</span><br><span class="line"> offset.Execute(clippers, max_d*ScaleNumber); ClipperLib::CleanPolygons(clippers);</span><br><span class="line"> difference.AddPaths(clippers, ptClip, TRUE);</span><br><span class="line"> clipper.clear();</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">if</span> (sub.size())subs.push_back(sub);<span class="comment">/* DrawPaths(subs, layernum);*/</span></span><br><span class="line"> difference.AddPath(sub, ptSubject, TRUE);</span><br><span class="line"> difference.Execute(ctDifference, tmp_res, pftNonZero, pftNonZero); ClipperLib::CleanPolygons(tmp_res);</span><br><span class="line"> tmp_res1.clear(); <span class="keyword">for</span> (<span class="keyword">int</span> j = <span class="number">0</span>; j < tmp_res.size(); j++)<span class="keyword">if</span> (<span class="built_in">fabs</span>(Area(tmp_res[j]))*<span class="number">1.0</span> / ScaleNumber / ScaleNumber >= MIN_DIS*MIN_DIS*<span class="number">0.2</span>)tmp_res1.push_back(tmp_res[j]);</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> j = <span class="number">0</span>; j < tmp_res1.size(); j++)<span class="keyword">if</span> (tmp_res1[j].size())res.push_back(tmp_res1[j]);</span><br><span class="line"> }</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> i = <span class="number">0</span>; i < layers[layernum].size(); i++) <span class="keyword">if</span> (!IsInner[i]) {</span><br><span class="line"> Path sub, clipper; ClipperOffset offset; Clipper difference; Paths clippers;</span><br><span class="line"> <span class="keyword">for</span> (<span class="keyword">int</span> j = <span class="number">0</span>; j < layers[layernum][i].size(); j++) {</span><br><span class="line"> ClipperLib::cInt x = ScaleNumber*layers[layernum][i][j].first;</span><br><span class="line"> ClipperLib::cInt y = ScaleNumber*layers[layernum][i][j].second;</span><br><span class="line"> sub << IntPoint(x, y);</span><br><span class="line"> }</span><br><span class="line"> res.push_back(sub);</span><br><span class="line"> }</span><br><span class="line"> res_path.push_back(res);</span><br><span class="line">}</span><br></pre></td></tr></table></figure></p>
<p><img src="content/images/2017/12/offset_result.png" alt="Fig.1. Offset Result" style="width: 400px;"></p>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2017/12/06/3-Dimensional-Reconstruction-from-2-Dimensional-Slicing/" data-id="cju13wdl000008ys6209cd04a" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-Particle-Swarm-Optimization" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/12/05/Particle-Swarm-Optimization/" class="article-date">
<time datetime="2017-12-05T10:31:30.000Z" itemprop="datePublished">2017-12-05</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/12/05/Particle-Swarm-Optimization/">Particle Swarm Optimization</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h3 id="Introduction-of-Particle-Swarm-Optimization"><a href="#Introduction-of-Particle-Swarm-Optimization" class="headerlink" title="Introduction of Particle Swarm Optimization"></a>Introduction of Particle Swarm Optimization</h3><p>Particle Swarm Optimization(PSO) is a computational method that finds a solution by iteratively improving a candidate solution, Which is broadly applied to the problems whose exact solution are impossible to compute, specifically nonlinear optimization problems.</p>
<p>PSO is firstly introduced to simulate social behavior but finally became a optimization paradigm to perform optimzation in complex situations. PSO have the following advantages: 1) it makes few assumptions and is capable of searching solutions in a very large state space. 2) it does not require the problem to be differentiable like ohter optimization paradigm such as gradient descent. However, it does not guarantee that a opitmal solution is found.</p>
<h3 id="Standard-Swarm-Optimization-Procedure"><a href="#Standard-Swarm-Optimization-Procedure" class="headerlink" title="Standard Swarm Optimization Procedure"></a>Standard Swarm Optimization Procedure</h3><p>Simulating the food seeking process of flock, PSO fisrtly initialize many particles representing solutions and update those particles iteratively according to the PSO updating rule shown in Fig.1. p1 indicates the best solution in histroy for each particle while p2 indicates best solution of all particles. Therefore, updating is divided into two stages, the velocity reflecting the inertia towards better solution is computed and then this velocity is added to the previous position of each particle in second stage. a, b, c, d, r1, and r2 are all coefficients affecting this updating process.<br><img src="content/images/2017/12/PSO_updating_rule.png" alt="Fig.1. PSO updating Rule" style="width: 500px;"></p>
<h3 id="Finding-a-solution-satisfying-multiple-constraints"><a href="#Finding-a-solution-satisfying-multiple-constraints" class="headerlink" title="Finding a solution satisfying multiple constraints"></a>Finding a solution satisfying multiple constraints</h3><p>However, for each particle a variety of constraints need to be satisfied in practice, leading to an optimization problem under certain conditions. Below is an example code I wrote to optimize a nonliear function with complex constrains. In this example code, the action I took is ignoring those illegal solutions after updating. At the same time, I record number of times that illegal solutions happend as an reference for parameter tuning.</p>
<figure class="highlight matlab"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br><span class="line">86</span><br><span class="line">87</span><br><span class="line">88</span><br><span class="line">89</span><br><span class="line">90</span><br><span class="line">91</span><br><span class="line">92</span><br><span class="line">93</span><br><span class="line">94</span><br><span class="line">95</span><br><span class="line">96</span><br><span class="line">97</span><br><span class="line">98</span><br><span class="line">99</span><br><span class="line">100</span><br><span class="line">101</span><br><span class="line">102</span><br><span class="line">103</span><br><span class="line">104</span><br><span class="line">105</span><br><span class="line">106</span><br><span class="line">107</span><br><span class="line">108</span><br><span class="line">109</span><br><span class="line">110</span><br><span class="line">111</span><br><span class="line">112</span><br><span class="line">113</span><br><span class="line">114</span><br><span class="line">115</span><br><span class="line">116</span><br><span class="line">117</span><br><span class="line">118</span><br><span class="line">119</span><br><span class="line">120</span><br><span class="line">121</span><br><span class="line">122</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">function</span> <span class="title">out</span> = <span class="title">PSO</span><span class="params">(num, tree)</span></span></span><br><span class="line"><span class="comment">% profile on</span></span><br><span class="line"> tree = init(tree);</span><br><span class="line"> kappa = <span class="number">1</span>;</span><br><span class="line"> phi1 = <span class="number">2.05</span>;</span><br><span class="line"> phi2 = <span class="number">2.05</span>;</span><br><span class="line"> phi = phi1 + phi2;</span><br><span class="line"> chi = <span class="number">2</span>*kappa/<span class="built_in">abs</span>(<span class="number">2</span>-phi-<span class="built_in">sqrt</span>(phi^<span class="number">2</span><span class="number">-4</span>*phi));</span><br><span class="line"> problem.w = chi;</span><br><span class="line"> problem.wdamp = <span class="number">1</span>;</span><br><span class="line"> problem.c1 = chi*phi1;</span><br><span class="line"> problem.c2 = chi*phi2;</span><br><span class="line"> problem.nparticles = <span class="number">200</span>;</span><br><span class="line"> problem.niterations = <span class="number">2400</span>;</span><br><span class="line"> problem.localminimum = <span class="number">0</span>;</span><br><span class="line"> problem.avoidancerate = <span class="number">0</span>;</span><br><span class="line"></span><br><span class="line"> problem.forest.diameterlimitation.max = <span class="number">2</span>;</span><br><span class="line"> problem.forest.diameterlimitation.min = <span class="number">0.75</span>;</span><br><span class="line"> problem.forest.limitation = num; <span class="comment">%limitation for every branch</span></span><br><span class="line"> problem.forest.heightlimitation.min = min(num(:,<span class="number">1</span>));</span><br><span class="line"> problem.forest.heightlimitation.max = max(num(:,<span class="number">2</span>));</span><br><span class="line"> problem.forest.structure = tree; <span class="comment">%%definition of structure</span></span><br><span class="line"> num_size = <span class="built_in">size</span>(num);</span><br><span class="line"> problem.statesize = [<span class="number">3</span>, num_size(<span class="number">1</span>)];</span><br><span class="line"> problem.statenormalize = <span class="built_in">ones</span>(problem.statesize);</span><br><span class="line"> </span><br><span class="line"> temp.state = [];</span><br><span class="line"> temp.velocity = [];</span><br><span class="line"> temp.volume = [];</span><br><span class="line"> temp.pbest.state = [];</span><br><span class="line"> temp.pbest.volume = [];</span><br><span class="line"></span><br><span class="line"> gbest.volume = <span class="built_in">inf</span>;</span><br><span class="line"> particles = <span class="built_in">repmat</span>(temp, problem.nparticles, <span class="number">1</span>);</span><br><span class="line"> problem.minivolume = <span class="built_in">zeros</span>(<span class="number">1</span>, problem.niterations);</span><br><span class="line"> problem.fail = <span class="built_in">zeros</span>(<span class="number">1</span>, problem.niterations);</span><br><span class="line"></span><br><span class="line"> tmp.state = [];</span><br><span class="line"> problem.stateiterations = <span class="built_in">repmat</span>(tmp,problem.niterations,<span class="number">1</span>);</span><br><span class="line"></span><br><span class="line"> <span class="keyword">for</span> <span class="built_in">i</span> =<span class="number">1</span>:problem.nparticles</span><br><span class="line"> <span class="keyword">while</span>(<span class="number">1</span>)</span><br><span class="line"> particles(<span class="built_in">i</span>).state = generateParticle(problem.forest.limitation, problem.forest.structure, problem.statesize, problem.forest.diameterlimitation);</span><br><span class="line"> <span class="keyword">if</span> (checknull(particles(<span class="built_in">i</span>).state) ==<span class="number">0</span>) || (diametercheck(particles(<span class="built_in">i</span>).state, problem.forest.structure,problem.forest.diameterlimitation) == <span class="number">0</span>)...</span><br><span class="line"> ||(checkheight(particles(<span class="built_in">i</span>).state, problem.forest.structure,problem.forest.limitation) ==<span class="number">0</span>)||(anglecheck(particles(<span class="built_in">i</span>).state, problem.forest.structure) == <span class="number">0</span>)</span><br><span class="line"> <span class="keyword">continue</span>;</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> <span class="keyword">break</span>;</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> particles(<span class="built_in">i</span>).velocity = <span class="built_in">zeros</span>(problem.statesize);</span><br><span class="line"> particles(<span class="built_in">i</span>).volume = calcVolume(particles(<span class="built_in">i</span>).state, problem.forest.structure);</span><br><span class="line"> particles(<span class="built_in">i</span>).pbest.state = particles(<span class="built_in">i</span>).state;</span><br><span class="line"> particles(<span class="built_in">i</span>).pbest.volume = particles(<span class="built_in">i</span>).volume;</span><br><span class="line"> <span class="keyword">if</span> particles(<span class="built_in">i</span>).volume < gbest.volume</span><br><span class="line"> gbest.volume = particles(<span class="built_in">i</span>).volume;</span><br><span class="line"> gbest.state = particles(<span class="built_in">i</span>).state;</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> </span><br><span class="line"> <span class="keyword">for</span> it = <span class="number">1</span>:problem.niterations</span><br><span class="line"> problem.L = <span class="number">2</span>*(<span class="number">1</span>-it*<span class="number">1.0</span>/problem.niterations);</span><br><span class="line"> <span class="keyword">if</span> it< <span class="number">0.75</span>*problem.niterations </span><br><span class="line"> problem.avoidancerate = <span class="built_in">rand</span>(<span class="number">1</span>);</span><br><span class="line"> <span class="keyword">else</span></span><br><span class="line"> problem.avoidancerate = <span class="number">1.0</span>;</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> </span><br><span class="line"> <span class="keyword">for</span> p = <span class="number">1</span>:problem.nparticles*problem.avoidancerate</span><br><span class="line"> particles(p).velocity = problem.w*particles(p).velocity ...</span><br><span class="line"> + problem.c1*<span class="built_in">rand</span>(problem.statesize).*(particles(p).pbest.state - particles(p).state) ...</span><br><span class="line"> + problem.c2*<span class="built_in">rand</span>(problem.statesize).*(gbest.state - particles(p).state); </span><br><span class="line"> particles(p).state = particles(p).state + particles(p).velocity.*problem.statenormalize*<span class="number">0.15</span>;</span><br><span class="line"> <span class="keyword">if</span> (diametercheck(particles(p).state, problem.forest.structure,problem.forest.diameterlimitation) == <span class="number">0</span>)...</span><br><span class="line"> ||(checkheight(particles(p).state, problem.forest.structure,problem.forest.limitation) ==<span class="number">0</span>)||(anglecheck(particles(p).state, problem.forest.structure) == <span class="number">0</span>)</span><br><span class="line"><span class="comment">% particles(p).state = particles(p).pbest.state;</span></span><br><span class="line"> problem.fail(it) = problem.fail(it) + <span class="number">1</span>;</span><br><span class="line"> <span class="keyword">continue</span>;</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> particles(p).volume = calcVolume(particles(p).state, problem.forest.structure);</span><br><span class="line"> <span class="keyword">if</span> particles(p).volume < particles(p).pbest.volume</span><br><span class="line"> particles(p).pbest.volume = particles(p).volume;</span><br><span class="line"> particles(p).pbest.state = particles(p).state;</span><br><span class="line"> <span class="keyword">if</span> particles(p).pbest.volume < gbest.volume</span><br><span class="line"> gbest.volume = particles(p).pbest.volume;</span><br><span class="line"> gbest.state = particles(p).pbest.state;</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> </span><br><span class="line"> <span class="keyword">for</span> p = <span class="built_in">ceil</span>(problem.nparticles*problem.avoidancerate):problem.nparticles</span><br><span class="line"> particles(p).velocity = problem.w*particles(p).velocity ...</span><br><span class="line"> - problem.L*(<span class="built_in">rand</span>(problem.statesize).*(particles(p).pbest.state - particles(p).state) ...</span><br><span class="line"> + <span class="built_in">rand</span>(problem.statesize).*(gbest.state - particles(p).state)); </span><br><span class="line"> particles(p).state = particles(p).state + particles(p).velocity.*problem.statenormalize*<span class="number">0.15</span>;</span><br><span class="line"> </span><br><span class="line"> <span class="keyword">if</span> (diametercheck(particles(p).state, problem.forest.structure,problem.forest.diameterlimitation) == <span class="number">0</span>)...</span><br><span class="line"> ||(checkheight(particles(p).state, problem.forest.structure,problem.forest.limitation) ==<span class="number">0</span>)||(anglecheck(particles(p).state, problem.forest.structure) == <span class="number">0</span>)</span><br><span class="line"> problem.fail(it) = problem.fail(it) + <span class="number">1</span>;</span><br><span class="line"> <span class="keyword">continue</span>;</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> particles(p).volume = calcVolume(particles(p).state, problem.forest.structure); </span><br><span class="line"> <span class="keyword">if</span> particles(p).volume < particles(p).pbest.volume</span><br><span class="line"> particles(p).pbest.volume = particles(p).volume;</span><br><span class="line"> particles(p).pbest.state = particles(p).state;</span><br><span class="line"> <span class="keyword">if</span> particles(p).pbest.volume < gbest.volume</span><br><span class="line"> gbest.volume = particles(p).pbest.volume;</span><br><span class="line"> gbest.state = particles(p).pbest.state;</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> problem.w = problem.w*problem.wdamp;</span><br><span class="line"> problem.minivolume(it) = gbest.volume;</span><br><span class="line"> problem.stateiterations(it).state = <span class="built_in">zeros</span>(problem.statesize);</span><br><span class="line"> problem.stateiterations(it).state = gbest.state;</span><br><span class="line"> <span class="keyword">end</span></span><br><span class="line"> save(<span class="string">'problem'</span>)</span><br><span class="line"> savefig(<span class="string">'VolumeChange'</span>);</span><br><span class="line"> savedata(problem.stateiterations, problem.forest.structure);</span><br><span class="line"> save (<span class="string">'problem.minivolume.mat'</span>);</span><br><span class="line"> showvolumes(problem.minivolume);</span><br><span class="line"><span class="comment">% profile viewer</span></span><br></pre></td></tr></table></figure>
<h3 id="Local-Minimum-Avoidance"><a href="#Local-Minimum-Avoidance" class="headerlink" title="Local Minimum Avoidance"></a>Local Minimum Avoidance</h3><p>As an heuristic strategy, PSO also tends to falls into local minimum. Here we provide a simple trick to handle this problem. Particles are divided to two groups according to a random generated number, avoidance rate, in each iteration. One group executes the traditional updating rule to move toward the discovered better solution while another group explores potential area that may be missed through moving away from the discovered solution. In this manner, particles are capable of exploring larger space instead of trapping into local minimum and hence PSO performs slightly better.</p>
<h3 id="Reference"><a href="#Reference" class="headerlink" title="Reference"></a>Reference</h3><p>Particle Swarm Optimization. (2017, November 15). In Wikipedia, the free encyclopedia. Retrieved November 15, 2017, from <a href="http://en.wikipedia.org/wiki/Particle_swarm_optimization" target="_blank" rel="noopener">http://en.wikipedia.org/wiki/Particle_swarm_optimization</a><br>Trelea, I. C. (2003). The particle swarm optimization algorithm: convergence analysis and parameter selection. Elsevier North-Holland, Inc.</p>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2017/12/05/Particle-Swarm-Optimization/" data-id="cju13wdm0000d8ys6il4wiln3" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-Experience-in-Tian-Chi-Big-Data-Competetion" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/12/02/Experience-in-Tian-Chi-Big-Data-Competetion/" class="article-date">
<time datetime="2017-12-02T14:30:42.000Z" itemprop="datePublished">2017-12-02</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/12/02/Experience-in-Tian-Chi-Big-Data-Competetion/">Experience in Tian Chi Big Data Competetion</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>This is my first time to participate in data competition. Unfortunately, I obtained quite a low score in this competition. The problem and my solution is posted here.</p>
<h3 id="Problem-Statement"><a href="#Problem-Statement" class="headerlink" title="Problem Statement"></a>Problem Statement</h3><p><a href="content/images/2017/12/Problem.pdf">Problem</a></p>
<p>In this problem, we are required to identify in which shop the transaction happened given the relevant information. </p>
<h3 id="My-Solution"><a href="#My-Solution" class="headerlink" title="My Solution"></a>My Solution</h3><p>I took this problem as a binary classification problem. Therefore, I trained a xgboost classifier for every shop. Then I used the trained classifiers to compute logistic probability of the corresponding shop and selected the shop which has maximum probability as our prediction result.</p>
<p>Below is my python code for feature selection written on jupyter notebook.<br><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br><span class="line">86</span><br><span class="line">87</span><br><span class="line">88</span><br><span class="line">89</span><br><span class="line">90</span><br><span class="line">91</span><br><span class="line">92</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> pandas <span class="keyword">as</span> pd</span><br><span class="line"><span class="keyword">import</span> numpy <span class="keyword">as</span> np</span><br><span class="line"><span class="keyword">import</span> matplotlib.pyplot <span class="keyword">as</span> plt</span><br><span class="line"><span class="keyword">from</span> collections <span class="keyword">import</span> Counter</span><br><span class="line"><span class="keyword">from</span> IPython.display <span class="keyword">import</span> Image</span><br><span class="line"><span class="keyword">from</span> sklearn.cross_validation <span class="keyword">import</span> train_test_split</span><br><span class="line"><span class="keyword">from</span> sklearn.metrics <span class="keyword">import</span> log_loss, accuracy_score</span><br><span class="line"><span class="keyword">from</span> sklearn.ensemble <span class="keyword">import</span> AdaBoostClassifier</span><br><span class="line"><span class="keyword">from</span> sklearn.ensemble <span class="keyword">import</span> GradientBoostingClassifier</span><br><span class="line"><span class="keyword">from</span> datetime <span class="keyword">import</span> datetime</span><br><span class="line">%matplotlib inline</span><br><span class="line"></span><br><span class="line"><span class="comment">##reading the data</span></span><br><span class="line">shop_info = pd.read_csv(<span class="string">'ccf_first_round_shop_info.csv'</span>)</span><br><span class="line">train_data = pd.read_csv(<span class="string">'ccf_first_round_user_shop_behavior.csv'</span>)</span><br><span class="line"></span><br><span class="line"><span class="comment">##convert the string to int for later use</span></span><br><span class="line"><span class="keyword">if</span> shop_info[<span class="string">"shop_id"</span>].dtype == <span class="string">'object'</span>:</span><br><span class="line"> shop_info[<span class="string">"shop_id"</span>] = shop_info.shop_id.str.replace(<span class="string">'s_'</span>, <span class="string">''</span>).astype(np.int64)</span><br><span class="line"><span class="keyword">if</span> shop_info[<span class="string">"category_id"</span>].dtype == <span class="string">'object'</span>:</span><br><span class="line"> shop_info[<span class="string">"category_id"</span>] = shop_info.category_id.str.replace(<span class="string">'c_'</span>, <span class="string">''</span>).astype(np.int64)</span><br><span class="line"><span class="keyword">if</span> shop_info[<span class="string">"mall_id"</span>].dtype == <span class="string">'object'</span>:</span><br><span class="line"> shop_info[<span class="string">"mall_id"</span>] = shop_info.mall_id.str.replace(<span class="string">'m_'</span>, <span class="string">''</span>).astype(np.int64)</span><br><span class="line"></span><br><span class="line"><span class="comment">##Add mall information to training data</span></span><br><span class="line"><span class="keyword">if</span> train_data[<span class="string">"shop_id"</span>].dtype == <span class="string">'object'</span>:</span><br><span class="line"> train_data[<span class="string">"shop_id"</span>] = train_data.shop_id.str.replace(<span class="string">'s_'</span>, <span class="string">''</span>).astype(np.int64)</span><br><span class="line"><span class="keyword">if</span> train_data[<span class="string">"user_id"</span>].dtype == <span class="string">'object'</span>:</span><br><span class="line"> train_data[<span class="string">"user_id"</span>] = train_data.user_id.str.replace(<span class="string">'u_'</span>, <span class="string">''</span>).astype(np.int64)</span><br><span class="line"></span><br><span class="line">shop_mall = shop_info.loc[:,[<span class="string">'shop_id'</span>,<span class="string">'mall_id'</span>]]</span><br><span class="line"><span class="keyword">if</span> <span class="keyword">not</span> <span class="string">'mall_id'</span> <span class="keyword">in</span> train_data:</span><br><span class="line"> train_data = train_data.merge(shop_mall, on = <span class="string">'shop_id'</span>,how = <span class="string">'outer'</span>)<span class="comment">#, suffixes=('_1', '_2'))</span></span><br><span class="line"><span class="keyword">if</span> <span class="string">'wifi_infos'</span> <span class="keyword">in</span> train_data:</span><br><span class="line"> train_data = train_data.drop(<span class="string">'wifi_infos'</span>,<span class="number">1</span>)</span><br><span class="line"></span><br><span class="line"><span class="comment">##convert transaction time to a number as feature</span></span><br><span class="line">train_data.time_stamp = train_data.time_stamp.astype(str)</span><br><span class="line">time = []</span><br><span class="line"><span class="keyword">for</span> index,row <span class="keyword">in</span> train_data.iterrows():</span><br><span class="line"> tmp = row[<span class="string">'time_stamp'</span>]</span><br><span class="line"> a = int(tmp[<span class="number">11</span>]); b = int(tmp[<span class="number">12</span>]); c = int(tmp[<span class="number">14</span>]); d = int(tmp[<span class="number">15</span>])</span><br><span class="line"> time.append(((a*<span class="number">10</span>+b)*<span class="number">60</span>+(c*<span class="number">10</span>+d))/<span class="number">10</span>)</span><br><span class="line">train_data[<span class="string">'time'</span>] = pd.Series(time, index = train_data.index)</span><br><span class="line"><span class="keyword">if</span> <span class="string">'time_stamp'</span> <span class="keyword">in</span> train_data:</span><br><span class="line"> train_data = train_data.drop(<span class="string">'time_stamp'</span>,<span class="number">1</span>)</span><br><span class="line">cols = train_data.columns.tolist()</span><br><span class="line">col = []</span><br><span class="line">col.append(cols[<span class="number">0</span>]);col.append(cols[<span class="number">2</span>]);col.append(cols[<span class="number">3</span>]);col.append(cols[<span class="number">4</span>]);col.append(cols[<span class="number">5</span>]);col.append(cols[<span class="number">1</span>]);</span><br><span class="line">train_data.append(col)</span><br><span class="line">train_data = train_data[col]</span><br><span class="line"></span><br><span class="line"><span class="comment">##sort wifi_infos based on their strength and build a dictionary to map string to int for later use</span></span><br><span class="line">b = []</span><br><span class="line"><span class="keyword">for</span> index,row <span class="keyword">in</span> train_data.iterrows():</span><br><span class="line"> a = row[<span class="string">"wifi_infos"</span>].split(<span class="string">';'</span>);</span><br><span class="line"> a = sorted(a, key = <span class="keyword">lambda</span> d:int(d.split(<span class="string">'|'</span>)[<span class="number">1</span>]),reverse = <span class="keyword">True</span>)</span><br><span class="line"> b.append(a)</span><br><span class="line"> </span><br><span class="line">wifi_infos = np.zeros((len(b),<span class="number">20</span>));</span><br><span class="line"><span class="keyword">for</span> i, row <span class="keyword">in</span> enumerate(b):</span><br><span class="line"> <span class="keyword">for</span> j, column <span class="keyword">in</span> enumerate(row):</span><br><span class="line"> wifi_infos[i][j] = train_bissid2dict[column.split(<span class="string">'|'</span>)[<span class="number">0</span>]]</span><br><span class="line"></span><br><span class="line"><span class="comment">##map shop_id, mall_id and user_id to class and build a dictionary</span></span><br><span class="line">data_without_wifi = train_data.values</span><br><span class="line">shop_id2class = {}</span><br><span class="line">class2shop_id = {}</span><br><span class="line">cnt_shop = <span class="number">1</span>;</span><br><span class="line">user_id2class = {}</span><br><span class="line">cnt_user_id = <span class="number">1</span>;</span><br><span class="line">mall_id2class = {}</span><br><span class="line">cnt_mall_id = <span class="number">1</span>;</span><br><span class="line"><span class="keyword">for</span> i <span class="keyword">in</span> range(len(data_without_wifi)):</span><br><span class="line"> <span class="keyword">if</span> <span class="keyword">not</span> data_without_wifi[i][<span class="number">5</span>] <span class="keyword">in</span> shop_id2class:</span><br><span class="line"> shop_id2class[data_without_wifi[i][<span class="number">5</span>]] = cnt_shop</span><br><span class="line"> class2shop_id[cnt_shop] = data_without_wifi[i][<span class="number">5</span>]</span><br><span class="line"> cnt_shop += <span class="number">1</span></span><br><span class="line"> data_without_wifi[i][<span class="number">5</span>] = shop_id2class[data_without_wifi[i][<span class="number">5</span>]]</span><br><span class="line"> <span class="keyword">if</span> <span class="keyword">not</span> data_without_wifi[i][<span class="number">3</span>] <span class="keyword">in</span> mall_id2class:</span><br><span class="line"> mall_id2class[data_without_wifi[i][<span class="number">3</span>]] = cnt_mall_id</span><br><span class="line"> cnt_mall_id += <span class="number">1</span></span><br><span class="line"> data_without_wifi[i][<span class="number">3</span>] = mall_id2class[data_without_wifi[i][<span class="number">3</span>]]</span><br><span class="line"> <span class="keyword">if</span> <span class="keyword">not</span> data_without_wifi[i][<span class="number">0</span>] <span class="keyword">in</span> user_id2class:</span><br><span class="line"> user_id2class[data_without_wifi[i][<span class="number">0</span>]] = cnt_user_id</span><br><span class="line"> cnt_user_id += <span class="number">1</span></span><br><span class="line"> data_without_wifi[i][<span class="number">0</span>] = user_id2class[data_without_wifi[i][<span class="number">0</span>]]</span><br><span class="line"></span><br><span class="line"><span class="comment">##Add the wifi information and save the data</span></span><br><span class="line">data = np.hstack((data_without_wifi[:,:<span class="number">5</span>],wifi_infos,data_without_wifi[:,<span class="number">5</span>].reshape(<span class="number">-1</span>,<span class="number">1</span>)))</span><br><span class="line">data[data==<span class="number">0</span>]=np.nan</span><br><span class="line">np.savetxt(<span class="string">'data.csv'</span>,data, delimiter=<span class="string">","</span>)</span><br></pre></td></tr></table></figure></p>
<p>To Sum up, the feature I selected includes longitude, latitude, mall id, transation time and wifi infos sorted from highest to lowest based on their strength.</p>
<p>Classifier trainning is described below.<br><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> numpy <span class="keyword">as</span> np</span><br><span class="line"><span class="keyword">import</span> matplotlib.pyplot <span class="keyword">as</span> plt</span><br><span class="line"><span class="keyword">from</span> sklearn.model_selection <span class="keyword">import</span> train_test_split</span><br><span class="line"><span class="keyword">from</span> numpy <span class="keyword">import</span> genfromtxt</span><br><span class="line"><span class="keyword">import</span> xgboost <span class="keyword">as</span> xgb</span><br><span class="line"></span><br><span class="line">data = genfromtxt(<span class="string">'data.csv'</span>,delimiter=<span class="string">','</span>)</span><br><span class="line"><span class="keyword">for</span> shop_id <span class="keyword">in</span> range(<span class="number">1</span>,<span class="number">8478</span>):</span><br><span class="line"> data_x = data[:,:<span class="number">-1</span>]; <span class="comment">#data_y = data[:data_scale*20000,-1].astype(int)</span></span><br><span class="line"> data_y = (data[:,<span class="number">-1</span>]==shop_id).astype(int);</span><br><span class="line"> X_train, X_test, y_train, y_test = train_test_split(data_x, data_y, test_size=<span class="number">0.33</span>, random_state=<span class="number">42</span>)</span><br><span class="line"></span><br><span class="line"> dtrain = xgb.DMatrix(X_train, label=y_train)</span><br><span class="line"> dtest = xgb.DMatrix(X_test, label=y_test)</span><br><span class="line"></span><br><span class="line"> param = {<span class="string">'max_depth'</span>: <span class="number">2</span>, <span class="string">'eta'</span>: <span class="number">1</span>, <span class="string">'silent'</span>: <span class="number">1</span>, <span class="string">'objective'</span>: <span class="string">'binary:logistic'</span>}</span><br><span class="line"> param[<span class="string">'nthread'</span>] = <span class="number">4</span></span><br><span class="line"> <span class="comment"># param['eval_metric'] = 'auc'</span></span><br><span class="line"></span><br><span class="line"> evallist = [(dtest, <span class="string">'eval'</span>), (dtrain, <span class="string">'train'</span>)]</span><br><span class="line"></span><br><span class="line"> num_round = <span class="number">5</span></span><br><span class="line"> bst = xgb.train(param, dtrain, num_round)</span><br><span class="line"> <span class="keyword">if</span> shop_id%<span class="number">100</span> == <span class="number">0</span>: </span><br><span class="line"> pred = (bst.predict(dtest)><span class="number">0.5</span>).astype(int)</span><br><span class="line"> error_rate = np.sum(pred != y_test)*<span class="number">1.0</span> / y_test.shape[<span class="number">0</span>]</span><br><span class="line"> print(str(shop_id)+<span class="string">' test error using logistic = {}'</span>.format(error_rate))</span><br><span class="line"></span><br><span class="line"> bst.save_model(str(shop_id)+<span class="string">'.model'</span>)</span><br></pre></td></tr></table></figure></p>
<p>Arrange the evaluation data to certain form for later use in prediction<br><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br></pre></td><td class="code"><pre><span class="line">eval_data = pd.read_csv(<span class="string">'evaluation_public.csv'</span>)</span><br><span class="line"><span class="keyword">if</span> eval_data[<span class="string">"user_id"</span>].dtype == <span class="string">'object'</span>:</span><br><span class="line"> eval_data[<span class="string">"user_id"</span>] = eval_data.user_id.str.replace(<span class="string">'u_'</span>, <span class="string">''</span>).astype(np.int64)</span><br><span class="line"><span class="keyword">if</span> eval_data[<span class="string">"mall_id"</span>].dtype == <span class="string">'object'</span>:</span><br><span class="line"> eval_data[<span class="string">"mall_id"</span>] = eval_data.mall_id.str.replace(<span class="string">'m_'</span>, <span class="string">''</span>).astype(np.int64)</span><br><span class="line"></span><br><span class="line">b = []</span><br><span class="line"><span class="keyword">for</span> index,row <span class="keyword">in</span> eval_data.iterrows():</span><br><span class="line"> a = row[<span class="string">"wifi_infos"</span>].split(<span class="string">';'</span>);</span><br><span class="line"> a = sorted(a, key = <span class="keyword">lambda</span> d:int(d.split(<span class="string">'|'</span>)[<span class="number">1</span>]),reverse = <span class="keyword">True</span>)</span><br><span class="line"> b.append(a)</span><br><span class="line"></span><br><span class="line">wifi_infos = np.zeros((len(b),<span class="number">20</span>));</span><br><span class="line"><span class="keyword">for</span> i, row <span class="keyword">in</span> enumerate(b):</span><br><span class="line"> <span class="keyword">for</span> j, column <span class="keyword">in</span> enumerate(row):</span><br><span class="line"> <span class="keyword">if</span> column.split(<span class="string">'|'</span>)[<span class="number">0</span>] <span class="keyword">in</span> train_bissid2dict:</span><br><span class="line"> wifi_infos[i][j] = train_bissid2dict[column.split(<span class="string">'|'</span>)[<span class="number">0</span>]]</span><br><span class="line"> <span class="keyword">else</span>:</span><br><span class="line"> wifi_infos[i][j] = <span class="number">0</span></span><br><span class="line"></span><br><span class="line"><span class="keyword">if</span> <span class="string">'time_stamp'</span> <span class="keyword">in</span> eval_data:</span><br><span class="line"> time = []</span><br><span class="line"> <span class="keyword">for</span> index,row <span class="keyword">in</span> eval_data.iterrows():</span><br><span class="line"> tmp = row[<span class="string">'time_stamp'</span>]</span><br><span class="line"> a = int(tmp[<span class="number">11</span>]); b = int(tmp[<span class="number">12</span>]); c = int(tmp[<span class="number">14</span>]); d = int(tmp[<span class="number">15</span>])</span><br><span class="line"> time.append(((a*<span class="number">10</span>+b)*<span class="number">60</span>+(c*<span class="number">10</span>+d))/<span class="number">10</span>)</span><br><span class="line"> eval_data[<span class="string">'time'</span>] = pd.Series(time, index = eval_data.index)</span><br><span class="line"><span class="keyword">if</span> <span class="string">'time_stamp'</span> <span class="keyword">in</span> eval_data:</span><br><span class="line"> eval_data = eval_data.drop(<span class="string">'time_stamp'</span>,<span class="number">1</span>)</span><br><span class="line"><span class="keyword">if</span> <span class="string">'wifi_infos'</span> <span class="keyword">in</span> eval_data:</span><br><span class="line"> eval_data = eval_data.drop(<span class="string">'wifi_infos'</span>,<span class="number">1</span>)</span><br><span class="line"></span><br><span class="line">data_final = eval_data.values</span><br><span class="line"><span class="keyword">for</span> i <span class="keyword">in</span> range(len(data_final)):</span><br><span class="line"> data_final[i][<span class="number">2</span>] = mall_id2class[data_final[i][<span class="number">2</span>]]</span><br><span class="line"> <span class="keyword">if</span> data_final[i][<span class="number">1</span>] <span class="keyword">in</span> user_id2class:</span><br><span class="line"> data_final[i][<span class="number">1</span>] = user_id2class[data_final[i][<span class="number">1</span>]] </span><br><span class="line"> <span class="keyword">else</span>:</span><br><span class="line"> data_final[i][<span class="number">1</span>] = <span class="number">0</span></span><br><span class="line"></span><br><span class="line">data_done = np.hstack((data_final[:,:<span class="number">2</span>],data_final[:,<span class="number">3</span>:<span class="number">5</span>],data_final[:,<span class="number">2</span>].reshape(<span class="number">-1</span>,<span class="number">1</span>),data_final[:,<span class="number">5</span>].reshape(<span class="number">-1</span>,<span class="number">1</span>),wifi_infos))</span><br><span class="line">data_done[data_done == <span class="number">0</span>] = np.nan</span><br><span class="line">np.savetxt(<span class="string">'final_data.csv'</span>,data_done, delimiter=<span class="string">","</span>)</span><br></pre></td></tr></table></figure></p>
<p>Prediction is described below. It is really funny that I run this code on PI supercomputer for more than 40 hours. After I have been waiting for 20 hours, I tell myself that always program in future in parallel manner. At this time, I realize how important the distributive machine learning is.<br><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> numpy <span class="keyword">as</span> np</span><br><span class="line"><span class="keyword">import</span> matplotlib.pyplot <span class="keyword">as</span> plt</span><br><span class="line"><span class="keyword">from</span> sklearn.model_selection <span class="keyword">import</span> train_test_split</span><br><span class="line"><span class="keyword">from</span> numpy <span class="keyword">import</span> genfromtxt</span><br><span class="line"><span class="keyword">import</span> xgboost <span class="keyword">as</span> xgb</span><br><span class="line"></span><br><span class="line">data = genfromtxt(<span class="string">'final_data.csv'</span>,delimiter=<span class="string">','</span>)</span><br><span class="line">res = np.zeros((len(data),<span class="number">2</span>))</span><br><span class="line">res[:,<span class="number">0</span>] = data[:,<span class="number">0</span>]</span><br><span class="line"></span><br><span class="line">bst = {}</span><br><span class="line"><span class="keyword">for</span> i <span class="keyword">in</span> range(<span class="number">1</span>,<span class="number">8478</span>):</span><br><span class="line"> bst[i] = xgb.Booster();</span><br><span class="line"> bst[i].load_model(str(i)+<span class="string">'.model'</span>); </span><br><span class="line"><span class="keyword">for</span> i <span class="keyword">in</span> range(len(data)):</span><br><span class="line"> evaluation = xgb.DMatrix(data[i,<span class="number">1</span>:]);</span><br><span class="line"> pred = np.zeros((<span class="number">8478</span>,<span class="number">1</span>));</span><br><span class="line"> <span class="keyword">for</span> j <span class="keyword">in</span> range(<span class="number">1</span>,<span class="number">8478</span>):</span><br><span class="line"> pred[j] = bst[j].predict(evaluation);</span><br><span class="line"> hash_shop_id = np.argmax(pred);</span><br><span class="line"> res[i,<span class="number">1</span>] = hash_shop_id;</span><br><span class="line"></span><br><span class="line">np.savetxt(<span class="string">'res.csv'</span>, res, delimiter=<span class="string">","</span>)</span><br></pre></td></tr></table></figure></p>
<p>Arrange the prediction results to the required results in jupyter notebook</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">from</span> numpy <span class="keyword">import</span> genfromtxt</span><br><span class="line">res_data = genfromtxt(<span class="string">'res.csv'</span>,delimiter=<span class="string">','</span>)</span><br><span class="line">res_data = np.array(res_data).astype(int)</span><br><span class="line"><span class="keyword">for</span> i <span class="keyword">in</span> range(len(res_data)):</span><br><span class="line"> res_data[i,<span class="number">1</span>] = class2shop_id[res_data[i,<span class="number">1</span>]]</span><br><span class="line">np.savetxt(<span class="string">'result.csv'</span>,res_data,fmt=<span class="string">'%d'</span>,delimiter=<span class="string">','</span>)</span><br><span class="line"></span><br><span class="line">replacements = {<span class="string">','</span>:<span class="string">',s_'</span>}</span><br><span class="line"><span class="keyword">with</span> open(<span class="string">'result.csv'</span>) <span class="keyword">as</span> infile, open(<span class="string">'result1.csv'</span>, <span class="string">'w'</span>) <span class="keyword">as</span> outfile:</span><br><span class="line"> <span class="keyword">for</span> line <span class="keyword">in</span> infile:</span><br><span class="line"> <span class="keyword">for</span> src, target <span class="keyword">in</span> replacements.items():</span><br><span class="line"> line = line.replace(src, target)</span><br><span class="line"> outfile.write(line)</span><br></pre></td></tr></table></figure>
<h3 id="Some-Mistakes-I-Have-Made"><a href="#Some-Mistakes-I-Have-Made" class="headerlink" title="Some Mistakes I Have Made"></a>Some Mistakes I Have Made</h3><ul>
<li>Bias is significant in training because the number of negative data items is larger than postive greatly.</li>
<li>I only sorted the wifi_info as the feature without normalizing their strength as another feature, leading to loss of information. </li>
<li>I forget to use cross-validation to tune my hyperparamters.</li>
</ul>
<!-- ### Final Accuracy
![Results](content/images/2017/12/Result.jpg)
-->
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2017/12/02/Experience-in-Tian-Chi-Big-Data-Competetion/" data-id="cju13wdm3000f8ys65bota8jc" class="article-share-link">Share</a>
</footer>
</div>
</article>
<article id="post-PI超级计算使用常用shell命令" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/12/01/PI超级计算使用常用shell命令/" class="article-date">
<time datetime="2017-12-01T06:38:20.000Z" itemprop="datePublished">2017-12-01</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/12/01/PI超级计算使用常用shell命令/">Cheatsheet for Frequently Used Shell Commands</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>Here are some frequently used shell commands when we run programs on remote supercomputer.</p>
<h3 id="SSH-to-remote-supercomputer"><a href="#SSH-to-remote-supercomputer" class="headerlink" title="SSH to remote supercomputer"></a>SSH to remote supercomputer</h3><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">ssh -p [portnumber] [usrname]@[ipaddress]</span><br></pre></td></tr></table></figure>
<h3 id="Use-python-in-supercomputer"><a href="#Use-python-in-supercomputer" class="headerlink" title="Use python in supercomputer"></a>Use python in supercomputer</h3><ul>
<li><p>Refer to python module</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">module load ~/python27-gcc/bin/activate</span><br></pre></td></tr></table></figure>
</li>
<li><p>Use python environment </p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">source ~/python27-gcc/bin/activate</span><br></pre></td></tr></table></figure>
</li>
</ul>
<h3 id="Transfer-files-between-local-computer-and-remote-computer"><a href="#Transfer-files-between-local-computer-and-remote-computer" class="headerlink" title="Transfer files between local computer and remote computer"></a>Transfer files between local computer and remote computer</h3><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">scp [filepath in local computer] [usrname]@[ipaddress]:[filepath in remote computer]</span><br></pre></td></tr></table></figure>
<h3 id="Run-shell-file-in-supercomputer"><a href="#Run-shell-file-in-supercomputer" class="headerlink" title="Run shell file in supercomputer"></a>Run shell file in supercomputer</h3><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">sbatch [shell file]</span><br></pre></td></tr></table></figure>
<h3 id="Execute-a-task"><a href="#Execute-a-task" class="headerlink" title="Execute a task"></a>Execute a task</h3><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">sbatch -p cpu [slurm file]</span><br></pre></td></tr></table></figure>
<h3 id="Cancel-a-task"><a href="#Cancel-a-task" class="headerlink" title="Cancel a task"></a>Cancel a task</h3><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">scancel [jobnumber]</span><br></pre></td></tr></table></figure>
<h3 id="Show-our-tasks-running-on-supercomputer"><a href="#Show-our-tasks-running-on-supercomputer" class="headerlink" title="Show our tasks running on supercomputer"></a>Show our tasks running on supercomputer</h3><figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">squeue -u `whoami`</span><br></pre></td></tr></table></figure>
</div>
<footer class="article-footer">
<a data-url="https://sunyasheng.github.io/2017/12/01/PI超级计算使用常用shell命令/" data-id="cju13wdm2000e8ys6znkzrogq" class="article-share-link">Share</a>
</footer>
</div>
</article>