-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
chapterABM.tex
87 lines (67 loc) · 2.68 KB
/
chapterABM.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
\chapter{Adaptive basis function models}
\section{AdaBoost}
\subsection{Representation}
\begin{equation}
y=\text{sign}(f(\vec{x}))=\text{sign}\left(\sum\limits_{i=1}^m \alpha_mG_m(\vec{x})\right)
\end{equation}
where $G_m(\vec{x})$ are sub classifiers.
\subsection{Evaluation}
\begin{equation} \nonumber
L(y,f(\vec{x}))=\exp[-yf(\vec{x})] \text{ i.e., exponential loss function}
\end{equation}
\begin{equation}
(\alpha_m,G_m(x))= \arg\min_{\alpha,G} \sum_{i=1}^N \exp{[-y_i(f_{m-1}(\vec{x}_i)+\alpha G(\vec{x}_i))]}
\end{equation}
Define $\bar{w}_{mi}=\exp{[-y_i(f_{m-1}(\vec{x}_i)]}$, which is constant w.r.t. $\alpha, G$
\begin{equation}
(\alpha_m,G_m(x))= \arg\min_{\alpha,G} \sum_{i=1}^N {\bar{w}}_{mi} \exp{(-y_i \alpha G(x_i))}
\end{equation}
\subsection{Optimization}
\subsubsection{Input}
\begin{eqnarray*}
& \mathcal{D}=\{(\vec{x}_1,y_1),(\vec{x}_2,y2),\dots,(\vec{x}_N,y_N)\} \\
& \quad \text{where } \vec{x}_i \in \mathbb{R}^D,\ y_i \in \{-1,+1\} \\
& \text{ Weak classifiers } \{G_1,G_2,\dots,G_m\}
\end{eqnarray*}
\subsubsection{Output}
Final classifier: $G(x)$
\subsubsection{Algorithm}
\begin{enumerate}
\item Initialize the weights' distribution of training data(when $m=1$)
\begin{equation}\nonumber
\mathcal{D}_0=(w_{11},w_{12},\cdots,w_{1n})=(\frac{1}{N},\frac{1}{N},\cdots,\frac{1}{N})
\end{equation}
\item Iterate over $m=1,2,\dotsc,M$
\subitem (a) Use training data with current weights' distribution $\mathcal{D}_m$ to get a classifier $G_m(\vec{x})$
\subitem (b) Compute the error rate of $G_m(\vec{x})$ over the training data \\
\begin{equation}
e_m=P(G_m(\vec{x}_i)\neq y_i)=\sum_{i=1}^N {w_{mi}\mathbb{I}(G_m(\vec{x}_i) \neq y_i)}
\end{equation}
\subitem (c) Compute the coefficient of classifier $G_m(x)$
\begin{equation}
\alpha_m = \frac{1}{2}\log{\frac{1-e_m}{e_m}}
\end{equation}
\subitem (d) Update the weights' distribution of training data
\begin{equation}
w_{m+1,i}=\frac{w_{mi}}{Z_m}\exp(-\alpha_m y_i G_m(\vec{x}_i))
\end{equation}
where $Z_m$ is the normalizing constant
\begin{equation}
Z_m=\sum_{i=1}^N w_{mi}\exp(-\alpha_m y_i G_m(\vec{x}_i))
\end{equation}
\item Ensemble $M$ weak classifiers
\begin{equation}
G(x)=\text{sign}f(\vec{x})=\text{sign}\left[\sum_{m=1}^M \alpha_m G_m(\vec{x})\right]
\end{equation}
\end{enumerate}
\subsection{The upper bound of the training error of AdaBoost}
\begin{theorem}
The upper bound of the training error of AdaBoost is
\begin{equation}
\frac{1}{N} \sum_{i=1}^N \mathbb{I}(G(\vec{x}_i)\neq y_i) \leq \frac{1}{N} \sum_{i=1}^N \exp(-y_i f(\vec{x}_i))=\prod_{m=1}^M Z_m
\end{equation}
Note: the following equation would help proof this theorem
\begin{equation}
w_{mi}\exp(-\alpha_m y_i G_m(\vec{x}_i))=Z_m w_{m+1,i}
\end{equation}
\end{theorem}