From 4a0f63af88e72d11daa64701901bcf8ff974400e Mon Sep 17 00:00:00 2001 From: Dominik Probst Date: Thu, 13 Jun 2024 12:46:40 +0200 Subject: [PATCH] exercise-classification: Fixed a wrong midpoint used in the calculation of Information Gain and Gini Index --- exercise/4-Classification.tex | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/exercise/4-Classification.tex b/exercise/4-Classification.tex index 549a87c..9936f65 100644 --- a/exercise/4-Classification.tex +++ b/exercise/4-Classification.tex @@ -144,7 +144,7 @@ \subsection*{Task 1: Information Gain} & = 0 \\ \end{alignat*} - \item \textbf{Split point $25,5$:} + \item \textbf{Split point $25,0$:} \begin{alignat*}{2} \text{Info}\OfAttribute{Age}(D) & = \sum_{j=1}^v \frac{|D\OfAttribute{Age$,j$}|}{|D\OfAttribute{Age}|} \text{Info}(D_{A\OfAttribute{Age$,j$}}) \\ @@ -159,7 +159,7 @@ \subsection*{Task 1: Information Gain} \end{enumerate} - Therefore, the Information Gain for the attribute \textit{Age} is $0,817$ (if we split at $25,5$). + Therefore, the Information Gain for the attribute \textit{Age} is $0,817$ (if we split at $25,0$). \item \textbf{Attribute \textit{Major}:} @@ -599,10 +599,10 @@ \subsection*{Task 2: Gini Index} & = 0,5 - 0,5 \\ & = 0 \\ \end{alignat*} - \item \textbf{Split point $25,5$:} + \item \textbf{Split point $25,0$:} \begin{alignat*}{2} - \text{Gini}\OfAttribute{Age}(D) & = \frac{|D\OfSpecificValue{Age}{$\leq$}{25,5}|}{|D\OfAttribute{Age}|} \text{Gini}(D\OfSpecificValue{Age}{$\leq$}{25,5}) + \frac{|D\OfSpecificValue{Age}{$>$}{25,5}|}{|D\OfAttribute{Age}|} \text{Gini}(D\OfSpecificValue{Age}{$>$}{25,5}) \\ + \text{Gini}\OfAttribute{Age}(D) & = \frac{|D\OfSpecificValue{Age}{$\leq$}{25,0}|}{|D\OfAttribute{Age}|} \text{Gini}(D\OfSpecificValue{Age}{$\leq$}{25,0}) + \frac{|D\OfSpecificValue{Age}{$>$}{25,0}|}{|D\OfAttribute{Age}|} \text{Gini}(D\OfSpecificValue{Age}{$>$}{25,0}) \\ & = \frac{3}{6} \left(1 - \left(\frac{2}{3}\right)^2 - \left(\frac{1}{3}\right)^2\right) + \frac{3}{6} \left(1 - \left(\frac{1}{3}\right)^2 - \left(\frac{2}{3}\right)^2\right) \\ & = \frac{3}{6} \cdot 0,4444 + \frac{3}{6} \cdot 0,4444 \\ & = 0,4444 \\ @@ -612,7 +612,7 @@ \subsection*{Task 2: Gini Index} \end{alignat*} \end{enumerate} - The best split point is $25,5$ since its Gini Index is the lowest ($0,4444$) and therefore the reduction of impurity ($0,0556$) is the highest. + The best split point is $25,0$ since its Gini Index is the lowest ($0,4444$) and therefore the reduction of impurity ($0,0556$) is the highest. \item \textbf{Attribute \textit{Major}:}