update

author Mart Lubbers <mart@martlubbers.net>

Mon, 12 Sep 2016 18:30:19 +0000 (20:30 +0200)

committer Mart Lubbers <mart@martlubbers.net>

Mon, 12 Sep 2016 18:30:19 +0000 (20:30 +0200)
author Mart Lubbers <mart@martlubbers.net>
Mon, 12 Sep 2016 18:30:19 +0000 (20:30 +0200)
committer Mart Lubbers <mart@martlubbers.net>
Mon, 12 Sep 2016 18:30:19 +0000 (20:30 +0200)
diff --git a/week3/eml.tex b/week3/eml.tex

index 5e13726..781f2fe 100644 (file)
--- a/week3/eml.tex
+++ b/week3/eml.tex
@@ -2,7 +2,6 @@
  
  \usepackage{amssymb}
  \usepackage{booktabs}
-\usepackage{float}
  \usepackage{geometry}
  
  \title{Exercise: Machine Learning}
@@ -12,7 +11,19 @@
  \begin{document}
  \maketitle
  \subsection*{Chapter 5: Machine Learning}
-\begin{table}[H]
+Table~\ref{t2} shows that there is some difference in classification when
+choosing different parameter sets. The results show that adding the focus word
+is very important knowledge since the percentage increases quite a bit.
+However, knowing only the focus word gives exceptionally low performance.
+
+Knowing the next words or the previous words gives some improvement but not a
+whole lot.
+
+Table~\ref{t1} shows that the method does not make a big difference.
+
+Using ten-fold cross validation decreases the percentage.
+               
+\begin{table}
         \centering
         \begin{tabular}{lll}
                 \toprule
@@ -24,38 +35,37 @@
                 J48 (10FCF) & $96.4352\%$ & $36.5122\%$\\
                 \bottomrule
         \end{tabular}
-       \caption{Results for \texttt{P1D} and \texttt{FD}}
+       \caption{Results for \texttt{P1D} and \texttt{FD}\label{t1}}
  \end{table}
  
-\begin{table}[H]
+\begin{table}
         \centering
         \begin{tabular}{lllllll}
                 \toprule
                 \texttt{P2D} & \texttt{P1D} & \texttt{N2D} & \texttt{N1D} & \texttt{FW} & Correctly classified\\
                 \midrule
-               \checkmark{}    & \checkmark{}  & \checkmark{}  & \checkmark{}  & \checkmark{}  & $98.3225\%$\\
-               \checkmark{}    & \checkmark{}  & \checkmark{}  & \checkmark{}  &       & $97.3905\%$\\
-               \checkmark{}    & \checkmark{}  &       & \checkmark{}  & \checkmark{}  & $98.5555\%$\\
-               \checkmark{}    & \checkmark{}  &       & \checkmark{}  &       & $97.507\%$\\
-               \checkmark{}    & \checkmark{}  &       &       & \checkmark{}  & $98.0429\%$\\
-               \checkmark{}    & \checkmark{}  &       &       &       & $95.5732\%$\\
-
-                       & \checkmark{}  & \checkmark{}  & \checkmark{}  & \checkmark{}  & $\%$\\
-                       & \checkmark{}  & \checkmark{}  & \checkmark{}  &       & $\%$\\
-                       & \checkmark{}  &       & \checkmark{}  & \checkmark{}  & $\%$\\
-                       & \checkmark{}  &       & \checkmark{}  &       & $\%$\\
-                       & \checkmark{}  &       &       & \checkmark{}  & $98.2992\%$\\
-                       & \checkmark{}  &       &       &       & $96.6449\%$\\
-
-                       &       & \checkmark{}  & \checkmark{}  & \checkmark{}  & $\%$\\
-                       &       & \checkmark{}  & \checkmark{}  &       & $\%$\\
-                       &       &       & \checkmark{}  & \checkmark{}  & $\%$\\
-                       &       &       & \checkmark{}  &       & $\%$\\
-                       &       &       &       & \checkmark{}  & $88.4436\%$\\
-
+               \checkmark{} & \checkmark{} & \checkmark{} & \checkmark{} & \checkmark{} & $98.3225\%$\\
+               \checkmark{} & \checkmark{} & \checkmark{} & \checkmark{} & & $97.3905\%$\\
+               \checkmark{} & \checkmark{} & & \checkmark{} & \checkmark{} & $98.5555\%$\\
+               \checkmark{} & \checkmark{} & & \checkmark{} & & $97.507\%$\\
+               \checkmark{} & \checkmark{} & & & \checkmark{} & $98.0429\%$\\
+               \checkmark{} & \checkmark{} & & & & $95.5732\%$\\
+               
+               & \checkmark{} & \checkmark{} & \checkmark{} & \checkmark{} & $98.6486\%$\\
+               & \checkmark{} & \checkmark{} & \checkmark{} & & $97.5769\%$\\
+               & \checkmark{} & & \checkmark{} & \checkmark{} & $98.5555\%$\\
+               & \checkmark{} & & \checkmark{} & & $97.973\%$\\
+               & \checkmark{} & & & \checkmark{} & $98.2992\%$\\
+               & \checkmark{} & & & & $96.6449\%$\\
+               
+               & & \checkmark{} & \checkmark{} & \checkmark{} & $91.8919\%$\\
+               & & \checkmark{} & \checkmark{} & & $85.5079\%$\\
+               & & & \checkmark{} & \checkmark{} & $92.579\%$\\
+               & & & \checkmark{} & & $85.2516\%$\\
+               & & & & \checkmark{} & $88.4436\%$\\
                 \bottomrule
         \end{tabular}
-       \caption{NaiveBayes}
+       \caption{NaiveBayes on all sensible combinations\label{t2}}
  \end{table}
  
  \subsection*{Chapter 6: Exercises}
@@ -65,10 +75,11 @@
                 the preceding $n$ states. However, it is claimed that the algorithm
                 takes into account the whole sequence. Explain in your own words (at
                 most $100$) how the probability is influenced by the rest of the
-               sequence, i.e.\ both the positions more than n back and the following
+               sequence, i.e.\ both the positions more than $n$ back and the following
                 positions.}
  
         \item\emph{Explain in your own words (at most 50) how the EM algorithm
                 works. I don't mean the mathematics, but the underlying concept.}
+
  \end{itemize}
  \end{document}
author	Mart Lubbers <mart@martlubbers.net>
	Mon, 12 Sep 2016 18:30:19 +0000 (20:30 +0200)
committer	Mart Lubbers <mart@martlubbers.net>
	Mon, 12 Sep 2016 18:30:19 +0000 (20:30 +0200)