update
authorMart Lubbers <mart@martlubbers.net>
Mon, 12 Sep 2016 18:30:19 +0000 (20:30 +0200)
committerMart Lubbers <mart@martlubbers.net>
Mon, 12 Sep 2016 18:30:19 +0000 (20:30 +0200)
week3/eml.tex

index 5e13726..781f2fe 100644 (file)
@@ -2,7 +2,6 @@
 
 \usepackage{amssymb}
 \usepackage{booktabs}
-\usepackage{float}
 \usepackage{geometry}
 
 \title{Exercise: Machine Learning}
 \begin{document}
 \maketitle
 \subsection*{Chapter 5: Machine Learning}
-\begin{table}[H]
+Table~\ref{t2} shows that there is some difference in classification when
+choosing different parameter sets. The results show that adding the focus word
+is very important knowledge since the percentage increases quite a bit.
+However, knowing only the focus word gives exceptionally low performance.
+
+Knowing the next words or the previous words gives some improvement but not a
+whole lot.
+
+Table~\ref{t1} shows that the method does not make a big difference.
+
+Using ten-fold cross validation decreases the percentage.
+               
+\begin{table}
        \centering
        \begin{tabular}{lll}
                \toprule
                J48 (10FCF) & $96.4352\%$ & $36.5122\%$\\
                \bottomrule
        \end{tabular}
-       \caption{Results for \texttt{P1D} and \texttt{FD}}
+       \caption{Results for \texttt{P1D} and \texttt{FD}\label{t1}}
 \end{table}
 
-\begin{table}[H]
+\begin{table}
        \centering
        \begin{tabular}{lllllll}
                \toprule
                \texttt{P2D} & \texttt{P1D} & \texttt{N2D} & \texttt{N1D} & \texttt{FW} & Correctly classified\\
                \midrule
-               \checkmark{}    & \checkmark{}  & \checkmark{}  & \checkmark{}  & \checkmark{}  & $98.3225\%$\\
-               \checkmark{}    & \checkmark{}  & \checkmark{}  & \checkmark{}  &       & $97.3905\%$\\
-               \checkmark{}    & \checkmark{}  &       & \checkmark{}  & \checkmark{}  & $98.5555\%$\\
-               \checkmark{}    & \checkmark{}  &       & \checkmark{}  &       & $97.507\%$\\
-               \checkmark{}    & \checkmark{}  &       &       & \checkmark{}  & $98.0429\%$\\
-               \checkmark{}    & \checkmark{}  &       &       &       & $95.5732\%$\\
-
-                       & \checkmark{}  & \checkmark{}  & \checkmark{}  & \checkmark{}  & $\%$\\
-                       & \checkmark{}  & \checkmark{}  & \checkmark{}  &       & $\%$\\
-                       & \checkmark{}  &       & \checkmark{}  & \checkmark{}  & $\%$\\
-                       & \checkmark{}  &       & \checkmark{}  &       & $\%$\\
-                       & \checkmark{}  &       &       & \checkmark{}  & $98.2992\%$\\
-                       & \checkmark{}  &       &       &       & $96.6449\%$\\
-
-                       &       & \checkmark{}  & \checkmark{}  & \checkmark{}  & $\%$\\
-                       &       & \checkmark{}  & \checkmark{}  &       & $\%$\\
-                       &       &       & \checkmark{}  & \checkmark{}  & $\%$\\
-                       &       &       & \checkmark{}  &       & $\%$\\
-                       &       &       &       & \checkmark{}  & $88.4436\%$\\
-
+               \checkmark{} & \checkmark{} & \checkmark{} & \checkmark{} & \checkmark{} & $98.3225\%$\\
+               \checkmark{} & \checkmark{} & \checkmark{} & \checkmark{} & & $97.3905\%$\\
+               \checkmark{} & \checkmark{} & & \checkmark{} & \checkmark{} & $98.5555\%$\\
+               \checkmark{} & \checkmark{} & & \checkmark{} & & $97.507\%$\\
+               \checkmark{} & \checkmark{} & & & \checkmark{} & $98.0429\%$\\
+               \checkmark{} & \checkmark{} & & & & $95.5732\%$\\
+               
+               & \checkmark{} & \checkmark{} & \checkmark{} & \checkmark{} & $98.6486\%$\\
+               & \checkmark{} & \checkmark{} & \checkmark{} & & $97.5769\%$\\
+               & \checkmark{} & & \checkmark{} & \checkmark{} & $98.5555\%$\\
+               & \checkmark{} & & \checkmark{} & & $97.973\%$\\
+               & \checkmark{} & & & \checkmark{} & $98.2992\%$\\
+               & \checkmark{} & & & & $96.6449\%$\\
+               
+               & & \checkmark{} & \checkmark{} & \checkmark{} & $91.8919\%$\\
+               & & \checkmark{} & \checkmark{} & & $85.5079\%$\\
+               & & & \checkmark{} & \checkmark{} & $92.579\%$\\
+               & & & \checkmark{} & & $85.2516\%$\\
+               & & & & \checkmark{} & $88.4436\%$\\
                \bottomrule
        \end{tabular}
-       \caption{NaiveBayes}
+       \caption{NaiveBayes on all sensible combinations\label{t2}}
 \end{table}
 
 \subsection*{Chapter 6: Exercises}
                the preceding $n$ states. However, it is claimed that the algorithm
                takes into account the whole sequence. Explain in your own words (at
                most $100$) how the probability is influenced by the rest of the
-               sequence, i.e.\ both the positions more than n back and the following
+               sequence, i.e.\ both the positions more than $n$ back and the following
                positions.}
 
        \item\emph{Explain in your own words (at most 50) how the EM algorithm
                works. I don't mean the mathematics, but the underlying concept.}
+
 \end{itemize}
 \end{document}