update
[itlast1617.git] / week3 / eml.tex
1 \documentclass[a4paper]{article}
2
3 \usepackage{amssymb}
4 \usepackage{booktabs}
5 \usepackage{geometry}
6
7 \title{Exercise: Machine Learning}
8 \author{Mart Lubbers}
9 \date{\today}
10
11 \begin{document}
12 \maketitle
13 \subsection*{Chapter 5: Machine Learning}
14 Table~\ref{t2} shows that there is some difference in classification when
15 choosing different parameter sets. The results show that adding the focus word
16 is very important knowledge since the percentage increases quite a bit.
17 However, knowing only the focus word gives exceptionally low performance.
18
19 Knowing the next words or the previous words gives some improvement but not a
20 whole lot.
21
22 Table~\ref{t1} shows that the method does not make a big difference.
23
24 Using ten-fold cross validation decreases the percentage.
25
26 \begin{table}
27 \centering
28 \begin{tabular}{lll}
29 \toprule
30 Method & Correctly classified & Root relative squared error\\
31 \midrule
32 NaiveBayes & $96.6449\%$ & $35.7222\%$\\
33 NaiveBayes (10FCF) & $96.4352\%$ & $37.1926\%$\\
34 J48 & $96.6449\%$ & $34.9136\%$\\
35 J48 (10FCF) & $96.4352\%$ & $36.5122\%$\\
36 \bottomrule
37 \end{tabular}
38 \caption{Results for \texttt{P1D} and \texttt{FD}\label{t1}}
39 \end{table}
40
41 \begin{table}
42 \centering
43 \begin{tabular}{lllllll}
44 \toprule
45 \texttt{P2D} & \texttt{P1D} & \texttt{N2D} & \texttt{N1D} & \texttt{FW} & Correctly classified\\
46 \midrule
47 \checkmark{} & \checkmark{} & \checkmark{} & \checkmark{} & \checkmark{} & $98.3225\%$\\
48 \checkmark{} & \checkmark{} & \checkmark{} & \checkmark{} & & $97.3905\%$\\
49 \checkmark{} & \checkmark{} & & \checkmark{} & \checkmark{} & $98.5555\%$\\
50 \checkmark{} & \checkmark{} & & \checkmark{} & & $97.507\%$\\
51 \checkmark{} & \checkmark{} & & & \checkmark{} & $98.0429\%$\\
52 \checkmark{} & \checkmark{} & & & & $95.5732\%$\\
53
54 & \checkmark{} & \checkmark{} & \checkmark{} & \checkmark{} & $98.6486\%$\\
55 & \checkmark{} & \checkmark{} & \checkmark{} & & $97.5769\%$\\
56 & \checkmark{} & & \checkmark{} & \checkmark{} & $98.5555\%$\\
57 & \checkmark{} & & \checkmark{} & & $97.973\%$\\
58 & \checkmark{} & & & \checkmark{} & $98.2992\%$\\
59 & \checkmark{} & & & & $96.6449\%$\\
60
61 & & \checkmark{} & \checkmark{} & \checkmark{} & $91.8919\%$\\
62 & & \checkmark{} & \checkmark{} & & $85.5079\%$\\
63 & & & \checkmark{} & \checkmark{} & $92.579\%$\\
64 & & & \checkmark{} & & $85.2516\%$\\
65 & & & & \checkmark{} & $88.4436\%$\\
66 \bottomrule
67 \end{tabular}
68 \caption{NaiveBayes on all sensible combinations\label{t2}}
69 \end{table}
70
71 \subsection*{Chapter 6: Exercises}
72 \begin{itemize}
73 \item\emph{If we look at the Viterbi algorithm, we see that the
74 probability of state at a given position is calculated $n$ the basis of
75 the preceding $n$ states. However, it is claimed that the algorithm
76 takes into account the whole sequence. Explain in your own words (at
77 most $100$) how the probability is influenced by the rest of the
78 sequence, i.e.\ both the positions more than $n$ back and the following
79 positions.}
80
81 \item\emph{Explain in your own words (at most 50) how the EM algorithm
82 works. I don't mean the mathematics, but the underlying concept.}
83
84 \end{itemize}
85 \end{document}