final version
[ker2014-2.git] / report / ass2-1.tex
index 4ae080f..41573b2 100644 (file)
 \chapter{Probabilistic representation and reasoning (and burglars)}
-\section{Bayesian network and the conditional probability tables}
+\section{Formal description}
+In our representation of the model we introduced a \textit{Noisy OR} to
+represent the causal independence of \textit{Burglar} and \textit{Earthquake}
+on \textit{Alarm}. The representation of the network is displayed in
+Figure~\ref{bnetwork21}
+
 \begin{figure}[H]
-       \caption{Bayesian network, visual representation}
+       \caption{Bayesian network alarmsystem}
+       \label{bnetwork21}
        \centering
        \includegraphics[scale=0.5]{d1.eps}
 \end{figure}
 
-We introduced a \textit{Noisy OR} to represent the causal independence of
-\textit{Burglar} and \textit{Earthquake} on Alarm. Probabilities for the causes
-of the alarm are calculated using days, in practice this means that the
-smallest discrete time interval is one day. The calculation for the probability
-of a burglar is then calculated with the following formula(taking leap years
-into account and assuming a standard gregorian calendar).
+Days were chosen as unit to model the story. Calculation of the probability of
+a\textit{Burglar} event happening at some day is then (assuming a gregorian
+calendar and leap days):
 $$\frac{1}{365 + 0.25 - 0.01 - 0.0025}=\frac{1}{365.2425}$$
 
-This gives the following probability distributions\\
-\begin{tabular}{|l|ll|}
-       \hline
-       & \multicolumn{2}{c|}{Earthquake}\\
-       \hline
-       T & $0.0027$ & $0.9972$ \\
-       F & $0.9973$ & $0.0027$\\
-       \hline
-\end{tabular}
-%
-\begin{tabular}{|l|ll|}
-       \hline
-       & \multicolumn{2}{c|}{Burglar}\\
-       \hline
-       T & $0.0027$ & $0.9973$ \\
-       F & $0.9973$ & $0.0027$\\
-       \hline
-\end{tabular}
+The resultant probability distributions can be found in Table~\ref{probdist},
+in order to avoid an unclear graph.
+
+\begin{table}[H]
+       \label{probdist}
+       \begin{tabular}{|l|l|}
+               \hline
+               & Earthquake\\
+               \hline
+               T & $0.0027$\\
+               F & $0.9973$\\
+               \hline
+       \end{tabular}
+       %
+       \begin{tabular}{|l|l|}
+               \hline
+               & Burglar\\
+               \hline
+               T & $0.0027$\\
+               F & $0.9973$\\
+               \hline
+       \end{tabular}
+       
+       \begin{tabular}{|l|ll|}
+               \hline
+               & \multicolumn{2}{c|}{$I_1$}\\
+               Earthquake & T & F\\
+               \hline
+               T & $0.2$ & $0.8$\\
+               F & $0$ & $1$\\
+               \hline
+       \end{tabular}
+       \begin{tabular}{|l|ll|}
+               \hline
+               & \multicolumn{2}{c|}{$I_2$}\\
+               Burglar & T & F\\
+               \hline
+               T & $0.95$ & $0.05$\\
+               F & $0$ & $1$\\
+               \hline
+       \end{tabular}
+       \begin{tabular}{|ll|ll|}
+               \hline
+               && \multicolumn{2}{c|}{Alarm}\\
+               $I_1$ & $I_2$ & T & F\\
+               \hline
+               T & T & $1$ & $0$\\
+               T & F & $1$ & $0$\\
+               F & T & $1$ & $0$\\
+               F & F & $0$ & $1$\\
+               \hline
+       \end{tabular}
+       
+       \begin{tabular}{|l|ll|}
+               \hline
+               & \multicolumn{2}{c|}{Watson}\\
+               Alarm & T & F\\
+               \hline
+               T & $0.8$ & $0.2$\\
+               F & $0.4$ & $0.6$\\
+               \hline
+       \end{tabular} 
+       \begin{tabular}{|l|ll|}
+               \hline
+               & \multicolumn{2}{c|}{Gibbons}\\
+               Alarm & T & F\\
+               \hline
+               T & $0.99$ & $0.01$\\
+               F & $0.04$ & $0.96$\\
+               \hline
+       \end{tabular}
+       \begin{tabular}{|l|ll|}
+               \hline
+               & \multicolumn{2}{c|}{Radio}\\
+               Earthquake & T & F\\
+               \hline
+               T & $0.9998$ & $0.0002$\\
+               F & $0.0002$ & $0.9998$\\
+               \hline
+       \end{tabular}
+\end{table}
+
+\textit{If there is a burglar present (which could happen once every ten
+years), the alarm is known to go off 95\% of the time.} We modelled this by
+setting the value for Burglar True and $I_2$ True on 0,95.\\
+\textit{There’s a 40\% chance that Watson is joking and the alarm is in fact
+off.} This is modelled by putting the value for Watson True and Alarm F on 0,4.
+As Holmes expects Watson to call in 80\% of the time, the value for alarm True
+and Watson True is set 0,2. Because the rows have to sum to 1, the other values
+are easily calculated.\\
+\textit{She may not have heard the alarm in 1\% of the cases and is thought to
+erroneously report an alarm when it is in fact off in 4\% of the cases.} We
+modelled this by assuming that when Mrs. Gibbons hears the alarm, she calls
+Holmes. Meaning that the value for Gibbons False and Alarm true is 0,01. As
+she reports when the alarm is in fact off in 4\% of the cases, the value for
+Gibbons True and alarm False is 0,04.\\
+
+\section{Implementation}
+We implemented the distributions in \textit{AILog}, see Listing~\ref{alarm.ail}
+
+\begin{listing}[H]
+       \label{alarm.ail}
+       \caption{Alarm.ail}
+       \inputminted[linenos,fontsize=\footnotesize]{prolog}{./src/alarm.ail}
+\end{listing}
+
+\section{Queries}
+Now that we have modelled the story with the corresponding probabilities, we
+can have AILog calculate some other probabilities given by some observations.
+Down below we wrote down some probabilties and the associated AILog output.\\
+The chance that a burglary happens given that Watson calls is greater than the
+chance that a burglary happens without this observations, as is observerd by
+the difference between a and b. This makes sense as Watson calls rightly in
+80\% of the time. So when Holmes receives a call by Watson, the chance that the
+alarm goes of increases.\\
+When we compare b to c, the same mechanisme holds. There are more observations
+that give evidence for a burglary as both Watson and Gibbons have called in the
+case of c.\\
+When you take a look at the last case, d, you see that the probability has
+decreased compared to c. This can be explained by an observation that is added
+on top of the observations of b; the radio. The variable Radio means that the
+newcast tells that there was an earhquake. As that is also a reason why the
+alarm could go of, but has nothing to do with a burglary, it decreases the
+probability of a burglary.
+%We kunnen misschien de kans uitrekenen dat Watson en Gibbons allebei foutief bellen? Daar mis je denk ik info over of niet?
+\begin{enumerate}[a)]
+       \item $P(\text{Burglary})=
+               0.002737757092501968$
+       \item $P(\text{Burglary}|\text{Watson called})=
+               0.005321803679438259$
+       \item $P(\text{Burglary}|\text{Watson called},\text{Gibbons called})=
+               0.11180941544755249$
+       \item $P(\text{Burglary}|\text{Watson called},\text{Gibbons called}
+               , \text{Radio})=0.01179672476662423$
+\end{enumerate}
+
+\begin{listing}[H]
+       \begin{minted}[fontsize=\footnotesize]{prolog}
+AILog: predict burglar.
+Answer: P(burglar|Obs)=0.002737757092501968.
+  [ok,more,explanations,worlds,help]: ok.
+
+AILog: observe watson.
+Answer: P(watson|Obs)=0.4012587986186947.
+  [ok,more,explanations,worlds,help]: ok.
+
+AILog: predict burglar.
+Answer: P(burglar|Obs)=[0.005321803679438259,0.005321953115441623].
+  [ok,more,explanations,worlds,help]: ok.
+
+AILog: observe gibbons.
+Answer: P(gibbons|Obs)=[0.04596053565368094,0.045962328885721306].
+  [ok,more,explanations,worlds,help]: ok.
+
+AILog: predict burglar.
+Answer: P(burglar|Obs)=[0.11180941544755249,0.1118516494624678].
+  [ok,more,explanations,worlds,help]: ok.
+
+AILog: observe radio.
+Answer: P(radio|Obs)=[0.02582105837443645,0.025915745316785182].
+  [ok,more,explanations,worlds,help]: ok.
+
+AILog: predict burglar.
+Answer: P(burglar|Obs)=[0.01179672476662423,0.015584580594335082].
+  [ok,more,explanations,worlds,help]: ok.
+       \end{minted}
+\end{listing}
 
-\begin{tabular}{|l|ll|}
+\section{Comparison with manual calculation}
+Querying the \textit{Alarm} variable gives the following answer:
+\begin{minted}{prolog}
+       AILog: predict alarm.
+       Answer: P(alarm|Obs)=0.0031469965467367292.
+                           
+         [ok,more,explanations,worlds,help]: ok.
+\end{minted}
+
+Using the formula for causal independence with a logical OR:\\
+$P(Alarm|C_1, C_2) = P(i_1|C_1)+P(i_2|C_2)(1-P(i_1|C_1))$ we can calculate the
+probability of the \textit{Alarm} variable using variable elimination. This
+results in the following answer:\\
+$P(Alarm|burglar, earthquake) =
+P(i_1|burglar)+P(i_2|earthquake)(1-P(i_1|burglar)) =
+0.2*0.0027+0.95*0.0027*(1-0.2*0.0027)=0.00314699654673673$ \\
+
+When you compare the output of AILog and of the variable elimination, you see
+that they are exactly the same. The method with which AILog calculates the
+probability is almost the same but that is mainly because we did not use any
+techniques that are not available in AILog. When we would have done the same
+task with a Bayesian network and the use of Bayes' rule we would have had a
+different method.
+
+\newpage
+\section{Burglary problem with extended information}
+Extending the story with multiple houses and constraints about who wants to work
+ with who the following AILog representation:
+\inputminted[linenos,fontsize=\footnotesize]{prolog}{./src/burglary.ail}
+
+The following demandings on the sets of colleagues, limited the number of
+ possible groups to four:
+ \begin{itemize}
+       \item needs(joe, [])
+       \item needs(william, [])
+       \item needs(jack, [joe])
+       \item needs(averall, [jack, william])
+ \end{itemize}
+
+The only way that those people will burgler, is when those constraints are satisfied and when there are at least two people working. So the only combinations possible are then: 
+\begin{enumerate}
+       \item Joe and Jack
+       \item Joe and William
+       \item Joe, William and Jack
+       \item Joe, William, Jack and Averall.
+\end{enumerate}
+
+We implemented the extended story using a three layered model.\\
+\textit{Each day a burglar decides whether he wants to work or not, and on
+        average this happens only 5 days a week} 
+       Meaning that every burglar has the same initial working probability: 5/7 (see the first 5 lines of code).\\
+Then we implemented the constraints on the colleagues by telling AILog that a burglary can happen when at least one of our combinations is working (see line 8 to 11).\\
+\textit{Finally, if they decide to burgle, then they will burgle 3 houses a night.} The third layer consists of implementing the change that out of the 10,000 houses in which Joe, William, Jack and Averall are the only burglars, Holmes' house is burgled as one of the three (see line 14 to 19). This results in the following probability for a burglary at a Holmes' house.
+
+$P(burglary)\cdot(
+       P(\text{first house Holmes'})+
+       P(\text{second house Holmes'})+
+       P(\text{third house Holmes'}))=\\
+0.655976676\cdot\left(
+       \frac{1}{10000}+
+       \frac{9999}{10000}\cdot\frac{1}{9999}+
+       \frac{9999}{10000}\cdot\frac{9998}{9999}\cdot\frac{1}{9998}\right)
+       \approx 0.000196773$
+
+\section{Bayesian networks}
+A Bayesian network representation of the extended story is possible, but could
+ become very large because of the great number of houses and burglars. However
+  we found a way to create a network that is very compact, see
+   figure~\ref{bnnetworkhouses} and the corresponding probability tables. The
+    network is created in the same way as our implementation of the extended
+     story. Every burglar has the same starting probabilities that are merged 
+     together in whether a burglary happens. Then the probability of a burglary
+      at Holmes' house when there will be burgled is calculated.
+
+\begin{figure}[H]
+       \caption{Bayesian network of burglars and houses}
+       \label{bnnetworkhouses}
+       \centering
+       \includegraphics[scale=0.5]{d2.eps}
+\end{figure}
+
+\begin{tabular}{|l|l|}
        \hline
-       & \multicolumn{2}{c|}{$I_1$}\\
-       Earthquake & T & F\\
+       Joe &\\
        \hline
-       T & $0.2$ & $0.08$\\
-       F & $0$ & $1$\\
+       T & $\nicefrac{5}{7}$\\
+       F & $\nicefrac{2}{7}$\\
        \hline
 \end{tabular}
-\begin{tabular}{|l|ll|}
+\begin{tabular}{|l|l|}
        \hline
-       & \multicolumn{2}{c|}{$I_2$}\\
-       Burglar & T & F\\
+       William &\\
        \hline
-       T & $0.95$ & $0.05$\\
-       F & $0$ & $1$\\
+       T & $\nicefrac{5}{7}$\\
+       F & $\nicefrac{2}{7}$\\
        \hline
 \end{tabular}
-\begin{tabular}{|ll|ll|}
+\begin{tabular}{|l|l|}
        \hline
-       && \multicolumn{2}{c|}{Burglar}\\
-       i1 & i2 & T & F\\
+       Jack & \\
        \hline
-       T & T & $1$ & $0$\\
-       T & F & $1$ & $0$\\
-       F & T & $1$ & $0$\\
-       F & F & $0$ & $1$\\
+       T & $\nicefrac{5}{7}$\\
+       F & $\nicefrac{2}{7}$\\
        \hline
 \end{tabular}
-
-\begin{tabular}{|l|ll|}
+\begin{tabular}{|l|l|}
        \hline
-       & \multicolumn{2}{c|}{Watson}\\
-       Alarm & T & F\\
+       Averall & \\
        \hline
-       T & $0.8$ & $0.2$\\
-       F & $0.4$ & $0.6$\\
+       T & $\nicefrac{5}{7}$\\
+       F & $\nicefrac{2}{7}$\\
        \hline
 \end{tabular}
-\begin{tabular}{|l|ll|}
+
+\begin{tabular}{|llll|ll|}
        \hline
-       & \multicolumn{2}{c|}{Gibbons}\\
-       Alarm & T & F\\
+       & & & & Burglary &\\
+       Joe & William & Jack & Averall & T & F\\
        \hline
-       T & $0.99$ & $0.01$\\
-       F & $0.04$ & $0.96$\\
+       F& F& F& F & $0$ & $1$\\
+       F& F& F& T & $0$ & $1$\\
+       F& F& T& F & $0$ & $1$\\
+       F& F& T& T & $0$ & $1$\\
+       F& T& F& F & $0$ & $1$\\
+       F& T& F& T & $0$ & $1$\\
+       F& T& T& F & $0$ & $1$\\
+       F& T& T& T & $0$ & $1$\\
+       T& F& F& F & $0$ & $1$\\
+       T& F& F& T & $0$ & $1$\\
+       T& F& T& F & $1$ & $0$\\
+       T& F& T& T & $0$ & $1$\\
+       T& T& F& F & $1$ & $0$\\
+       T& T& F& T & $0$ & $1$\\
+       T& T& T& F & $1$ & $0$\\
+       T& T& T& T & $1$ & $0$\\
        \hline
 \end{tabular}
-\begin{tabular}{|l|ll|}
+\begin{tabular}{|lll|}
        \hline
-       & \multicolumn{2}{c|}{Radio}\\
-       Earthquake & T & F\\
+       & Holmes &\\
+       Burglary & T & F\\
        \hline
-       T & $0.9998$ & $0.0002$\\
-       F & $0.0002$ & $0.9998$\\
+       T & $0.000153$ & $0.999847$\\
+       F & $0$ & $1$\\
        \hline
 \end{tabular}
+
+\section{Additional Questions}
+We wouldn't change any aspect of the assignment. It is nice that the assignment is slowly increasing in difficulty because of the extention of the story. We estimate that we spent about 20 hours each on the assignment.
+