From: Mart Lubbers Date: Wed, 23 Jul 2014 19:21:45 +0000 (+0200) Subject: thesis methods update X-Git-Url: https://git.martlubbers.net/?a=commitdiff_plain;h=45543217bf9ef48445ba1cd95311d4842c512708;p=bsc-thesis1415.git thesis methods update --- diff --git a/thesis/Makefile b/thesis/Makefile index fe6db97..886cea5 100644 --- a/thesis/Makefile +++ b/thesis/Makefile @@ -4,11 +4,10 @@ dots: bash ./dots/compileall thesis: - latex thesis.tex - latex thesis.tex + pdflatex thesis.tex + pdflatex thesis.tex # bibtex thesis.aux - latex thesis.tex - dvipdfm thesis.dvi + pdflatex thesis.tex clean: rm -vf *.aux *.bbl *.blg *.dvi *.log *.out *.pdf *.toc diff --git a/thesis/compileall.sh b/thesis/compileall.sh index 4e688bc..29ed071 100644 --- a/thesis/compileall.sh +++ b/thesis/compileall.sh @@ -1,5 +1,5 @@ #!/bin/bash for f in dots/*.dot do - dot -Tps "$f" > "$f.ps" + dot -odots/$(basename -s ".dot" "$f").png -Tpng "$f" done diff --git a/thesis/dots/.gitignore b/thesis/dots/.gitignore index 11eb07c..68a2009 100644 --- a/thesis/dots/.gitignore +++ b/thesis/dots/.gitignore @@ -1 +1,2 @@ -*.ps +*.eps +*.png diff --git a/thesis/dots/graph1.dot b/thesis/dots/graph1.dot index 4a29bb3..fa01618 100644 --- a/thesis/dots/graph1.dot +++ b/thesis/dots/graph1.dot @@ -1,7 +1,8 @@ digraph finite_state_machine { + graph [ dpi = 300 ]; node [shape = doublecircle]; 2 node [shape = circle]; 0 1 0 -> 1 [label = "a"]; 1 -> 2 [label = "b"]; - 0 -> 2 [label = "c"];j + 1 -> 2 [label = "c"]; } diff --git a/thesis/dots/graph2.dot b/thesis/dots/graph2.dot new file mode 100644 index 0000000..2f7c606 --- /dev/null +++ b/thesis/dots/graph2.dot @@ -0,0 +1,16 @@ +digraph finite_state_machine { + graph [ dpi = 300 ]; + rankdir = "LR" + node [shape = doublecircle]; 5 + node [shape = circle]; 0 1 2 3 4 6 7 8 9 + 0 -> 1 [label = "what"]; + 1 -> 2 [label = "space"]; + 2 -> 3 [label = "hyphen"]; + 3 -> 4 [label = "space"]; + 4 -> 5 [label = "when"]; + 4 -> 6 [label = "when"]; + 6 -> 7 [label = "space"]; + 7 -> 8 [label = "hyphen"]; + 8 -> 9 [label = "space"]; + 9 -> 5 [label = "where"]; +} diff --git a/thesis/introduction.tex b/thesis/introduction.tex index c6c3419..ea46330 100644 --- a/thesis/introduction.tex +++ b/thesis/introduction.tex @@ -13,7 +13,7 @@ of the data input is done by hand and takes a lot of time to type in. \section{Research question} The main research question is: \textit{How can we make an adaptive, autonomous and programmable data mining program that can be set up by a non IT -professional which is able to transform raw data into structured data.}\\ +professional(NIP) which is able to transform raw data into structured data.}\\ The practical goal and aim of the project is to make a crawler(web or other document types) that can autonomously gather information after it has been diff --git a/thesis/methods.tex b/thesis/methods.tex index 583114a..29ea2b8 100644 --- a/thesis/methods.tex +++ b/thesis/methods.tex @@ -2,7 +2,36 @@ Directed acyclic graphs(DAG) and finite state automatas(FSA) have a lot in common concerning pattern recognition and information extraction. By feeding words into an algorithm a DAG can be generated so that it matches certain -patters present in the given words. +patters present in the given words. Figure~\ref{fig:mg1} for example shows a +FSA that matches on the words \textit{ab} and \textit{ac}. +\begin{figure}[H] + \centering + \caption{Example DAG/FSA} + \label{fig:mg1} + \includegraphics[width=15mm]{./dots/graph1.png} +\end{figure} + +With this FSA we can test if a word fits to the constraints it the FSA +describes. And with a little adaptation we can extract dynamic information from +semi-structured data.\\ + +\section{NIP input} + +\section{Back to DAG's and FSA's} +Nodes in this datastructure can be single letters but also bigger +constructions. The example in Figure~\ref{fig:mg2} describes different +separator pattern for event data with its three component: what, when, where. +In this example the nodes with the labels \textit{what, when, where} can also +be complete subgrahps. In this way data on a larger scale +\begin{figure}[H] + \centering + \caption{Example event data} + \label{fig:mg2} + \includegraphics[width=\linewidth]{./dots/graph2.png} +\end{figure} + + \section{Algorithm} +Hello Wordl diff --git a/thesis/thesis.pdf b/thesis/thesis.pdf index d4cf3b5..2d7a68f 100644 Binary files a/thesis/thesis.pdf and b/thesis/thesis.pdf differ diff --git a/thesis/thesis.tex b/thesis/thesis.tex index d5de86d..a7a5ae7 100644 --- a/thesis/thesis.tex +++ b/thesis/thesis.tex @@ -2,6 +2,7 @@ \usepackage{lipsum} \usepackage{graphicx} +\usepackage{float} \author{Mart Lubbers\\s4109053} \title{Non IT congurable adaptive data mining solution used in transforming raw data to structured data} @@ -10,8 +11,8 @@ Radboud University Nijmegen\\ \vspace{15mm} \begin{tabular}{cp{5em}c} - Franc Grootjen && Alessandro Paula\\ - RU && Hyperleap + Franc Grootjen && Alessandro Paula\\ + RU && Hyperleap \end{tabular} }