From: Mart Lubbers Date: Fri, 13 Feb 2015 11:21:02 +0000 (+0100) Subject: update X-Git-Url: https://git.martlubbers.net/?a=commitdiff_plain;h=b77fbe1c15ab2762abdde8c1bf9b7d90f318ae50;p=bsc-thesis1415.git update --- diff --git a/thesis2/3.methods.tex b/thesis2/3.methods.tex index 2bc95bb..5729b79 100644 --- a/thesis2/3.methods.tex +++ b/thesis2/3.methods.tex @@ -217,19 +217,29 @@ added. \end{figure} \subsection{Appliance on extraction of patterns} -The text data in combination with the user markings are not just plain text and -thus we can not create a DAWG out of them without a few adaptations to the -structure. +The text data in combination with the user markings can not be converted +automatically to a DAWG using the algorithm we described. This is because the +user markings are not necessarily a single character or word. User markings are +basically one or more characters. When we add a user marking we insert a +character that is not in the alphabet and later on we change the marking to a +kind of subgraph. When this is applied it can be possible that non determinism +is added to the graph. Non determinism is the fact that a single node has +multiple edges with the same transition, in practise this means that a word can +be present in the graph in multiple paths. This is shown in +Figure~\ref{nddawg} with the following words: \texttt{ab<1>c}, \texttt{a<1>bbc}. +In this graph the word \texttt{abdc} will be accepted and the user pattern +\texttt{<1>} will be filled with the word \texttt{d}. However if we try the +word \texttt{abdddbc} both paths can be chosen. In the first case the user +pattern \texttt{<1>} will be filled with \texttt{dddb} and in the second case +with \texttt{bddd}. In such a case we need to choose the hopefully smartest +choice. -Adaption for our data - -Non determinism +\begin{figure}[H] + \label{nddawg} + \centering + \includegraphics[width=\linewidth]{nddawg.eps} + \caption{Example non determinism} +\end{figure} -How to get best match +\subsection{How to choose path} -The algorithm for minimizing DAWGs can not be applied directly on the user -generated pattern. This is mainly because there is no conclusive information -about the contents of the pattern and therefore non deterministic choices have -to be made. Because of this there can be multiple matches and the best match -has to be determined. There are several possible criteria for determining the -best match. The method we use is ... TODO diff --git a/thesis2/Makefile b/thesis2/Makefile index 647c756..bbf24e0 100644 --- a/thesis2/Makefile +++ b/thesis2/Makefile @@ -9,6 +9,7 @@ thesis: dot -Teps appoverview.dot > appoverview.eps dot -Teps backend.dot > backend.eps dot -Teps nodelistexample.dot > nodelistexample.eps + dot -Teps nddawg.dot > nddawg.eps latex -shell-escape thesis.tex > log.txt bibtex thesis.aux >> log.txt latex -shell-escape thesis.tex >> log.txt diff --git a/thesis2/nddawg.dot b/thesis2/nddawg.dot new file mode 100644 index 0000000..1a52518 --- /dev/null +++ b/thesis2/nddawg.dot @@ -0,0 +1,13 @@ +digraph { + rankdir=LR; + n0 [style=invis] + q4 [shape=doublecircle] + n0 -> q0 + q0 -> q1 [label="a"] + q1 -> q2 [label="b"] + q2 -> q3 [label="<1>"] + q3 -> q4 [label="c"] + q1 -> q5 [label="<1>"] + q5 -> q6 [label="b"] + q6 -> q4 [label="c"] +} diff --git a/thesis2/nddawg.eps b/thesis2/nddawg.eps new file mode 100644 index 0000000..0600bb2 --- /dev/null +++ b/thesis2/nddawg.eps @@ -0,0 +1,444 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: graphviz version 2.38.0 (20140413.2041) +%%Title: %3 +%%Pages: 1 +%%BoundingBox: 36 36 634 134 +%%EndComments +save +%%BeginProlog +/DotDict 200 dict def +DotDict begin + +/setupLatin1 { +mark +/EncodingVector 256 array def + EncodingVector 0 + +ISOLatin1Encoding 0 255 getinterval putinterval +EncodingVector 45 /hyphen put + +% Set up ISO Latin 1 character encoding +/starnetISO { + dup dup findfont dup length dict begin + { 1 index /FID ne { def }{ pop pop } ifelse + } forall + /Encoding EncodingVector def + currentdict end definefont +} def +/Times-Roman starnetISO def +/Times-Italic starnetISO def +/Times-Bold starnetISO def +/Times-BoldItalic starnetISO def +/Helvetica starnetISO def +/Helvetica-Oblique starnetISO def +/Helvetica-Bold starnetISO def +/Helvetica-BoldOblique starnetISO def +/Courier starnetISO def +/Courier-Oblique starnetISO def +/Courier-Bold starnetISO def +/Courier-BoldOblique starnetISO def +cleartomark +} bind def + +%%BeginResource: procset graphviz 0 0 +/coord-font-family /Times-Roman def +/default-font-family /Times-Roman def +/coordfont coord-font-family findfont 8 scalefont def + +/InvScaleFactor 1.0 def +/set_scale { + dup 1 exch div /InvScaleFactor exch def + scale +} bind def + +% styles +/solid { [] 0 setdash } bind def +/dashed { [9 InvScaleFactor mul dup ] 0 setdash } bind def +/dotted { [1 InvScaleFactor mul 6 InvScaleFactor mul] 0 setdash } bind def +/invis {/fill {newpath} def /stroke {newpath} def /show {pop newpath} def} bind def +/bold { 2 setlinewidth } bind def +/filled { } bind def +/unfilled { } bind def +/rounded { } bind def +/diagonals { } bind def +/tapered { } bind def + +% hooks for setting color +/nodecolor { sethsbcolor } bind def +/edgecolor { sethsbcolor } bind def +/graphcolor { sethsbcolor } bind def +/nopcolor {pop pop pop} bind def + +/beginpage { % i j npages + /npages exch def + /j exch def + /i exch def + /str 10 string def + npages 1 gt { + gsave + coordfont setfont + 0 0 moveto + (\() show i str cvs show (,) show j str cvs show (\)) show + grestore + } if +} bind def + +/set_font { + findfont exch + scalefont setfont +} def + +% draw text fitted to its expected width +/alignedtext { % width text + /text exch def + /width exch def + gsave + width 0 gt { + [] 0 setdash + text stringwidth pop width exch sub text length div 0 text ashow + } if + grestore +} def + +/boxprim { % xcorner ycorner xsize ysize + 4 2 roll + moveto + 2 copy + exch 0 rlineto + 0 exch rlineto + pop neg 0 rlineto + closepath +} bind def + +/ellipse_path { + /ry exch def + /rx exch def + /y exch def + /x exch def + matrix currentmatrix + newpath + x y translate + rx ry scale + 0 0 1 0 360 arc + setmatrix +} bind def + +/endpage { showpage } bind def +/showpage { } def + +/layercolorseq + [ % layer color sequence - darkest to lightest + [0 0 0] + [.2 .8 .8] + [.4 .8 .8] + [.6 .8 .8] + [.8 .8 .8] + ] +def + +/layerlen layercolorseq length def + +/setlayer {/maxlayer exch def /curlayer exch def + layercolorseq curlayer 1 sub layerlen mod get + aload pop sethsbcolor + /nodecolor {nopcolor} def + /edgecolor {nopcolor} def + /graphcolor {nopcolor} def +} bind def + +/onlayer { curlayer ne {invis} if } def + +/onlayers { + /myupper exch def + /mylower exch def + curlayer mylower lt + curlayer myupper gt + or + {invis} if +} def + +/curlayer 0 def + +%%EndResource +%%EndProlog +%%BeginSetup +14 default-font-family set_font +1 setmiterlimit +% /arrowlength 10 def +% /arrowwidth 5 def + +% make sure pdfmark is harmless for PS-interpreters other than Distiller +/pdfmark where {pop} {userdict /pdfmark /cleartomark load put} ifelse +% make '<<' and '>>' safe on PS Level 1 devices +/languagelevel where {pop languagelevel}{1} ifelse +2 lt { + userdict (<<) cvn ([) cvn load put + userdict (>>) cvn ([) cvn load put +} if + +%%EndSetup +setupLatin1 +%%Page: 1 1 +%%PageBoundingBox: 36 36 634 134 +%%PageOrientation: Portrait +0 0 1 beginpage +gsave +36 36 598 98 boxprim clip newpath +1 1 set_scale 0 rotate 40 40 translate +% n0 +% q0 +gsave +1 setlinewidth +0 0 0 nodecolor +118 47 27 18 ellipse_path stroke +0 0 0 nodecolor +14 /Times-Roman set_font +108.5 43.3 moveto 19 (q0) alignedtext +grestore +% n0->q0 +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 54.22 47 moveto +62.55 47 71.91 47 80.82 47 curveto +stroke +0 0 0 edgecolor +newpath 80.97 50.5 moveto +90.97 47 lineto +80.97 43.5 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 80.97 50.5 moveto +90.97 47 lineto +80.97 43.5 lineto +closepath stroke +grestore +% q4 +gsave +1 setlinewidth +0 0 0 nodecolor +563.75 47 22.96 22.96 ellipse_path stroke +1 setlinewidth +0 0 0 nodecolor +563.75 47 27 27 ellipse_path stroke +0 0 0 nodecolor +14 /Times-Roman set_font +554.25 43.3 moveto 19 (q4) alignedtext +grestore +% q1 +gsave +1 setlinewidth +0 0 0 nodecolor +217 47 27 18 ellipse_path stroke +0 0 0 nodecolor +14 /Times-Roman set_font +207.5 43.3 moveto 19 (q1) alignedtext +grestore +% q0->q1 +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 145.25 47 moveto +155.82 47 168.18 47 179.6 47 curveto +stroke +0 0 0 edgecolor +newpath 179.73 50.5 moveto +189.73 47 lineto +179.73 43.5 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 179.73 50.5 moveto +189.73 47 lineto +179.73 43.5 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +163 50.8 moveto 9 (a) alignedtext +grestore +% q2 +gsave +1 setlinewidth +0 0 0 nodecolor +341 72 27 18 ellipse_path stroke +0 0 0 nodecolor +14 /Times-Roman set_font +331.5 68.3 moveto 19 (q2) alignedtext +grestore +% q1->q2 +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 242.98 52.12 moveto +260.91 55.79 285.31 60.79 305.15 64.86 curveto +stroke +0 0 0 edgecolor +newpath 304.55 68.31 moveto +315.05 66.89 lineto +305.96 61.45 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 304.55 68.31 moveto +315.05 66.89 lineto +305.96 61.45 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +274 65.8 moveto 10 (b) alignedtext +grestore +% q5 +gsave +1 setlinewidth +0 0 0 nodecolor +341 18 27 18 ellipse_path stroke +0 0 0 nodecolor +14 /Times-Roman set_font +331.5 14.3 moveto 19 (q5) alignedtext +grestore +% q1->q5 +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 242.69 41.13 moveto +260.71 36.85 285.39 30.98 305.38 26.23 curveto +stroke +0 0 0 edgecolor +newpath 306.43 29.58 moveto +315.35 23.86 lineto +304.81 22.77 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 306.43 29.58 moveto +315.35 23.86 lineto +304.81 22.77 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +262 38.8 moveto 34 (<1>) alignedtext +grestore +% q3 +gsave +1 setlinewidth +0 0 0 nodecolor +465 72 27 18 ellipse_path stroke +0 0 0 nodecolor +14 /Times-Roman set_font +455.5 68.3 moveto 19 (q3) alignedtext +grestore +% q2->q3 +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 368.17 72 moveto +385.54 72 408.57 72 427.7 72 curveto +stroke +0 0 0 edgecolor +newpath 427.87 75.5 moveto +437.87 72 lineto +427.87 68.5 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 427.87 75.5 moveto +437.87 72 lineto +427.87 68.5 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +386 75.8 moveto 34 (<1>) alignedtext +grestore +% q3->q4 +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 490.41 65.69 moveto +501.7 62.78 515.31 59.26 527.66 56.07 curveto +stroke +0 0 0 edgecolor +newpath 528.86 59.37 moveto +537.67 53.48 lineto +527.11 52.59 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 528.86 59.37 moveto +537.67 53.48 lineto +527.11 52.59 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +510 63.8 moveto 9 (c) alignedtext +grestore +% q6 +gsave +1 setlinewidth +0 0 0 nodecolor +465 18 27 18 ellipse_path stroke +0 0 0 nodecolor +14 /Times-Roman set_font +455.5 14.3 moveto 19 (q6) alignedtext +grestore +% q5->q6 +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 368.17 18 moveto +385.54 18 408.57 18 427.7 18 curveto +stroke +0 0 0 edgecolor +newpath 427.87 21.5 moveto +437.87 18 lineto +427.87 14.5 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 427.87 21.5 moveto +437.87 18 lineto +427.87 14.5 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +398 21.8 moveto 10 (b) alignedtext +grestore +% q6->q4 +gsave +1 setlinewidth +0 0 0 edgecolor +newpath 489.92 25.17 moveto +501.4 28.61 515.37 32.8 527.99 36.58 curveto +stroke +0 0 0 edgecolor +newpath 527.17 39.99 moveto +537.76 39.51 lineto +529.18 33.29 lineto +closepath fill +1 setlinewidth +solid +0 0 0 edgecolor +newpath 527.17 39.99 moveto +537.76 39.51 lineto +529.18 33.29 lineto +closepath stroke +0 0 0 edgecolor +14 /Times-Roman set_font +510 36.8 moveto 9 (c) alignedtext +grestore +endpage +showpage +grestore +%%PageTrailer +%%EndPage: 1 +%%Trailer +end +restore +%%EOF