From: Mart Lubbers Date: Mon, 22 Jun 2015 09:37:04 +0000 (+0200) Subject: presentation update and defense start X-Git-Url: https://git.martlubbers.net/?a=commitdiff_plain;h=04a107cac3d2f828d596edba3ebc0dc137876b6d;p=bsc-thesis1415.git presentation update and defense start --- diff --git a/defense/defense.txt b/defense/defense.txt new file mode 100644 index 0000000..c912289 --- /dev/null +++ b/defense/defense.txt @@ -0,0 +1,36 @@ +Intro: + Hyperleap + infotaintment + Relieve programmer fixing crawlers + System to generate crawler specification + Frontend useable for non programmers + +Frontend: + Runs in browser + Runs from apache and python + +Backend: + Converts the user patterns from frontend to nodelists. + Nodelists are merged into DAWG minimization to generate patterns(graphs). + The crawler reads the patterns and crawls the site. + Crawler results are send via an XML/XSD stream to the original backend. + +Results: + Few RSS + Much RSS misuse + +Future: + Extend to HTML (program to convert HTML to RSS) + Reuse interface + Low level matching can increase + +Questions: +- Why is user interface easy to use + Direct feedback + Familiar interface with buttons and textboxes + +- Why did you choose RSS + We had to limit scope + RSS is very consistent in underlying structure + But RSS doesn't have any structure in itself but underlying because + they are generated + diff --git a/pres/pres.tex b/pres/pres.tex index 1af0985..93abf6f 100644 --- a/pres/pres.tex +++ b/pres/pres.tex @@ -96,6 +96,9 @@ \begin{frame} \frametitle{Frontend} + \begin{itemize}[<+->] + \item Useable by non programmers + \end{itemize} \begin{figure}[H] \only<2>{\includegraphics[width=\linewidth]{frontendfront}} \only<3>{\includegraphics[width=\linewidth]{crawlerpattern}} @@ -116,6 +119,7 @@ \begin{itemize}[<+->] \item Based on Daciuk et al. \item User patterns become nodes/subgraphs + \item Incremental construction \end{itemize} \end{frame}