update
authorMart Lubbers <mart@martlubbers.net>
Wed, 8 Oct 2014 18:45:44 +0000 (20:45 +0200)
committerMart Lubbers <mart@martlubbers.net>
Wed, 8 Oct 2014 18:45:44 +0000 (20:45 +0200)
program/everything/contextmenu_o.js [deleted file]
program/everything/crawler.db
program/everything/crawler.py
program/everything/index.py
program/everything/input_app.py
program/everything/new.html.t
program/everything/scripts.js [new file with mode: 0644]
thesis2/Makefile [new file with mode: 0644]
thesis2/abstract.tex [new file with mode: 0644]
thesis2/fac.py [new file with mode: 0644]
thesis2/thesis.tex [new file with mode: 0644]

diff --git a/program/everything/contextmenu_o.js b/program/everything/contextmenu_o.js
deleted file mode 100644 (file)
index b7b6d1f..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-var selection;
-var mouse_x = 0;
-var mouse_y = 0;
-var mouse_left = false;
-var mouse_right = false;
-if (document.addEventListener != undefined) document.addEventListener('mousemove', mouseMove, true);
-else if (document.layers) document.captureEvents(Event.MOUSEDOWN | Event.MOUSEUP);
-//document.onmousemove = mouseMove;
-//document.oncontextmenu = RightMouseDown;
-document.onmousedown = mouseDown;
-document.onmouseup = mouseUp;
-
-function mouseMove(a) {
-//    mouse_x = document.all ? event.clientX + document.body.scrollLeft : document.layers ? a.x + window.pageXOffset : a.clientX + window.pageXOffset;
-//    mouse_y = document.all ? event.clientY + document.body.scrollTop : document.layers ? a.y + window.pageYOffset : a.clientY + window.pageYOffset
-}
-
-//function RightMouseDown() {
-//    mouse_right = true;
-//    return false
-//}
-
-function mouseUp(e) {
-    var curselection = window.getSelection().getRangeAt(0);
-    if (curselection.endOffset - curselection.startOffset > 0)
-    selection = curselection;
-    if (e.which == 3) mouse_right = false
-}
-
-function mouseDown(e) {
-    if (e.which == 3) {
-        mouse_right = true;
-//        document.getElementById("contextmenu").style.left = mouse_x + "px";
-//        document.getElementById("contextmenu").style.top = mouse_y + "px";
-//        document.getElementById("contextmenu").style.visibility = "visible"
-    }
-}
-
-
-function stylizeHighlightedString(range, col) 
-{
-    var selectionContents = range.extractContents();
-    var span = document.createElement("span");
-    span.appendChild(selectionContents);
-    span.setAttribute("class","uiWebviewHighlight");
-    span.style.backgroundColor = col;
-    span.style.color = "white";
-    range.insertNode(span);
-}
-
-function f_wann_tijd() {
-    stylizeHighlightedString(selection, "red")
-}
-
-function f_wann_dat() {
-    stylizeHighlightedString(selection, "darkred")
-}
-
-function f_wat() {
-    stylizeHighlightedString(selection, "green")
-}
-
-function f_waar() {
-    stylizeHighlightedString(selection, "blue")
-}
-
-function f_clear() {
-    parnt = selection.commonAncestorContainer.parentNode;
-    stopp = selection.endContainer.parentNode;
-    starp = selection.startContainer.parentNode;
-    console.log(selection);
-    if(parnt.tagName == "SPAN"){
-        superparent = parnt.parentNode;
-        var text = document.createTextNode(parnt.innerHTML);
-        superparent.insertBefore(text, parnt);
-        superparent.removeChild(parnt);
-        console.log("You selected a colored piece perfectly");
-    }
-    else {
-        if(starp.tagName == "SPAN"){
-            superparent = starp.parentNode;
-            var text = document.createTextNode(starp.innerHTML);
-            superparent.insertBefore(text, starp);
-            superparent.removeChild(starp);
-            console.log("You selected a overlap with span start");
-        }
-    }
-}
-
-function post_all(num) {
-    var xmlhttp = new XMLHttpRequest();
-    if(num == 1){
-        xmlhttp.onreadystatechange=function()
-        {
-            if (xmlhttp.readyState==4)
-            {
-                document.write(xmlhttp.responseText);
-            }
-        }
-    } else {
-        xmlhttp.onreadystatechange=function()
-        {
-            if (xmlhttp.readyState==4)
-            {
-                field = document.getElementById("preview_field");
-                field.innerHTML = xmlhttp.responseText;
-            }
-        }
-    }
-    var params = "content="+encodeURIComponent(document.getElementById("content-table").innerHTML);
-    params += "&name="+encodeURIComponent(document.getElementById("rssname").innerHTML);
-    var indatarray = document.getElementsByClassName('indat')
-    for (var i = 0; i<indatarray.length; i++) {
-        params += "&" + indatarray[i].name + "=" + indatarray[i].value;
-    }
-    xmlhttp.open("POST", num == 1 ? "hyper.py" : "preview.py", true);
-    xmlhttp.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
-    xmlhttp.setRequestHeader("Content-length", params.length); 
-    xmlhttp.send(params);
-}
index 380729a..b36ee29 100644 (file)
@@ -1,96 +1,76 @@
 (dp0
-S'test'
+S'Example_offline'
 p1
 (dp2
-S'website'
+S'name'
 p3
-S't'
+S'Example_offline'
 p4
-sS'name'
-p5
-g1
 sS'url'
-p6
+p5
 S'localhost/py/paradiso.rss.xml'
-p7
-sS'dloc'
-p8
-S'ut'
-p9
-sS'venue'
-p10
-S'Para'
-p11
+p6
 sS'content'
-p12
-(lp13
-(lp14
+p7
+(lp8
+(lp9
 S'zaterdag 31 mei 2014  - Lentekabinet Festival Afterparty   -   Locatie: Tolhuistuin (zaal)'
-p15
+p10
 aS''
+p11
+aa(lp12
+S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">vrijdag 4 juli 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">20:30</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">The Crimson Projekct</span>   -   Locatie: <span class="uiWebviewHighlight" style="color: white; background-color: blue;">Tolhuistuin (zaal)</span>'
+p13
+aS'Muziek rond King Crimson'
+p14
+aa(lp15
+S'dinsdag 10 juni 2014 20:30 - Het Ultieme Natuurkunde Feestje \xe2\x80\x93 keynote Amanda Gefter'
 p16
+ag11
 aa(lp17
-S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">vrijdag 4 juli 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">20:30</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">The Crimson Projekct</span>   -   Locatie: <span class="uiWebviewHighlight" style="color: white; background-color: blue;">Tolhuistuin (zaal)</span>'
+S'dinsdag 12 augustus 2014 21:00 - Kevin Drew - Locatie: Bitterzoet'
 p18
-aS'Muziek rond King Crimson'
+aS'mede-oprichter Broken Social Scene solo'
 p19
 aa(lp20
-S'dinsdag 10 juni 2014 20:30 - Het Ultieme Natuurkunde Feestje \xe2\x80\x93 keynote Amanda Gefter'
+S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">vrijdag 4 juli 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">22:00</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">Palenke Soultribe</span>'
 p21
-ag16
-aa(lp22
-S'dinsdag 12 augustus 2014 21:00 - Kevin Drew - Locatie: Bitterzoet'
-p23
-aS'mede-oprichter Broken Social Scene solo'
+aS'Electronische muziek en Afro-Colombiaanse ritmes'
+p22
+aa(lp23
+S'maandag 3 november 2014 20:15 - Eefje de Visser: Waterwereldsteden - Locatie: Het Concertgebouw'
 p24
+ag11
 aa(lp25
-S'vrijdag 4 juli 2014 22:00 - Palenke Soultribe'
+S'zaterdag 27 september 2014 20:30 - A Great Big World - Locatie: Tolhuistuin (zaal)'
 p26
-aS'Electronische muziek en Afro-Colombiaanse ritmes'
+aS'Hitschrijvers uit New York'
 p27
 aa(lp28
-S'maandag 3 november 2014 20:15 - Eefje de Visser: Waterwereldsteden - Locatie: Het Concertgebouw'
-p29
-ag16
-aa(lp30
-S'zaterdag 27 september 2014 20:30 - A Great Big World - Locatie: Tolhuistuin (zaal)'
-p31
-aS'Hitschrijvers uit New York'
-p32
-aa(lp33
 S'zaterdag 7 juni 2014 23:00 - Benefietavond Marokkaanse Boot'
-p34
+p29
 aS'Van Amsterdam naar Tanger'
-p35
-aa(lp36
+p30
+aa(lp31
 S'donderdag 13 november 2014 19:30 - Wouter Hamel'
-p37
+p32
 aS'Sprankelende jazzy pop'
-p38
-aa(lp39
+p33
+aa(lp34
 S'vrijdag 13 juni 2014 00:00 - Legends'
-p40
-ag16
+p35
+ag11
 aasS'headers'
-p41
-(lp42
-S'Title'
-p43
-aS'Summary'
-p44
-asS'summarydawg'
-p45
-(lp46
+p36
+(lp37
+sS'summarydawg'
+p38
+(lp39
 sS'titledawg'
-p47
-(lp48
+p40
+(lp41
 S'\x01 \x02 - \x03   -   Locatie: \x04'
-p49
-asS'freq'
-p50
-S'1w'
-p51
-sS'adress'
-p52
-g4
-ss.
\ No newline at end of file
+p42
+aS'\x01 \x02 - \x03'
+p43
+ass.
\ No newline at end of file
index 39f2e9b..ae12bef 100644 (file)
@@ -148,8 +148,6 @@ class Crawler():
         matchs = filter(lambda x: x is not None, matchs)
 
         # Title urls
-        for u in URL_REG.findall(title):
-            results['url'] += list(filter(None, u))
         results['url'] = URL_REG.findall(title)
 
         # Title best match
@@ -159,14 +157,19 @@ class Crawler():
             results['title'] = list(reversed(matches))[0].groupdict()
 
         # Summary urls
-        for u in URL_REG.findall(summary):
-            results['url'] += list(filter(None, u))
+        results['url'] += URL_REG.findall(summary)
+
         # Summary best match
         results['summary'] = {}
         if matchs:
             matches = sorted(matchs, key=lambda x: len(x.groups()))
             results['summary'] = list(reversed(matches))[0].groupdict()
         outputdct = dict(results['summary'].items() + results['title'].items())
+        outputdct['urls'] = [x[0] for x in results['url'] if filter(None, x)]
+        if not outputdct['urls']:
+            del(outputdct['urls'])
+        else:
+            outputdct['urls'] = ', '.join(outputdct['urls'])
         return {re.sub('\d', '', k): v for k, v in outputdct.iteritems()}
 
     def has_to_run(self, interval, last_run, now):
index 146fa67..287730c 100644 (file)
@@ -45,7 +45,7 @@ def crawler_edit(req, args, apok):
         cr = crawler.Crawler()
         with open('/var/www/py/new.html.t', 'r') as f:
             data = f.read()
-        html_feed = '<table id="content-table" border="1" id="htab">'
+        html_feed = '<table id="content-table" border="1">'
         html_feed += '<tr><th>Title</th><th>Summary</th></tr><!--line-->'
         for i in cr.entries[args['name']]['content']:
             html_feed += ('<tr><td id="cel">{}</td><!--cel--><td id="cel">{}</'
@@ -69,17 +69,37 @@ def crawler_test(req, args, apok):
     req.log_error('handler')
     req.content_type = 'text/html'
     req.send_http_header()
-    cr = crawler.Crawler(init=True)
-    if 'name' not in args or str(args['name']) not in cr.entries:
-        req.write('Name not in the crawler...<br/>')
-    else:
-        data = cr.full_run(args['name'])
-        for entry, dc in data:
-            req.write(u'<br/><b>entry</b>: {title}, {summary}<br/>'.
-                      format(**entry).encode('utf-8'))
-            for k, v in dc.iteritems():
-                req.write(u'<b>{}</b>: {}<br/>'.format(k, v).encode('utf-8'))
-    req.write('<br/><br/><a href="index.py">Go back</a>')
+    try:
+        cr = crawler.Crawler(init=True)
+        if 'name' not in args or str(args['name']) not in cr.entries:
+            req.write('Name not in the crawler...<br/>')
+        else:
+            data = cr.full_run(args['name'])
+            req.write('<b>Results: </b>{}/{} got results<br/>\n'.format(
+                len(filter(None, [x[1] for x in data])), len(data)))
+            for entry, dc in data:
+                req.write(u"""<br/><hr style="background:#000000; border:1; height:2px" />
+<b>Entry: </b>
+<div style="height:2em;width:70em;border:1px solid#ccc;font-family:courier;overflow:auto;">
+    {title}
+</div>
+<div style="height:2em;width:70em;border:1px solid#ccc;font-family:courier;overflow:auto;">
+{summary}
+</div><br/>
+""".format(**entry).encode('ascii', 'xmlcharrefreplace'))
+                req.write('<table>')
+                if dc:
+                    for k, v in dc.iteritems():
+                        req.write(
+                            (u'<tr><td><b>{}</b></td><td><span style="font-fam'
+                             'amily:courier">{}</span></td></tr>').format(
+                                k, v).encode('ascii', 'xmlcharrefreplace'))
+                else:
+                    req.write('<tr><th>NO RESULTS FOUND</th></tr>')
+                req.write('</table>\n')
+        req.write('<br/><br/><a href="index.py">Go back</a>')
+    except Exception, e:
+        req.write(repr(e))
     return apok
 
 
@@ -88,7 +108,7 @@ def feed2html(url, name):
     url = urllib.unquote(url)
     url = url if re.match('https?://', url) else 'http://{}'.format(url)
     feed = feedparser.parse(url)
-    result += '<table id="content-table" border="1" id="htab">'
+    result += '<table id="content-table" border="1">'
     result += '<tr><th>Title</th><th>Summary</th></tr><!--line-->'
     for i in feed.entries[:10]:
         result += ('<tr><td id="cel">{}</td><!--cel--><td id="cel">{}</td><!--'
@@ -140,6 +160,7 @@ def crawler_new(req, args, apok):
         req.write(data.format(**params))
     return apok
 
+
 def crawler_preview(req, args, apok):
     req.log_error('handler')
     req.content_type = 'text/html'
index dc57287..8dd916f 100644 (file)
@@ -162,7 +162,7 @@ def feed2html(req, url, name):
     req.write(
         '\tLoading "{}" as <p id="rssname">{}</p><br />\n'.format(url, name))
     feed = feedparser.parse(url)
-    req.write('\t<table id="content-table" border="1" id="htab">\n')
+    req.write('\t<table id="content-table" border="1">\n')
     req.write('\t\t<tr><th>Title</th><th>Summary</th></tr>\n')
     for i in feed.entries[:10]:
         req.write(('\t\t<tr><td id="cel">{}</td><!--cel--><td id="cel">{}</td>'
index 7ba7602..4f95a77 100644 (file)
@@ -1,7 +1,7 @@
 <html>
 <head>
        <title>HyperFrontend RSS feed input new</title>
-       <script src="contextmenu_o.js"></script>
+       <script src="scripts.js"></script>
 </head>
 <body>
        <table>
diff --git a/program/everything/scripts.js b/program/everything/scripts.js
new file mode 100644 (file)
index 0000000..98c00df
--- /dev/null
@@ -0,0 +1,129 @@
+var selection;
+document.captureEvents(Event.MOUSEUP);
+document.onmouseup = function mouseUp(e) {
+    var curselection = window.getSelection().getRangeAt(0);
+    if (curselection.endOffset - curselection.startOffset > 0) {
+       selection = curselection;
+    }
+}
+
+function stylizeHighlightedString(range, col) 
+{
+    par = range.commonAncestorContainer;
+    correct = 0
+    while(1) {
+        if(par.tagName == 'BODY' || par.tagName == 'TH'){
+            break;
+        }
+        if(par.tagName == 'TR' || par.tagName == 'TD'){
+            correct = 1;
+        }
+        if(correct == 1 && par.tagName == 'TABLE' && par.id == 'content-table'){
+            correct = 2;
+            break
+        }
+        par = par.parentNode;
+    }
+    if(correct == 2 && range_has_span(range) == null){
+        var selectionContents = range.extractContents();
+        var span = document.createElement("span");
+        span.appendChild(selectionContents);
+        span.setAttribute("class","uiWebviewHighlight");
+        span.style.backgroundColor = col;
+        span.style.color = "white";
+        range.insertNode(span);
+    }
+}
+
+function range_has_span(range){
+    par = range.commonAncestorContainer;
+    while(1) {
+        if(par == null){
+            break;
+        }
+        if(par.tagName == "SPAN"){
+            return par;
+        }
+        par = par.parentNode;
+    }
+    parentchilds = range.commonAncestorContainer.childNodes;
+    returnarray = []
+    for(var i = 0; i<parentchilds.length; i++){
+        if(parentchilds[i].nodeName == "SPAN"){
+            console.log("found");
+            console.log(parentchilds[i]);
+            returnarray[returnarray.length] = parentchilds[i];
+        }
+    }
+    if(returnarray.length == 0){
+        return null;
+    } else {
+        return returnarray;
+    }
+}
+
+
+function f_wann_tijd() {
+    stylizeHighlightedString(selection, "red")
+}
+
+function f_wann_dat() {
+    stylizeHighlightedString(selection, "darkred")
+}
+
+function f_wat() {
+    stylizeHighlightedString(selection, "green")
+}
+
+function f_waar() {
+    stylizeHighlightedString(selection, "blue")
+}
+
+function f_clear() {
+    span = range_has_span(selection);
+    if(span == null){
+        console.log("You haven't selected anything...");
+    } else {
+        for(var i = 0; i<span.length; i++){
+            cspan = span[i];
+            par = cspan.parentNode;
+            var text = document.createTextNode(cspan.innerHTML);
+            par.insertBefore(text, cspan);
+            par.removeChild(cspan);
+            par.normalize();
+        }
+    }
+    selection.collapse();
+}
+
+function post_all(num) {
+    var xmlhttp = new XMLHttpRequest();
+    if(num == 1){
+        xmlhttp.onreadystatechange=function()
+        {
+            if (xmlhttp.readyState==4)
+            {
+                document.write(xmlhttp.responseText);
+            }
+        }
+    } else {
+        xmlhttp.onreadystatechange=function()
+        {
+            if (xmlhttp.readyState==4)
+            {
+                field = document.getElementById("preview_field");
+                field.innerHTML = xmlhttp.responseText;
+            }
+        }
+    }
+    var params = "content="+encodeURIComponent(document.getElementById("content-table").innerHTML);
+    params += "&name="+encodeURIComponent(document.getElementById("rssname").innerHTML);
+    var indatarray = document.getElementsByClassName('indat')
+    for (var i = 0; i<indatarray.length; i++) {
+        params += "&" + indatarray[i].name + "=" + indatarray[i].value;
+    }
+    xmlhttp.open("POST", num == 1 ? "hyper.py" : "preview.py", true);
+    xmlhttp.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
+    xmlhttp.setRequestHeader("Content-length", params.length); 
+    xmlhttp.send(params);
+}
diff --git a/thesis2/Makefile b/thesis2/Makefile
new file mode 100644 (file)
index 0000000..9dfa35d
--- /dev/null
@@ -0,0 +1,12 @@
+SHELL:=/bin/bash
+
+all: thesis
+
+thesis:
+       pdflatex thesis.tex
+       pdflatex thesis.tex
+#      bibtex thesis.aux
+       pdflatex thesis.tex
+
+clean:
+       rm -vf *.aux *.bbl *.blg *.dvi *.log *.out *.pdf *.toc 
diff --git a/thesis2/abstract.tex b/thesis2/abstract.tex
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/thesis2/fac.py b/thesis2/fac.py
new file mode 100644 (file)
index 0000000..1226f56
--- /dev/null
@@ -0,0 +1,16 @@
+#!/bin/env python
+# -*- coding: utf-8 -*-
+
+lookup = {0: 1}
+
+def fac(n):
+    if n in lookup:
+        return n
+    else:
+        return n*fac(n-1)
+
+def fac2(n):
+    if n == 0:
+        return 1
+    else:
+        return n*fac(n-1)
diff --git a/thesis2/thesis.tex b/thesis2/thesis.tex
new file mode 100644 (file)
index 0000000..ec5e57b
--- /dev/null
@@ -0,0 +1,75 @@
+\documentclass[hidelinks,a4]{scrbook}
+
+\usepackage{lipsum}    % Dummy text
+\usepackage{graphicx}  % Images
+\usepackage{float}     % Better placement float figures
+\usepackage{listings}  % Source code formatting
+\usepackage{hyperref}  % Hyperlinks
+\usepackage{tikz}      % Sequence diagrams
+\usepackage{pgf-umlsd} %
+\usepgflibrary{arrows} %
+
+% Set listings settings
+\lstset{
+       basicstyle=\scriptsize,
+       breaklines=true,
+       numbers=left,
+       numberstyle=\tiny,
+       tabsize=2
+}
+\lstdefinestyle{custompy}{
+       language=python,
+       keepspaces=true,
+       columns=flexible,
+       showspaces=false
+}
+\lstdefinestyle{customhtml}{
+       language=html
+}
+\lstdefinestyle{customjs}{
+       language=java
+}
+
+% Setup hyperlink formatting
+\hypersetup{
+       pdftitle={Non IT congurable adaptive data mining solution used in       transforming raw data to structured data},
+       pdfauthor={Mart Lubbers},
+       pdfsubject={Artificial Intelligence},
+}
+
+% Describe the frontpage
+\author{Mart Lubbers\\s4109053}
+\title{Non IT congurable adaptive data mining solution used in transforming raw
+data to structured data} 
+\subtitle{
+       Bachelor's Thesis in Artificial Intelligence\\
+       Radboud University Nijmegen\\
+       \vspace{15mm}
+       \begin{tabular}{cp{5em}c}
+               Franc Grootjen && Alessandro Paula\\
+               RU && Hyperleap
+       \end{tabular}
+       }
+\date{\today}
+
+\begin{document}
+\maketitle
+\tableofcontents
+\newpage
+
+% Surrogate abstract
+\chapter*{
+       \centering 
+       \begin{normalsize}
+               Abstract
+       \end{normalsize}
+}
+\begin{quotation}
+       \noindent
+       \input{abstract.tex}
+\end{quotation}
+\clearpage
+
+\chapter{Introduction}
+
+\end{document}