+++ /dev/null
-var selection;
-var mouse_x = 0;
-var mouse_y = 0;
-var mouse_left = false;
-var mouse_right = false;
-if (document.addEventListener != undefined) document.addEventListener('mousemove', mouseMove, true);
-else if (document.layers) document.captureEvents(Event.MOUSEDOWN | Event.MOUSEUP);
-//document.onmousemove = mouseMove;
-//document.oncontextmenu = RightMouseDown;
-document.onmousedown = mouseDown;
-document.onmouseup = mouseUp;
-
-function mouseMove(a) {
-// mouse_x = document.all ? event.clientX + document.body.scrollLeft : document.layers ? a.x + window.pageXOffset : a.clientX + window.pageXOffset;
-// mouse_y = document.all ? event.clientY + document.body.scrollTop : document.layers ? a.y + window.pageYOffset : a.clientY + window.pageYOffset
-}
-
-//function RightMouseDown() {
-// mouse_right = true;
-// return false
-//}
-
-function mouseUp(e) {
- var curselection = window.getSelection().getRangeAt(0);
- if (curselection.endOffset - curselection.startOffset > 0)
- selection = curselection;
- if (e.which == 3) mouse_right = false
-}
-
-function mouseDown(e) {
- if (e.which == 3) {
- mouse_right = true;
-// document.getElementById("contextmenu").style.left = mouse_x + "px";
-// document.getElementById("contextmenu").style.top = mouse_y + "px";
-// document.getElementById("contextmenu").style.visibility = "visible"
- }
-}
-
-
-function stylizeHighlightedString(range, col)
-{
- var selectionContents = range.extractContents();
- var span = document.createElement("span");
- span.appendChild(selectionContents);
- span.setAttribute("class","uiWebviewHighlight");
- span.style.backgroundColor = col;
- span.style.color = "white";
- range.insertNode(span);
-}
-
-function f_wann_tijd() {
- stylizeHighlightedString(selection, "red")
-}
-
-function f_wann_dat() {
- stylizeHighlightedString(selection, "darkred")
-}
-
-function f_wat() {
- stylizeHighlightedString(selection, "green")
-}
-
-function f_waar() {
- stylizeHighlightedString(selection, "blue")
-}
-
-function f_clear() {
- parnt = selection.commonAncestorContainer.parentNode;
- stopp = selection.endContainer.parentNode;
- starp = selection.startContainer.parentNode;
- console.log(selection);
- if(parnt.tagName == "SPAN"){
- superparent = parnt.parentNode;
- var text = document.createTextNode(parnt.innerHTML);
- superparent.insertBefore(text, parnt);
- superparent.removeChild(parnt);
- console.log("You selected a colored piece perfectly");
- }
- else {
- if(starp.tagName == "SPAN"){
- superparent = starp.parentNode;
- var text = document.createTextNode(starp.innerHTML);
- superparent.insertBefore(text, starp);
- superparent.removeChild(starp);
- console.log("You selected a overlap with span start");
- }
- }
-}
-
-function post_all(num) {
- var xmlhttp = new XMLHttpRequest();
- if(num == 1){
- xmlhttp.onreadystatechange=function()
- {
- if (xmlhttp.readyState==4)
- {
- document.write(xmlhttp.responseText);
- }
- }
- } else {
- xmlhttp.onreadystatechange=function()
- {
- if (xmlhttp.readyState==4)
- {
- field = document.getElementById("preview_field");
- field.innerHTML = xmlhttp.responseText;
- }
- }
- }
- var params = "content="+encodeURIComponent(document.getElementById("content-table").innerHTML);
- params += "&name="+encodeURIComponent(document.getElementById("rssname").innerHTML);
- var indatarray = document.getElementsByClassName('indat')
- for (var i = 0; i<indatarray.length; i++) {
- params += "&" + indatarray[i].name + "=" + indatarray[i].value;
- }
- xmlhttp.open("POST", num == 1 ? "hyper.py" : "preview.py", true);
- xmlhttp.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
- xmlhttp.setRequestHeader("Content-length", params.length);
- xmlhttp.send(params);
-}
(dp0
-S'test'
+S'Example_offline'
p1
(dp2
-S'website'
+S'name'
p3
-S't'
+S'Example_offline'
p4
-sS'name'
-p5
-g1
sS'url'
-p6
+p5
S'localhost/py/paradiso.rss.xml'
-p7
-sS'dloc'
-p8
-S'ut'
-p9
-sS'venue'
-p10
-S'Para'
-p11
+p6
sS'content'
-p12
-(lp13
-(lp14
+p7
+(lp8
+(lp9
S'zaterdag 31 mei 2014 - Lentekabinet Festival Afterparty - Locatie: Tolhuistuin (zaal)'
-p15
+p10
aS''
+p11
+aa(lp12
+S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">vrijdag 4 juli 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">20:30</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">The Crimson Projekct</span> - Locatie: <span class="uiWebviewHighlight" style="color: white; background-color: blue;">Tolhuistuin (zaal)</span>'
+p13
+aS'Muziek rond King Crimson'
+p14
+aa(lp15
+S'dinsdag 10 juni 2014 20:30 - Het Ultieme Natuurkunde Feestje \xe2\x80\x93 keynote Amanda Gefter'
p16
+ag11
aa(lp17
-S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">vrijdag 4 juli 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">20:30</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">The Crimson Projekct</span> - Locatie: <span class="uiWebviewHighlight" style="color: white; background-color: blue;">Tolhuistuin (zaal)</span>'
+S'dinsdag 12 augustus 2014 21:00 - Kevin Drew - Locatie: Bitterzoet'
p18
-aS'Muziek rond King Crimson'
+aS'mede-oprichter Broken Social Scene solo'
p19
aa(lp20
-S'dinsdag 10 juni 2014 20:30 - Het Ultieme Natuurkunde Feestje \xe2\x80\x93 keynote Amanda Gefter'
+S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">vrijdag 4 juli 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">22:00</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">Palenke Soultribe</span>'
p21
-ag16
-aa(lp22
-S'dinsdag 12 augustus 2014 21:00 - Kevin Drew - Locatie: Bitterzoet'
-p23
-aS'mede-oprichter Broken Social Scene solo'
+aS'Electronische muziek en Afro-Colombiaanse ritmes'
+p22
+aa(lp23
+S'maandag 3 november 2014 20:15 - Eefje de Visser: Waterwereldsteden - Locatie: Het Concertgebouw'
p24
+ag11
aa(lp25
-S'vrijdag 4 juli 2014 22:00 - Palenke Soultribe'
+S'zaterdag 27 september 2014 20:30 - A Great Big World - Locatie: Tolhuistuin (zaal)'
p26
-aS'Electronische muziek en Afro-Colombiaanse ritmes'
+aS'Hitschrijvers uit New York'
p27
aa(lp28
-S'maandag 3 november 2014 20:15 - Eefje de Visser: Waterwereldsteden - Locatie: Het Concertgebouw'
-p29
-ag16
-aa(lp30
-S'zaterdag 27 september 2014 20:30 - A Great Big World - Locatie: Tolhuistuin (zaal)'
-p31
-aS'Hitschrijvers uit New York'
-p32
-aa(lp33
S'zaterdag 7 juni 2014 23:00 - Benefietavond Marokkaanse Boot'
-p34
+p29
aS'Van Amsterdam naar Tanger'
-p35
-aa(lp36
+p30
+aa(lp31
S'donderdag 13 november 2014 19:30 - Wouter Hamel'
-p37
+p32
aS'Sprankelende jazzy pop'
-p38
-aa(lp39
+p33
+aa(lp34
S'vrijdag 13 juni 2014 00:00 - Legends'
-p40
-ag16
+p35
+ag11
aasS'headers'
-p41
-(lp42
-S'Title'
-p43
-aS'Summary'
-p44
-asS'summarydawg'
-p45
-(lp46
+p36
+(lp37
+sS'summarydawg'
+p38
+(lp39
sS'titledawg'
-p47
-(lp48
+p40
+(lp41
S'\x01 \x02 - \x03 - Locatie: \x04'
-p49
-asS'freq'
-p50
-S'1w'
-p51
-sS'adress'
-p52
-g4
-ss.
\ No newline at end of file
+p42
+aS'\x01 \x02 - \x03'
+p43
+ass.
\ No newline at end of file
matchs = filter(lambda x: x is not None, matchs)
# Title urls
- for u in URL_REG.findall(title):
- results['url'] += list(filter(None, u))
results['url'] = URL_REG.findall(title)
# Title best match
results['title'] = list(reversed(matches))[0].groupdict()
# Summary urls
- for u in URL_REG.findall(summary):
- results['url'] += list(filter(None, u))
+ results['url'] += URL_REG.findall(summary)
+
# Summary best match
results['summary'] = {}
if matchs:
matches = sorted(matchs, key=lambda x: len(x.groups()))
results['summary'] = list(reversed(matches))[0].groupdict()
outputdct = dict(results['summary'].items() + results['title'].items())
+ outputdct['urls'] = [x[0] for x in results['url'] if filter(None, x)]
+ if not outputdct['urls']:
+ del(outputdct['urls'])
+ else:
+ outputdct['urls'] = ', '.join(outputdct['urls'])
return {re.sub('\d', '', k): v for k, v in outputdct.iteritems()}
def has_to_run(self, interval, last_run, now):
cr = crawler.Crawler()
with open('/var/www/py/new.html.t', 'r') as f:
data = f.read()
- html_feed = '<table id="content-table" border="1" id="htab">'
+ html_feed = '<table id="content-table" border="1">'
html_feed += '<tr><th>Title</th><th>Summary</th></tr><!--line-->'
for i in cr.entries[args['name']]['content']:
html_feed += ('<tr><td id="cel">{}</td><!--cel--><td id="cel">{}</'
req.log_error('handler')
req.content_type = 'text/html'
req.send_http_header()
- cr = crawler.Crawler(init=True)
- if 'name' not in args or str(args['name']) not in cr.entries:
- req.write('Name not in the crawler...<br/>')
- else:
- data = cr.full_run(args['name'])
- for entry, dc in data:
- req.write(u'<br/><b>entry</b>: {title}, {summary}<br/>'.
- format(**entry).encode('utf-8'))
- for k, v in dc.iteritems():
- req.write(u'<b>{}</b>: {}<br/>'.format(k, v).encode('utf-8'))
- req.write('<br/><br/><a href="index.py">Go back</a>')
+ try:
+ cr = crawler.Crawler(init=True)
+ if 'name' not in args or str(args['name']) not in cr.entries:
+ req.write('Name not in the crawler...<br/>')
+ else:
+ data = cr.full_run(args['name'])
+ req.write('<b>Results: </b>{}/{} got results<br/>\n'.format(
+ len(filter(None, [x[1] for x in data])), len(data)))
+ for entry, dc in data:
+ req.write(u"""<br/><hr style="background:#000000; border:1; height:2px" />
+<b>Entry: </b>
+<div style="height:2em;width:70em;border:1px solid#ccc;font-family:courier;overflow:auto;">
+ {title}
+</div>
+<div style="height:2em;width:70em;border:1px solid#ccc;font-family:courier;overflow:auto;">
+{summary}
+</div><br/>
+""".format(**entry).encode('ascii', 'xmlcharrefreplace'))
+ req.write('<table>')
+ if dc:
+ for k, v in dc.iteritems():
+ req.write(
+ (u'<tr><td><b>{}</b></td><td><span style="font-fam'
+ 'amily:courier">{}</span></td></tr>').format(
+ k, v).encode('ascii', 'xmlcharrefreplace'))
+ else:
+ req.write('<tr><th>NO RESULTS FOUND</th></tr>')
+ req.write('</table>\n')
+ req.write('<br/><br/><a href="index.py">Go back</a>')
+ except Exception, e:
+ req.write(repr(e))
return apok
url = urllib.unquote(url)
url = url if re.match('https?://', url) else 'http://{}'.format(url)
feed = feedparser.parse(url)
- result += '<table id="content-table" border="1" id="htab">'
+ result += '<table id="content-table" border="1">'
result += '<tr><th>Title</th><th>Summary</th></tr><!--line-->'
for i in feed.entries[:10]:
result += ('<tr><td id="cel">{}</td><!--cel--><td id="cel">{}</td><!--'
req.write(data.format(**params))
return apok
+
def crawler_preview(req, args, apok):
req.log_error('handler')
req.content_type = 'text/html'
req.write(
'\tLoading "{}" as <p id="rssname">{}</p><br />\n'.format(url, name))
feed = feedparser.parse(url)
- req.write('\t<table id="content-table" border="1" id="htab">\n')
+ req.write('\t<table id="content-table" border="1">\n')
req.write('\t\t<tr><th>Title</th><th>Summary</th></tr>\n')
for i in feed.entries[:10]:
req.write(('\t\t<tr><td id="cel">{}</td><!--cel--><td id="cel">{}</td>'
<html>
<head>
<title>HyperFrontend RSS feed input new</title>
- <script src="contextmenu_o.js"></script>
+ <script src="scripts.js"></script>
</head>
<body>
<table>
--- /dev/null
+var selection;
+document.captureEvents(Event.MOUSEUP);
+document.onmouseup = function mouseUp(e) {
+ var curselection = window.getSelection().getRangeAt(0);
+ if (curselection.endOffset - curselection.startOffset > 0) {
+ selection = curselection;
+ }
+}
+
+function stylizeHighlightedString(range, col)
+{
+ par = range.commonAncestorContainer;
+ correct = 0
+ while(1) {
+ if(par.tagName == 'BODY' || par.tagName == 'TH'){
+ break;
+ }
+ if(par.tagName == 'TR' || par.tagName == 'TD'){
+ correct = 1;
+ }
+ if(correct == 1 && par.tagName == 'TABLE' && par.id == 'content-table'){
+ correct = 2;
+ break
+ }
+ par = par.parentNode;
+ }
+ if(correct == 2 && range_has_span(range) == null){
+ var selectionContents = range.extractContents();
+ var span = document.createElement("span");
+ span.appendChild(selectionContents);
+ span.setAttribute("class","uiWebviewHighlight");
+ span.style.backgroundColor = col;
+ span.style.color = "white";
+ range.insertNode(span);
+ }
+}
+
+function range_has_span(range){
+ par = range.commonAncestorContainer;
+ while(1) {
+ if(par == null){
+ break;
+ }
+ if(par.tagName == "SPAN"){
+ return par;
+ }
+ par = par.parentNode;
+ }
+ parentchilds = range.commonAncestorContainer.childNodes;
+ returnarray = []
+ for(var i = 0; i<parentchilds.length; i++){
+ if(parentchilds[i].nodeName == "SPAN"){
+ console.log("found");
+ console.log(parentchilds[i]);
+ returnarray[returnarray.length] = parentchilds[i];
+ }
+ }
+ if(returnarray.length == 0){
+ return null;
+ } else {
+ return returnarray;
+ }
+}
+
+
+function f_wann_tijd() {
+ stylizeHighlightedString(selection, "red")
+}
+
+function f_wann_dat() {
+ stylizeHighlightedString(selection, "darkred")
+}
+
+function f_wat() {
+ stylizeHighlightedString(selection, "green")
+}
+
+function f_waar() {
+ stylizeHighlightedString(selection, "blue")
+}
+
+function f_clear() {
+ span = range_has_span(selection);
+ if(span == null){
+ console.log("You haven't selected anything...");
+ } else {
+ for(var i = 0; i<span.length; i++){
+ cspan = span[i];
+ par = cspan.parentNode;
+ var text = document.createTextNode(cspan.innerHTML);
+ par.insertBefore(text, cspan);
+ par.removeChild(cspan);
+ par.normalize();
+ }
+ }
+ selection.collapse();
+}
+
+function post_all(num) {
+ var xmlhttp = new XMLHttpRequest();
+ if(num == 1){
+ xmlhttp.onreadystatechange=function()
+ {
+ if (xmlhttp.readyState==4)
+ {
+ document.write(xmlhttp.responseText);
+ }
+ }
+ } else {
+ xmlhttp.onreadystatechange=function()
+ {
+ if (xmlhttp.readyState==4)
+ {
+ field = document.getElementById("preview_field");
+ field.innerHTML = xmlhttp.responseText;
+ }
+ }
+ }
+ var params = "content="+encodeURIComponent(document.getElementById("content-table").innerHTML);
+ params += "&name="+encodeURIComponent(document.getElementById("rssname").innerHTML);
+ var indatarray = document.getElementsByClassName('indat')
+ for (var i = 0; i<indatarray.length; i++) {
+ params += "&" + indatarray[i].name + "=" + indatarray[i].value;
+ }
+ xmlhttp.open("POST", num == 1 ? "hyper.py" : "preview.py", true);
+ xmlhttp.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
+ xmlhttp.setRequestHeader("Content-length", params.length);
+ xmlhttp.send(params);
+}
--- /dev/null
+SHELL:=/bin/bash
+
+all: thesis
+
+thesis:
+ pdflatex thesis.tex
+ pdflatex thesis.tex
+# bibtex thesis.aux
+ pdflatex thesis.tex
+
+clean:
+ rm -vf *.aux *.bbl *.blg *.dvi *.log *.out *.pdf *.toc
--- /dev/null
+#!/bin/env python
+# -*- coding: utf-8 -*-
+
+lookup = {0: 1}
+
+def fac(n):
+ if n in lookup:
+ return n
+ else:
+ return n*fac(n-1)
+
+def fac2(n):
+ if n == 0:
+ return 1
+ else:
+ return n*fac(n-1)
--- /dev/null
+\documentclass[hidelinks,a4]{scrbook}
+
+\usepackage{lipsum} % Dummy text
+\usepackage{graphicx} % Images
+\usepackage{float} % Better placement float figures
+\usepackage{listings} % Source code formatting
+\usepackage{hyperref} % Hyperlinks
+\usepackage{tikz} % Sequence diagrams
+\usepackage{pgf-umlsd} %
+\usepgflibrary{arrows} %
+
+% Set listings settings
+\lstset{
+ basicstyle=\scriptsize,
+ breaklines=true,
+ numbers=left,
+ numberstyle=\tiny,
+ tabsize=2
+}
+\lstdefinestyle{custompy}{
+ language=python,
+ keepspaces=true,
+ columns=flexible,
+ showspaces=false
+}
+\lstdefinestyle{customhtml}{
+ language=html
+}
+\lstdefinestyle{customjs}{
+ language=java
+}
+
+% Setup hyperlink formatting
+\hypersetup{
+ pdftitle={Non IT congurable adaptive data mining solution used in transforming raw data to structured data},
+ pdfauthor={Mart Lubbers},
+ pdfsubject={Artificial Intelligence},
+}
+
+% Describe the frontpage
+\author{Mart Lubbers\\s4109053}
+\title{Non IT congurable adaptive data mining solution used in transforming raw
+data to structured data}
+\subtitle{
+ Bachelor's Thesis in Artificial Intelligence\\
+ Radboud University Nijmegen\\
+ \vspace{15mm}
+ \begin{tabular}{cp{5em}c}
+ Franc Grootjen && Alessandro Paula\\
+ RU && Hyperleap
+ \end{tabular}
+ }
+\date{\today}
+
+\begin{document}
+\maketitle
+\tableofcontents
+\newpage
+
+% Surrogate abstract
+\chapter*{
+ \centering
+ \begin{normalsize}
+ Abstract
+ \end{normalsize}
+}
+\begin{quotation}
+ \noindent
+ \input{abstract.tex}
+\end{quotation}
+\clearpage
+
+\chapter{Introduction}
+
+\end{document}