From: Mart Lubbers Date: Thu, 14 Aug 2014 11:21:31 +0000 (+0200) Subject: laatste push X-Git-Url: https://git.martlubbers.net/?a=commitdiff_plain;h=9fe96eac2686ef9ec2385f898cccfad853aaf9bd;p=bsc-thesis1415.git laatste push --- diff --git a/planning/final_planning.txt b/planning/final_planning.txt new file mode 100644 index 0000000..7d7194a --- /dev/null +++ b/planning/final_planning.txt @@ -0,0 +1,7 @@ +2014-08-14 - Datumprikker maken voor overleg met franc. +2014-08-21 - +2014-08-28 - Programma af +2014-09-04 - Scriptie af? +2014-09-11 - Commentaar verwerken? +2014-09-18 - Scriptie laatste versie +2014-09-25 - ?? Presenteren diff --git a/program/everything/crawler.db b/program/everything/crawler.db index 3b7579c..3f80ccb 100644 --- a/program/everything/crawler.db +++ b/program/everything/crawler.db @@ -21,143 +21,143 @@ p9 DAWGNode p10 (dp11 -S'final' +S'children' p12 -I00 -sS'number' -p13 -NsS'children' -p14 -(dp15 +(dp13 S'\x01' -p16 +p14 (ipydawg DAWGNode -p17 -(dp18 +p15 +(dp16 g12 -I00 -sg13 -Nsg14 -(dp19 +(dp17 S',' -p20 +p18 (ipydawg DAWGNode -p21 -(dp22 +p19 +(dp20 g12 -I00 -sg13 -Nsg14 -(dp23 +(dp21 S' ' -p24 +p22 (ipydawg DAWGNode -p25 -(dp26 +p23 +(dp24 g12 -I00 -sg13 -Nsg14 -(dp27 +(dp25 S'\x02' -p28 +p26 (ipydawg DAWGNode -p29 -(dp30 +p27 +(dp28 g12 -I00 -sg13 -Nsg14 -(dp31 -g24 +(dp29 +g22 (ipydawg DAWGNode -p32 -(dp33 +p30 +(dp31 g12 -I00 -sg13 -Nsg14 -(dp34 +(dp32 S'u' -p35 +p33 (ipydawg DAWGNode -p36 -(dp37 +p34 +(dp35 g12 -I00 -sg13 -Nsg14 -(dp38 -g35 +(dp36 +g33 (ipydawg DAWGNode -p39 -(dp40 +p37 +(dp38 g12 -I00 -sg13 -Nsg14 -(dp41 +(dp39 S'r' -p42 +p40 (ipydawg DAWGNode -p43 -(dp44 +p41 +(dp42 g12 -I00 -sg13 -Nsg14 -(dp45 -g24 +(dp43 +g22 (ipydawg DAWGNode -p46 -(dp47 +p44 +(dp45 g12 -I00 -sg13 -Nsg14 -(dp48 +(dp46 S'-' -p49 +p47 (ipydawg DAWGNode -p50 -(dp51 +p48 +(dp49 g12 -I00 -sg13 -Nsg14 -(dp52 -g24 +(dp50 +g22 (ipydawg DAWGNode -p53 -(dp54 +p51 +(dp52 g12 -I00 -sg13 -Nsg14 -(dp55 +(dp53 S'\x03' -p56 +p54 (ipydawg DAWGNode -p57 -(dp58 +p55 +(dp56 g12 +(dp57 +sS'final' +p58 I01 -sg13 -Nsg14 -(dp59 -sbssbssbssbssbssbssbssbssbssbssbssbssbsS'_numbers_valid' +sS'number' +p59 +Nsbssg58 +I00 +sg59 +Nsbssg58 +I00 +sg59 +Nsbssg58 +I00 +sg59 +Nsbssg58 +I00 +sg59 +Nsbssg58 +I00 +sg59 +Nsbssg58 +I00 +sg59 +Nsbssg58 +I00 +sg59 +Nsbssg58 +I00 +sg59 +Nsbssg58 +I00 +sg59 +Nsbssg58 +I00 +sg59 +Nsbssg58 +I00 +sg59 +Nsbssg58 +I00 +sg59 +NsbsS'_numbers_valid' p60 I00 sS'register' @@ -174,27 +174,27 @@ S'\x01, \x02 uur - \x03' p67 sbsS'dloc' p68 -S'test' +S'Grote zaal' p69 sS'venue' p70 -S'De doelen' +S'De Doelen' p71 sS'content' p72 (lp73 (lp74 -S'za 16 aug, 20.15 uur - Elvis & More 2014 - Ren\xc3\xa9 Shuman & Angel-Eye' +S'za 16 aug, 20.15 uur - Elvis & More 2014 - Ren\xc3\xa9 Shuman & Angel-Eye' p75 aS'Ode aan 60 jaar rock-and-roll door Ren\xc3\xa9 Shuman & Angel-Eye
In 1954 werd de song That\xe2\x80\x99s allright mama door Elvis Presley, The King of Rock and Roll, opgenomen en uitgebracht. Het duo Ren\xc3\xa9 Shuman & Angel-Eye houdt zijn muziek in ere met hun theatershows, tv-specials en opnamen. Ze trekken met hun showband volle zalen met hun Rock and Roll-vertolkingen. En jaarlijks geeft het duo een bijzonder concert op 16 augustus, de sterfdag van Elvis.

Ren\xc3\xa9: \xe2\x80\x98Dit jaar zal ons Elvis & More-concert plaatsvinden in de Doelen, de grootste klassieke concertzaal van Nederland met een prachtige akoestiek. We willen onze fans trakteren op een energieke en mooie show met subliem geluid en comfortabele zitplaatsen. Een hele avond genieten van 60 jaar Rock and Roll-geschiedenis!\xe2\x80\x99

' p76 aa(lp77 -S'vr 05 sep, 20.00 uur - Night of Ziryab - Amsterdams Andalusisch Orkest i.s.m. Orchestre Temsamani (Tetuan)' +S'vr 05 sep, 20.00 uur - Night of Ziryab - Amsterdams Andalusisch Orkest i.s.m. Orchestre Temsamani (Tetuan)' p78 aS'' p79 aa(lp80 -S'vr 12 sep, 13.00 uur - Rotterdam Philharmonic Gergiev Festival - Locatietheater | Niemandsland (I)' +S'vr 12 sep, 13.00 uur - Rotterdam Philharmonic Gergiev Festival - Locatietheater | Niemandsland (I)' p81 aS'Rode draad door het Gergiev Festival vormt de muziektheatervoorstelling \xe2\x80\x98Niemandsland\xe2\x80\x99 van regisseur Serge van Veggel. Een veteraan haalt herinneringen op aan zijn oorlogsverleden, dat in donkere kelders opeens weer heel dichtbij lijkt te komen. Hij symboliseert de \xe2\x80\x98Onbekende Soldaat\xe2\x80\x99 in een collage van originele dagboekfragmenten en gedichten, liedjes of soms absurde oorlogsverhalen. Muziek komt van het theatrale Ragazze Kwartet: gaandeweg zal de sublieme eenheid van het klassieke strijkkwartet door de toenemende chaos steeds meer verbrokkelen.

\xe2\x80\x98...hoe je met weinig middelen schitterende sc\xc3\xa8nes kunt bouwen\xe2\x80\x99 - NRC Handelsblad over het locatietheater van regisseur Serge van Veggel.

Deze voorstelling wordt georganiseerd in samenwerking met het muziektheatergezelschap OPERA2DAY.

Locatietheater met als startpunt de Eduard Flipse Zaal. De voorstelling bestaat uit twee gedeelten. Het tweede gedeelte is niet goed te bezoeken voor hen die slecht ter been zijn of voor rolstoelgebruikers.

' p82 @@ -252,11 +252,11 @@ DAWGNode p111 (dp112 g12 -I00 -sg13 -Nsg14 (dp113 -sbsg60 +sg58 +I00 +sg59 +Nsbsg60 I00 sg61 g62 @@ -267,533 +267,377 @@ sg66 g79 sbsS'freq' p117 -S'1w' +S'1d' p118 sS'adress' p119 -S'amsterdam' +S'Straat 1 Eindhoven' p120 ssS'paradiso' p121 (dp122 -S'website' +S'name' p123 -S'www.paradiso.nl' -p124 -sS'name' -p125 g121 sS'titledawg' -p126 +p124 (ipydawg DAWG -p127 -(dp128 +p125 +(dp126 g9 (ipydawg DAWGNode -p129 -(dp130 -g13 -Nsg14 -(dp131 +p127 +(dp128 +g59 +Nsg12 +(dp129 S'\x01' -p132 +p130 (ipydawg DAWGNode -p133 -(dp134 -g13 -Nsg14 -(dp135 +p131 +(dp132 +g59 +Nsg12 +(dp133 S' ' -p136 +p134 (ipydawg DAWGNode -p137 -(dp138 -g13 -Nsg14 -(dp139 +p135 +(dp136 +g59 +Nsg12 +(dp137 S'\x02' -p140 +p138 (ipydawg DAWGNode -p141 -(dp142 -g13 -Nsg14 -(dp143 -g136 +p139 +(dp140 +g59 +Nsg12 +(dp141 +g134 (ipydawg DAWGNode -p144 -(dp145 -g13 -Nsg14 -(dp146 +p142 +(dp143 +g59 +Nsg12 +(dp144 S'-' -p147 +p145 (ipydawg DAWGNode -p148 -(dp149 -g13 -Nsg14 -(dp150 -g136 +p146 +(dp147 +g59 +Nsg12 +(dp148 +g134 (ipydawg DAWGNode -p151 -(dp152 -g13 -Nsg14 -(dp153 +p149 +(dp150 +g59 +Nsg12 +(dp151 S'\x03' -p154 +p152 +(ipydawg +DAWGNode +p153 +(dp154 +g59 +Nsg12 +(dp155 +g134 (ipydawg DAWGNode -p155 -(dp156 -g13 -Nsg14 +p156 (dp157 -g136 +g59 +Nsg12 +(dp158 +g134 (ipydawg DAWGNode -p158 -(dp159 -g13 -Nsg14 +p159 (dp160 -g136 +g59 +Nsg12 +(dp161 +g134 (ipydawg DAWGNode -p161 -(dp162 -g13 -Nsg14 +p162 (dp163 -g136 +g59 +Nsg12 +(dp164 +g145 (ipydawg DAWGNode -p164 -(dp165 -g13 -Nsg14 +p165 (dp166 -g147 +g59 +Nsg12 +(dp167 +g134 (ipydawg DAWGNode -p167 -(dp168 -g13 -Nsg14 +p168 (dp169 -g136 +g59 +Nsg12 +(dp170 +g134 (ipydawg DAWGNode -p170 -(dp171 -g13 -Nsg14 +p171 (dp172 -g136 +g59 +Nsg12 +(dp173 +g134 (ipydawg DAWGNode -p173 -(dp174 -g13 -Nsg14 +p174 (dp175 -g136 -(ipydawg -DAWGNode -p176 -(dp177 -g13 -Nsg14 -(dp178 +g59 +Nsg12 +(dp176 S'L' -p179 +p177 (ipydawg DAWGNode -p180 -(dp181 -g13 -Nsg14 -(dp182 +p178 +(dp179 +g59 +Nsg12 +(dp180 S'o' -p183 +p181 (ipydawg DAWGNode -p184 -(dp185 -g13 -Nsg14 -(dp186 +p182 +(dp183 +g59 +Nsg12 +(dp184 S'c' -p187 +p185 (ipydawg DAWGNode -p188 -(dp189 -g13 -Nsg14 -(dp190 +p186 +(dp187 +g59 +Nsg12 +(dp188 S'a' -p191 +p189 (ipydawg DAWGNode -p192 -(dp193 -g13 -Nsg14 -(dp194 +p190 +(dp191 +g59 +Nsg12 +(dp192 S't' -p195 +p193 (ipydawg DAWGNode -p196 -(dp197 -g13 -Nsg14 -(dp198 +p194 +(dp195 +g59 +Nsg12 +(dp196 S'i' -p199 +p197 (ipydawg DAWGNode -p200 -(dp201 -g13 -Nsg14 -(dp202 +p198 +(dp199 +g59 +Nsg12 +(dp200 S'e' -p203 +p201 (ipydawg DAWGNode -p204 -(dp205 -g13 -Nsg14 -(dp206 +p202 +(dp203 +g59 +Nsg12 +(dp204 S':' -p207 +p205 (ipydawg DAWGNode -p208 -(dp209 -g13 -Nsg14 -(dp210 -g136 +p206 +(dp207 +g59 +Nsg12 +(dp208 +g134 (ipydawg DAWGNode -p211 -(dp212 -g13 -Nsg14 -(dp213 +p209 +(dp210 +g59 +Nsg12 +(dp211 S'\x04' -p214 +p212 (ipydawg DAWGNode -p215 -(dp216 -g13 -Nsg14 -(dp217 -sg12 +p213 +(dp214 +g59 +Nsg12 +(dp215 +sg58 I01 -sbssg12 -I00 -sbssg12 -I00 -sbssg12 -I00 -sbssg12 -I00 -sbssg12 -I00 -sbssg12 -I00 -sbssg12 -I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 -I00 -sbssg12 -I00 -sbssg12 -I00 -sbssg12 -I00 -sbsg147 -(ipydawg -DAWGNode -p218 -(dp219 -g13 -Nsg14 -(dp220 -g136 -(ipydawg -DAWGNode -p221 -(dp222 -g13 -Nsg14 -(dp223 -g179 -(ipydawg -DAWGNode -p224 -(dp225 -g13 -Nsg14 -(dp226 -g183 -(ipydawg -DAWGNode -p227 -(dp228 -g13 -Nsg14 -(dp229 -g187 -(ipydawg -DAWGNode -p230 -(dp231 -g13 -Nsg14 -(dp232 -g191 -(ipydawg -DAWGNode -p233 -(dp234 -g13 -Nsg14 -(dp235 -g195 -(ipydawg -DAWGNode -p236 -(dp237 -g13 -Nsg14 -(dp238 -g199 -(ipydawg -DAWGNode -p239 -(dp240 -g13 -Nsg14 -(dp241 -g203 -(ipydawg -DAWGNode -p242 -(dp243 -g13 -Nsg14 -(dp244 -g207 -(ipydawg -DAWGNode -p245 -(dp246 -g13 -Nsg14 -(dp247 -g136 -(ipydawg -DAWGNode -p248 -(dp249 -g13 -Nsg14 -(dp250 -g214 -(ipydawg -DAWGNode -p251 -(dp252 -g13 -Nsg14 -(dp253 -sg12 -I01 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I01 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 -sbssg12 +sbssg58 I00 sbsg60 I00 sg61 g62 -((lp254 -g164 -ag188 -ag161 -ag192 -ag208 -ag196 -ag170 -ag200 -ag173 -ag204 -ag176 -ag180 -ag167 -ag211 -ag184 -ag215 -atp255 -Rp256 +((lp216 +tp217 +Rp218 sg66 -S'\x01 \x02 - \x03 - Locatie: \x04' -p257 -sbsS'dloc' -p258 -S'Grote Zaal' -p259 -sS'venue' -p260 -S'Paradiso' -p261 -sS'content' -p262 -(lp263 -(lp264 +S'\x01 \x02 - \x03 - Locatie: \x04' +p219 +sbsS'content' +p220 +(lp221 +(lp222 S'zaterdag 31 mei 2014 - Lentekabinet Festival Afterparty - Locatie: Tolhuistuin (zaal)' -p265 +p223 ag79 -aa(lp266 -S'vrijdag 4 juli 2014 20:30 - The Crimson Projekct - Locatie: Tolhuistuin (zaal)' -p267 +aa(lp224 +S'vrijdag 4 juli 2014 20:30 - The Crimson Projekct - Locatie: Tolhuistuin (zaal)' +p225 aS'Muziek rond King Crimson' -p268 -aa(lp269 -S'dinsdag 10 juni 2014 20:30 - Het Ultieme Natuurkunde Feestje \xe2\x80\x93 keynote Amanda Gefter' -p270 +p226 +aa(lp227 +S'dinsdag 10 juni 2014 20:30 - Het Ultieme Natuurkunde Feestje \xe2\x80\x93 keynote Amanda Gefter' +p228 ag79 -aa(lp271 -S'dinsdag 12 augustus 2014 21:00 - Kevin Drew - Locatie: Bitterzoet' -p272 +aa(lp229 +S'dinsdag 12 augustus 2014 21:00 - Kevin Drew - Locatie: Bitterzoet' +p230 aS'mede-oprichter Broken Social Scene solo' -p273 -aa(lp274 -S'vrijdag 4 juli 2014 22:00 - Palenke Soultribe' -p275 +p231 +aa(lp232 +S'vrijdag 4 juli 2014 22:00 - Palenke Soultribe' +p233 aS'Electronische muziek en Afro-Colombiaanse ritmes' -p276 -aa(lp277 +p234 +aa(lp235 S'maandag 3 november 2014 20:15 - Eefje de Visser: Waterwereldsteden - Locatie: Het Concertgebouw' -p278 +p236 ag79 -aa(lp279 +aa(lp237 S'zaterdag 27 september 2014 20:30 - A Great Big World - Locatie: Tolhuistuin (zaal)' -p280 +p238 aS'Hitschrijvers uit New York' -p281 -aa(lp282 +p239 +aa(lp240 S'zaterdag 7 juni 2014 23:00 - Benefietavond Marokkaanse Boot' -p283 +p241 aS'Van Amsterdam naar Tanger' -p284 -aa(lp285 +p242 +aa(lp243 S'donderdag 13 november 2014 19:30 - Wouter Hamel' -p286 +p244 aS'Sprankelende jazzy pop' -p287 -aa(lp288 +p245 +aa(lp246 S'vrijdag 13 juni 2014 00:00 - Legends' -p289 +p247 ag79 aasS'headers' -p290 -(lp291 +p248 +(lp249 S'Title' -p292 +p250 aS'Summary' -p293 +p251 asS'summarydawg' -p294 +p252 (ipydawg DAWG -p295 -(dp296 +p253 +(dp254 g9 (ipydawg DAWGNode -p297 -(dp298 -g13 -Nsg14 -(dp299 -sg12 +p255 +(dp256 +g59 +Nsg12 +(dp257 +sg58 I00 sbsg60 I00 sg61 g62 -((lp300 -tp301 -Rp302 +((lp258 +tp259 +Rp260 sg66 g79 -sbsS'freq' -p303 -S'1w' -p304 -sS'adress' -p305 -S'Amsterdam' -p306 -ss. \ No newline at end of file +sbss. \ No newline at end of file diff --git a/program/everything/crawler.py b/program/everything/crawler.py index 091bfb9..a68491d 100644 --- a/program/everything/crawler.py +++ b/program/everything/crawler.py @@ -4,6 +4,8 @@ import pickle import re import os +import pprint +import sys class Crawler(): @@ -41,38 +43,99 @@ class Crawler(): d_s = self.entries[name]['summarydawg'] r_t, r_s = [], [] for i, w in enumerate(d_t.words()): - w = w.replace('\x01', '(?P.*)'.format(i)) - w = w.replace('\x02', '(?P.*)'.format(i)) - w = w.replace('\x03', '(?P.*)'.format(i)) - w = w.replace('\x04', '(?P.*)'.format(i)) + w = w.replace('\x01', + '(?P.+)'.format(i)) + w = w.replace('\x02', + '(?P.+)'.format(i)) + w = w.replace('\x03', + '(?P.+)'.format(i)) + w = w.replace('\x04', + '(?P.+)'.format(i)) + w = re.sub('\s+', '\\s+', w) r_t.append(w) for i, w in enumerate(d_s.words()): - w = w.replace('\x01', '(?P.*)'.format(i)) - w = w.replace('\x02', '(?P.*)'.format(i)) - w = w.replace('\x03', '(?P.*)'.format(i)) - w = w.replace('\x04', '(?P.*)'.format(i)) + w = w.replace('\x01', + '(?P.+)'.format(i)) + w = w.replace('\x02', + '(?P.+)'.format(i)) + w = w.replace('\x03', + '(?P.+)'.format(i)) + w = w.replace('\x04', + '(?P.+)'.format(i)) + w = re.sub('\s+', '\\s+', w) r_s.append(w) - r_t = '' if not r_t else '({})'.format('|'.join( - reversed(sorted(r_t, key=lambda x: len(x))))) - r_s = '' if not r_s else '({})'.format('|'.join( - reversed(sorted(r_s, key=lambda x: len(x))))) +# r_t = '' if not r_t else '({})'.format('|'.join( +# reversed(sorted(r_t, key=lambda x: len(x))))) +# r_s = '' if not r_s else '({})'.format('|'.join( +# reversed(sorted(r_s, key=lambda x: len(x))))) return r_t, r_s + def to_dot(self, name, out='-'): + out = sys.stdout if out == '-' else open(out, 'w') + try: + q0 = self.entries[name]['titledawg'].q0 + nodenum = 0 + final_nodes = [] + nodes = [] + edges = [] + to_visit = [(0, q0)] + visited = set() + translation = [] + if q0.final: + final_nodes.append(nodenum) + else: + nodes.append(nodenum) + + nodenum += 1 + while to_visit: + current = to_visit.pop() + if not current[0] in visited: + visited.add(current[0]) + for char, child in current[1].children.iteritems(): + matches = [c for c in translation if c[0] == child] + curnum = -1 + if matches: + curnum = matches[-1][1] + else: + translation.append((child, nodenum)) + curnum = nodenum + nodenum += 1 + if child.final: + final_nodes.append(curnum) + else: + nodes.append(curnum) + edges.append((current[0], char, curnum)) + to_visit.append((curnum, child)) + out.write('digraph dawg {\n') + out.write('\tnode [shape = doublecircle]; {}\n'.format( + ' '.join(str(n) for n in final_nodes))) + out.write('\tnode [shape = circle]; {}\n'.format( + ' '.join(str(n) for n in nodes))) + for fr, ch, to in edges: + out.write('\t{} -> {} [label = "{}"];\n'.format(fr, to, ch)) + out.write('}\n') + except: + out.close() + def test_entry(self, name, title, summary): + print '\n', repr(title), repr(summary) r_t, r_s = self.get_regex(name) - print r_t, r_s - rtm = re.search(r_t, title) - print '\ntrying to match: "{}", "{}"'.format(title, summary) - print 'matching to: "{}", "{}"'.format(repr(r_t), repr(r_s)) - if rtm and r_t: - for k, v in [(k, v) for k, v in rtm.groupdict().iteritems() if v]: - print '{}: {}'.format(k, v) + matcht = [re.search(t, title) for t in r_t] + matchs = [re.search(t, summary) for t in r_s] + + matcht = filter(lambda x: x is not None, matcht) + matchs = filter(lambda x: x is not None, matchs) + + if matcht: + pprint.pprint( + [m.groupdict() for m in + reversed(sorted(matcht, key=lambda x: len(x.groups())))][0]) else: print 'no title match' - rsm = re.search(r_s, summary) - if rsm and r_s: - for k, v in [(k, v) for k, v in rsm.groupdict().iteritems() if v]: - print '{}: {}'.format(k, v) + if matchs: + pprint.pprint( + [m.groupdict() for m in + reversed(sorted(matchs, key=lambda x: len(x.groups())))][0]) else: print 'no summary match' @@ -83,12 +146,11 @@ def main(): cr.test_entry('dedoelen', 'vr 5 mei08:00 uur - Abba live', '') cr.test_entry('paradiso', 'donderdag 13 november 2014 19:30 - Wouter Hamel', '') - cr.test_entry('paradiso', - 'zaterdag 27 september 2014 20:30 - A Great Big World - ' - 'Locatie: Tolhuistuin (zaal)', '') - cr.test_entry('paradiso', - 'zaterdag 31 mei 2014 - Lentekabinet Festival Afterparty - ' - 'Locatie: Tolhuistuin (zaal)', '') + cr.test_entry('paradiso', 'zaterdag 27 september 2014 20:30 - A Great Big ' + 'World - Locatie: Tolhuistuin (zaal)', '') + cr.test_entry('paradiso', 'zaterdag 27 september 2014 20:30 - A Great Big ' + 'World - Locatie: Tolhuistuin (zaal)', '') + cr.to_dot('paradiso', 't.dot') if __name__ == '__main__': main() diff --git a/program/everything/crawler.pyc b/program/everything/crawler.pyc index 9ea3de6..fb83e56 100644 Binary files a/program/everything/crawler.pyc and b/program/everything/crawler.pyc differ diff --git a/program/everything/dawg.py b/program/everything/dawg.py deleted file mode 100644 index be0757b..0000000 --- a/program/everything/dawg.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/env python -# -*- coding: utf-8 -*- - -import pydawg - - -def to_dot(filepath, q0): - nodenum = 0 - final_nodes = [] - nodes = [] - edges = [] - to_visit = [(0, q0)] - visited = set() - translation = [] - if q0.final: - final_nodes.append(nodenum) - else: - nodes.append(nodenum) - - nodenum += 1 - while to_visit: - current = to_visit.pop() - if not current[0] in visited: - visited.add(current[0]) - for char, child in current[1].children.iteritems(): - matches = [c for c in translation if c[0] == child] - curnum = -1 - if matches: - curnum = matches[-1][1] - else: - translation.append((child, nodenum)) - curnum = nodenum - nodenum += 1 - if child.final: - final_nodes.append(curnum) - else: - nodes.append(curnum) - edges.append((current[0], char, curnum)) - to_visit.append((curnum, child)) - print 'digraph dawg {' - print '\tnode [shape = doublecircle]; {}'.format( - ' '.join(str(n) for n in final_nodes)) - print '\tnode [shape = circle]; {}'.format( - ' '.join(str(n) for n in nodes)) - for fr, ch, to in edges: - print '\t{} -> {} [label = "{}"];'.format(fr, to, ch) - print '}' - - -d = pydawg.DAWG() - -regs = [ - 'wdag dag maand jaar tijd - wat', - 'dag maand jaar tijd - wat', - 'wdag dag maand jaar tijd - wat', - 'wdag dag maand jaar tijd - wat - Locatie: waar', - 'wdag dag maand jaar tijd - wat - Locatie: waar'] - -#regs = [ -# 'maandag 11 augustus 2014 19:30 - Neutral Milk Hotel', -# 'dinsdag 19 augustus 2014 22:00 - Arkells', -# 'maandag 24 november 2014 20:30 - Fink', -# 'woensdag 19 november 2014 20:00 - Michael Schulte', -# 'zondag 26 oktober 2014 21:00 - The Majority Says - Locatie: Bitterzoet', -# 'maandag 15 september 2014 20:30 - Ani DiFranco', -# 'maandag 13 oktober 2014 20:30 - Tarrus Riley', -# 'maandag 29 december 2014 20:30 - Alain Clark - Locatie: De Duif'] -for w in sorted(set(regs)): - d.add_word(w) - -to_dot('t.dot', d.q0) diff --git a/program/everything/pydawg.pyc b/program/everything/pydawg.pyc index e6507ee..fcf006b 100644 Binary files a/program/everything/pydawg.pyc and b/program/everything/pydawg.pyc differ diff --git a/program/everything/t.dot b/program/everything/t.dot index ffe9df4..ba9f6bd 100644 Binary files a/program/everything/t.dot and b/program/everything/t.dot differ diff --git a/program/everything/uri.txt b/program/everything/uri.txt index ac33311..7dcb5de 100644 --- a/program/everything/uri.txt +++ b/program/everything/uri.txt @@ -3,3 +3,4 @@ http://www.tivoli.nl/rss/agenda/ http://www.stadsschouwburgendevereeniging.nl/_rss/rss.php?type=voorstellingen http://www.dedoelen.nl/_rss/rss.php?type=voorstellingen http://www.parktheater.nl/_rss/rss.php?type=voorstellingen +http://www.ticketunlimited.nl/ProductFeed/rssproductfeed.xml