if (curselection.endOffset - curselection.startOffset > 0)
selection = curselection;
console.log(selection)
- if (e.which == 1) document.getElementById("contextmenu").style.visibility = "hidden";
- else if (e.which == 3) mouse_right = false
+ if (e.which == 3) mouse_right = false
}
function mouseDown(e) {
(dp0
-S'Test1'
+S'Paradiso_test1'
p1
(dp2
S'website'
g1
sS'url'
p6
-S'http://www.paradiso.nl/rss.xml'
+S'localhost/py/paradiso.rss.xml'
p7
-sS'venue'
+sS'dloc'
p8
-S'Paradiso'
+S'test'
p9
-sS'dloc'
+sS'venue'
p10
-S'grote zaal'
+S'p'
p11
-sS'db'
+sS'content'
p12
-(dp13
-S'63662c13105245c8c98a5cc17443268a'
-p14
-(dp15
-S'raw'
+(lp13
+(lp14
+S'zaterdag 31 mei 2014 - Lentekabinet Festival Afterparty - Locatie: Tolhuistuin (zaal)'
+p15
+aS''
p16
-(Vzondag 30 november 2014 21:00 - Catfish & The Bottlemen - Locatie: Bitterzoet
-p17
-V
+aa(lp17
+S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">vrijdag 4 juli 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">20:30</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">The Crimson Projekct</span> - Locatie: <span class="uiWebviewHighlight" style="color: white; background-color: blue;">Tolhuistuin (zaal)</span>'
p18
-tp19
-sS'results'
-p20
-(dp21
-S'waar2'
-p22
-VBitterzoet
+aS'Muziek rond King Crimson'
+p19
+aa(lp20
+S'dinsdag 10 juni 2014 20:30 - Het Ultieme Natuurkunde Feestje \xe2\x80\x93 keynote Amanda Gefter'
+p21
+ag16
+aa(lp22
+S'dinsdag 12 augustus 2014 21:00 - Kevin Drew - Locatie: Bitterzoet'
p23
-sS'tijd2'
+aS'mede-oprichter Broken Social Scene solo'
p24
-V21:00
-p25
-sS'wat2'
+aa(lp25
+S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">vrijdag 4 juli 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">22:00</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">Palenke Soultribe</span>'
p26
-VCatfish & The Bottlemen
+aS'Electronische muziek en Afro-Colombiaanse ritmes'
p27
-sS'datum2'
-p28
-Vzondag 30 november 2014
+aa(lp28
+S'maandag 3 november 2014 20:15 - Eefje de Visser: Waterwereldsteden - Locatie: Het Concertgebouw'
p29
-sssS'497afcdf3c8fe95e5b63a7fd3483c88e'
-p30
-(dp31
-g16
-(Vdonderdag 4 december 2014 21:30 - She Keeps Bees
+ag16
+aa(lp30
+S'zaterdag 27 september 2014 20:30 - A Great Big World - Locatie: Tolhuistuin (zaal)'
+p31
+aS'Hitschrijvers uit New York'
p32
-g18
-tp33
-sg20
-(dp34
-S'tijd0'
+aa(lp33
+S'zaterdag 7 juni 2014 23:00 - Benefietavond Marokkaanse Boot'
+p34
+aS'Van Amsterdam naar Tanger'
p35
-V21:30
-p36
-sS'wat0'
+aa(lp36
+S'donderdag 13 november 2014 19:30 - Wouter Hamel'
p37
-VShe Keeps Bees
+aS'Sprankelende jazzy pop'
p38
-sS'datum0'
-p39
-Vdonderdag 4 december 2014
+aa(lp39
+S'vrijdag 13 juni 2014 00:00 - Legends'
p40
-sssS'bc20fb8295d411fc5dd3b2b1f6cb0f4e'
-p41
-(dp42
-g16
-(Vdonderdag 30 oktober 2014 22:00 - Dondergrondse: hosted by The Daily Indie @ Kelder
-p43
-g18
-tp44
-sg20
-(dp45
-g35
-V22:00
-p46
-sg37
-VDondergrondse: hosted by The Daily Indie @ Kelder
-p47
-sg39
-Vdonderdag 30 oktober 2014
-p48
-sssS'1aae9feccc63dfc41b653af9cebe97ed'
-p49
-(dp50
-g16
-(Vzaterdag 22 november 2014 21:00 - Selda feat. Boom Pam - Locatie: Bitterzoet
-p51
-g18
-tp52
-sg20
-(dp53
-g22
-VBitterzoet
-p54
-sg24
-V21:00
-p55
-sg26
-VSelda feat. Boom Pam
-p56
-sg28
-Vzaterdag 22 november 2014
-p57
-sssS'0e90f944f7d591d6e3c47bff6ba40301'
-p58
-(dp59
-g16
-(Vvrijdag 7 november 2014 20:00 - The Mahones
-p60
-g18
-tp61
-sg20
-(dp62
-g35
-V20:00
-p63
-sg37
-VThe Mahones
-p64
-sg39
-Vvrijdag 7 november 2014
-p65
-sssS'73801ebeb1dfc852b8a441d4534bfd37'
-p66
-(dp67
-g16
-(Vmaandag 24 november 2014 21:00 - Twin Forks - Locatie: Bitterzoet
-p68
-g18
-tp69
-sg20
-(dp70
-g22
-VBitterzoet
-p71
-sg24
-V21:00
-p72
-sg26
-VTwin Forks
-p73
-sg28
-Vmaandag 24 november 2014
-p74
-sssS'6ae02cb00a5deb63cb417a870900d3cc'
-p75
-(dp76
-g16
-(Vvrijdag 31 oktober 2014 20:30 - Rocket Cinema: Night of the Living Dead - Locatie: Paradiso Noord, Tolhuistuin (tuin)
-p77
-VZombie Horror In Tolhuistuin
-p78
-tp79
-sg20
-(dp80
-g22
-VParadiso Noord, Tolhuistuin (tuin)
-p81
-sg24
-V20:30
-p82
-sg26
-VRocket Cinema: Night of the Living Dead
-p83
-sg28
-Vvrijdag 31 oktober 2014
-p84
-sssS'fadc5b32d15a55b4bc60f5448a1c5342'
-p85
-(dp86
-g16
-(Vdonderdag 18 december 2014 19:30 - dEUS
-p87
-V\u2018\u2018Selected Songs 1994-2014\u201d
-p88
-tp89
-sg20
-(dp90
-g35
-V19:30
-p91
-sg37
-VdEUS
-p92
-sg39
-Vdonderdag 18 december 2014
-p93
-sssS'60632ff265f05913f6ebe8f6b0bf1995'
-p94
-(dp95
-g16
-(Vdonderdag 9 oktober 2014 23:30 - Dondergrondse: hosted by Sweet Dreams @ Kelder
-p96
-g18
-tp97
-sg20
-(dp98
-g35
-V23:30
-p99
-sg37
-VDondergrondse: hosted by Sweet Dreams @ Kelder
-p100
-sg39
-Vdonderdag 9 oktober 2014
-p101
-sssS'ae8b5e28b321c9caaca5a7b56d892670'
-p102
-(dp103
-g16
-(Vzondag 28 september 2014 11:00 - C4C Affordable Vintage & Fashion Fair - Locatie: Paradiso Noord, Tolhuistuin
-p104
-VVintage Fashion Markt & V.I.P. Shoppen
-p105
-tp106
-sg20
-(dp107
-g22
-VParadiso Noord, Tolhuistuin
-p108
-sg24
-V11:00
-p109
-sg26
-VC4C Affordable Vintage & Fashion Fair
-p110
-sg28
-Vzondag 28 september 2014
-p111
-sssS'c09b1ed0e482c4e27bb7c61fae4d0e15'
-p112
-(dp113
-g16
-(Vvrijdag 16 januari 2015 20:30 - Kraftwerk The Catalogue 12345678 in 3-D - Autobahn (1974)
-p114
-g18
-tp115
-sg20
-(dp116
-g35
-V3-D
-p117
-sg37
-VAutobahn (1974)
-p118
-sg39
-Vvrijdag 16 januari 2015 20:30 - Kraftwerk The Catalogue 12345678 in
-p119
-sssS'91e127c55ea69be373514604cd3f0d54'
-p120
-(dp121
-g16
-(Vzondag 8 maart 2015 20:30 - Thanasis Papakonstantinou & band
-p122
-g18
-tp123
-sg20
-(dp124
-g35
-V20:30
-p125
-sg37
-VThanasis Papakonstantinou & band
-p126
-sg39
-Vzondag 8 maart 2015
-p127
-sssS'492d8f49e92850c4b71345cf78b9cd5c'
-p128
-(dp129
-g16
-(Vzaterdag 15 november 2014 21:00 - Kris Berry & Perquisite afscheidsconcert - Locatie: Bitterzoet
-p130
-g18
-tp131
-sg20
-(dp132
-g22
-VBitterzoet
-p133
-sg24
-V21:00
-p134
-sg26
-VKris Berry & Perquisite afscheidsconcert
-p135
-sg28
-Vzaterdag 15 november 2014
-p136
-sssS'03e21261cc548333f352f3aeefb655ca'
-p137
-(dp138
-g16
-(Vwoensdag 29 oktober 2014 19:00 - Sofia Dragt
-p139
-g18
-tp140
-sg20
-(dp141
-g35
-V19:00
-p142
-sg37
-VSofia Dragt
-p143
-sg39
-Vwoensdag 29 oktober 2014
-p144
-sssS'05acd9f7674136b0497d96d1d7331ed2'
-p145
-(dp146
-g16
-(Vdonderdag 23 oktober 2014 23:30 - Noodlanding!
-p147
-VDansnacht, alternatieve hits
-p148
-tp149
-sg20
-(dp150
-g35
-V23:30
-p151
-sg37
-VNoodlanding!
-p152
-sg39
-Vdonderdag 23 oktober 2014
-p153
-sssS'6e517291b28ec790c33819a477869519'
-p154
-(dp155
-g16
-(Vvrijdag 31 oktober 2014 22:00 - Halloween Hairball - Locatie: Paradiso Noord, Tolhuistuin
-p156
-VThe Freakiest Halloween Special Everrrr...
-p157
-tp158
-sg20
-(dp159
-g22
-VParadiso Noord, Tolhuistuin
-p160
-sg24
-V22:00
-p161
-sg26
-VHalloween Hairball
-p162
-sg28
-Vvrijdag 31 oktober 2014
-p163
-sssS'7b6aeecede53f52256d7ba3278b35679'
-p164
-(dp165
-g16
-(Vzaterdag 4 oktober 2014 20:30 - Fixkes - Locatie: Paradiso Noord, Tolhuistuin
-p166
-g18
-tp167
-sg20
-(dp168
-g22
-VParadiso Noord, Tolhuistuin
-p169
-sg24
-V20:30
-p170
-sg26
-VFixkes
-p171
-sg28
-Vzaterdag 4 oktober 2014
-p172
-sssS'aca18d82bffadc7a8756531c0febec9f'
-p173
-(dp174
-g16
-(Vdonderdag 2 oktober 2014 22:00 - Scoop
-p175
-VParadiso's nieuwe Amsterdamse band-avond
-p176
-tp177
-sg20
-(dp178
-g35
-V22:00
-p179
-sg37
-VScoop
-p180
-sg39
-Vdonderdag 2 oktober 2014
-p181
-sssS'62db0423c9274dc47fdc8713e405cf14'
-p182
-(dp183
-g16
-(Vwoensdag 12 november 2014 20:30 - The Drums - Locatie: Paradiso Noord, Tolhuistuin
-p184
-VEncyclopedia
-p185
-tp186
-sg20
-(dp187
-g22
-VParadiso Noord, Tolhuistuin
-p188
-sg24
-V20:30
-p189
-sg26
-VThe Drums
-p190
-sg28
-Vwoensdag 12 november 2014
-p191
-sssS'c0fbf389b316e3f931dd33fb0ec51a43'
-p192
-(dp193
-g16
-(Vdonderdag 20 november 2014 20:30 - Bombay Bicycle Club
-p194
-VIntelligente indie
-p195
-tp196
-sg20
-(dp197
-g35
-V20:30
-p198
-sg37
-VBombay Bicycle Club
-p199
-sg39
-Vdonderdag 20 november 2014
-p200
-ssssS'last_run'
-p201
-F1410345938.705529
-sS'content'
-p202
-(lp203
-(lp204
-S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">maandag 24 november 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">21:00</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">Twin Forks </span>- Locatie: <span class="uiWebviewHighlight" style="color: white; background-color: blue;">Bitterzoet</span>'
-p205
-aS''
-p206
-aa(lp207
-S'zaterdag 22 november 2014 21:00 - Selda feat. Boom Pam - Locatie: Bitterzoet'
-p208
-ag206
-aa(lp209
-S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">donderdag 30 oktober 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">22:00</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">Dondergrondse: hosted by The Daily Indie @ Kelder</span>'
-p210
-ag206
-aa(lp211
-S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">donderdag 9 oktober 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">23:30</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">Dondergrondse: hosted by Sweet Dreams @ Kelder</span>'
-p212
-ag206
-aa(lp213
-S'donderdag 23 oktober 2014 23:30 - Noodlanding!'
-p214
-aS'Dansnacht, alternatieve hits'
-p215
-aa(lp216
-S'donderdag 2 oktober 2014 22:00 - Scoop'
-p217
-aS"Paradiso's nieuwe Amsterdamse band-avond"
-p218
-aa(lp219
-S'donderdag 18 december 2014 19:30 - dEUS'
-p220
-aS'\xe2\x80\x98\xe2\x80\x98Selected Songs 1994-2014\xe2\x80\x9d'
-p221
-aa(lp222
-S'donderdag 4 december 2014 21:30 - She Keeps Bees'
-p223
-ag206
-aa(lp224
-S'donderdag 20 november 2014 20:30 - Bombay Bicycle Club'
-p225
-aS'Intelligente indie'
-p226
-aa(lp227
-S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">vrijdag 31 oktober 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">20:30</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">Rocket Cinema: Night of the Living Dead</span> - Locatie: <span class="uiWebviewHighlight" style="color: white; background-color: blue;">Paradiso Noord, Tolhuistuin (tuin)</span>'
-p228
-aS'Zombie Horror In Tolhuistuin'
-p229
+ag16
aasS'headers'
-p230
-(lp231
-S'Title'
-p232
-aS'Summary'
-p233
-asS'summarydawg'
-p234
-(ipydawg
-DAWG
-p235
-(dp236
-S'q0'
-p237
-(ipydawg
-DAWGNode
-p238
-(dp239
-S'children'
-p240
-(dp241
-sS'final'
-p242
-I00
-sS'number'
-p243
-NsbsS'_numbers_valid'
-p244
-I00
-sS'register'
-p245
-c__builtin__
-set
-p246
-((lp247
-tp248
-Rp249
-sS'wp'
-p250
-g206
-sbsS'titledawg'
-p251
-(ipydawg
-DAWG
-p252
-(dp253
-g237
-(ipydawg
-DAWGNode
-p254
-(dp255
-g240
-(dp256
-S'\x01'
-p257
-(ipydawg
-DAWGNode
-p258
-(dp259
-g240
-(dp260
-S' '
-p261
-(ipydawg
-DAWGNode
-p262
-(dp263
-g240
-(dp264
-S'\x02'
-p265
-(ipydawg
-DAWGNode
-p266
-(dp267
-g240
-(dp268
-g261
-(ipydawg
-DAWGNode
-p269
-(dp270
-g240
-(dp271
-S'-'
-p272
-(ipydawg
-DAWGNode
-p273
-(dp274
-g240
-(dp275
-g261
-(ipydawg
-DAWGNode
-p276
-(dp277
-g240
-(dp278
-S'\x03'
-p279
-(ipydawg
-DAWGNode
-p280
-(dp281
-g240
-(dp282
-g261
-(ipydawg
-DAWGNode
-p283
-(dp284
-g240
-(dp285
-g272
-(ipydawg
-DAWGNode
-p286
-(dp287
-g240
-(dp288
-g261
-(ipydawg
-DAWGNode
-p289
-(dp290
-g240
-(dp291
-S'L'
-p292
-(ipydawg
-DAWGNode
-p293
-(dp294
-g240
-(dp295
-S'o'
-p296
-(ipydawg
-DAWGNode
-p297
-(dp298
-g240
-(dp299
-S'c'
-p300
-(ipydawg
-DAWGNode
-p301
-(dp302
-g240
-(dp303
-S'a'
-p304
-(ipydawg
-DAWGNode
-p305
-(dp306
-g240
-(dp307
-S't'
-p308
-(ipydawg
-DAWGNode
-p309
-(dp310
-g240
-(dp311
-S'i'
-p312
-(ipydawg
-DAWGNode
-p313
-(dp314
-g240
-(dp315
-S'e'
-p316
-(ipydawg
-DAWGNode
-p317
-(dp318
-g240
-(dp319
-S':'
-p320
-(ipydawg
-DAWGNode
-p321
-(dp322
-g240
-(dp323
-g261
-(ipydawg
-DAWGNode
-p324
-(dp325
-g240
-(dp326
-S'\x04'
-p327
-(ipydawg
-DAWGNode
-p328
-(dp329
-g240
-(dp330
-sg242
-I01
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbsg272
-(ipydawg
-DAWGNode
-p331
-(dp332
-g240
-(dp333
-g261
-(ipydawg
-DAWGNode
-p334
-(dp335
-g240
-(dp336
-g292
-(ipydawg
-DAWGNode
-p337
-(dp338
-g240
-(dp339
-g296
-(ipydawg
-DAWGNode
-p340
-(dp341
-g240
-(dp342
-g300
-(ipydawg
-DAWGNode
-p343
-(dp344
-g240
-(dp345
-g304
-(ipydawg
-DAWGNode
-p346
-(dp347
-g240
-(dp348
-g308
-(ipydawg
-DAWGNode
-p349
-(dp350
-g240
-(dp351
-g312
-(ipydawg
-DAWGNode
-p352
-(dp353
-g240
-(dp354
-g316
-(ipydawg
-DAWGNode
-p355
-(dp356
-g240
-(dp357
-g320
-(ipydawg
-DAWGNode
-p358
-(dp359
-g240
-(dp360
-g261
-(ipydawg
-DAWGNode
-p361
-(dp362
-g240
-(dp363
-g327
-(ipydawg
-DAWGNode
-p364
-(dp365
-g240
-(dp366
-sg242
-I01
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I01
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbssg242
-I00
-sg243
-Nsbsg244
-I00
-sg245
-g246
-((lp367
-g305
-ag317
-ag309
-ag283
-ag313
-ag286
-ag289
-ag321
-ag293
-ag297
-ag324
-ag328
-ag301
-atp368
-Rp369
-sg250
-S'\x01 \x02 - \x03- Locatie: \x04'
-p370
-sbsS'freq'
-p371
-S'1w'
-p372
-sS'adress'
-p373
-S'test'
-p374
-ssS'paradiso'
-p375
-(dp376
-S'website'
-p377
-S'www.paradiso.nl'
-p378
-sS'name'
-p379
-g375
-sS'url'
-p380
-S'http://www.paradiso.nl/rss.xml'
-p381
-sS'db'
-p382
-(dp383
-S'63662c13105245c8c98a5cc17443268a'
-p384
-(dp385
-S'raw'
-p386
-(Vzondag 30 november 2014 21:00 - Catfish & The Bottlemen - Locatie: Bitterzoet
-p387
-g18
-tp388
-sS'results'
-p389
-(dp390
-S'waar1'
-p391
-VBitterzoet
-p392
-sS'datum1'
-p393
-Vzondag 30 november 2014
-p394
-sS'tijd1'
-p395
-V21:00
-p396
-sS'wat1'
-p397
-VCatfish & The Bottlemen
-p398
-sssS'497afcdf3c8fe95e5b63a7fd3483c88e'
-p399
-(dp400
-g386
-(Vdonderdag 4 december 2014 21:30 - She Keeps Bees
-p401
-g18
-tp402
-sg389
-(dp403
-S'wat0'
-p404
-VShe Keeps Bees
-p405
-sS'tijd0'
-p406
-V21:30
-p407
-sS'datum0'
-p408
-Vdonderdag 4 december 2014
-p409
-sssS'bc20fb8295d411fc5dd3b2b1f6cb0f4e'
-p410
-(dp411
-g386
-(Vdonderdag 30 oktober 2014 22:00 - Dondergrondse: hosted by The Daily Indie @ Kelder
-p412
-g18
-tp413
-sg389
-(dp414
-g404
-VDondergrondse: hosted by The Daily Indie @ Kelder
-p415
-sg406
-V22:00
-p416
-sg408
-Vdonderdag 30 oktober 2014
-p417
-sssS'1aae9feccc63dfc41b653af9cebe97ed'
-p418
-(dp419
-g386
-(Vzaterdag 22 november 2014 21:00 - Selda feat. Boom Pam - Locatie: Bitterzoet
-p420
-g18
-tp421
-sg389
-(dp422
-g391
-VBitterzoet
-p423
-sg393
-Vzaterdag 22 november 2014
-p424
-sg395
-V21:00
-p425
-sg397
-VSelda feat. Boom Pam
-p426
-sssS'03e21261cc548333f352f3aeefb655ca'
-p427
-(dp428
-g386
-(Vwoensdag 29 oktober 2014 19:00 - Sofia Dragt
-p429
-g18
-tp430
-sg389
-(dp431
-g404
-VSofia Dragt
-p432
-sg406
-V19:00
-p433
-sg408
-Vwoensdag 29 oktober 2014
-p434
-sssS'62db0423c9274dc47fdc8713e405cf14'
-p435
-(dp436
-g386
-(Vwoensdag 12 november 2014 20:30 - The Drums - Locatie: Paradiso Noord, Tolhuistuin
-p437
-VEncyclopedia
-p438
-tp439
-sg389
-(dp440
-g393
-Vwoensdag 12 november 2014
-p441
-sg391
-VParadiso Noord, Tolhuistuin
-p442
-sS'wat0'
-p443
-g438
-sg395
-V20:30
-p444
-sg397
-VThe Drums
-p445
-sssS'6ae02cb00a5deb63cb417a870900d3cc'
-p446
-(dp447
-g386
-(Vvrijdag 31 oktober 2014 20:30 - Rocket Cinema: Night of the Living Dead - Locatie: Paradiso Noord, Tolhuistuin (tuin)
-p448
-VZombie Horror In Tolhuistuin
-p449
-tp450
-sg389
-(dp451
-g393
-Vvrijdag 31 oktober 2014
-p452
-sg391
-VParadiso Noord, Tolhuistuin (tuin)
-p453
-sg443
-g449
-sg395
-V20:30
-p454
-sg397
-VRocket Cinema: Night of the Living Dead
-p455
-sssS'fadc5b32d15a55b4bc60f5448a1c5342'
-p456
-(dp457
-g386
-(Vdonderdag 18 december 2014 19:30 - dEUS
-p458
-V\u2018\u2018Selected Songs 1994-2014\u201d
-p459
-tp460
-sg389
-(dp461
-g406
-V19:30
-p462
-sg443
-VdEUS
-p463
-sg408
-Vdonderdag 18 december 2014
-p464
-sssS'60632ff265f05913f6ebe8f6b0bf1995'
-p465
-(dp466
-g386
-(Vdonderdag 9 oktober 2014 23:30 - Dondergrondse: hosted by Sweet Dreams @ Kelder
-p467
-g18
-tp468
-sg389
-(dp469
-g404
-VDondergrondse: hosted by Sweet Dreams @ Kelder
-p470
-sg406
-V23:30
-p471
-sg408
-Vdonderdag 9 oktober 2014
-p472
-sssS'ae8b5e28b321c9caaca5a7b56d892670'
-p473
-(dp474
-g386
-(Vzondag 28 september 2014 11:00 - C4C Affordable Vintage & Fashion Fair - Locatie: Paradiso Noord, Tolhuistuin
-p475
-VVintage Fashion Markt & V.I.P. Shoppen
-p476
-tp477
-sg389
-(dp478
-g393
-Vzondag 28 september 2014
-p479
-sg391
-VParadiso Noord, Tolhuistuin
-p480
-sg443
-g476
-sg395
-V11:00
-p481
-sg397
-VC4C Affordable Vintage & Fashion Fair
-p482
-sssS'c09b1ed0e482c4e27bb7c61fae4d0e15'
-p483
-(dp484
-g386
-(Vvrijdag 16 januari 2015 20:30 - Kraftwerk The Catalogue 12345678 in 3-D - Autobahn (1974)
-p485
-g18
-tp486
-sg389
-(dp487
-g404
-VAutobahn (1974)
-p488
-sg406
-V3-D
-p489
-sg408
-Vvrijdag 16 januari 2015 20:30 - Kraftwerk The Catalogue 12345678 in
-p490
-sssS'91e127c55ea69be373514604cd3f0d54'
-p491
-(dp492
-g386
-(Vzondag 8 maart 2015 20:30 - Thanasis Papakonstantinou & band
-p493
-g18
-tp494
-sg389
-(dp495
-g404
-VThanasis Papakonstantinou & band
-p496
-sg406
-V20:30
-p497
-sg408
-Vzondag 8 maart 2015
-p498
-sssS'492d8f49e92850c4b71345cf78b9cd5c'
-p499
-(dp500
-g386
-(Vzaterdag 15 november 2014 21:00 - Kris Berry & Perquisite afscheidsconcert - Locatie: Bitterzoet
-p501
-g18
-tp502
-sg389
-(dp503
-g391
-VBitterzoet
-p504
-sg393
-Vzaterdag 15 november 2014
-p505
-sg395
-V21:00
-p506
-sg397
-VKris Berry & Perquisite afscheidsconcert
-p507
-sssS'73801ebeb1dfc852b8a441d4534bfd37'
-p508
-(dp509
-g386
-(Vmaandag 24 november 2014 21:00 - Twin Forks - Locatie: Bitterzoet
-p510
-g18
-tp511
-sg389
-(dp512
-g391
-VBitterzoet
-p513
-sg393
-Vmaandag 24 november 2014
-p514
-sg395
-V21:00
-p515
-sg397
-VTwin Forks
-p516
-sssS'05acd9f7674136b0497d96d1d7331ed2'
-p517
-(dp518
-g386
-(Vdonderdag 23 oktober 2014 23:30 - Noodlanding!
-p519
-VDansnacht, alternatieve hits
-p520
-tp521
-sg389
-(dp522
-g406
-V23:30
-p523
-sg443
-VNoodlanding!
-p524
-sg408
-Vdonderdag 23 oktober 2014
-p525
-sssS'6e517291b28ec790c33819a477869519'
-p526
-(dp527
-g386
-(Vvrijdag 31 oktober 2014 22:00 - Halloween Hairball - Locatie: Paradiso Noord, Tolhuistuin
-p528
-VThe Freakiest Halloween Special Everrrr...
-p529
-tp530
-sg389
-(dp531
-g393
-Vvrijdag 31 oktober 2014
-p532
-sg391
-VParadiso Noord, Tolhuistuin
-p533
-sg443
-g529
-sg395
-V22:00
-p534
-sg397
-VHalloween Hairball
-p535
-sssS'7b6aeecede53f52256d7ba3278b35679'
-p536
-(dp537
-g386
-(Vzaterdag 4 oktober 2014 20:30 - Fixkes - Locatie: Paradiso Noord, Tolhuistuin
-p538
-g18
-tp539
-sg389
-(dp540
-g391
-VParadiso Noord, Tolhuistuin
-p541
-sg393
-Vzaterdag 4 oktober 2014
-p542
-sg395
-V20:30
-p543
-sg397
-VFixkes
-p544
-sssS'aca18d82bffadc7a8756531c0febec9f'
-p545
-(dp546
-g386
-(Vdonderdag 2 oktober 2014 22:00 - Scoop
-p547
-VParadiso's nieuwe Amsterdamse band-avond
-p548
-tp549
-sg389
-(dp550
-g406
-V22:00
-p551
-sg443
-VScoop
-p552
-sg408
-Vdonderdag 2 oktober 2014
-p553
-sssS'0e90f944f7d591d6e3c47bff6ba40301'
-p554
-(dp555
-g386
-(Vvrijdag 7 november 2014 20:00 - The Mahones
-p556
-g18
-tp557
-sg389
-(dp558
-g404
-VThe Mahones
-p559
-sg406
-V20:00
-p560
-sg408
-Vvrijdag 7 november 2014
-p561
-sssS'c0fbf389b316e3f931dd33fb0ec51a43'
-p562
-(dp563
-g386
-(Vdonderdag 20 november 2014 20:30 - Bombay Bicycle Club
-p564
-VIntelligente indie
-p565
-tp566
-sg389
-(dp567
-g406
-V20:30
-p568
-sg443
-VBombay Bicycle Club
-p569
-sg408
-Vdonderdag 20 november 2014
-p570
-ssssS'dloc'
-p571
-S'nee'
-p572
-sS'venue'
-p573
-S'Paradiso'
-p574
-sS'last_run'
-p575
-F1410338734.431591
-sS'content'
-p576
-(lp577
-(lp578
-S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">zaterdag 22 november 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">21:00</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">Selda feat. Boom Pam</span> - Locatie: <span class="uiWebviewHighlight" style="color: white; background-color: blue;">Bitterzoet</span>'
-p579
-ag206
-aa(lp580
-S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">donderdag 30 oktober 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">22:00</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">Dondergrondse: hosted by The Daily Indie @ Kelder</span>'
-p581
-ag206
-aa(lp582
-S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">donderdag 9 oktober 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">23:30</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">Dondergrondse: hosted by Sweet Dreams @ Kelder</span>'
-p583
-ag206
-aa(lp584
-S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">donderdag 23 oktober 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">23:30</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">Noodlanding!</span>'
-p585
-aS'<span class="uiWebviewHighlight" style="color: white; background-color: green;">Dansnacht, alternatieve hits</span>'
-p586
-aa(lp587
-S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">donderdag 2 oktober 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">22:00</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">Scoop</span>'
-p588
-aS"Paradiso's nieuwe Amsterdamse band-avond"
-p589
-aa(lp590
-S'donderdag 18 december 2014 19:30 - dEUS'
-p591
-aS'\xe2\x80\x98\xe2\x80\x98Selected Songs 1994-2014\xe2\x80\x9d'
-p592
-aa(lp593
-S'donderdag 4 december 2014 21:30 - She Keeps Bees'
-p594
-ag206
-aa(lp595
-S'donderdag 20 november 2014 20:30 - Bombay Bicycle Club'
-p596
-aS'Intelligente indie'
-p597
-aa(lp598
-S'vrijdag 31 oktober 2014 20:30 - Rocket Cinema: Night of the Living Dead - Locatie: Paradiso Noord, Tolhuistuin (tuin)'
-p599
-aS'Zombie Horror In Tolhuistuin'
-p600
-aa(lp601
-S'vrijdag 31 oktober 2014 22:00 - Halloween Hairball - Locatie: Paradiso Noord, Tolhuistuin'
-p602
-aS'The Freakiest Halloween Special Everrrr...'
-p603
-aasS'headers'
-p604
-(lp605
+p41
+(lp42
S'Title'
-p606
+p43
aS'Summary'
-p607
+p44
asS'summarydawg'
-p608
-(ipydawg
-DAWG
-p609
-(dp610
-g237
-(ipydawg
-DAWGNode
-p611
-(dp612
-g242
-I00
-sg243
-Nsg240
-(dp613
-S'\x03'
-p614
-(ipydawg
-DAWGNode
-p615
-(dp616
-g242
-I01
-sg243
-Nsg240
-(dp617
-sbssbsg244
-I00
-sg245
-g246
-((lp618
-tp619
-Rp620
-sg250
-g614
-sbsS'titledawg'
-p621
-(ipydawg
-DAWG
-p622
-(dp623
-g237
-(ipydawg
-DAWGNode
-p624
-(dp625
-g242
-I00
-sg243
-Nsg240
-(dp626
-S'\x01'
-p627
-(ipydawg
-DAWGNode
-p628
-(dp629
-g242
-I00
-sg243
-Nsg240
-(dp630
-S' '
-p631
-(ipydawg
-DAWGNode
-p632
-(dp633
-g242
-I00
-sg243
-Nsg240
-(dp634
-S'\x02'
-p635
-(ipydawg
-DAWGNode
-p636
-(dp637
-g242
-I00
-sg243
-Nsg240
-(dp638
-g631
-(ipydawg
-DAWGNode
-p639
-(dp640
-g242
-I00
-sg243
-Nsg240
-(dp641
-S'-'
-p642
-(ipydawg
-DAWGNode
-p643
-(dp644
-g242
-I00
-sg243
-Nsg240
-(dp645
-g631
-(ipydawg
-DAWGNode
-p646
-(dp647
-g242
-I00
-sg243
-Nsg240
-(dp648
-g614
-(ipydawg
-DAWGNode
-p649
-(dp650
-g242
-I01
-sg243
-Nsg240
-(dp651
-g631
-(ipydawg
-DAWGNode
-p652
-(dp653
-g242
-I00
-sg243
-Nsg240
-(dp654
-g631
-(ipydawg
-DAWGNode
-p655
-(dp656
-g242
-I00
-sg243
-Nsg240
-(dp657
-g631
-(ipydawg
-DAWGNode
-p658
-(dp659
-g242
-I00
-sg243
-Nsg240
-(dp660
-g642
-(ipydawg
-DAWGNode
-p661
-(dp662
-g242
-I00
-sg243
-Nsg240
-(dp663
-g631
-(ipydawg
-DAWGNode
-p664
-(dp665
-g242
-I00
-sg243
-Nsg240
-(dp666
-g631
-(ipydawg
-DAWGNode
-p667
-(dp668
-g242
-I00
-sg243
-Nsg240
-(dp669
-g631
-(ipydawg
-DAWGNode
-p670
-(dp671
-g242
-I00
-sg243
-Nsg240
-(dp672
-S'L'
-p673
-(ipydawg
-DAWGNode
-p674
-(dp675
-g242
-I00
-sg243
-Nsg240
-(dp676
-S'o'
-p677
-(ipydawg
-DAWGNode
-p678
-(dp679
-g242
-I00
-sg243
-Nsg240
-(dp680
-S'c'
-p681
-(ipydawg
-DAWGNode
-p682
-(dp683
-g242
-I00
-sg243
-Nsg240
-(dp684
-S'a'
-p685
-(ipydawg
-DAWGNode
-p686
-(dp687
-g242
-I00
-sg243
-Nsg240
-(dp688
-S't'
-p689
-(ipydawg
-DAWGNode
-p690
-(dp691
-g242
-I00
-sg243
-Nsg240
-(dp692
-S'i'
-p693
-(ipydawg
-DAWGNode
-p694
-(dp695
-g242
-I00
-sg243
-Nsg240
-(dp696
-S'e'
-p697
-(ipydawg
-DAWGNode
-p698
-(dp699
-g242
-I00
-sg243
-Nsg240
-(dp700
-S':'
-p701
-(ipydawg
-DAWGNode
-p702
-(dp703
-g242
-I00
-sg243
-Nsg240
-(dp704
-g631
-(ipydawg
-DAWGNode
-p705
-(dp706
-g242
-I00
-sg243
-Nsg240
-(dp707
-S'\x04'
-p708
-(ipydawg
-DAWGNode
-p709
-(dp710
-g242
-I01
-sg243
-Nsg240
-(dp711
-sbssbssbssbssbssbssbssbssbssbssbssbssbssbssbssbssbssbssbssbssbssbssbssbssbsg244
-I00
-sg245
-g246
-((lp712
-tp713
-Rp714
-sg250
+p45
+(lp46
+sS'titledawg'
+p47
+(lp48
S'\x01 \x02 - \x03 - Locatie: \x04'
-p715
-sbsS'freq'
-p716
+p49
+aS'\x01 \x02 - \x03'
+p50
+asS'freq'
+p51
S'1w'
-p717
+p52
sS'adress'
-p718
-S'amsterdam'
-p719
+p53
+S'adres'
+p54
ss.
\ No newline at end of file
import re
import sys
import time
+import pydawg
URL_REG = re.compile(
class Crawler():
- def __init__(self, dbfile='./crawler.db'):
+ def __init__(self, dbfile='/var/www/py/crawler.db', init=False):
if not os.path.exists(dbfile):
self.entries = {}
else:
with open(dbfile, 'rb') as f:
self.entries = pickle.loads(f.read())
+ if init:
+ for k, v in self.entries.iteritems():
+ if 'titledawg' in v and 'summarydawg' in v:
+ v['titledawg_t'] = pydawg.DAWG()
+ for t in sorted(set(v['titledawg'])):
+ v['titledawg_t'].add_word(t)
+ v['summarydawg_t'] = pydawg.DAWG()
+ for t in sorted(set(v['summarydawg'])):
+ v['summarydawg_t'].add_word(t)
def list_names(self):
- return str(self.entries.keys())
+ return self.entries.keys()
def add_entry(self, d):
if d['name'] in self.entries:
- print 'content already present... skipping'
+ raise Exception('That name is already present')
else:
self.entries[d['name']] = d
- for e in self.entries:
- print e
- def write(self, path='./crawler.db'):
+ def write(self, path='/var/www/py/crawler.db'):
+ entries2 = {kk: {k: v for k, v in vv.iteritems()
+ if k not in ['summarydawg_t', 'titledawg_t']}
+ for kk, vv in self.entries.iteritems()}
if os.path.exists(path):
os.rename(path, '{}.bak'.format(path))
try:
with open(path, 'wb') as f:
- f.write(pickle.dumps(self.entries))
+ f.write(pickle.dumps(entries2))
except Exception, e:
- print 'something went wrong writing: {}'.format(e)
- print 'restoring backup'
+ # print 'something went wrong writing: {}'.format(e)
+ # print 'restoring backup'
+ raise e
os.rename('{}.bak'.format(path), path)
finally:
if os.path.exists('{}.bak'.format(path)):
os.remove('{}.bak'.format(path))
def get_regex(self, name):
- d_t = self.entries[name]['titledawg']
- d_s = self.entries[name]['summarydawg']
+ d_t = self.entries[name]['titledawg_t']
+ d_s = self.entries[name]['summarydawg_t']
r_t, r_s = [], []
for i, w in enumerate(d_t.words()):
w = reduce(lambda x, y: x.replace(y[0], y[1].format(i)), REPL, w)
'results': results,
'raw': (i['title'], i['summary'])
}
- print edict['db'][hashvalue]['raw']
- print edict['db'][hashvalue]['results']
- print hashvalue
+ # print edict['db'][hashvalue]['raw']
+ # print edict['db'][hashvalue]['results']
+ # print hashvalue
raw_input('Press enter for the next one')
-def main():
- if len(sys.argv) == 5 and sys.argv[1] == 'test':
+def main(argv):
+ if len(argv) == 5 and argv[1] == 'test':
+ cr = Crawler(init=True)
+ print cr.test_entry(*argv[2:])
+ elif len(argv) == 3 and argv[1] == 'del':
cr = Crawler()
- print cr.test_entry(*sys.argv[2:])
- elif len(sys.argv) == 3 and sys.argv[1] == 'del':
- cr = Crawler()
- if sys.argv[2] in cr.entries:
- del(cr.entries[sys.argv[2]])
+ if argv[2] in cr.entries:
+ del(cr.entries[argv[2]])
print 'Succesfull'
cr.write()
else:
- print '{} not in the entries'.format(sys.argv[2])
- elif len(sys.argv) == 3 and sys.argv[1] == 'export':
+ print '{} not in the entries'.format(argv[2])
+ elif len(argv) == 3 and argv[1] == 'export':
cr = Crawler()
for k, v in cr.entries.iteritems():
print k, '----'
for kk, vv in sorted(v.iteritems()):
print kk, ':', vv
- elif len(sys.argv) == 5 and sys.argv[1] == 'edit':
+ elif len(argv) == 5 and argv[1] == 'edit':
cr = Crawler()
- name, key, value = sys.argv[2:]
+ name, key, value = argv[2:]
cr.entries[name][key] = value
cr.write()
- elif len(sys.argv) >= 2 and sys.argv[1] == 'run':
- args = sys.argv[2:]
+ elif len(argv) >= 2 and argv[1] == 'run':
+ args = argv[2:]
force = True if '-f' in args else False
cr = Crawler()
to_run = []
else:
print 'Skipping because last run was within interval'
cr.write()
- elif len(sys.argv) == 2 and sys.argv[1] == 'list':
+ elif len(argv) == 2 and argv[1] == 'list':
cr = Crawler()
- print cr.list_names()
+ print str(cr.list_names())
else:
print ('Usage:\n'
'\t{0} del crawlername\n'
'\t{0} export FILE\n'
'\t{0} list\n'
'\t{0} run -f {{item1 item2 ...|all}}\n'
- '\t{0} test crawlername title summary\n').format(sys.argv[0])
+ '\t{0} test crawlername title summary\n').format(argv[0])
if __name__ == '__main__':
- main()
+ main(sys.argv)
+++ /dev/null
-#!/bin/env python
-# -*- coding: utf-8 -*-
-
-import ast
-import logging
-import re
-import pydawg
-import crawler
-
-
-def structure_data(d):
- re_hdr = re.compile('<th>(?P<h>.*?)</th>', flags=re.MULTILINE | re.DOTALL)
- re_row = re.compile('<tr>(?P<row>.*)</tr>', flags=re.MULTILINE | re.DOTALL)
- re_dualcel = re.compile('<td id="cel">(?P<c>.*?)</td><!--cel-->',
- flags=re.MULTILINE | re.DOTALL)
- con = d['content']
- d['content'] = []
- d['headers'] = []
- for line in con.split('\n\t\t'):
- if not line:
- continue
- row = re_row.search(line)
- row = row.group('row')
- for header in re_hdr.finditer(row):
- d['headers'].append(header.group('h'))
- d['content'].append([])
- for cell in re_dualcel.finditer(row):
- d['content'][-1].append(cell.group('c'))
-
-
-def parse_line(line):
- re_spa = re.compile('(?P<b><span.*?background-color:\s*(?P<c>.*?);.*?>)(?P'
- '<content>.*?)(?P<e></span>)')
- results = []
- for column in line:
- results.append([])
- markings = list(re_spa.finditer(column))
- if markings:
- results[-1].append(markings)
- return results
-
-
-def create_nodes(d):
- color_dict = {
- 'rgb(139, 0, 0)': '\x01', # datum
- 'red': '\x02', # tijd
- 'green': '\x03', # wat
- 'blue': '\x04' # wanneer
- }
- line_w_match = []
- d['content'] = d['content'][1:]
- for i, m in enumerate(d['matchdata']):
- if filter(None, m):
- line_w_match.append((d['content'][i], m))
- nodelists = {'Title': [], 'Summary': []}
- for (title_l, summary_l), (title_m, summary_m) in line_w_match:
- # Title
- if title_m:
- title = title_m[0]
- matches = reversed(sorted(title, key=lambda x: x.end('e')))
- for match in matches:
- title_l = title_l[:match.start('e')] + title_l[match.end('e'):]
- title_l = title_l[:match.start('content')] +\
- color_dict[match.group('c').strip()] +\
- title_l[match.end('content'):]
- title_l = title_l[:match.start('b')] + title_l[match.end('b'):]
- nodelists['Title'].append(title_l)
- # Summary
- if summary_m:
- summary = summary_m[0]
- matches = reversed(sorted(summary, key=lambda x: x.end('e')))
- for match in matches:
- summary_l = summary_l[:match.start('e')] +\
- summary_l[match.end('e'):]
- summary_l = summary_l[:match.start('content')] +\
- color_dict[match.group('c').strip()] +\
- summary_l[match.end('content'):]
- summary_l = summary_l[:match.start('b')] +\
- summary_l[match.end('b'):]
- nodelists['Summary'].append(summary_l)
- return nodelists
-
-
-def to_dot(q0):
- nodenum = 0
- final_nodes = []
- nodes = []
- edges = []
- to_visit = [(0, q0)]
- visited = set()
- translation = []
- if q0.final:
- final_nodes.append(nodenum)
- else:
- nodes.append(nodenum)
-
- nodenum += 1
- while to_visit:
- current = to_visit.pop()
- if not current[0] in visited:
- visited.add(current[0])
- for char, child in current[1].children.iteritems():
- matches = [c for c in translation if c[0] == child]
- curnum = -1
- if matches:
- curnum = matches[-1][1]
- else:
- translation.append((child, nodenum))
- curnum = nodenum
- nodenum += 1
- if child.final:
- final_nodes.append(curnum)
- else:
- nodes.append(curnum)
- edges.append((current[0], char, curnum))
- to_visit.append((curnum, child))
- print 'digraph dawg {'
- print '\tnode [shape = doublecircle]; {}'.format(
- ' '.join(str(n) for n in final_nodes))
- print '\tnode [shape = circle]; {}'.format(
- ' '.join(str(n) for n in nodes))
- for fr, ch, to in edges:
- print '\t{} -> {} [label = "{}"];'.format(fr, to, ch)
- print '}'
-
-
-def main():
- with open('./output_data/raw_out.txt', 'r') as data:
- logging.info('raw data loaded, going to parse data')
- d = data.readline()
- d = re.sub('\)\]}$', '}',
- re.sub('\)\],', ',',
- re.sub('\[Field\(\'.*?\', ', '', d)))
- d = ast.literal_eval(d)
- logging.info('raw data parsed, going to structure data')
- structure_data(d)
- logging.info('data structured, parsed headers: {}'.format(d['headers']))
- logging.info('lines: {}'.format(len(d['content'])))
- d['matchdata'] = []
- for line in filter(None, d['content']):
- d['matchdata'].append(parse_line(line))
- nodelists = create_nodes(d)
- titledawg = pydawg.DAWG()
- for n in sorted(set(nodelists['Title'])):
- titledawg.add_word(n)
- summarydawg = pydawg.DAWG()
- for n in sorted(set(nodelists['Summary'])):
- summarydawg.add_word(n)
- raw_input('Going to write to crawler and finish up ok?\n')
- crawl = crawler.Crawler()
- d['titledawg'] = titledawg
- d['summarydawg'] = summarydawg
- del(d['matchdata'])
- crawl.add_entry(d)
- crawl.write()
-
-
-if __name__ == '__main__':
- logging.basicConfig(level=logging.WARNING)
- main()
--- /dev/null
+#!/bin/env python
+# -*- codng: utf-8 -*-
+
+import crawler
+
+
+def index(req, args, apok):
+ req.log_error('handler')
+ req.content_type = 'text/html'
+ req.send_http_header()
+ with open('/var/www/py/main.html.t', 'r') as f:
+ data = f.read()
+ cr = crawler.Crawler('/var/www/py/crawler.db')
+ ns = cr.list_names()
+ params = {
+ 'active_crawlers':
+ '\n'.join('<a href="./crawler_edit.py?url={0}">{0}</a><br>'.
+ format(a) for a in ns),
+ 'active_crawlers_dropdown':
+ '\n'.join('<option value={0}>{0}</option>'.format(a) for a in ns)
+ }
+ req.write(data.format(**params))
+ return apok
+
+
+def crawler_edit(req, args, apok):
+ return apok
+
+
+def crawler_test(req, args, apok):
+ return apok
from mod_python import apache, util
import feedparser
+import index
+import crawler
import re
import urllib
-import os
def req_pre_pos(req):
req.content_type = 'text/html'
req.send_http_header()
args = util.FieldStorage(req)
+ listing = data_main(args)
req.write(
'<html>\n<head>\n'
'\t<title>VER: 0.01 - HyperFrontend RSS feed POSTREQUEST</title>'
'</head>\n<body>\n'
'\tThanks submitting: <br />\n'
- '\t<a href="index.html">Enter new rss feed</a>\n<pre>\n'
- '{}\n</pre>\n</body>\n</html>'.format(args))
- os.chdir('/var/www/py/files')
- with open('raw_out.txt', 'w') as f:
- f.write(str(args))
+ '\t<a href="index.py">Go back...</a>\n<pre>\n'
+ 'Current crawlers: {}\n</pre>\n</body>\n</html>'.format(listing))
+
+
+def structure_data(d):
+ re_hdr = re.compile('<th>(?P<h>.*?)</th>', flags=re.MULTILINE | re.DOTALL)
+ re_row = re.compile('<tr>(?P<row>.*)</tr>', flags=re.MULTILINE | re.DOTALL)
+ re_dualcel = re.compile('<td id="cel">(?P<c>.*?)</td><!--cel-->',
+ flags=re.MULTILINE | re.DOTALL)
+ con = d['content']
+ d['content'] = []
+ d['headers'] = []
+ for line in con.split('\n\t\t'):
+ if not line:
+ continue
+ row = re_row.search(line)
+ row = row.group('row')
+ for header in re_hdr.finditer(row):
+ d['headers'].append(header.group('h'))
+ d['content'].append([])
+ for cell in re_dualcel.finditer(row):
+ d['content'][-1].append(cell.group('c'))
+
+
+def parse_line(line):
+ re_spa = re.compile('(?P<b><span.*?background-color:\s*(?P<c>.*?);.*?>)(?P'
+ '<content>.*?)(?P<e></span>)')
+ results = []
+ for column in line:
+ results.append([])
+ markings = list(re_spa.finditer(column))
+ if markings:
+ results[-1].append(markings)
+ return results
+
+
+def create_nodes(d):
+ color_dict = {
+ 'rgb(139, 0, 0)': '\x01', # datum
+ 'red': '\x02', # tijd
+ 'green': '\x03', # wat
+ 'blue': '\x04' # wanneer
+ }
+ line_w_match = []
+ d['content'] = d['content'][1:]
+ for i, m in enumerate(d['matchdata']):
+ if filter(None, m):
+ line_w_match.append((d['content'][i], m))
+ nodelists = {'Title': [], 'Summary': []}
+ for (title_l, summary_l), (title_m, summary_m) in line_w_match:
+ # Title
+ if title_m:
+ title = title_m[0]
+ matches = reversed(sorted(title, key=lambda x: x.end('e')))
+ for match in matches:
+ title_l = title_l[:match.start('e')] + title_l[match.end('e'):]
+ title_l = title_l[:match.start('content')] +\
+ color_dict[match.group('c').strip()] +\
+ title_l[match.end('content'):]
+ title_l = title_l[:match.start('b')] + title_l[match.end('b'):]
+ nodelists['Title'].append(title_l)
+ # Summary
+ if summary_m:
+ summary = summary_m[0]
+ matches = reversed(sorted(summary, key=lambda x: x.end('e')))
+ for match in matches:
+ summary_l = summary_l[:match.start('e')] +\
+ summary_l[match.end('e'):]
+ summary_l = summary_l[:match.start('content')] +\
+ color_dict[match.group('c').strip()] +\
+ summary_l[match.end('content'):]
+ summary_l = summary_l[:match.start('b')] +\
+ summary_l[match.end('b'):]
+ nodelists['Summary'].append(summary_l)
+ return nodelists
+
+
+def to_dot(q0):
+ nodenum = 0
+ final_nodes = []
+ nodes = []
+ edges = []
+ to_visit = [(0, q0)]
+ visited = set()
+ translation = []
+ if q0.final:
+ final_nodes.append(nodenum)
+ else:
+ nodes.append(nodenum)
+
+ nodenum += 1
+ while to_visit:
+ current = to_visit.pop()
+ if not current[0] in visited:
+ visited.add(current[0])
+ for char, child in current[1].children.iteritems():
+ matches = [c for c in translation if c[0] == child]
+ curnum = -1
+ if matches:
+ curnum = matches[-1][1]
+ else:
+ translation.append((child, nodenum))
+ curnum = nodenum
+ nodenum += 1
+ if child.final:
+ final_nodes.append(curnum)
+ else:
+ nodes.append(curnum)
+ edges.append((current[0], char, curnum))
+ to_visit.append((curnum, child))
+ print 'digraph dawg {'
+ print '\tnode [shape = doublecircle]; {}'.format(
+ ' '.join(str(n) for n in final_nodes))
+ print '\tnode [shape = circle]; {}'.format(
+ ' '.join(str(n) for n in nodes))
+ for fr, ch, to in edges:
+ print '\t{} -> {} [label = "{}"];'.format(fr, to, ch)
+ print '}'
+
+
+def data_main(d):
+ d = {k: str(v) for k, v in dict(d).iteritems()}
+ structure_data(d)
+ d['matchdata'] = []
+ for line in filter(None, d['content']):
+ d['matchdata'].append(parse_line(line))
+ nodelists = create_nodes(d)
+ d['titledawg'] = nodelists['Title']
+ d['summarydawg'] = nodelists['Summary']
+ del(d['matchdata'])
+ crawl = crawler.Crawler()
+ crawl.add_entry(d)
+ crawl.write()
+ return crawl.list_names()
def req_pre(req, args):
req.write(
'\tLoading "{}" as <p id="rssname">{}</p><br />\n'.format(url, name))
feed = feedparser.parse(url)
-# channel = feed.feed
req.write('\t<table id="content-table" border="1" id="htab">\n')
req.write('\t\t<tr><th>Title</th><th>Summary</th></tr>\n')
for i in feed.entries[:10]:
def handler(req):
- if req.method == "POST":
- req_pre_pos(req)
+ if req.uri.split('/')[-1] == 'index.py':
+ return index.index(req, util.FieldStorage(req), apache.OK)
+ elif req.uri.split('/')[-1] == 'crawler_test.py':
+ return index.crawler_test(req, util.FieldStorage(req), apache.OK)
+ elif req.uri.split('/')[-1] == 'crawler_edit.py':
+ return index.crawler_edit(req, util.FieldStorage(req), apache.OK)
else:
- args = util.FieldStorage(req)
- req_pre(req, args)
- if 'url' not in args and 'name' not in args:
- req.write('Something went wrong, empty fields?<br />')
- req.write('<a href="index.html">back</a>')
+ if req.method == "POST":
+ req_pre_pos(req)
else:
- feed2html(req, args['url'], args['name'])
- req_post(req)
- return apache.OK
+ args = util.FieldStorage(req)
+ req_pre(req, args)
+ if 'url' not in args and 'name' not in args:
+ req.write('Something went wrong, empty fields?<br />')
+ req.write('<a href="index.html">back</a>')
+ else:
+ feed2html(req, args['url'], args['name'])
+ req_post(req)
+ return apache.OK
sudo rm -rv /var/www/py/*
-sudo cp -v ./input_app.py /var/www/py
-sudo cp -v ./webdata/*.{xml,html,js} /var/www/py/
-sudo mkdir /var/www/py/files
+sudo cp -v * /var/www/py/
sudo chown -vR mart:www-data /var/www/py
sudo chmod -vR 770 /var/www/py
-ln -s /var/www/py/files/ ./output_data
--- /dev/null
+<html>
+<head>
+ <title>Crawler control center</title>
+</head>
+<body>
+ <table border=1>
+ <tr>
+ <td>Inspect/edit crawler</td>
+ <td>Add new crawler</td>
+ <td>Test crawler</td>
+ <tr>
+ <td>
+ {active_crawlers}
+ </td>
+ <td>
+ <form method="get" action="./input_app.py">
+ <table>
+ <tr><td><p>RSS URL:</td><td><input type="text" name="url" value="localhost/py/paradiso.rss.xml"></td></tr>
+ <tr><td>RSS Name:</td><td><input type="text" name="name"></td></tr>
+ <tr><td><input type="submit" value="Submit"></p></td></tr>
+ </table>
+ </form>
+ <td>
+ <br />
+ <form method="get" action="./crawler_test.py">
+ <table>
+ <tr><td>
+ <select name="name">
+ {active_crawlers_dropdown}
+ </select>
+ </td></tr>
+ <tr><td>Title:</td><td><input type="text" name="title"></td></tr>
+ <tr><td>Summary:</td><td><input type="text" name="summary"></td></tr>
+ <tr><td><input type="submit" value="Submit"></td></tr>
+ </table>
+ </form>
+ </td>
+ </tr>
+ </table>
+</body>
+</html>
+++ /dev/null
-/var/www/py/files/
\ No newline at end of file
+++ /dev/null
-meer containers
-minimale eisen rss feed
-benadrukken waarom rss
+++ /dev/null
-http://www.paradiso.nl/rss.xml
-http://www.tivoli.nl/rss/agenda/
-http://www.stadsschouwburgendevereeniging.nl/_rss/rss.php?type=voorstellingen
-http://www.dedoelen.nl/_rss/rss.php?type=voorstellingen
-http://www.parktheater.nl/_rss/rss.php?type=voorstellingen
-http://www.ticketunlimited.nl/ProductFeed/rssproductfeed.xml
-podiuminfo.nl
+++ /dev/null
-<html>
- <head>
- </head>
- <body>
- <form method="get" action="./input_app.py">
- <table>
- <tr><td><p>RSS URL: </td><td><input type="text" name="url"
- value="localhost/py/paradiso.rss.xml"></td></tr>
- <tr><td>RSS Name: </td><td><input type="text" name="name"></td></tr>
- <tr><td><input type="submit" value="Submit"</p>
- </table>
- </form>
- </body>
-</html>