update
authorMart Lubbers <mart@martlubbers.net>
Wed, 24 Sep 2014 17:32:12 +0000 (19:32 +0200)
committerMart Lubbers <mart@martlubbers.net>
Wed, 24 Sep 2014 17:32:12 +0000 (19:32 +0200)
program/everything/crawler.db
program/everything/crawler.py
program/everything/index.py
program/everything/main.html.t

index e9287f8..380729a 100644 (file)
@@ -1,10 +1,10 @@
 (dp0
-S'Paradiso_test1'
+S'test'
 p1
 (dp2
 S'website'
 p3
-S'www.test.nl'
+S't'
 p4
 sS'name'
 p5
@@ -15,11 +15,11 @@ S'localhost/py/paradiso.rss.xml'
 p7
 sS'dloc'
 p8
-S'test'
+S'ut'
 p9
 sS'venue'
 p10
-S'p'
+S'Para'
 p11
 sS'content'
 p12
@@ -44,7 +44,7 @@ p23
 aS'mede-oprichter Broken Social Scene solo'
 p24
 aa(lp25
-S'<span class="uiWebviewHighlight" style="color: white; background-color: rgb(139, 0, 0);">vrijdag 4 juli 2014</span> <span class="uiWebviewHighlight" style="color: white; background-color: red;">22:00</span> - <span class="uiWebviewHighlight" style="color: white; background-color: green;">Palenke Soultribe</span>'
+S'vrijdag 4 juli 2014 22:00 - Palenke Soultribe'
 p26
 aS'Electronische muziek en Afro-Colombiaanse ritmes'
 p27
@@ -86,14 +86,11 @@ p47
 (lp48
 S'\x01 \x02 - \x03   -   Locatie: \x04'
 p49
-aS'\x01 \x02 - \x03'
-p50
 asS'freq'
-p51
+p50
 S'1w'
-p52
+p51
 sS'adress'
-p53
-S'adres'
-p54
+p52
+g4
 ss.
\ No newline at end of file
index a377d5b..52c3f2c 100644 (file)
@@ -54,22 +54,28 @@ class Crawler():
             self.entries[d['name']] = d
 
     def write(self, path='/var/www/py/crawler.db'):
+        status = ''
         entries2 = {kk: {k: v for k, v in vv.iteritems()
                     if k not in ['summarydawg_t', 'titledawg_t']}
                     for kk, vv in self.entries.iteritems()}
         if os.path.exists(path):
             os.rename(path, '{}.bak'.format(path))
+            status += 'Old crawler file found, created backup<br />'
         try:
             with open(path, 'wb') as f:
                 f.write(pickle.dumps(entries2))
+            status += 'Crawler written succesfully<br />'
         except Exception, e:
             #  print 'something went wrong writing: {}'.format(e)
             #  print 'restoring backup'
-            raise e
+            status += 'Something went wrong: {}<br />'.format(e)
             os.rename('{}.bak'.format(path), path)
+            status += 'Writing failed, restored backup<br />'
         finally:
             if os.path.exists('{}.bak'.format(path)):
                 os.remove('{}.bak'.format(path))
+                status += 'Backup file removed<br />'
+        return status
 
     def get_regex(self, name):
         d_t = self.entries[name]['titledawg_t']
@@ -162,7 +168,8 @@ class Crawler():
         if matchs:
             matches = sorted(matchs, key=lambda x: len(x.groups()))
             results['summary'] = list(reversed(matches))[0].groupdict()
-        return dict(results['summary'].items() + results['title'].items())
+        outputdct = dict(results['summary'].items() + results['title'].items())
+        return {re.sub('\d', '', k): v for k, v in outputdct.iteritems()}
 
     def has_to_run(self, interval, last_run, now):
         time_wait = sum(
index e331645..5143402 100644 (file)
@@ -13,9 +13,6 @@ def index(req, args, apok):
     cr = crawler.Crawler('/var/www/py/crawler.db')
     ns = cr.list_names()
     params = {
-        'active_crawlers':
-            '\n'.join('<a href="./crawler_edit.py?url={0}">{0}</a><br>'.
-                      format(a) for a in ns),
         'active_crawlers_dropdown':
             '\n'.join('<option value={0}>{0}</option>'.format(a) for a in ns)
     }
@@ -24,8 +21,47 @@ def index(req, args, apok):
 
 
 def crawler_edit(req, args, apok):
+    args['name'] = args.get('name', '')
+    req.log_error('handler')
+    req.content_type = 'text/html'
+    req.send_http_header()
+    if args['action'] == 'remove':
+        req.write('Remove {}<br />'.format(args['name']))
+        cr = crawler.Crawler()
+        status = ''
+        try:
+            del(cr.entries[args['name']])
+            status = 'Succes...<br />'
+            status += cr.write()
+        except KeyError:
+            status = 'Name not in the crawler<br />'
+        except Exception, e:
+            status = 'Other exception thrown: {}<br />'.format(e)
+        req.write(status)
+    elif args['action'] == 'edit':
+        req.write('Edit {}\n'.format(args['name']))
+    else:
+        req.write('Unknown editing action: {}'.format(args['action']))
+    req.write('<a href="index.py">Go back</a>')
     return apok
 
 
 def crawler_test(req, args, apok):
+    req.log_error('handler')
+    req.content_type = 'text/html'
+    req.send_http_header()
+    cr = crawler.Crawler(init=True)
+    if 'name' not in args or str(args['name']) not in cr.entries:
+        req.write('Name not in the crawler...<br/>')
+    else:
+        args['summary'] = args.get('summary', '')
+        args['title'] = args.get('title', '')
+        respons = cr.test_entry(str(args['name']), str(args['title']),
+                                str(args['summary']))
+        req.write('<b>Title:</b> {}<br/><b>Summary: </b>{}<br/>'.format(
+            str(args['title']), str(args['summary'])))
+        req.write('<br/><b>Results:</b><br/>')
+        req.write('<br/>'.join('{}: {}'.format(k, v)
+                  for k, v in sorted(respons.iteritems())))
+    req.write('<br/><br/><a href="index.py">Go back</a>')
     return apok
index 4a7be42..6606818 100644 (file)
@@ -5,12 +5,27 @@
 <body>
        <table border=1>
                <tr>
-                       <td>Inspect/edit crawler</td>
+                       <td>Edit/Remove crawler</td>
                        <td>Add new crawler</td>
                        <td>Test crawler</td>
                <tr>
                        <td>
-                               {active_crawlers}
+                               <form method="get" action="./crawler_edit.py">
+                                       <table>
+                                               <tr><td>
+                                                       <select name="name">
+                                                               {active_crawlers_dropdown}
+                                                       </select>
+                                               </td></tr><tr><td>
+                                                       <select name="action">
+                                                               <option value="remove">Remove</option>
+                                                               <option value="edit">Edit</option>
+                                                       </select>
+                                               </tr></td><tr><td>
+                                                       <input type="submit" value="Submit">
+                                               </td></tr>
+                                       </table>
+                               </form>
                        </td>
                        <td>
                                <form method="get" action="./input_app.py">