eia
authorMart Lubbers <mart@martlubbers.net>
Mon, 26 May 2014 20:50:54 +0000 (22:50 +0200)
committerMart Lubbers <mart@martlubbers.net>
Mon, 26 May 2014 20:50:54 +0000 (22:50 +0200)
program/hypconvert/hypconvert.py

index cf67dfd..34548ff 100644 (file)
@@ -8,7 +8,7 @@ import re
 re_hdr = re.compile('<th>(?P<h>.*?)</th>')
 re_row = re.compile('<tr>(?P<row>.*?)</tr>')
 re_cel = re.compile('<td>(?P<c>.*?)</td>')
-re_spa = re.compile('(?P<b><span.*background-color:(?P<c>.*?);.*>).*?(?P<e></span>)')
+re_spa = re.compile('(?P<b><span.*?background-color:(?P<c>.*?);.*?>)(?P<content>.*?)(?P<e></span>)')
 
 
 def structure_data(d):
@@ -26,10 +26,14 @@ def structure_data(d):
 
 def parse_line(line):
     logging.debug('parsing line: {}'.format(line))
+    results = []
     for column in line:
         logging.debug('parsing column: {}'.format(column))
         markings = list(re_spa.finditer(column))
-        print markings
+        if markings:
+            results.append(markings)
+            logging.debug('found {} spans in the column'.format(len(markings)))
+    logging.debug('found {} columns with markings'.format(len(results)))
 
 
 def main():
@@ -52,5 +56,5 @@ def main():
 
 
 if __name__ == '__main__':
-    logging.basicConfig(level=logging.INFO)
+    logging.basicConfig(level=logging.DEBUG)
     main()