re_hdr = re.compile('<th>(?P<h>.*?)</th>')
re_row = re.compile('<tr>(?P<row>.*?)</tr>')
re_cel = re.compile('<td>(?P<c>.*?)</td>')
-re_spa = re.compile('(?P<b><span.*background-color:(?P<c>.*?);.*>).*?(?P<e></span>)')
+re_spa = re.compile('(?P<b><span.*?background-color:(?P<c>.*?);.*?>)(?P<content>.*?)(?P<e></span>)')
def structure_data(d):
def parse_line(line):
logging.debug('parsing line: {}'.format(line))
+ results = []
for column in line:
logging.debug('parsing column: {}'.format(column))
markings = list(re_spa.finditer(column))
- print markings
+ if markings:
+ results.append(markings)
+ logging.debug('found {} spans in the column'.format(len(markings)))
+ logging.debug('found {} columns with markings'.format(len(results)))
def main():
if __name__ == '__main__':
- logging.basicConfig(level=logging.INFO)
+ logging.basicConfig(level=logging.DEBUG)
main()