2 # -*- coding: utf-8 -*-
4 from mod_python
import apache
, util
14 req
.log_error('handler')
15 req
.content_type
= 'text/html'
16 req
.send_http_header()
17 args
= util
.FieldStorage(req
)
18 listing
, crawl
= data_main(args
)
21 '\t<title>VER: 0.01 - HyperFrontend RSS feed POSTREQUEST</title>'
23 '\tThanks submitting: <br />\n'
24 '\t<a href="index.py">Go back...</a>\n<pre>\n'
25 'Current crawlers: {}\n</pre>\n</body>\n</html>'.format(listing
))
28 def structure_data(d
):
29 re_hdr
= re
.compile('<th>(?P<h>.*?)</th>', flags
=re
.MULTILINE | re
.DOTALL
)
30 re_row
= re
.compile('<tr>(?P<row>.*)</tr>', flags
=re
.MULTILINE | re
.DOTALL
)
31 re_dualcel
= re
.compile('<td id="cel">(?P<c>.*?)</td><!--cel-->',
32 flags
=re
.MULTILINE | re
.DOTALL
)
36 for line
in con
.split('<!--line-->'):
37 if not line
or '<!--cel-->' not in line
:
39 row
= re_row
.search(line
)
40 row
= row
.group('row')
41 for header
in re_hdr
.finditer(row
):
42 d
['headers'].append(header
.group('h'))
43 d
['content'].append([])
44 for cell
in re_dualcel
.finditer(row
):
45 d
['content'][-1].append(cell
.group('c'))
49 re_spa
= re
.compile('(?P<b><span.*?background-color:\s*(?P<c>.*?);.*?>)(?P'
50 '<content>.*?)(?P<e></span>)')
54 markings
= list(re_spa
.finditer(column
))
56 results
[-1].append(markings
)
61 color_dict
= {'rgb({}, {}, {})'.format(
62 int(x
[1:3], 16), int(x
[3:5], 16), int(x
[5:], 16)): chr(i
)
63 for i
, (x
, _
) in enumerate(buttons
.BUTTONPAIRS
, 1)}
65 # d['content'] = d['content'][1:]
66 for i
, m
in enumerate(d
['matchdata']):
68 line_w_match
.append((d
['content'][i
], m
))
69 nodelists
= {'Title': [], 'Summary': []}
70 for (title_l
, summary_l
), (title_m
, summary_m
) in line_w_match
:
74 matches
= reversed(sorted(title
, key
=lambda x
: x
.end('e')))
76 title_l
= title_l
[:match
.start('e')] + title_l
[match
.end('e'):]
77 title_l
= title_l
[:match
.start('content')] +\
78 color_dict
[match
.group('c').strip()] +\
79 title_l
[match
.end('content'):]
80 title_l
= title_l
[:match
.start('b')] + title_l
[match
.end('b'):]
81 nodelists
['Title'].append(title_l
)
84 summary
= summary_m
[0]
85 matches
= reversed(sorted(summary
, key
=lambda x
: x
.end('e')))
87 summary_l
= summary_l
[:match
.start('e')] +\
88 summary_l
[match
.end('e'):]
89 summary_l
= summary_l
[:match
.start('content')] +\
90 color_dict
[match
.group('c').strip()] +\
91 summary_l
[match
.end('content'):]
92 summary_l
= summary_l
[:match
.start('b')] +\
93 summary_l
[match
.end('b'):]
94 nodelists
['Summary'].append(summary_l
)
107 final_nodes
.append(nodenum
)
109 nodes
.append(nodenum
)
113 current
= to_visit
.pop()
114 if not current
[0] in visited
:
115 visited
.add(current
[0])
116 for char
, child
in current
[1].children
.iteritems():
117 matches
= [c
for c
in translation
if c
[0] == child
]
120 curnum
= matches
[-1][1]
122 translation
.append((child
, nodenum
))
126 final_nodes
.append(curnum
)
129 edges
.append((current
[0], char
, curnum
))
130 to_visit
.append((curnum
, child
))
131 print 'digraph dawg {'
132 print '\tnode [shape = doublecircle]; {}'.format(
133 ' '.join(str(n
) for n
in final_nodes
))
134 print '\tnode [shape = circle]; {}'.format(
135 ' '.join(str(n
) for n
in nodes
))
136 for fr
, ch
, to
in edges
:
137 print '\t{} -> {} [label = "{}"];'.format(fr
, to
, ch
)
142 d
= {k
: str(v
) for k
, v
in dict(d
).iteritems() if k
!= 'write'}
145 for line
in filter(None, d
['content']):
146 d
['matchdata'].append(parse_line(line
))
147 nodelists
= create_nodes(d
)
148 d
['titledawg'] = nodelists
['Title']
149 d
['summarydawg'] = nodelists
['Summary']
151 crawl
= crawler
.Crawler()
154 return crawl
.list_names(), crawl
157 def feed2html(req
, url
, name
):
158 url
= urllib
.unquote(url
)
159 url
= url
if re
.match('https?://', url
) else 'http://{}'.format(url
)
161 '\tLoading "{}" as <p id="rssname">{}</p><br />\n'.format(url
, name
))
162 feed
= feedparser
.parse(url
)
163 req
.write('\t<table id="content-table" border="1">\n')
164 req
.write('\t\t<tr><th>Title</th><th>Summary</th></tr>\n')
165 for i
in feed
.entries
[:10]:
166 req
.write(('\t\t<tr><td id="cel">{}</td><!--cel--><td id="cel">{}</td>'
167 '<!--cel--></tr>\n').format(
168 i
['title'].encode('ascii', 'xmlcharrefreplace'),
169 i
['summary'].encode('ascii', 'xmlcharrefreplace')))
170 req
.write('\t</table>\n<br />')
174 if req
.uri
.split('/')[-1] == 'index.py':
175 return index
.index(req
, util
.FieldStorage(req
), apache
.OK
)
176 elif req
.uri
.split('/')[-1] == 'crawler_test.py':
177 return index
.crawler_test(req
, util
.FieldStorage(req
), apache
.OK
)
178 elif req
.uri
.split('/')[-1] == 'crawler_edit.py':
179 return index
.crawler_edit(req
, util
.FieldStorage(req
), apache
.OK
)
180 elif req
.uri
.split('/')[-1] == 'crawler_new.py':
181 return index
.crawler_new(req
, util
.FieldStorage(req
), apache
.OK
)
182 elif req
.uri
.split('/')[-1] == 'crawler_xml.py':
183 return index
.crawler_xml(req
, util
.FieldStorage(req
), apache
.OK
)
185 if req
.method
== "POST":
186 if req
.uri
.split('/')[-1] == 'hyper.py':
188 elif req
.uri
.split('/')[-1] == 'preview.py':
189 args
= util
.FieldStorage(req
)
190 cr
= crawler
.Crawler()
191 listing
, crawl
= data_main(args
)
192 returncode
= index
.crawler_test(req
, args
, apache
.OK
)
196 req
.write('Unknown case')