From: Mart Lubbers Date: Mon, 1 Sep 2014 15:09:21 +0000 (+0200) Subject: test X-Git-Url: https://git.martlubbers.net/?a=commitdiff_plain;h=db1f6fa86c20ffbcaf42706af84a951bf47fe3a1;p=bsc-thesis1415.git test --- diff --git a/program/everything/crawler.py b/program/everything/crawler.py index a0029b0..a6c6ab0 100644 --- a/program/everything/crawler.py +++ b/program/everything/crawler.py @@ -9,7 +9,9 @@ import sys URL_REG = re.compile( - ur'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019]))') + ur'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<' + ur'>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+' + ur'\)))*\)|[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019]))') class Crawler(): @@ -153,16 +155,17 @@ class Crawler(): def main(): - cr = Crawler() - cr.test_entry('dedoelen', 'vr 5 mei, 08:00 uur - Abba live', '') - cr.test_entry('dedoelen', 'vr 5 mei08:00 uur - Abba live', '') - cr.test_entry('paradiso', - 'donderdag 13 november 2014 19:30 - Wouter Hamel', '') - cr.test_entry('paradiso', 'zaterdag 27 september 2014 20:30 - A Great Big ' - 'World - Locatie: Tolhuistuin (zaal)', '') - cr.test_entry('paradiso', 'zaterdag 27 september 2014 20:30 - A Great Big ' - 'World - Locatie: Tolhuistuin (zaal)', '') - cr.to_dot('paradiso', 't.dot') + if len(sys.argv) == 5 and sys.argv[1] == 'test': + print 'Crawler: {}\nTitle: {}\nSummary: {}'.format(*sys.argv[2:]) + cr = Crawler() + cr.test_entry(*sys.argv[2:]) + elif len(sys.argv) == 3 and sys.argv[1] == 'run': + print 'Crawler: {}'.format(sys.argv[2]) + cr = Crawler() + else: + print '{} test crawlername title summary'.format(sys.argv[0]) + print '{} run crawlername'.format(sys.argv[0]) + if __name__ == '__main__': main()