From: Mart Lubbers Date: Thu, 10 Jul 2014 11:58:53 +0000 (+0200) Subject: laatste X-Git-Url: https://git.martlubbers.net/?a=commitdiff_plain;h=b5f156fd4c8139db25150ba83206ac39eaa6d4ce;p=bsc-thesis1415.git laatste --- diff --git a/program/regexex/fsm.py b/program/regexex/fsm.py index 9b2c2b1..4d06244 100644 --- a/program/regexex/fsm.py +++ b/program/regexex/fsm.py @@ -131,21 +131,57 @@ class fsm(): nodes = [a for a in self.subgraphs['none'][0]] connections = self.subgraphs['none'][1] groups = [] + # While there are still unexplored nodes in the none group while nodes: visited = set() strings = [] + # Walk the nodes and discover the different possible paths self.walk(nodes[0][0], connections, [], strings, visited) + # Remove the walked paths for v in nodes[:]: if v[0] in visited: del(nodes[nodes.index(v)]) groups.append((visited, strings)) + # For all groups try to merge paths for i, group in enumerate(groups): - paths = [[r for r in rr if r[1][0] != '#'] for rr in group[1]] - if filter(None, paths): - self.process_multipath(paths) + paths = [[r for r in rr] for rr in group[1]] + pathswo = [[r for r in rr if r[1][0] != '#'] for rr in group[1]] + # If there are paths of length bigger then 0 + if filter(None, pathswo): + # Find equality + result = self.process_multipath(paths) + # Remove the obsolete nodes + if result: + # Magic removal process + print result + print group + def process_multipath(self, path): - pass + """Looks for possible merge candidates + + Required arguments: + path - list of possible paths + """ + obj = None + equal = True + # For all paths check if they are all the same + for p in path: + s = ''.join(i[1] for i in p) + if not obj: + obj = s + elif obj != s: + equal = False + break + # If they are all the same return the first entry + if equal: + return path[0] + # If they are not all the same try to find a regex that is matching + # them all + else: + # Magic + pass + def walk(self, current, conn, currents=[], strings=[], vis=set()): """Walk the graph and keep track of vis conn diff --git a/thesis/methods.tex b/thesis/methods.tex index 20b7ae8..7fdf8df 100644 --- a/thesis/methods.tex +++ b/thesis/methods.tex @@ -1,4 +1,9 @@ \section{Regular expressions, finite state machines and automata} +Regular expressions provide a way to match text and extract groups by defining +patterns. Every regular expression can be converted to a finite state machine +without loosing any information \section{Algorithm} +\subsection{Within group merging of nodes} +\subsection{Between group merging of nodes}