From: Mart Lubbers Date: Thu, 10 Jul 2014 07:19:15 +0000 (+0200) Subject: another weekly update X-Git-Url: https://git.martlubbers.net/?a=commitdiff_plain;h=08f94b2500b6f509376e4a41c5e2c319ebb8ca35;p=bsc-thesis1415.git another weekly update --- diff --git a/program/regexex/fsm.py b/program/regexex/fsm.py index c3b6357..9b2c2b1 100644 --- a/program/regexex/fsm.py +++ b/program/regexex/fsm.py @@ -113,11 +113,62 @@ class fsm(): # Connect to the end point self.add_connection(last, -2) + def create_internal(self, force=False): + """Create internal subgraph structure + + Keyword arguments: + force -- flag to force a completely new structure + """ + if force or not self.subgraphs: + subgraphs = {a: ([], []) for a in self.markings.keys() + ['none']} + self.travers(-1, subgraphs) + self.subgraphs = subgraphs + def optimize(self): """Optimize the finite state machine""" - # Search the patterns outside the markings and try to merge + self.create_internal(force=True) + # Search the patterns outside the markings and find exact same strings + nodes = [a for a in self.subgraphs['none'][0]] + connections = self.subgraphs['none'][1] + groups = [] + while nodes: + visited = set() + strings = [] + self.walk(nodes[0][0], connections, [], strings, visited) + for v in nodes[:]: + if v[0] in visited: + del(nodes[nodes.index(v)]) + groups.append((visited, strings)) + for i, group in enumerate(groups): + paths = [[r for r in rr if r[1][0] != '#'] for rr in group[1]] + if filter(None, paths): + self.process_multipath(paths) + + def process_multipath(self, path): pass + def walk(self, current, conn, currents=[], strings=[], vis=set()): + """Walk the graph and keep track of vis conn + + Required arguments: + current -- current visiting node + conn -- list of available connections + + Keyword arguments: + lst -- list of vis connections + vis -- set of vis node + """ + vis.add(current) + currents.append((current, self.nodes[current])) + children = [c for c in conn if c[0] == current] + # Endpoint reached + if not children: + strings.append(currents) + # Children left + else: + for c in [c for c in conn if c[0] == current]: + self.walk(c[1], conn, currents[:], strings, vis) + def graphviz(self, fp='-'): """Print the finite state machine in graphviz format @@ -128,19 +179,18 @@ class fsm(): fp = sys.stdout if fp == '-' else open(fp, 'w') fp.write('digraph fsm{\n') - # Find subgraphs travers the graph - subgraphs = {a: ([], []) for a in self.markings.keys() + ['none']} - self.travers(-1, subgraphs) + # Create internal structure if it wasn't there + self.create_internal() # Print the nodes that are not in a subgraph - for nodes in [self.nodestr.format(*n) for n in subgraphs['none'][0]] +\ - [self.edgestr.format(*n) for n in subgraphs['none'][1]]: + for nodes in\ + [self.nodestr.format(*n) for n in self.subgraphs['none'][0]] +\ + [self.edgestr.format(*n) for n in self.subgraphs['none'][1]]: fp.write('{:<2}{}\n'.format(' ', nodes)) - del(subgraphs['none']) #Print the nodes that are in a subgraph - for key, sg in subgraphs.iteritems(): - fp.write('{0:<2}subgraph cluster_{1} {{\n'.format(' ', key)) + for k, sg in [a for a in self.subgraphs.iteritems() if a[0] != 'none']: + fp.write('{0:<2}subgraph cluster_{1} {{\n'.format(' ', k)) for nodes in [self.nodestr.format(*n) for n in sg[0]] +\ [self.edgestr.format(*n) for n in sg[1]]: fp.write('{:<4}{}\n'.format(' ', nodes))