another weekly update
authorMart Lubbers <mart@martlubbers.net>
Thu, 10 Jul 2014 07:19:15 +0000 (09:19 +0200)
committerMart Lubbers <mart@martlubbers.net>
Thu, 10 Jul 2014 07:19:15 +0000 (09:19 +0200)
program/regexex/fsm.py

index c3b6357..9b2c2b1 100644 (file)
@@ -113,11 +113,62 @@ class fsm():
         # Connect to the end point
         self.add_connection(last, -2)
 
+    def create_internal(self, force=False):
+        """Create internal subgraph structure
+
+        Keyword arguments:
+        force -- flag to force a completely new structure
+        """
+        if force or not self.subgraphs:
+            subgraphs = {a: ([], []) for a in self.markings.keys() + ['none']}
+            self.travers(-1, subgraphs)
+            self.subgraphs = subgraphs
+
     def optimize(self):
         """Optimize the finite state machine"""
-        # Search the patterns outside the markings and try to merge
+        self.create_internal(force=True)
+        # Search the patterns outside the markings and find exact same strings
+        nodes = [a for a in self.subgraphs['none'][0]]
+        connections = self.subgraphs['none'][1]
+        groups = []
+        while nodes:
+            visited = set()
+            strings = []
+            self.walk(nodes[0][0], connections, [], strings, visited)
+            for v in nodes[:]:
+                if v[0] in visited:
+                    del(nodes[nodes.index(v)])
+            groups.append((visited, strings))
+        for i, group in enumerate(groups):
+            paths = [[r for r in rr if r[1][0] != '#'] for rr in group[1]]
+            if filter(None, paths):
+                self.process_multipath(paths)
+
+    def process_multipath(self, path):
         pass
 
+    def walk(self, current, conn, currents=[], strings=[], vis=set()):
+        """Walk the graph and keep track of vis conn
+
+        Required arguments:
+        current -- current visiting node
+        conn    -- list of available connections
+
+        Keyword arguments:
+        lst     -- list of vis connections
+        vis -- set of vis node
+        """
+        vis.add(current)
+        currents.append((current, self.nodes[current]))
+        children = [c for c in conn if c[0] == current]
+        # Endpoint reached
+        if not children:
+            strings.append(currents)
+        # Children left
+        else:
+            for c in [c for c in conn if c[0] == current]:
+                self.walk(c[1], conn, currents[:], strings, vis)
+
     def graphviz(self, fp='-'):
         """Print the finite state machine in graphviz format
 
@@ -128,19 +179,18 @@ class fsm():
         fp = sys.stdout if fp == '-' else open(fp, 'w')
         fp.write('digraph fsm{\n')
 
-        # Find subgraphs travers the graph
-        subgraphs = {a: ([], []) for a in self.markings.keys() + ['none']}
-        self.travers(-1, subgraphs)
+        # Create internal structure if it wasn't there
+        self.create_internal()
 
         # Print the nodes that are not in a subgraph
-        for nodes in [self.nodestr.format(*n) for n in subgraphs['none'][0]] +\
-                [self.edgestr.format(*n) for n in subgraphs['none'][1]]:
+        for nodes in\
+                [self.nodestr.format(*n) for n in self.subgraphs['none'][0]] +\
+                [self.edgestr.format(*n) for n in self.subgraphs['none'][1]]:
             fp.write('{:<2}{}\n'.format(' ', nodes))
-        del(subgraphs['none'])
 
         #Print the nodes that are in a subgraph
-        for key, sg in subgraphs.iteritems():
-            fp.write('{0:<2}subgraph cluster_{1} {{\n'.format(' ', key))
+        for k, sg in [a for a in self.subgraphs.iteritems() if a[0] != 'none']:
+            fp.write('{0:<2}subgraph cluster_{1} {{\n'.format(' ', k))
             for nodes in [self.nodestr.format(*n) for n in sg[0]] +\
                     [self.edgestr.format(*n) for n in sg[1]]:
                 fp.write('{:<4}{}\n'.format(' ', nodes))