fadsda
authorMart Lubbers <mart@martlubbers.net>
Fri, 18 Jul 2014 12:15:57 +0000 (14:15 +0200)
committerMart Lubbers <mart@martlubbers.net>
Fri, 18 Jul 2014 12:15:57 +0000 (14:15 +0200)
program/regexex/dafsa.py
program/regexex/f.dot

index f855c7a..af9542d 100644 (file)
@@ -9,18 +9,38 @@ class Afda():
         pass
 
     def common_prefix_p(self, cons, word, res, c_node, c_index, cur):
+        """Finds the common prefix within a network
+
+        Required arguments:
+        cons    -- connection dictionary
+        word    -- word to which to find the prefix
+        res     -- results list
+        c_node  -- current node
+        c_index -- current word index
+        cur     -- current prefix
+        """
+        # If the end of the word is reached there are no children to explore
         if c_index >= len(word):
             childs = []
+        # End is not reached, find all the children with the correct letter
         else:
             childs = [c for c in cons[c_node] if c[0] == word[c_index]]
+        # If there are no childs left, append the result
         if not childs or c_index >= len(word):
             res.append(cur + [c_node])
+        # Search through all the children for longer paths
         else:
             for child in childs:
                 self.common_prefix_p(cons, word, res, child[1], c_index+1,
                                      cur+[(c_node, child)])
 
     def common_prefix(self, cons, word):
+        """Find the common prefix within a network
+
+        Required arguments:
+        cons -- connection dictionary
+        word -- word to which to find the prefix
+        """
         results = []
         self.common_prefix_p(cons, word, results, -1, 0, [])
         return max(results, key=lambda x: len(x))
@@ -96,71 +116,87 @@ class Afda():
     def new_num(self, connections):
         return max(connections.keys())+1
 
-    def add_suffix(self, connections, last_state, current_suffix, first=False):
-        if first:
-            this = current_suffix
-            that = None
-        else:
-            this = current_suffix[:-1]
-            that = current_suffix[-1]
-        for c in this:
+    def add_suffix(self, connections, last_state, current_suffix):
+        suffix = []
+        for c in current_suffix:
             newnum = self.new_num(connections)
             connections[last_state].append((c, newnum))
             connections[newnum] = []
+            suffix.append((last_state, (c, newnum)))
             last_state = newnum
-        if that:
-            endstate = [i for i in connections.iteritems() if not i[1]][0]
-            connections[last_state].append((that, endstate[0]))
+        return suffix
+
+    def replace_or_register(self, connections, state, symbol):
+        pass
 
     def train(self, wlist):
-#       register = None
+# 0. start
+        register = set()
 #        connections = {-1: [('a', 2), ('b', 4)], 2: [('b', 3)], 3: [('d', 5)],
 #                       4: [('a', 3)], 5: []}
         connections = {-1: []}
-        first = True
         for word in wlist:
-            if first:
-                self.add_suffix(connections, -1, word, True)
-                first = False
             print word
-# Find the common prefix and suffix and skip if word is in the adfa
+# v1. Find the common prefix and suffix and skip if word is in the adfa
             common_prefix = self.common_prefix(connections, word)
             current_suffix = word[len(common_prefix)-1:]
             print 'common_prefix: {}'.format(common_prefix)
             print 'current_suffix: {}'.format(current_suffix)
-            if not current_suffix:
+            if not current_suffix and not connections[common_prefix[-1]]:
                 continue
 
-# Find the first confluence state in the common prefix path
+# v2. Find the first confluence state in the common prefix path
             first_state = self.first_cstate(connections, common_prefix)
             print 'first_state: {}'.format(first_state)
-            if first_state:
+            if not first_state:
+                print 'not cloning'
+                last_state = common_prefix[-1]
+            else:
+                print 'cloning'
                 last_state = self.clone(
                     connections, common_prefix, first_state)
-            else:
-                last_state = common_prefix[-1]
             print 'last_state: {}'.format(last_state)
-            self.add_suffix(connections, last_state, current_suffix)
-
-# If there is a confluence state and there has been cloning, loop through the
-# new states backwards and replace or register the states
+            print 'adding suffix: {}'.format(current_suffix)
+            suffix = self.add_suffix(connections, last_state, current_suffix)
+            print 'suffix added: {}'.format(suffix)
+            path = [c[0] for c in common_prefix + suffix
+                    if not isinstance(c, int)]
+            print 'path: {}'.format(path)
+
+# 3a. If there is a confluence state and there has been cloning, loop through
+# the new states backwards and replace or register the states
             if first_state:
                 print 'consider replace register clone'
                 first_state = self.first_cstate(connections, common_prefix)
                 print 'first_state {}'.format(first_state)
                 current_index = common_prefix.index(first_state)
-                print range(len(common_prefix, current_index, -1))
+                print 'range: ', range(len(common_prefix, current_index, -1))
                 for i in range(len(common_prefix, current_index, -1)):
                     pass
-# There is no confluence state so the current index will be the last of the
-# common prefix
+# v3b. There is no confluence state so the current index will be the last of
+# the common prefix
             else:
                 current_index = len(common_prefix)
 
-# While we change things try to find equivalent states in the suffix to
+# 4. While we change things try to find equivalent states in the suffix to
 # replace or register
-            #changed = True
-            raw_input('continue...\n')
+            changed = True
+            while changed:
+                current_index -= 1
+                current_state = None
+                current_state = path[current_index]
+                print 'current state: {}'.format(current_state)
+                old_state = last_state
+                if current_index > 0:
+                    register = register - {last_state}
+#                self.replace_or_register(connections, current_state)
+                changed = old_state != last_state
+
+# v5. If there has been no change and there are characters left
+            if not changed and current_index > 0:
+                register = register.union({current_state})
+            import pdb
+            pdb.set_trace()
 
         self.graphviz(connections, 'f.dot')
 
index 4411aa4..b1b9188 100644 (file)
@@ -1,11 +1,11 @@
 digraph finite_state_machine {
-       node [shape = doublecircle]; 2
+       node [shape = doublecircle]; 2 5 6
        node [shape = circle]; 0 1 3 4 -1
        0 -> 1 [label = "b"];
        1 -> 2 [label = "d"];
        3 -> 4 [label = "a"];
-       4 -> 2 [label = "d"];
-       4 -> 2 [label = "e"];
+       4 -> 5 [label = "d"];
+       4 -> 6 [label = "e"];
        -1 -> 0 [label = "a"];
        -1 -> 3 [label = "b"];
 }
\ No newline at end of file