replace and register done
authorMart Lubbers <mart@martlubbers.net>
Mon, 21 Jul 2014 12:48:51 +0000 (14:48 +0200)
committerMart Lubbers <mart@martlubbers.net>
Mon, 21 Jul 2014 12:48:51 +0000 (14:48 +0200)
program/regexex/dafsa.py
program/regexex/f.dot
program/regexex/log.txt [new file with mode: 0644]

index af9542d..d9d4766 100644 (file)
@@ -46,6 +46,12 @@ class Afda():
         return max(results, key=lambda x: len(x))
 
     def first_cstate(self, connections, common_prefix):
+        """Find the first confluence state within the states
+
+        Required arguments:
+        connections   -- connections dictionary
+        common_prefix -- common prefix
+        """
         print common_prefix
         for node in common_prefix:
             print node
@@ -59,10 +65,22 @@ class Afda():
                 return node
 
     def find_value(self, connections, from_node, to_node):
+        """Find the value from a node to another
+
+        Required arguments:
+        connections -- connections dictionary
+        from_node   -- from
+        to_node     -- to
+        """
         return [c for c in connections[from_node] if c[1] == to_node][0][0]
 
     def clone_p(self, connections, point):
-        """clone everything from this point on"""
+        """Clone everything from this point on
+
+        Required arguments:
+        connections -- connections dictionary
+        point       -- point to clone from
+        """
         # Translation dictionary for the new points
         newnames = {point[0]: point[0], point[1]: self.new_num(connections)}
         # Fix point all the endpoints(should be only one
@@ -97,6 +115,13 @@ class Afda():
         return newnames[point[1]]
 
     def clone(self, connections, states, from_node):
+        """Clone a subtree
+
+        Required arguments:
+        connections  -- connections dictionary
+        states       -- states to clone
+        from_node    -- node to start from
+        """
         # Remember the last node number
         last_node = states[0] if isinstance(states[0], int) else states[0][0]
         # While the node is not found
@@ -114,20 +139,86 @@ class Afda():
             last_node = node
 
     def new_num(self, connections):
+        """Generate a new node number
+
+        Required arguments:
+        connections -- connections dictionary
+        """
         return max(connections.keys())+1
 
+    def replace_or_register(self, connections, tra):
+        """Replace or register the newly created state, will return the parent
+
+        Required arguments:
+        connections -- connections dictionary
+        tra         -- parent, (symbol, state)
+        """
+        for c in sorted(connections.iteritems()):
+            print c
+        print 'replace or register: {}'.format(tra)
+        parent, (symbol, state) = tra
+        print 'parent, symbol, state = {} {} {}'.format(parent, symbol, state)
+        replace = None
+        for cparent, children in connections.iteritems():
+            for csymbol, cstate in children:
+                if symbol == csymbol and state == cstate and parent != cparent:
+                    replace = (cparent, (csymbol, cstate))
+                    print 'found possible replace match: {}'.format(replace)
+#        matches = [c for c in connections[parent]
+#                   if c[0] == symbol and c[1] != state]
+        if replace:
+            # Delete the child
+            # Register the child to the existing state
+            print 'replacing {} with {}'.format(tra, replace)
+            return replace
+        else:
+            # It's already in the register, just go on
+            print 'registering'
+            return tra
+
     def add_suffix(self, connections, last_state, current_suffix):
+        """Add a suffix to the graph
+
+        Required arguments:
+        connections    -- connections dictionary
+        last_state     -- last state of the common prefix
+        current_suffix -- suffix to be added
+        """
         suffix = []
-        for c in current_suffix:
-            newnum = self.new_num(connections)
+        endpoints = [i[0] for i in connections.iteritems()
+                     if not i[1] and i[0] != -1]
+        for i, c in enumerate(current_suffix):
+            if i == len(current_suffix)-1:
+                print 'last_state reached in adding suffix {}'.format(c)
+                print 'endpoints: {}'.format(endpoints)
+                if not endpoints:
+                    print 'no end point yet'
+                    newnum = self.new_num(connections)
+                    connections[newnum] = []
+                else:
+                    print 'end point found'
+                    newnum = endpoints[0]
+            else:
+                newnum = self.new_num(connections)
+                connections[newnum] = []
             connections[last_state].append((c, newnum))
-            connections[newnum] = []
             suffix.append((last_state, (c, newnum)))
             last_state = newnum
-        return suffix
 
-    def replace_or_register(self, connections, state, symbol):
-        pass
+        print 'registering or replacing, add_suffix'
+        prev = None
+        for i, tra in list(reversed(list(enumerate(suffix)))):
+            if prev:
+                connections[tra[0]] = [(tra[1][0], prev[0])]
+                prev = None
+            print '\nfrom {}'.format(tra)
+            tra2 = self.replace_or_register(connections, tra)
+            if tra2 != tra:
+                prev = tra2
+                suffix[i] = tra2
+                del(connections[tra[0]])
+            print 'to {}'.format(tra2)
+        return suffix
 
     def train(self, wlist):
 # 0. start
@@ -138,6 +229,7 @@ class Afda():
         for word in wlist:
             print word
 # v1. Find the common prefix and suffix and skip if word is in the adfa
+            print '\tI\n'
             common_prefix = self.common_prefix(connections, word)
             current_suffix = word[len(common_prefix)-1:]
             print 'common_prefix: {}'.format(common_prefix)
@@ -146,6 +238,7 @@ class Afda():
                 continue
 
 # v2. Find the first confluence state in the common prefix path
+            print '\tII\n'
             first_state = self.first_cstate(connections, common_prefix)
             print 'first_state: {}'.format(first_state)
             if not first_state:
@@ -155,7 +248,6 @@ class Afda():
                 print 'cloning'
                 last_state = self.clone(
                     connections, common_prefix, first_state)
-            print 'last_state: {}'.format(last_state)
             print 'adding suffix: {}'.format(current_suffix)
             suffix = self.add_suffix(connections, last_state, current_suffix)
             print 'suffix added: {}'.format(suffix)
@@ -165,7 +257,9 @@ class Afda():
 
 # 3a. If there is a confluence state and there has been cloning, loop through
 # the new states backwards and replace or register the states
+            print '\tIII\n'
             if first_state:
+                continue
                 print 'consider replace register clone'
                 first_state = self.first_cstate(connections, common_prefix)
                 print 'first_state {}'.format(first_state)
@@ -180,19 +274,24 @@ class Afda():
 
 # 4. While we change things try to find equivalent states in the suffix to
 # replace or register
+            print '\tIV\n'
             changed = True
             while changed:
+                print 'current_index: {}'.format(current_index)
+                print 'last_state: {}'.format(last_state)
                 current_index -= 1
                 current_state = None
                 current_state = path[current_index]
                 print 'current state: {}'.format(current_state)
                 old_state = last_state
                 if current_index > 0:
+                    print 'replace or register changing state'
                     register = register - {last_state}
 #                self.replace_or_register(connections, current_state)
                 changed = old_state != last_state
 
 # v5. If there has been no change and there are characters left
+            print '\tV\n'
             if not changed and current_index > 0:
                 register = register.union({current_state})
             import pdb
index b1b9188..4411aa4 100644 (file)
@@ -1,11 +1,11 @@
 digraph finite_state_machine {
-       node [shape = doublecircle]; 2 5 6
+       node [shape = doublecircle]; 2
        node [shape = circle]; 0 1 3 4 -1
        0 -> 1 [label = "b"];
        1 -> 2 [label = "d"];
        3 -> 4 [label = "a"];
-       4 -> 5 [label = "d"];
-       4 -> 6 [label = "e"];
+       4 -> 2 [label = "d"];
+       4 -> 2 [label = "e"];
        -1 -> 0 [label = "a"];
        -1 -> 3 [label = "b"];
 }
\ No newline at end of file
diff --git a/program/regexex/log.txt b/program/regexex/log.txt
new file mode 100644 (file)
index 0000000..17b8693
--- /dev/null
@@ -0,0 +1 @@
+python: can't open file 'voca.py': [Errno 2] No such file or directory