From c2c8d14d71e262aa7c57dac9aa8ffdd3fc6b6f3a Mon Sep 17 00:00:00 2001 From: Mart Lubbers Date: Mon, 21 Jul 2014 14:48:51 +0200 Subject: [PATCH] replace and register done --- program/regexex/dafsa.py | 115 ++++++++++++++++++++++++++++++++++++--- program/regexex/f.dot | 6 +- program/regexex/log.txt | 1 + 3 files changed, 111 insertions(+), 11 deletions(-) create mode 100644 program/regexex/log.txt diff --git a/program/regexex/dafsa.py b/program/regexex/dafsa.py index af9542d..d9d4766 100644 --- a/program/regexex/dafsa.py +++ b/program/regexex/dafsa.py @@ -46,6 +46,12 @@ class Afda(): return max(results, key=lambda x: len(x)) def first_cstate(self, connections, common_prefix): + """Find the first confluence state within the states + + Required arguments: + connections -- connections dictionary + common_prefix -- common prefix + """ print common_prefix for node in common_prefix: print node @@ -59,10 +65,22 @@ class Afda(): return node def find_value(self, connections, from_node, to_node): + """Find the value from a node to another + + Required arguments: + connections -- connections dictionary + from_node -- from + to_node -- to + """ return [c for c in connections[from_node] if c[1] == to_node][0][0] def clone_p(self, connections, point): - """clone everything from this point on""" + """Clone everything from this point on + + Required arguments: + connections -- connections dictionary + point -- point to clone from + """ # Translation dictionary for the new points newnames = {point[0]: point[0], point[1]: self.new_num(connections)} # Fix point all the endpoints(should be only one @@ -97,6 +115,13 @@ class Afda(): return newnames[point[1]] def clone(self, connections, states, from_node): + """Clone a subtree + + Required arguments: + connections -- connections dictionary + states -- states to clone + from_node -- node to start from + """ # Remember the last node number last_node = states[0] if isinstance(states[0], int) else states[0][0] # While the node is not found @@ -114,20 +139,86 @@ class Afda(): last_node = node def new_num(self, connections): + """Generate a new node number + + Required arguments: + connections -- connections dictionary + """ return max(connections.keys())+1 + def replace_or_register(self, connections, tra): + """Replace or register the newly created state, will return the parent + + Required arguments: + connections -- connections dictionary + tra -- parent, (symbol, state) + """ + for c in sorted(connections.iteritems()): + print c + print 'replace or register: {}'.format(tra) + parent, (symbol, state) = tra + print 'parent, symbol, state = {} {} {}'.format(parent, symbol, state) + replace = None + for cparent, children in connections.iteritems(): + for csymbol, cstate in children: + if symbol == csymbol and state == cstate and parent != cparent: + replace = (cparent, (csymbol, cstate)) + print 'found possible replace match: {}'.format(replace) +# matches = [c for c in connections[parent] +# if c[0] == symbol and c[1] != state] + if replace: + # Delete the child + # Register the child to the existing state + print 'replacing {} with {}'.format(tra, replace) + return replace + else: + # It's already in the register, just go on + print 'registering' + return tra + def add_suffix(self, connections, last_state, current_suffix): + """Add a suffix to the graph + + Required arguments: + connections -- connections dictionary + last_state -- last state of the common prefix + current_suffix -- suffix to be added + """ suffix = [] - for c in current_suffix: - newnum = self.new_num(connections) + endpoints = [i[0] for i in connections.iteritems() + if not i[1] and i[0] != -1] + for i, c in enumerate(current_suffix): + if i == len(current_suffix)-1: + print 'last_state reached in adding suffix {}'.format(c) + print 'endpoints: {}'.format(endpoints) + if not endpoints: + print 'no end point yet' + newnum = self.new_num(connections) + connections[newnum] = [] + else: + print 'end point found' + newnum = endpoints[0] + else: + newnum = self.new_num(connections) + connections[newnum] = [] connections[last_state].append((c, newnum)) - connections[newnum] = [] suffix.append((last_state, (c, newnum))) last_state = newnum - return suffix - def replace_or_register(self, connections, state, symbol): - pass + print 'registering or replacing, add_suffix' + prev = None + for i, tra in list(reversed(list(enumerate(suffix)))): + if prev: + connections[tra[0]] = [(tra[1][0], prev[0])] + prev = None + print '\nfrom {}'.format(tra) + tra2 = self.replace_or_register(connections, tra) + if tra2 != tra: + prev = tra2 + suffix[i] = tra2 + del(connections[tra[0]]) + print 'to {}'.format(tra2) + return suffix def train(self, wlist): # 0. start @@ -138,6 +229,7 @@ class Afda(): for word in wlist: print word # v1. Find the common prefix and suffix and skip if word is in the adfa + print '\tI\n' common_prefix = self.common_prefix(connections, word) current_suffix = word[len(common_prefix)-1:] print 'common_prefix: {}'.format(common_prefix) @@ -146,6 +238,7 @@ class Afda(): continue # v2. Find the first confluence state in the common prefix path + print '\tII\n' first_state = self.first_cstate(connections, common_prefix) print 'first_state: {}'.format(first_state) if not first_state: @@ -155,7 +248,6 @@ class Afda(): print 'cloning' last_state = self.clone( connections, common_prefix, first_state) - print 'last_state: {}'.format(last_state) print 'adding suffix: {}'.format(current_suffix) suffix = self.add_suffix(connections, last_state, current_suffix) print 'suffix added: {}'.format(suffix) @@ -165,7 +257,9 @@ class Afda(): # 3a. If there is a confluence state and there has been cloning, loop through # the new states backwards and replace or register the states + print '\tIII\n' if first_state: + continue print 'consider replace register clone' first_state = self.first_cstate(connections, common_prefix) print 'first_state {}'.format(first_state) @@ -180,19 +274,24 @@ class Afda(): # 4. While we change things try to find equivalent states in the suffix to # replace or register + print '\tIV\n' changed = True while changed: + print 'current_index: {}'.format(current_index) + print 'last_state: {}'.format(last_state) current_index -= 1 current_state = None current_state = path[current_index] print 'current state: {}'.format(current_state) old_state = last_state if current_index > 0: + print 'replace or register changing state' register = register - {last_state} # self.replace_or_register(connections, current_state) changed = old_state != last_state # v5. If there has been no change and there are characters left + print '\tV\n' if not changed and current_index > 0: register = register.union({current_state}) import pdb diff --git a/program/regexex/f.dot b/program/regexex/f.dot index b1b9188..4411aa4 100644 --- a/program/regexex/f.dot +++ b/program/regexex/f.dot @@ -1,11 +1,11 @@ digraph finite_state_machine { - node [shape = doublecircle]; 2 5 6 + node [shape = doublecircle]; 2 node [shape = circle]; 0 1 3 4 -1 0 -> 1 [label = "b"]; 1 -> 2 [label = "d"]; 3 -> 4 [label = "a"]; - 4 -> 5 [label = "d"]; - 4 -> 6 [label = "e"]; + 4 -> 2 [label = "d"]; + 4 -> 2 [label = "e"]; -1 -> 0 [label = "a"]; -1 -> 3 [label = "b"]; } \ No newline at end of file diff --git a/program/regexex/log.txt b/program/regexex/log.txt new file mode 100644 index 0000000..17b8693 --- /dev/null +++ b/program/regexex/log.txt @@ -0,0 +1 @@ +python: can't open file 'voca.py': [Errno 2] No such file or directory -- 2.20.1