From e5ea9e18ee875eb99c088e027d2f4ae789007e1a Mon Sep 17 00:00:00 2001 From: Mart Lubbers Date: Fri, 18 Jul 2014 14:15:57 +0200 Subject: [PATCH] fadsda --- program/regexex/dafsa.py | 100 ++++++++++++++++++++++++++------------- program/regexex/f.dot | 6 +-- 2 files changed, 71 insertions(+), 35 deletions(-) diff --git a/program/regexex/dafsa.py b/program/regexex/dafsa.py index f855c7a..af9542d 100644 --- a/program/regexex/dafsa.py +++ b/program/regexex/dafsa.py @@ -9,18 +9,38 @@ class Afda(): pass def common_prefix_p(self, cons, word, res, c_node, c_index, cur): + """Finds the common prefix within a network + + Required arguments: + cons -- connection dictionary + word -- word to which to find the prefix + res -- results list + c_node -- current node + c_index -- current word index + cur -- current prefix + """ + # If the end of the word is reached there are no children to explore if c_index >= len(word): childs = [] + # End is not reached, find all the children with the correct letter else: childs = [c for c in cons[c_node] if c[0] == word[c_index]] + # If there are no childs left, append the result if not childs or c_index >= len(word): res.append(cur + [c_node]) + # Search through all the children for longer paths else: for child in childs: self.common_prefix_p(cons, word, res, child[1], c_index+1, cur+[(c_node, child)]) def common_prefix(self, cons, word): + """Find the common prefix within a network + + Required arguments: + cons -- connection dictionary + word -- word to which to find the prefix + """ results = [] self.common_prefix_p(cons, word, results, -1, 0, []) return max(results, key=lambda x: len(x)) @@ -96,71 +116,87 @@ class Afda(): def new_num(self, connections): return max(connections.keys())+1 - def add_suffix(self, connections, last_state, current_suffix, first=False): - if first: - this = current_suffix - that = None - else: - this = current_suffix[:-1] - that = current_suffix[-1] - for c in this: + def add_suffix(self, connections, last_state, current_suffix): + suffix = [] + for c in current_suffix: newnum = self.new_num(connections) connections[last_state].append((c, newnum)) connections[newnum] = [] + suffix.append((last_state, (c, newnum))) last_state = newnum - if that: - endstate = [i for i in connections.iteritems() if not i[1]][0] - connections[last_state].append((that, endstate[0])) + return suffix + + def replace_or_register(self, connections, state, symbol): + pass def train(self, wlist): -# register = None +# 0. start + register = set() # connections = {-1: [('a', 2), ('b', 4)], 2: [('b', 3)], 3: [('d', 5)], # 4: [('a', 3)], 5: []} connections = {-1: []} - first = True for word in wlist: - if first: - self.add_suffix(connections, -1, word, True) - first = False print word -# Find the common prefix and suffix and skip if word is in the adfa +# v1. Find the common prefix and suffix and skip if word is in the adfa common_prefix = self.common_prefix(connections, word) current_suffix = word[len(common_prefix)-1:] print 'common_prefix: {}'.format(common_prefix) print 'current_suffix: {}'.format(current_suffix) - if not current_suffix: + if not current_suffix and not connections[common_prefix[-1]]: continue -# Find the first confluence state in the common prefix path +# v2. Find the first confluence state in the common prefix path first_state = self.first_cstate(connections, common_prefix) print 'first_state: {}'.format(first_state) - if first_state: + if not first_state: + print 'not cloning' + last_state = common_prefix[-1] + else: + print 'cloning' last_state = self.clone( connections, common_prefix, first_state) - else: - last_state = common_prefix[-1] print 'last_state: {}'.format(last_state) - self.add_suffix(connections, last_state, current_suffix) - -# If there is a confluence state and there has been cloning, loop through the -# new states backwards and replace or register the states + print 'adding suffix: {}'.format(current_suffix) + suffix = self.add_suffix(connections, last_state, current_suffix) + print 'suffix added: {}'.format(suffix) + path = [c[0] for c in common_prefix + suffix + if not isinstance(c, int)] + print 'path: {}'.format(path) + +# 3a. If there is a confluence state and there has been cloning, loop through +# the new states backwards and replace or register the states if first_state: print 'consider replace register clone' first_state = self.first_cstate(connections, common_prefix) print 'first_state {}'.format(first_state) current_index = common_prefix.index(first_state) - print range(len(common_prefix, current_index, -1)) + print 'range: ', range(len(common_prefix, current_index, -1)) for i in range(len(common_prefix, current_index, -1)): pass -# There is no confluence state so the current index will be the last of the -# common prefix +# v3b. There is no confluence state so the current index will be the last of +# the common prefix else: current_index = len(common_prefix) -# While we change things try to find equivalent states in the suffix to +# 4. While we change things try to find equivalent states in the suffix to # replace or register - #changed = True - raw_input('continue...\n') + changed = True + while changed: + current_index -= 1 + current_state = None + current_state = path[current_index] + print 'current state: {}'.format(current_state) + old_state = last_state + if current_index > 0: + register = register - {last_state} +# self.replace_or_register(connections, current_state) + changed = old_state != last_state + +# v5. If there has been no change and there are characters left + if not changed and current_index > 0: + register = register.union({current_state}) + import pdb + pdb.set_trace() self.graphviz(connections, 'f.dot') diff --git a/program/regexex/f.dot b/program/regexex/f.dot index 4411aa4..b1b9188 100644 --- a/program/regexex/f.dot +++ b/program/regexex/f.dot @@ -1,11 +1,11 @@ digraph finite_state_machine { - node [shape = doublecircle]; 2 + node [shape = doublecircle]; 2 5 6 node [shape = circle]; 0 1 3 4 -1 0 -> 1 [label = "b"]; 1 -> 2 [label = "d"]; 3 -> 4 [label = "a"]; - 4 -> 2 [label = "d"]; - 4 -> 2 [label = "e"]; + 4 -> 5 [label = "d"]; + 4 -> 6 [label = "e"]; -1 -> 0 [label = "a"]; -1 -> 3 [label = "b"]; } \ No newline at end of file -- 2.20.1