pass
def common_prefix_p(self, cons, word, res, c_node, c_index, cur):
+ """Finds the common prefix within a network
+
+ Required arguments:
+ cons -- connection dictionary
+ word -- word to which to find the prefix
+ res -- results list
+ c_node -- current node
+ c_index -- current word index
+ cur -- current prefix
+ """
+ # If the end of the word is reached there are no children to explore
if c_index >= len(word):
childs = []
+ # End is not reached, find all the children with the correct letter
else:
childs = [c for c in cons[c_node] if c[0] == word[c_index]]
+ # If there are no childs left, append the result
if not childs or c_index >= len(word):
res.append(cur + [c_node])
+ # Search through all the children for longer paths
else:
for child in childs:
self.common_prefix_p(cons, word, res, child[1], c_index+1,
cur+[(c_node, child)])
def common_prefix(self, cons, word):
+ """Find the common prefix within a network
+
+ Required arguments:
+ cons -- connection dictionary
+ word -- word to which to find the prefix
+ """
results = []
self.common_prefix_p(cons, word, results, -1, 0, [])
return max(results, key=lambda x: len(x))
def new_num(self, connections):
return max(connections.keys())+1
- def add_suffix(self, connections, last_state, current_suffix, first=False):
- if first:
- this = current_suffix
- that = None
- else:
- this = current_suffix[:-1]
- that = current_suffix[-1]
- for c in this:
+ def add_suffix(self, connections, last_state, current_suffix):
+ suffix = []
+ for c in current_suffix:
newnum = self.new_num(connections)
connections[last_state].append((c, newnum))
connections[newnum] = []
+ suffix.append((last_state, (c, newnum)))
last_state = newnum
- if that:
- endstate = [i for i in connections.iteritems() if not i[1]][0]
- connections[last_state].append((that, endstate[0]))
+ return suffix
+
+ def replace_or_register(self, connections, state, symbol):
+ pass
def train(self, wlist):
-# register = None
+# 0. start
+ register = set()
# connections = {-1: [('a', 2), ('b', 4)], 2: [('b', 3)], 3: [('d', 5)],
# 4: [('a', 3)], 5: []}
connections = {-1: []}
- first = True
for word in wlist:
- if first:
- self.add_suffix(connections, -1, word, True)
- first = False
print word
-# Find the common prefix and suffix and skip if word is in the adfa
+# v1. Find the common prefix and suffix and skip if word is in the adfa
common_prefix = self.common_prefix(connections, word)
current_suffix = word[len(common_prefix)-1:]
print 'common_prefix: {}'.format(common_prefix)
print 'current_suffix: {}'.format(current_suffix)
- if not current_suffix:
+ if not current_suffix and not connections[common_prefix[-1]]:
continue
-# Find the first confluence state in the common prefix path
+# v2. Find the first confluence state in the common prefix path
first_state = self.first_cstate(connections, common_prefix)
print 'first_state: {}'.format(first_state)
- if first_state:
+ if not first_state:
+ print 'not cloning'
+ last_state = common_prefix[-1]
+ else:
+ print 'cloning'
last_state = self.clone(
connections, common_prefix, first_state)
- else:
- last_state = common_prefix[-1]
print 'last_state: {}'.format(last_state)
- self.add_suffix(connections, last_state, current_suffix)
-
-# If there is a confluence state and there has been cloning, loop through the
-# new states backwards and replace or register the states
+ print 'adding suffix: {}'.format(current_suffix)
+ suffix = self.add_suffix(connections, last_state, current_suffix)
+ print 'suffix added: {}'.format(suffix)
+ path = [c[0] for c in common_prefix + suffix
+ if not isinstance(c, int)]
+ print 'path: {}'.format(path)
+
+# 3a. If there is a confluence state and there has been cloning, loop through
+# the new states backwards and replace or register the states
if first_state:
print 'consider replace register clone'
first_state = self.first_cstate(connections, common_prefix)
print 'first_state {}'.format(first_state)
current_index = common_prefix.index(first_state)
- print range(len(common_prefix, current_index, -1))
+ print 'range: ', range(len(common_prefix, current_index, -1))
for i in range(len(common_prefix, current_index, -1)):
pass
-# There is no confluence state so the current index will be the last of the
-# common prefix
+# v3b. There is no confluence state so the current index will be the last of
+# the common prefix
else:
current_index = len(common_prefix)
-# While we change things try to find equivalent states in the suffix to
+# 4. While we change things try to find equivalent states in the suffix to
# replace or register
- #changed = True
- raw_input('continue...\n')
+ changed = True
+ while changed:
+ current_index -= 1
+ current_state = None
+ current_state = path[current_index]
+ print 'current state: {}'.format(current_state)
+ old_state = last_state
+ if current_index > 0:
+ register = register - {last_state}
+# self.replace_or_register(connections, current_state)
+ changed = old_state != last_state
+
+# v5. If there has been no change and there are characters left
+ if not changed and current_index > 0:
+ register = register.union({current_state})
+ import pdb
+ pdb.set_trace()
self.graphviz(connections, 'f.dot')