return max(results, key=lambda x: len(x))
def first_cstate(self, connections, common_prefix):
+ """Find the first confluence state within the states
+
+ Required arguments:
+ connections -- connections dictionary
+ common_prefix -- common prefix
+ """
print common_prefix
for node in common_prefix:
print node
return node
def find_value(self, connections, from_node, to_node):
+ """Find the value from a node to another
+
+ Required arguments:
+ connections -- connections dictionary
+ from_node -- from
+ to_node -- to
+ """
return [c for c in connections[from_node] if c[1] == to_node][0][0]
def clone_p(self, connections, point):
- """clone everything from this point on"""
+ """Clone everything from this point on
+
+ Required arguments:
+ connections -- connections dictionary
+ point -- point to clone from
+ """
# Translation dictionary for the new points
newnames = {point[0]: point[0], point[1]: self.new_num(connections)}
# Fix point all the endpoints(should be only one
return newnames[point[1]]
def clone(self, connections, states, from_node):
+ """Clone a subtree
+
+ Required arguments:
+ connections -- connections dictionary
+ states -- states to clone
+ from_node -- node to start from
+ """
# Remember the last node number
last_node = states[0] if isinstance(states[0], int) else states[0][0]
# While the node is not found
last_node = node
def new_num(self, connections):
+ """Generate a new node number
+
+ Required arguments:
+ connections -- connections dictionary
+ """
return max(connections.keys())+1
+ def replace_or_register(self, connections, tra):
+ """Replace or register the newly created state, will return the parent
+
+ Required arguments:
+ connections -- connections dictionary
+ tra -- parent, (symbol, state)
+ """
+ for c in sorted(connections.iteritems()):
+ print c
+ print 'replace or register: {}'.format(tra)
+ parent, (symbol, state) = tra
+ print 'parent, symbol, state = {} {} {}'.format(parent, symbol, state)
+ replace = None
+ for cparent, children in connections.iteritems():
+ for csymbol, cstate in children:
+ if symbol == csymbol and state == cstate and parent != cparent:
+ replace = (cparent, (csymbol, cstate))
+ print 'found possible replace match: {}'.format(replace)
+# matches = [c for c in connections[parent]
+# if c[0] == symbol and c[1] != state]
+ if replace:
+ # Delete the child
+ # Register the child to the existing state
+ print 'replacing {} with {}'.format(tra, replace)
+ return replace
+ else:
+ # It's already in the register, just go on
+ print 'registering'
+ return tra
+
def add_suffix(self, connections, last_state, current_suffix):
+ """Add a suffix to the graph
+
+ Required arguments:
+ connections -- connections dictionary
+ last_state -- last state of the common prefix
+ current_suffix -- suffix to be added
+ """
suffix = []
- for c in current_suffix:
- newnum = self.new_num(connections)
+ endpoints = [i[0] for i in connections.iteritems()
+ if not i[1] and i[0] != -1]
+ for i, c in enumerate(current_suffix):
+ if i == len(current_suffix)-1:
+ print 'last_state reached in adding suffix {}'.format(c)
+ print 'endpoints: {}'.format(endpoints)
+ if not endpoints:
+ print 'no end point yet'
+ newnum = self.new_num(connections)
+ connections[newnum] = []
+ else:
+ print 'end point found'
+ newnum = endpoints[0]
+ else:
+ newnum = self.new_num(connections)
+ connections[newnum] = []
connections[last_state].append((c, newnum))
- connections[newnum] = []
suffix.append((last_state, (c, newnum)))
last_state = newnum
- return suffix
- def replace_or_register(self, connections, state, symbol):
- pass
+ print 'registering or replacing, add_suffix'
+ prev = None
+ for i, tra in list(reversed(list(enumerate(suffix)))):
+ if prev:
+ connections[tra[0]] = [(tra[1][0], prev[0])]
+ prev = None
+ print '\nfrom {}'.format(tra)
+ tra2 = self.replace_or_register(connections, tra)
+ if tra2 != tra:
+ prev = tra2
+ suffix[i] = tra2
+ del(connections[tra[0]])
+ print 'to {}'.format(tra2)
+ return suffix
def train(self, wlist):
# 0. start
for word in wlist:
print word
# v1. Find the common prefix and suffix and skip if word is in the adfa
+ print '\tI\n'
common_prefix = self.common_prefix(connections, word)
current_suffix = word[len(common_prefix)-1:]
print 'common_prefix: {}'.format(common_prefix)
continue
# v2. Find the first confluence state in the common prefix path
+ print '\tII\n'
first_state = self.first_cstate(connections, common_prefix)
print 'first_state: {}'.format(first_state)
if not first_state:
print 'cloning'
last_state = self.clone(
connections, common_prefix, first_state)
- print 'last_state: {}'.format(last_state)
print 'adding suffix: {}'.format(current_suffix)
suffix = self.add_suffix(connections, last_state, current_suffix)
print 'suffix added: {}'.format(suffix)
# 3a. If there is a confluence state and there has been cloning, loop through
# the new states backwards and replace or register the states
+ print '\tIII\n'
if first_state:
+ continue
print 'consider replace register clone'
first_state = self.first_cstate(connections, common_prefix)
print 'first_state {}'.format(first_state)
# 4. While we change things try to find equivalent states in the suffix to
# replace or register
+ print '\tIV\n'
changed = True
while changed:
+ print 'current_index: {}'.format(current_index)
+ print 'last_state: {}'.format(last_state)
current_index -= 1
current_state = None
current_state = path[current_index]
print 'current state: {}'.format(current_state)
old_state = last_state
if current_index > 0:
+ print 'replace or register changing state'
register = register - {last_state}
# self.replace_or_register(connections, current_state)
changed = old_state != last_state
# v5. If there has been no change and there are characters left
+ print '\tV\n'
if not changed and current_index > 0:
register = register.union({current_state})
import pdb