childs = []
# End is not reached, find all the children with the correct letter
else:
- childs = [c for c in cons[c_node] if c[0] == word[c_index]]
+ childs = [c for c in cons[c_node][0] if c[0] == word[c_index]]
# If there are no childs left, append the result
if not childs or c_index >= len(word):
res.append(cur + [c_node])
connections -- connections dictionary
common_prefix -- common prefix
"""
- print common_prefix
for node in common_prefix:
- print node
if not isinstance(node, int):
node = node[1][1]
num = 0
- for c, n in connections.iteritems():
+ for c, (n, _) in connections.iteritems():
if node in [m[1] for m in n]:
num += 1
if num > 1:
return node
- def find_value(self, connections, from_node, to_node):
- """Find the value from a node to another
-
- Required arguments:
- connections -- connections dictionary
- from_node -- from
- to_node -- to
- """
- return [c for c in connections[from_node] if c[1] == to_node][0][0]
-
- def clone_p(self, connections, point):
- """Clone everything from this point on
-
- Required arguments:
- connections -- connections dictionary
- point -- point to clone from
- """
- # Translation dictionary for the new points
- newnames = {point[0]: point[0], point[1]: self.new_num(connections)}
- # Fix point all the endpoints(should be only one
- for c, n in connections.iteritems():
- if len(n) == 0:
- newnames[c] = c
- # Make the first connection
- connections[point[0]].append(
- (self.find_value(connections, point[0], point[1]),
- newnames[point[1]]))
- # Add the first to-clone point to the frontier and give it a new node
- toclone = [point[1]]
- connections[newnames[point[1]]] = []
- # While there are still nodes to be cloned, add them and visit their
- # children
- while toclone:
- to = toclone.pop()
- for child in connections[to]:
- if child[1] not in newnames:
- # If the node is an endpoint, dont create new node
- if not connections[child[1]]:
- newnames[child[1]] = newnames[child[1]]
- # If the node is not an endpoint create a new node and add
- # it to the translation dictionary
- else:
- newnames[child[1]] = self.new_num(connections)
- connections[newnames[child[1]]] = []
- toclone.append(child[1])
- # Make the connection
- connections[newnames[to]].append(
- (child[0], newnames[child[1]]))
- return newnames[point[1]]
-
def clone(self, connections, states, from_node):
- """Clone a subtree
-
- Required arguments:
- connections -- connections dictionary
- states -- states to clone
- from_node -- node to start from
- """
- # Remember the last node number
- last_node = states[0] if isinstance(states[0], int) else states[0][0]
- # While the node is not found
- for c in states:
- # Derive the node number
- node = c if isinstance(c, int) else c[0]
- # If the node is the one we search for, search for its parent and
- # start cloning
- if node == from_node:
- for i, rem in enumerate(connections[last_node]):
- if rem[1] == node:
- result = self.clone_p(connections, (last_node, rem[1]))
- del(connections[last_node][i])
- return result
- last_node = node
+ print 'clone: {} from {}'.format(states, from_node)
+ newnames = {}
+ last_state = None
+ for i, c in enumerate(states):
+ node_number = c if isinstance(c, int) else c[0]
+ if node_number == from_node:
+ newnames[node_number] = self.new_num(connections)
+ connections[newnames[node_number]] =\
+ ([], connections[node_number][1])
+ new_con = (connections[last_state[0]][0][0][0],
+ newnames[node_number])
+ connections[last_state[0]] =\
+ ([new_con], connections[last_state[0]][1])
+ last_state = newnames[node_number]
+ break
+ last_state = node_number, connections[node_number][0]
+ toclone = [newnames.keys()[0]]
+ for node, (cs, _) in connections.iteritems():
+ if not cs:
+ newnames[node] = node
+ while toclone:
+ current = toclone.pop()
+ for symbol, childnode in connections[current][0]:
+ #New point, clone
+ if childnode not in newnames:
+ newnames[childnode] = self.new_num(connections)
+ if newnames[current] not in connections:
+ connections[newnames[current]] =\
+ ([], connections[current][1])
+ connections[newnames[current]][0].append(
+ (symbol, newnames[childnode]))
+ toclone.append(childnode)
+ for i, state in enumerate(states[:]):
+ if isinstance(state, int):
+ states[i] = newnames.get(state, state)
+ else:
+ fr, (sym, to) = state
+ states[i] = (newnames.get(fr, fr), (sym, newnames.get(to, to)))
+ return states, states[-1]
def new_num(self, connections):
"""Generate a new node number
connections -- connections dictionary
tra -- parent, (symbol, state)
"""
- for c in sorted(connections.iteritems()):
- print c
- print 'replace or register: {}'.format(tra)
parent, (symbol, state) = tra
- print 'parent, symbol, state = {} {} {}'.format(parent, symbol, state)
replace = None
- for cparent, children in connections.iteritems():
+ for cparent, (children, _) in connections.iteritems():
for csymbol, cstate in children:
if symbol == csymbol and state == cstate and parent != cparent:
replace = (cparent, (csymbol, cstate))
- print 'found possible replace match: {}'.format(replace)
-# matches = [c for c in connections[parent]
-# if c[0] == symbol and c[1] != state]
if replace:
# Delete the child
# Register the child to the existing state
- print 'replacing {} with {}'.format(tra, replace)
+ for node, (conns, _) in connections.items():
+ for enum, (symbolchild, nodechild) in\
+ reversed(list(enumerate(conns[:]))):
+ if nodechild == tra[0]:
+ del(connections[node][0][enum])
+ connections[node][0].insert(
+ enum, (symbolchild, replace[0]))
+ for enum, child in\
+ reversed(list(enumerate(connections[tra[0]][0]))):
+ if child == tra[1]:
+ del(connections[tra[0]][0][enum])
+ if not connections[tra[0]][0]:
+ del(connections[tra[0]])
return replace
else:
# It's already in the register, just go on
- print 'registering'
return tra
def add_suffix(self, connections, last_state, current_suffix):
"""
suffix = []
endpoints = [i[0] for i in connections.iteritems()
- if not i[1] and i[0] != -1]
+ if i[1][1]]
for i, c in enumerate(current_suffix):
if i == len(current_suffix)-1:
- print 'last_state reached in adding suffix {}'.format(c)
- print 'endpoints: {}'.format(endpoints)
if not endpoints:
- print 'no end point yet'
newnum = self.new_num(connections)
- connections[newnum] = []
+ connections[newnum] = ([], True)
else:
- print 'end point found'
- newnum = endpoints[0]
+ newnum = min(endpoints)
else:
newnum = self.new_num(connections)
- connections[newnum] = []
- connections[last_state].append((c, newnum))
+ connections[newnum] = ([], False)
+ connections[last_state][0].append((c, newnum))
suffix.append((last_state, (c, newnum)))
last_state = newnum
- print 'registering or replacing, add_suffix'
prev = None
- for i, tra in list(reversed(list(enumerate(suffix)))):
+ for i, tra in list(reversed(list(enumerate(suffix[1:])))):
if prev:
- connections[tra[0]] = [(tra[1][0], prev[0])]
+ tra = (tra[0], (tra[1][0], prev[0]))
prev = None
- print '\nfrom {}'.format(tra)
tra2 = self.replace_or_register(connections, tra)
if tra2 != tra:
prev = tra2
suffix[i] = tra2
- del(connections[tra[0]])
- print 'to {}'.format(tra2)
return suffix
def train(self, wlist):
-# 0. start
- register = set()
+# v0. start
# connections = {-1: [('a', 2), ('b', 4)], 2: [('b', 3)], 3: [('d', 5)],
# 4: [('a', 3)], 5: []}
- connections = {-1: []}
+ connections = {-1: ([], False)}
for word in wlist:
- print word
+# v0. prepare
+ raw_input('Press enter to train the next word\n\n')
+ print 'word := {}'.format(word)
+
# v1. Find the common prefix and suffix and skip if word is in the adfa
- print '\tI\n'
common_prefix = self.common_prefix(connections, word)
+ print 'CommonPrefix := {}'.format(common_prefix)
current_suffix = word[len(common_prefix)-1:]
- print 'common_prefix: {}'.format(common_prefix)
- print 'current_suffix: {}'.format(current_suffix)
- if not current_suffix and not connections[common_prefix[-1]]:
+ print 'CurrentSuffix := {}'.format(current_suffix)
+ print 'if CurrentSuffix = ...'
+ if not current_suffix:
+ if not connections[common_prefix[-1]][1]:
+ connections[common_prefix[-1]] =\
+ (connections[common_prefix[-1]][0], True)
+ print 'continue'
continue
# v2. Find the first confluence state in the common prefix path
- print '\tII\n'
first_state = self.first_cstate(connections, common_prefix)
- print 'first_state: {}'.format(first_state)
+ print 'FirstState := {}'.format(first_state)
+ print 'if FirstState = ...'
if not first_state:
- print 'not cloning'
last_state = common_prefix[-1]
+ print 'LastState := {}'.format(last_state)
else:
- print 'cloning'
- last_state = self.clone(
+ commen_prefix, last_state = self.clone(
connections, common_prefix, first_state)
- print 'adding suffix: {}'.format(current_suffix)
+ print 'LastState := {} (cloned new common prefix: {}'.format(
+ last_state, common_prefix)
suffix = self.add_suffix(connections, last_state, current_suffix)
- print 'suffix added: {}'.format(suffix)
- path = [c[0] for c in common_prefix + suffix
- if not isinstance(c, int)]
- print 'path: {}'.format(path)
+ print 'add_suffix({}, {})'.format(last_state, current_suffix)
# 3a. If there is a confluence state and there has been cloning, loop through
# the new states backwards and replace or register the states
- print '\tIII\n'
+ print 'if FirstState is not ...'
if first_state:
- continue
- print 'consider replace register clone'
first_state = self.first_cstate(connections, common_prefix)
- print 'first_state {}'.format(first_state)
- current_index = common_prefix.index(first_state)
- print 'range: ', range(len(common_prefix, current_index, -1))
- for i in range(len(common_prefix, current_index, -1)):
- pass
+ print 'First_state := {}'.format(first_state)
+ if first_state:
+ current_index = None
+ for enum, c in enumerate(common_prefix):
+ if c[0] == first_state:
+ current_index = enum
+ break
+ print 'CurrentIndex := {}'.format(current_index)
+# for ind in range(len(common_prefix)-1, current_index-1,
+# -1): print 'index: {} maps to {}'.format(ind,
+# common_prefix[ind])
+# self.clone(connections, common_prefix[:ind],
+# first_state)
+# break
+# #Do stuff
+# pass
+ current_index = len(common_prefix)
# v3b. There is no confluence state so the current index will be the last of
# the common prefix
else:
current_index = len(common_prefix)
+ print 'CurrentIndex := {}'.format(current_index)
# 4. While we change things try to find equivalent states in the suffix to
# replace or register
- print '\tIV\n'
+ print 'part IV: pref: {}, suf: {}'.format(common_prefix,
+ suffix)
changed = True
while changed:
+ break
print 'current_index: {}'.format(current_index)
print 'last_state: {}'.format(last_state)
current_index -= 1
current_state = None
- current_state = path[current_index]
+# current_state = path[current_index]
print 'current state: {}'.format(current_state)
old_state = last_state
if current_index > 0:
print 'replace or register changing state'
- register = register - {last_state}
+# register = register - {last_state}
# self.replace_or_register(connections, current_state)
changed = old_state != last_state
# v5. If there has been no change and there are characters left
- print '\tV\n'
if not changed and current_index > 0:
- register = register.union({current_state})
- import pdb
- pdb.set_trace()
+ pass
+# register = register.union({current_state})
self.graphviz(connections, 'f.dot')
fout = sys.stdout if fp == '-' else open(fp, 'w')
fout.write('digraph finite_state_machine {\n')
fout.write('\tnode [shape = doublecircle]; {}\n'.format(
- ' '.join(str(c) for c in connections if not connections[c])))
+ ' '.join(str(c) for c in connections if connections[c][1])))
fout.write('\tnode [shape = circle]; {}\n'.format(
- ' '.join(str(c) for c in connections if connections[c])))
- for node, targets in connections.iteritems():
+ ' '.join(str(c) for c in connections if not connections[c][1])))
+ for node, (targets, _) in connections.iteritems():
for target in targets:
fout.write('\t{} -> {} [label = "{}"];\n'.format(
node, target[1], target[0]))
if __name__ == '__main__':
afda = Afda()
- afda.train(['abd', 'bad', 'bae'])
+# afda.train(['abcde', 'fghde', 'fghdghde'])
+ afda.train([['wie', '_', 'h', '_', 'wat'],
+ ['wie', '_', 'h', '_', 'wat', '_', 'h', '_', 'wanneer'],
+ ['wie', '_', 'h', '_', 'wat']])
+# afda.train(['abd', 'ab', 'bad', 'bae'])