--- /dev/null
+#!/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+class Afda():
+ def __init__(self):
+ pass
+
+ def common_prefix_p(self, cons, word, res, c_node, c_index, cur):
+ if c_index >= len(word):
+ childs = []
+ else:
+ childs = [c for c in cons[c_node] if c[0] == word[c_index]]
+ if not childs or c_index >= len(word):
+ res.append(cur + [c_node])
+ else:
+ for child in childs:
+ self.common_prefix_p(cons, word, res, child[1], c_index+1,
+ cur+[(c_node, child)])
+
+ def common_prefix(self, cons, word):
+ results = []
+ self.common_prefix_p(cons, word, results, -1, 0, [])
+ return max(results, key=lambda x: len(x))
+
+ def first_cstate(self, connections, common_prefix):
+ print common_prefix
+ for node in common_prefix:
+ print node
+ if not isinstance(node, int):
+ node = node[1][1]
+ num = 0
+ for c, n in connections.iteritems():
+ if node in [m[1] for m in n]:
+ num += 1
+ if num > 1:
+ return node
+
+ def find_value(self, connections, from_node, to_node):
+ return [c for c in connections[from_node] if c[1] == to_node][0][0]
+
+ def clone_p(self, connections, point):
+ """clone everything from this point on"""
+ # Translation dictionary for the new points
+ newnames = {point[0]: point[0], point[1]: self.new_num(connections)}
+ # Fix point all the endpoints(should be only one
+ for c, n in connections.iteritems():
+ if len(n) == 0:
+ newnames[c] = c
+ # Make the first connection
+ connections[point[0]].append(
+ (self.find_value(connections, point[0], point[1]),
+ newnames[point[1]]))
+ # Add the first to-clone point to the frontier and give it a new node
+ toclone = [point[1]]
+ connections[newnames[point[1]]] = []
+ # While there are still nodes to be cloned, add them and visit their
+ # children
+ while toclone:
+ to = toclone.pop()
+ for child in connections[to]:
+ if child[1] not in newnames:
+ # If the node is an endpoint, dont create new node
+ if not connections[child[1]]:
+ newnames[child[1]] = newnames[child[1]]
+ # If the node is not an endpoint create a new node and add
+ # it to the translation dictionary
+ else:
+ newnames[child[1]] = self.new_num(connections)
+ connections[newnames[child[1]]] = []
+ toclone.append(child[1])
+ # Make the connection
+ connections[newnames[to]].append(
+ (child[0], newnames[child[1]]))
+ return newnames[point[1]]
+
+ def clone(self, connections, states, from_node):
+ # Remember the last node number
+ last_node = states[0] if isinstance(states[0], int) else states[0][0]
+ # While the node is not found
+ for c in states:
+ # Derive the node number
+ node = c if isinstance(c, int) else c[0]
+ # If the node is the one we search for, search for its parent and
+ # start cloning
+ if node == from_node:
+ for i, rem in enumerate(connections[last_node]):
+ if rem[1] == node:
+ result = self.clone_p(connections, (last_node, rem[1]))
+ del(connections[last_node][i])
+ return result
+ last_node = node
+
+ def new_num(self, connections):
+ return max(connections.keys())+1
+
+ def add_suffix(self, connections, last_state, current_suffix, first=False):
+ if first:
+ this = current_suffix
+ that = None
+ else:
+ this = current_suffix[:-1]
+ that = current_suffix[-1]
+ for c in this:
+ newnum = self.new_num(connections)
+ connections[last_state].append((c, newnum))
+ connections[newnum] = []
+ last_state = newnum
+ if that:
+ endstate = [i for i in connections.iteritems() if not i[1]][0]
+ connections[last_state].append((that, endstate[0]))
+
+ def train(self, wlist):
+# register = None
+# connections = {-1: [('a', 2), ('b', 4)], 2: [('b', 3)], 3: [('d', 5)],
+# 4: [('a', 3)], 5: []}
+ connections = {-1: []}
+ first = True
+ for word in wlist:
+ if first:
+ self.add_suffix(connections, -1, word, True)
+ first = False
+ print word
+# Find the common prefix and suffix and skip if word is in the adfa
+ common_prefix = self.common_prefix(connections, word)
+ current_suffix = word[len(common_prefix)-1:]
+ print 'common_prefix: {}'.format(common_prefix)
+ print 'current_suffix: {}'.format(current_suffix)
+ if not current_suffix:
+ continue
+
+# Find the first confluence state in the common prefix path
+ first_state = self.first_cstate(connections, common_prefix)
+ print 'first_state: {}'.format(first_state)
+ if first_state:
+ last_state = self.clone(
+ connections, common_prefix, first_state)
+ else:
+ last_state = common_prefix[-1]
+ print 'last_state: {}'.format(last_state)
+ self.add_suffix(connections, last_state, current_suffix)
+
+# If there is a confluence state and there has been cloning, loop through the
+# new states backwards and replace or register the states
+ if first_state:
+ print 'consider replace register clone'
+ first_state = self.first_cstate(connections, common_prefix)
+ print 'first_state {}'.format(first_state)
+ current_index = common_prefix.index(first_state)
+ print range(len(common_prefix, current_index, -1))
+ for i in range(len(common_prefix, current_index, -1)):
+ pass
+# There is no confluence state so the current index will be the last of the
+# common prefix
+ else:
+ current_index = len(common_prefix)
+
+# While we change things try to find equivalent states in the suffix to
+# replace or register
+ #changed = True
+ raw_input('continue...\n')
+
+ self.graphviz(connections, 'f.dot')
+
+ def graphviz(self, connections, fp='-'):
+ fout = sys.stdout if fp == '-' else open(fp, 'w')
+ fout.write('digraph finite_state_machine {\n')
+ fout.write('\tnode [shape = doublecircle]; {}\n'.format(
+ ' '.join(str(c) for c in connections if not connections[c])))
+ fout.write('\tnode [shape = circle]; {}\n'.format(
+ ' '.join(str(c) for c in connections if connections[c])))
+ for node, targets in connections.iteritems():
+ for target in targets:
+ fout.write('\t{} -> {} [label = "{}"];\n'.format(
+ node, target[1], target[0]))
+ fout.write('}')
+ if fp != '-':
+ fout.close()
+
+
+if __name__ == '__main__':
+ afda = Afda()
+ afda.train(['abd', 'bad', 'bae'])