--- /dev/null
+import sys
+
+
+class DAWG:
+ def __init__(self):
+ self.nodes = {'q0'}
+ self.delta = {'q0': {}}
+ self.final = set()
+ self.start = 'q0'
+ self.register = None
+
+ def common_prefix(self, word, current=None):
+ if current is None:
+ current = self.start
+ if word == '':
+ return ''
+ char, rest = word[0], word[1:]
+ if char in self.delta[current]:
+ return char + self.common_prefix(rest, self.delta[current][char])
+ else:
+ return ''
+
+ def delta_star(self, start, word):
+ if not word:
+ return start
+ else:
+ return self.delta_star(self.delta[start][word[0]], word[1:])
+
+ def equiv(self, a, b):
+ return (a in self.final) == (b in self.final) and\
+ len(self.delta[a].keys()) == len(self.delta[b].keys()) and\
+ self.delta[a].keys() == self.delta[b].keys()
+
+ def replace_or_register(self, suffix):
+ while suffix:
+ parent, char, state = suffix.pop()
+ for r in self.register:
+ if self.equiv(state, r):
+ self.delta[parent][char] = r
+ if state in self.final:
+ self.final.remove(state)
+ self.nodes.remove(state)
+ del(self.delta[state])
+ break
+ else:
+ self.register.add(state)
+
+ def add_suffix(self, state, current_suffix):
+ nodenum = max(int(w[1:]) for w in self.nodes)+1
+ reverse = []
+ for c in current_suffix:
+ newnode = 'q{}'.format(nodenum)
+ self.nodes.add(newnode)
+ nodenum += 1
+ self.delta[state][c] = newnode
+ self.delta[newnode] = {}
+ reverse.append((state, c, newnode))
+ state = newnode
+ self.final.add(newnode)
+ return reverse
+
+ def add_words_sorted(self, words, start=None):
+ self.register = set()
+# words = sorted(words)
+ while words:
+ word = words.pop()
+ common_prefix = self.common_prefix(word)
+ print('common_prefix=', common_prefix)
+ last_state = self.delta_star(self.start, common_prefix)
+ print('last_state=', last_state)
+ current_suffix = word[len(common_prefix):]
+ print('current_suffix=', current_suffix)
+ suffix = self.add_suffix(last_state, current_suffix)
+ print('added_suffix=', suffix)
+ self.replace_or_register(suffix)
+ print('after: register: ', self.register)
+
+ def to_dot(self, options=''):
+ s = 'digraph {{\n{}\nn0 [style=invis]\n'.format(options)
+ for node in self.nodes:
+ s += '{} [shape={}circle]\n'.format(
+ node, 'double' if node in self.final else '')
+ s += 'n0 -> {}\n'.format(self.start)
+ for node, transitions in self.delta.items():
+ for letter, state in transitions.items():
+ s += '{} -> {} [label="{}"]\n'.format(node, state, letter)
+ s += '}\n'
+ return s
+
+
+if __name__ == '__main__':
+ d = DAWG()
+ d.add_words_sorted(['abd', 'bad', 'bae'])
+# d.add_words_sorted(['abcde', 'fghde', 'fghdghde'])
+ sys.stderr.write(d.to_dot(options='rankdir=LR'))