From 19725dea916b3042cb8894bfb1d8e05c56437e25 Mon Sep 17 00:00:00 2001 From: Mart Lubbers Date: Mon, 9 Feb 2015 12:29:20 +0100 Subject: [PATCH] small dawg implementation --- program/dawg/dawg.py | 95 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 program/dawg/dawg.py diff --git a/program/dawg/dawg.py b/program/dawg/dawg.py new file mode 100644 index 0000000..82a0cda --- /dev/null +++ b/program/dawg/dawg.py @@ -0,0 +1,95 @@ +import sys + + +class DAWG: + def __init__(self): + self.nodes = {'q0'} + self.delta = {'q0': {}} + self.final = set() + self.start = 'q0' + self.register = None + + def common_prefix(self, word, current=None): + if current is None: + current = self.start + if word == '': + return '' + char, rest = word[0], word[1:] + if char in self.delta[current]: + return char + self.common_prefix(rest, self.delta[current][char]) + else: + return '' + + def delta_star(self, start, word): + if not word: + return start + else: + return self.delta_star(self.delta[start][word[0]], word[1:]) + + def equiv(self, a, b): + return (a in self.final) == (b in self.final) and\ + len(self.delta[a].keys()) == len(self.delta[b].keys()) and\ + self.delta[a].keys() == self.delta[b].keys() + + def replace_or_register(self, suffix): + while suffix: + parent, char, state = suffix.pop() + for r in self.register: + if self.equiv(state, r): + self.delta[parent][char] = r + if state in self.final: + self.final.remove(state) + self.nodes.remove(state) + del(self.delta[state]) + break + else: + self.register.add(state) + + def add_suffix(self, state, current_suffix): + nodenum = max(int(w[1:]) for w in self.nodes)+1 + reverse = [] + for c in current_suffix: + newnode = 'q{}'.format(nodenum) + self.nodes.add(newnode) + nodenum += 1 + self.delta[state][c] = newnode + self.delta[newnode] = {} + reverse.append((state, c, newnode)) + state = newnode + self.final.add(newnode) + return reverse + + def add_words_sorted(self, words, start=None): + self.register = set() +# words = sorted(words) + while words: + word = words.pop() + common_prefix = self.common_prefix(word) + print('common_prefix=', common_prefix) + last_state = self.delta_star(self.start, common_prefix) + print('last_state=', last_state) + current_suffix = word[len(common_prefix):] + print('current_suffix=', current_suffix) + suffix = self.add_suffix(last_state, current_suffix) + print('added_suffix=', suffix) + self.replace_or_register(suffix) + print('after: register: ', self.register) + + def to_dot(self, options=''): + s = 'digraph {{\n{}\nn0 [style=invis]\n'.format(options) + for node in self.nodes: + s += '{} [shape={}circle]\n'.format( + node, 'double' if node in self.final else '') + s += 'n0 -> {}\n'.format(self.start) + for node, transitions in self.delta.items(): + for letter, state in transitions.items(): + s += '{} -> {} [label="{}"]\n'.format(node, state, letter) + s += '}\n' + return s + + +if __name__ == '__main__': + d = DAWG() + d.add_words_sorted(['abd', 'bad', 'bae']) +# d.add_words_sorted(['abcde', 'fghde', 'fghdghde']) + sys.stderr.write(d.to_dot(options='rankdir=LR')) -- 2.20.1