feefcd0e040b6ca254c4a53beab6b9ad7c648573
2 # -*- coding: utf-8 -*-
6 """Class representing a DAWG
11 f - Set of final nodes
12 delta - Delta function
15 """Initialize with null graph."""
17 self
.delta
= {'q0': {}}
22 def common_prefix(self
, word
, current
):
23 """Find the common prefix of word starting from current."""
25 char
, rest
= word
[0], word
[1:]
26 return char
+ self
.common_prefix(rest
, self
.delta
[current
][char
])
27 except (KeyError, IndexError):
30 def delta_star(self
, c
, w
):
31 """Calculate the final node when traversing the graph from node c with
33 return c
if not w
else self
.delta_star(self
.delta
[c
][w
[0]], w
[1:])
35 def equiv(self
, a
, b
):
36 """Determine if two nodes are equivalent. This is the case when they
37 have the same final flag, the same number of children and their
38 children are equal."""
39 if (a
in self
.f
) != (b
in self
.f
) or\
40 self
.delta
[a
].keys() != self
.delta
[b
].keys():
42 return all(self
.equiv(x
, y
) for x
, y
in
43 zip(self
.delta
[a
].values(), self
.delta
[b
].values()))
45 def replace_or_register(self
, suffix
):
46 """Starting from the back try to merge nodes in the suffix."""
48 parent
, char
, state
= suffix
.pop()
49 for r
in self
.register
:
50 if self
.equiv(state
, r
):
51 self
.delta
[parent
][char
] = r
55 del(self
.delta
[state
])
58 self
.register
.add(state
)
60 def add_suffix(self
, state
, current_suffix
):
61 """Add the current_suffix to the graph from state and return it"""
62 nodenum
= max(int(w
[1:]) for w
in self
.v
)+1
64 for c
in current_suffix
:
65 newnode
= 'q{}'.format(nodenum
)
68 self
.delta
[state
][c
] = newnode
69 self
.delta
[newnode
] = {}
70 suffix
.append((state
, c
, newnode
))
75 def add_words(self
, words
):
76 """Add words to the dawg"""
81 common_prefix
= self
.common_prefix(word
, self
.v0
)
82 last_state
= self
.delta_star(self
.v0
, common_prefix
)
83 current_suffix
= word
[len(common_prefix
):]
85 suffix
= self
.add_suffix(last_state
, current_suffix
)
86 self
.replace_or_register(suffix
)
88 self
.f
.add(last_state
)
90 def to_dot(self
, options
=''):
91 """Return the graphviz(dot) string representation of the graph"""
92 s
= 'digraph {{\n{}\nn0 [style=invis]\n'.format(options
)
94 s
+= '{} [shape={}circle]\n'.format(
95 node
, 'double' if node
in self
.f
else '')
96 s
+= 'n0 -> {}\n'.format(self
.v0
)
97 for node
, transitions
in self
.delta
.items():
98 for letter
, state
in transitions
.items():
99 s
+= '{} -> {} [label="{}"]\n'.format(node
, state
, letter
)
103 if __name__
== '__main__':
105 d
.add_words(['abd', 'bad', 'bae'])
106 # d.add_words(['abcde', 'fghde', 'fghdghde'])
107 print repr(d
.common_prefix('', d
.v0
))
108 print d
.to_dot(options
='rankdir=LR')