# -*- coding: utf-8 -*-
import sys
-import re
class fsm():
- split = re.compile(r"""
-(
- (?:
- \[.*?\]| # character group
- \(.*\)| # group
- \\?. # single character
- )
- (?:
- \{[0-9,]+\}| # strict quantifier
- [?+*] # loose quantifier
- )?
-)""")
+ """Class describing a finite state machine in the form of a directed graph
+
+ Internal variables:
+ nodes -- dict for the node information,
+ {id -> string}
+ connections -- dict for all nodes the connections to other nodes,
+ {from_id -> to_id}
+ strings -- dict containing the strings the fsm is trained on:
+ {string -> [(marking_start, marking_end), ...]}
+ markings -- dictionary containing markings within the fsm
+ {marking_name -> (node_start, node_end)}
+ """
+ nodestr = '{} [label="{}"]'
+ edgestr = '{} -> {}'
def __init__(self):
- # List of nodes number as key, info as value
self.nodes = {-1: '#start', -2: '#end'}
- # List of connection tuples
-# self.connections = {-2: set()}
- self.connections = {-2: list()}
- # Dictionary of string that are added and have to be integrated
+ self.connections = {-2: set()}
self.strings = {}
- # Dictionary of marking locations in the graph
self.markings = {}
def add_node(self, string, key=-1):
keys = self.nodes.keys()
if check or (node_from in keys and node_to in keys):
if node_from not in self.connections:
-# self.connections[node_from] = set()
- self.connections[node_from] = list()
-# self.connections[node_from].add(node_to)
- self.connections[node_from].append(node_to)
+ self.connections[node_from] = set()
+ self.connections[node_from].add(node_to)
else:
raise Exception('One or more nodes not found')
def optimize(self):
"""Optimize the finite state machine"""
+ # Search the patterns outside the markings and try to merge
pass
def graphviz(self, fp='-'):
self.travers(-1, subgraphs)
# Print the nodes that are not in a subgraph
- for nodes in subgraphs['none'][0] + subgraphs['none'][1]:
+ for nodes in [self.nodestr.format(*n) for n in subgraphs['none'][0]] +\
+ [self.edgestr.format(*n) for n in subgraphs['none'][1]]:
fp.write('{:<2}{}\n'.format(' ', nodes))
del(subgraphs['none'])
#Print the nodes that are in a subgraph
for key, sg in subgraphs.iteritems():
fp.write('{0:<2}subgraph cluster_{1} {{\n'.format(' ', key))
- for nodes in sg[0] + sg[1]:
+ for nodes in [self.nodestr.format(*n) for n in sg[0]] +\
+ [self.edgestr.format(*n) for n in sg[1]]:
fp.write('{:<4}{}\n'.format(' ', nodes))
fp.write('{:<2}}}\n'.format(' '))
if fp != sys.stdout:
fp.close()
- def travers(self, current, subgraphs, state='none', visited=set(),
- nodestring='{} [label="{}"]', edgestring='{} -> {}'):
+ def travers(self, current, subgraphs, state='none', visited=set()):
"""Traverse the graph and fill the dictionary for graphviz output
Required arguments:
- current -- current node key
- subgraphs -- dictionary of subgraphs of the form:
- {'subgraph1': ([], []), 'subgraph2': ([], []) ... }
+ current -- current node key
+ subgraphs -- dictionary of subgraphs of the form:
+ {'subgraph1': ([label], [edge]),
+ 'subgraph2': ([label], [edge]),
+ ... }
Keyword arguments:
- state -- current state the traverser is in
- visited -- set of visited nodes
- nodestring-- format string for the node-dot output
- edgestring-- format string for the edge-dot output
+ state -- current state the traverser is in
+ visited -- set of visited nodes
+ nodestr -- format string for the node-dot output
+ edgestr -- format string for the edge-dot output
"""
+ # Stop when the node is already visited
if current not in visited:
+ # Add the node and mark visited
visited.add(current)
- subgraphs[state][0].append(nodestring.format(
- current, self.nodes[current]))
+ subgraphs[state][0].append((current, self.nodes[current]))
+ # Go through all the connections from this node
for c in self.connections[current]:
- subgraphs[state][1].append(edgestring.format(current, c))
+ subgraphs[state][1].append((current, c))
+ newstate = state
+ # Check if the current node is a border case for a category
for name, markings in self.markings.iteritems():
if markings[0] == c:
- state = name
- break
+ newstate = name
elif markings[1] == c:
- state = 'none'
- break
- self.travers(c, subgraphs, state)
+ newstate = 'none'
+ # Traverse the node
+ self.travers(c, subgraphs, newstate)
if __name__ == '__main__':