From: Mart Lubbers Date: Mon, 7 Jul 2014 15:12:06 +0000 (+0200) Subject: small commit X-Git-Url: https://git.martlubbers.net/?a=commitdiff_plain;h=ad8981a1d3c7dbbf450437e1944266f0feee17c1;p=bsc-thesis1415.git small commit --- diff --git a/program/regexex/fsm.py b/program/regexex/fsm.py index 1a24e88..c3b6357 100644 --- a/program/regexex/fsm.py +++ b/program/regexex/fsm.py @@ -2,32 +2,28 @@ # -*- coding: utf-8 -*- import sys -import re class fsm(): - split = re.compile(r""" -( - (?: - \[.*?\]| # character group - \(.*\)| # group - \\?. # single character - ) - (?: - \{[0-9,]+\}| # strict quantifier - [?+*] # loose quantifier - )? -)""") + """Class describing a finite state machine in the form of a directed graph + + Internal variables: + nodes -- dict for the node information, + {id -> string} + connections -- dict for all nodes the connections to other nodes, + {from_id -> to_id} + strings -- dict containing the strings the fsm is trained on: + {string -> [(marking_start, marking_end), ...]} + markings -- dictionary containing markings within the fsm + {marking_name -> (node_start, node_end)} + """ + nodestr = '{} [label="{}"]' + edgestr = '{} -> {}' def __init__(self): - # List of nodes number as key, info as value self.nodes = {-1: '#start', -2: '#end'} - # List of connection tuples -# self.connections = {-2: set()} - self.connections = {-2: list()} - # Dictionary of string that are added and have to be integrated + self.connections = {-2: set()} self.strings = {} - # Dictionary of marking locations in the graph self.markings = {} def add_node(self, string, key=-1): @@ -61,10 +57,8 @@ class fsm(): keys = self.nodes.keys() if check or (node_from in keys and node_to in keys): if node_from not in self.connections: -# self.connections[node_from] = set() - self.connections[node_from] = list() -# self.connections[node_from].add(node_to) - self.connections[node_from].append(node_to) + self.connections[node_from] = set() + self.connections[node_from].add(node_to) else: raise Exception('One or more nodes not found') @@ -121,6 +115,7 @@ class fsm(): def optimize(self): """Optimize the finite state machine""" + # Search the patterns outside the markings and try to merge pass def graphviz(self, fp='-'): @@ -138,14 +133,16 @@ class fsm(): self.travers(-1, subgraphs) # Print the nodes that are not in a subgraph - for nodes in subgraphs['none'][0] + subgraphs['none'][1]: + for nodes in [self.nodestr.format(*n) for n in subgraphs['none'][0]] +\ + [self.edgestr.format(*n) for n in subgraphs['none'][1]]: fp.write('{:<2}{}\n'.format(' ', nodes)) del(subgraphs['none']) #Print the nodes that are in a subgraph for key, sg in subgraphs.iteritems(): fp.write('{0:<2}subgraph cluster_{1} {{\n'.format(' ', key)) - for nodes in sg[0] + sg[1]: + for nodes in [self.nodestr.format(*n) for n in sg[0]] +\ + [self.edgestr.format(*n) for n in sg[1]]: fp.write('{:<4}{}\n'.format(' ', nodes)) fp.write('{:<2}}}\n'.format(' ')) @@ -154,35 +151,39 @@ class fsm(): if fp != sys.stdout: fp.close() - def travers(self, current, subgraphs, state='none', visited=set(), - nodestring='{} [label="{}"]', edgestring='{} -> {}'): + def travers(self, current, subgraphs, state='none', visited=set()): """Traverse the graph and fill the dictionary for graphviz output Required arguments: - current -- current node key - subgraphs -- dictionary of subgraphs of the form: - {'subgraph1': ([], []), 'subgraph2': ([], []) ... } + current -- current node key + subgraphs -- dictionary of subgraphs of the form: + {'subgraph1': ([label], [edge]), + 'subgraph2': ([label], [edge]), + ... } Keyword arguments: - state -- current state the traverser is in - visited -- set of visited nodes - nodestring-- format string for the node-dot output - edgestring-- format string for the edge-dot output + state -- current state the traverser is in + visited -- set of visited nodes + nodestr -- format string for the node-dot output + edgestr -- format string for the edge-dot output """ + # Stop when the node is already visited if current not in visited: + # Add the node and mark visited visited.add(current) - subgraphs[state][0].append(nodestring.format( - current, self.nodes[current])) + subgraphs[state][0].append((current, self.nodes[current])) + # Go through all the connections from this node for c in self.connections[current]: - subgraphs[state][1].append(edgestring.format(current, c)) + subgraphs[state][1].append((current, c)) + newstate = state + # Check if the current node is a border case for a category for name, markings in self.markings.iteritems(): if markings[0] == c: - state = name - break + newstate = name elif markings[1] == c: - state = 'none' - break - self.travers(c, subgraphs, state) + newstate = 'none' + # Traverse the node + self.travers(c, subgraphs, newstate) if __name__ == '__main__':