From 2ec328a28e8f9b16d7b17dd5f4d7461c1e73dc6d Mon Sep 17 00:00:00 2001 From: Mart Lubbers Date: Wed, 29 Oct 2014 20:37:43 +0100 Subject: [PATCH] up --- program/everything/input_app.py | 2 +- thesis2/2.methods.tex | 25 +++++++++++++++---------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/program/everything/input_app.py b/program/everything/input_app.py index 532e2c0..ef746a1 100644 --- a/program/everything/input_app.py +++ b/program/everything/input_app.py @@ -138,7 +138,7 @@ def to_dot(q0): print '}' -def data_main(d): +def ata_main(d): d = {k: str(v) for k, v in dict(d).iteritems() if k != 'write'} structure_data(d) d['matchdata'] = [] diff --git a/thesis2/2.methods.tex b/thesis2/2.methods.tex index 3f67cb3..a0afd71 100644 --- a/thesis2/2.methods.tex +++ b/thesis2/2.methods.tex @@ -31,17 +31,22 @@ After that the entries are extracted and processed line by line. The line processing converts the raw string of html data from a table row to a string. The string is stripped of all the html tags and is accompanied by a -list of marker items. - -The entries that don't contain any markers are left out in the next step of -processing. All data, including entries without user markers, is stored in the -object too for possible later reference, for example for editing the patterns. +list of marker items. The entries that don't contain any markers are left out +in the next step of processing. All data, including entries without user +markers, is stored in the object too for possible later reference, for example +for editing the patterns. The last step is when the entries with markers are then processed to build -node-lists. Node-lists are basically strings where the user markers are -replaced by patterns so that the variable data, the isolated data, is not used -in the node-lists. - -\subsection{Directed acyclic graphs} +node-lists. Node-lists are basically lists of words that, when concatenated, +form the original entry. A word isn't a word in the linguistic sense. A word +can be one letter or a category. The node-list is generated by putting all the +separate characters one by one in the list and when a user marking is +encountered, this marking is translated to the category code and that code is +then added as a word. The nodelists are then sent to the actual algorithm to be +converted to a graph representation. + +\subsection{Directed acyclic graphs(DAG)} +Directed acyclic graphs are a special kind of graph that is used to store big +sets of words and has a linear #TODO, CITE THIS# access times. -- 2.20.1