From c5529a0e61ac8c36e2de69e52739aa0f8edbae63 Mon Sep 17 00:00:00 2001 From: Mart Lubbers Date: Tue, 14 Jun 2016 17:37:59 +0200 Subject: [PATCH] dikke update --- deliverables/report/Makefile | 2 +- deliverables/report/ext.tex | 121 --------------------------------- deliverables/report/gen.tex | 51 ++++++++++++++ deliverables/report/intro.tex | 16 +++++ deliverables/report/pars.tex | 63 +++++++++++++++++ deliverables/report/report.tex | 2 - deliverables/report/sem.tex | 31 ++++++++- 7 files changed, 161 insertions(+), 125 deletions(-) delete mode 100644 deliverables/report/ext.tex diff --git a/deliverables/report/Makefile b/deliverables/report/Makefile index a97d801..0c76671 100644 --- a/deliverables/report/Makefile +++ b/deliverables/report/Makefile @@ -6,7 +6,7 @@ DOCUMENT:=report all: $(DOCUMENT).pdf -%.pdf: %.tex ext.tex gen.tex pars.tex sem.tex intro.tex eval.tex +%.pdf: %.tex gen.tex pars.tex sem.tex intro.tex eval.tex $(LATEX) $(LATEXFLAGS) $< $(LATEX) $(LATEXFLAGS) $< diff --git a/deliverables/report/ext.tex b/deliverables/report/ext.tex deleted file mode 100644 index 446f7bf..0000000 --- a/deliverables/report/ext.tex +++ /dev/null @@ -1,121 +0,0 @@ -\section{Extensions}\label{sec:ext} -\subsection{Higher order functions} -The nature of the type checking algorithm already included type checking and -inferring the type of higher order functions. Since we allow constants there is -a difference between \SI{read} and \SI{read()}. To make this difference we -introduced a separate type of expression called a \SI{FuncType}, this type -encodes a function with arity $0$. - -As all other data types a function type object occupies one item on the stack -which contains a heap pointer. The heap pointer points to a position on the -heap that stores, in increasing addresses, the function identification number, -the number of arguments already given and finally the argument values. -For example the following snippet shows the representation of the incomplete -function \SI{plus} that already has two arguments. -\begin{SPLCode} -//SPLCode -plus(x, y, z){ return x + y + z; } - -main(){ - var a = plus(4, 3); -} - -/*# | Stack -001 | 001 //Pointer to the -... | - - # | Heap -001 | 008 //Function identification number -002 | 002 //Available arguments -003 | 004 //Argument 1 -004 | 003 //Argument 2 -*/ -\end{SPLCode} - -When an incomplete function is declared we save the remaining arity in our -state to be able to know when a function is complete and can be executed. When -a function is still not complete the function is copied in totality and the -available arguments position on the heap is increased accordingly and the -arguments are added on the heap. - -Function numbers are integers that identify the function. Normally we address -functions using labels but it is not possible to store labels on the heap or on -the stack. Therefore we include a function in the preamble that is basically a -dictionary from integers to labels. All functions have their unique number that -identifies them via the function dictionary called \SI{1func}. When we want to -call a function that is identified by an address instead of a label we first -collect all the arguments from the heap and add the arguments given in the -function call. Calculating the number of arguments that have to be popped from -the heap is done at runtime using the \emph{Available Arguments} value stored -on the heap. Instead of jumping to a label we jump to the \SI{1func} function -and make sure the identification is situated in register R5. \SI{1func} then -makes sure the correct function is branched to and the caller can resume the -normal execution path. - - -\subsection{Syntactic sugars}\label{ssec:synsugar} -Several small syntactic sugars have been added on several levels of processing -to make writing programs more easy. - -\subsubsection{Literal strings} -While there are no literal strings one can print lists of characters. Entering -lists of characters can be cumbersome and therefore a shorthand notation is -added to define literal strings using double quotes. During the lexing phase -the string is lexed as a single token and during parsing the token is -transformed to a equal character list representation. The following example -shows the syntax before transformation and the syntax after. Note that just as -in characters, literal strings can contain escape characters. - -\begin{SPLCode} -//Before transformation -var a = "Hello world!"; -//After transformation -var a = 'H':'e':'l':'l':'o':' ':'w':'o':'r':'l':'d':'!':[] -\end{SPLCode} - -\subsubsection{Anonymous functions} -When the programmers wants to use small functions that consist of one line to -for example use the operators in a functional way the programmer has to make a -named function for every operator. In some cases it is easy to do this inline -using anonymous functions. A small syntactic sugar has been added that will -inline anonymous functions to non-anonymous functions during the parsing phase. -The following snippet is an example of a filter on even values before and after -transformation. - -\begin{SPLCode} -//Before transformation -main(){ - var a = filter(\x->x % 2 == 0, 1 : 2 : 3 : 4 : 5 : []); -} - -//After transformation -1lambda_23(x){ - return x % 2 == 0; -} - -main(){ - var a = filter(1lambda_23, 1 : 2 : 3 : 4 : 5 : []); -} -\end{SPLCode} - -Every anonymous functions gets a unique name outside the namespace of legal -names for functions making sure the name is unique. By implementing the -transformation in the parsing phase the function is automatically checked -or inferred on type. - -\subsubsection{Variable arguments printing} -To ease the programmer we have included a parse-level transformation that -transforms \SI{print} with multiple arguments to multiple \SI{print} function -calls with a single argument. The following example shows the syntax before the -transformation and the syntax after the transformation. - -\begin{SPLCode} -//Before transformation -print("abc", 42, 'c', "da\n"); - -//After transformation -print('a':'b':'c':[]); -print(42); -print('c'); -print('d':'a':'\n':[]); -\end{SPLCode} diff --git a/deliverables/report/gen.tex b/deliverables/report/gen.tex index 5881d16..0695174 100644 --- a/deliverables/report/gen.tex +++ b/deliverables/report/gen.tex @@ -6,3 +6,54 @@ %gewoon hoe het werkt \subsection{Higher order functions} +The nature of the type checking algorithm already included type checking and +inferring the type of higher order functions. Since we allow constants there is +a difference between \SI{read} and \SI{read()}. To make this difference we +introduced a separate type of expression called a \SI{FuncType}, this type +encodes a function with arity $0$. + +As all other data types a function type object occupies one item on the stack +which contains a heap pointer. The heap pointer points to a position on the +heap that stores, in increasing addresses, the function identification number, +the number of arguments already given and finally the argument values. +For example the following snippet shows the representation of the incomplete +function \SI{plus} that already has two arguments. +\begin{SPLCode} +//SPLCode +plus(x, y, z){ return x + y + z; } + +main(){ + var a = plus(4, 3); +} + +/*# | Stack +001 | 001 //Pointer to the +... | + + # | Heap +001 | 008 //Function identification number +002 | 002 //Available arguments +003 | 004 //Argument 1 +004 | 003 //Argument 2 +*/ +\end{SPLCode} + +When an incomplete function is declared we save the remaining arity in our +state to be able to know when a function is complete and can be executed. When +a function is still not complete the function is copied in totality and the +available arguments position on the heap is increased accordingly and the +arguments are added on the heap. + +Function numbers are integers that identify the function. Normally we address +functions using labels but it is not possible to store labels on the heap or on +the stack. Therefore we include a function in the preamble that is basically a +dictionary from integers to labels. All functions have their unique number that +identifies them via the function dictionary called \SI{1func}. When we want to +call a function that is identified by an address instead of a label we first +collect all the arguments from the heap and add the arguments given in the +function call. Calculating the number of arguments that have to be popped from +the heap is done at runtime using the \emph{Available Arguments} value stored +on the heap. Instead of jumping to a label we jump to the \SI{1func} function +and make sure the identification is situated in register R5. \SI{1func} then +makes sure the correct function is branched to and the caller can resume the +normal execution path. diff --git a/deliverables/report/intro.tex b/deliverables/report/intro.tex index a1af621..7f0e7d7 100644 --- a/deliverables/report/intro.tex +++ b/deliverables/report/intro.tex @@ -65,3 +65,19 @@ diff -q \ <(spl --parse --no-code input.spl) \ <(spl --parse --no-code input.spl | spl --parse --no-code) \end{lstlisting} + +\subsection{Extensions} +Several bigger and smaller extensions to the specification have been made for +the sake of the assignment but also to make programming in \SPL{} more easy. +The big extension for the assignment is the addition of higher order functions. +We have chosen to not implement them in the way of popular functional languages +such as \emph{Clean} and \emph{Haskell} with full closure but to implement them +with \emph{C}-style function pointers. Besides function pointers it is also +possible to already curry some arguments in the function. Details of this are +shown in the higher order function part of Section~\ref{sec:gen}. + +Several syntactic sugars have also been added to make programming more easy. +The sugars added are literal lists, literal strings, let expressions, lambda +functions and variable argument printing. All sugars are implemented in the +parsing or lexing phase and thus shown in Section~\ref{sec:pars} except for +lambda expressions which are shown in Section~\ref{sec:sem}. diff --git a/deliverables/report/pars.tex b/deliverables/report/pars.tex index 56fe1f7..d64ec69 100644 --- a/deliverables/report/pars.tex +++ b/deliverables/report/pars.tex @@ -139,3 +139,66 @@ parseFunDecl = liftM6 FunDecl (satTok CBraceOpenToken *> many parseVarDecl) (flatten <$> (many parseStmt <* satTok CBraceCloseToken)) \end{lstlisting} + +\subsection{Sugars} +Some syntactic sugars are procesessed in the lexing or parsing phase. Entering +literal lists and strings is a tedious job because you can only glue them +together with cons opererators. Thus we introduced a literal lists notation as +visible in the grammar. Literal strings are lex as single tokens and expanded +during parsing in to normal lists of characters. Literal lists are processed in +the parsing phase and expanded to their similar form with the \texttt{Cons} +operator. An example of this is shown in Listing~\ref{lst:litlist} + +\begin{lstlisting}[ + language=SPLCode, + caption={Literal lists}, + label={lst:litlist}] +//Before transformation +var a = "Hello world!"; +var b = [3, 1, 4, 1, 5]; + +//After transformation +var a = 'H':'e':'l':'l':'o':' ':'w':'o':'r':'l':'d':'!':[] +var b = 3 : 1 : 4 : 1 : 5 : []; +\end{lstlisting} + +Another sugar added is variable arguments printing. To enable the user to +pretty print data the number of arguments of the \SI{print} function is not +fixed and it is expanded at compiletime. With this sugar the programmer can +print several different types in one line. Listing~\ref{lst:varprint} shows an +example of the sugar. In the semantic analysis phase these statements are +actually refined even more to represent the specific print function per type +since the compiler allows printing of \SI{Int}, \SI{Bool}, \SI{Char} and +\SI{[Char]} +\begin{lstlisting}[ + language=SPLCode, + caption={Variable arguments printing}, + label={lst:varprint}] +//Before transformation +print("abc", 42, 'c', "da\n"); + +//After transformation +print('a':'b':'c':[]); +print(42); +print('c'); +print('d':'a':'\n':[]); +\end{lstlisting} + +The last sugar processed in the parsing or lexing phase are the \SI{let} +constructions. These constructions provide an easy way of defining global +constants without real constants. While we do not allow global variables one +might want to introduce global constants and with these constructs this can be +done very easily. Listing~\ref{lst:letgl} shows the expansion of \SI{let} +declarations. +\begin{lstlisting}[ + language=SPLCode, + caption={\SI{let} expansion}, + label={lst:letgl}] +//Before transformation +let Int x = 42; + +//After transformation +x() :: -> Int{ + return 42; +} +\end{lstlisting} diff --git a/deliverables/report/report.tex b/deliverables/report/report.tex index 152a139..fd850da 100644 --- a/deliverables/report/report.tex +++ b/deliverables/report/report.tex @@ -40,8 +40,6 @@ \input{gen.tex} -\input{ext.tex} - \input{eval.tex} \newpage diff --git a/deliverables/report/sem.tex b/deliverables/report/sem.tex index 6b49e68..ca61d2b 100644 --- a/deliverables/report/sem.tex +++ b/deliverables/report/sem.tex @@ -16,6 +16,35 @@ %the team. \newcommand{\unif}{{\scriptscriptstyle\cup}} +%\subsubsection{Anonymous functions} +%When the programmers wants to use small functions that consist of one line to +%for example use the operators in a functional way the programmer has to make a +%named function for every operator. In some cases it is easy to do this inline +%using anonymous functions. A small syntactic sugar has been added that will +%inline anonymous functions to non-anonymous functions during the parsing phase. +%The following snippet is an example of a filter on even values before and after +%transformation. +% +%\begin{SPLCode} +%//Before transformation +%main(){ +% var a = filter(\x->x % 2 == 0, 1 : 2 : 3 : 4 : 5 : []); +%} +% +%//After transformation +%1lambda_23(x){ +% return x % 2 == 0; +%} +% +%main(){ +% var a = filter(1lambda_23, 1 : 2 : 3 : 4 : 5 : []); +%} +%\end{SPLCode} + +Every anonymous functions gets a unique name outside the namespace of legal +names for functions making sure the name is unique. By implementing the +transformation in the parsing phase the function is automatically checked +or inferred on type. The semantic analysis phase asses if a -grammatically correct- program is also semantically correct and decorates the program with extra information it @@ -363,4 +392,4 @@ blabla, verschil tussen wel en niet iets returnen (\Gamma^\star, \star, \tau, \underline{\textrm{return }} e') } {\Gamma \vdash e \Rightarrow (\Gamma^\star, \star, \tau, e')} -\end{equation} \ No newline at end of file +\end{equation} -- 2.20.1