a524e314c11f7961385be9b8f0d3e120268a36cc
[phd-thesis.git] / dsl / class_deep_embedding.tex
1 \documentclass[../thesis.tex]{subfiles}
2
3 \include{subfilepreamble}
4
5 \begin{document}
6 \chapter{Deep embedding with class}%
7 \label{chp:classy_deep_embedding}
8
9 \begin{chapterabstract}
10 The two flavours of \gls{DSL} embedding are shallow and deep embedding.
11 In functional languages, shallow embedding models the language constructs as functions in which the semantics are embedded.
12 Adding semantics is therefore cumbersome while adding constructs is a breeze.
13 Upgrading the functions to type classes lifts this limitation to a certain extent.
14
15 Deeply embedded languages represent their language constructs as data and the semantics are functions on it.
16 As a result, the language constructs are embedded in the semantics, hence adding new language constructs is laborious where adding semantics is trouble free.
17
18 This paper shows that by abstracting the semantics functions in deep embedding to type classes, it is possible to easily add language constructs as well.
19 So-called classy deep embedding results in \glspl{DSL} that are extensible both in language constructs and in semantics while maintaining a concrete abstract syntax tree.
20 Additionally, little type-level trickery or complicated boilerplate code is required to achieve this.
21 \end{chapterabstract}
22
23 \section{Introduction}
24 The two flavours of \gls{DSL} embedding are deep and shallow embedding~\citep{boulton_experience_1992}.
25 In \gls{FP} languages, shallow embedding models language constructs as functions in the host language.
26 As a result, adding new language constructs---extra functions---is easy.
27 However, the semantics of the language is embedded in these functions, making it troublesome to add semantics since it requires updating all existing language constructs.
28
29 On the other hand, deep embedding models language constructs as data in the host language.
30 The semantics of the language are represented by functions over the data.
31 Consequently, adding new semantics, i.e.\ novel functions, is straightforward.
32 It can be stated that the language constructs are embedded in the functions that form a semantics.
33 If one wants to add a language construct, all semantics functions must be revisited and revised to avoid ending up with partial functions.
34
35 This juxtaposition has been known for many years~\citep{reynolds_user-defined_1978} and discussed by many others~\citep{krishnamurthi_synthesizing_1998} but most famously dubbed the \emph{expression problem} by Wadler~\citep{wadler_expression_1998}:
36
37 \begin{quote}
38 The \emph{expression problem} is a new name for an old problem.
39 The goal is to define a data type by cases, where one can add new cases to the data type and new functions over the data type, without recompiling existing code, and while retaining static type safety (e.g., no casts).
40 \end{quote}
41
42 In shallow embedding, abstracting the functions to type classes disentangles the language constructs from the semantics, allowing extension both ways.
43 This technique is dubbed tagless-final embedding~\citep{carette_finally_2009}, nonetheless it is no silver bullet.
44 Some semantics that require an intensional analysis of the syntax tree, such as transformation and optimisations, are difficult to implement in shallow embedding due to the lack of an explicit data structure representing the abstract syntax tree.
45 The semantics of the \gls{DSL} have to be combined and must hold some kind of state or context, so that structural information is not lost~\citep{kiselyov_typed_2012}.
46
47 \subsection{Research contribution}
48 This paper shows how to apply the technique observed in tagless-final embedding to deep embedding.
49 The presented basic technique, christened \emph{classy deep embedding}, does not require advanced type system extensions to be used.
50 However, it is suitable for type system extensions such as \glspl{GADT}.
51 While this paper is written as a literate
52 \Gls{HASKELL}~\citep{peyton_jones_haskell_2003} program using some minor extensions provided by \gls{GHC}~\citep{ghc_team_ghc_2021}, the idea is applicable to other languages as well\footnotemark{}.
53 \footnotetext{Lubbers, M. (2021): Literate Haskell/lhs2\TeX{} source code of the paper ``Deep Embedding
54 with Class'': TFP 2022.\ DANS.\ \url{https://doi.org/10.5281/zenodo.5081386}.}
55
56 \section{Deep embedding}
57 Pick a \gls{DSL}, any \gls{DSL}, pick the language of literal integers and addition.
58 In deep embedding, terms in the language are represented by data in the host language.
59 Hence, defining the constructs is as simple as creating the following algebraic data type\footnote{All data types and functions are subscripted to indicate the evolution.}.
60
61 \begin{lstHaskellLhstex}
62 data Expr_0
63 = Lit_0 Int
64 | Add_0 Expr_0 Expr_0
65 \end{lstHaskellLhstex}
66
67 Semantics are defined as functions on the \haskelllhstexinline{Expr_0} data type.
68 For example, a function transforming the term to an integer---an evaluator---is implemented as follows.
69
70 \begin{lstHaskellLhstex}
71 eval_0 :: Expr_0 -> Int
72 eval_0 (Lit_0 e) = e
73 eval_0 (Add_0 e1 e2) = eval_0 e1 + eval_0 e2
74 \end{lstHaskellLhstex}
75
76 Adding semantics---e.g.\ a printer---just means adding another function while the existing functions remain untouched.
77 I.e.\ the key property of deep embedding.
78 The following function, transforming the \haskelllhstexinline{Expr_0} data type to a string, defines a simple printer for our language.
79
80 \begin{lstHaskellLhstex}
81 print_0 :: Expr_0 -> String
82 print_0 (Lit_0 v) = show v
83 print_0 (Add_0 e1 e2) = "(" ++ print_0 e1 ++ "-" ++ print_0 e2 ++ ")"
84 \end{lstHaskellLhstex}
85
86 While the language is concise and elegant, it is not very expressive.
87 Traditionally, extending the language is achieved by adding a case to the \haskelllhstexinline{Expr_0} data type.
88 So, adding subtraction to the language results in the following revised data type.
89
90 \begin{lstHaskellLhstex}
91 data Expr_0
92 = Lit_0 Int
93 | Add_0 Expr_0 Expr_0
94 | Sub_0 Expr_0 Expr_0
95 \end{lstHaskellLhstex}
96
97 Extending the \gls{DSL} with language constructs exposes the Achilles' heel of deep embedding.
98 Adding a case to the data type means that all semantics functions have become partial and need to be updated to be able to handle this new case.
99 This does not seem like an insurmountable problem, but it does pose a problem if either the functions or the data type itself are written by others or are contained in a closed library.
100
101 \section{Shallow embedding}
102 Conversely, let us see how this would be done in shallow embedding.
103 First, the data type is represented by functions in the host language with embedded semantics.
104 Therefore, the evaluators for literals and addition both become a function in the host language as follows.
105
106 \begin{lstHaskellLhstex}
107 type Sem_s = Int
108
109 lit_s :: Int -> Sem_s
110 lit_s i = i
111
112 add_s :: Sem_s -> Sem_s -> Sem_s
113 add_s e1 e2 = e1 + e2
114 \end{lstHaskellLhstex}
115
116 Adding constructions to the language is done by adding functions.
117 Hence, the following function adds subtraction to our language.
118
119 \begin{lstHaskellLhstex}
120 sub_s :: Sem_s -> Sem_s -> Sem_s
121 sub_s e1 e2 = e1 - e2
122 \end{lstHaskellLhstex}
123
124 Adding semantics on the other hand---e.g.\ a printer---is not that simple because the semantics are part of the functions representing the language constructs.
125 One way to add semantics is to change all functions to execute both semantics at the same time.
126 In our case this means changing the type of \haskelllhstexinline{Sem_s} to be \haskelllhstexinline{(Int, String)} so that all functions operate on a tuple containing the result of the evaluator and the printed representation at the same time. %chktex 36
127 Alternatively, a single semantics can be defined that represents a fold over the language constructs~\citep{gibbons_folding_2014}, delaying the selection of semantics to the moment the fold is applied.
128
129 \subsection{Tagless-final embedding}
130 Tagless-final embedding overcomes the limitations of standard shallow embedding.
131 To upgrade to this embedding technique, the language constructs are changed from functions to type classes.
132 For our language this results in the following type class definition.
133
134 \begin{lstHaskellLhstex}
135 class Expr_t s where
136 lit_t :: Int -> s
137 add_t :: s -> s -> s
138 \end{lstHaskellLhstex}
139
140 Semantics become data types\footnotemark{} implementing these type classes, resulting in the following instance for the evaluator.
141 \footnotetext{%
142 In this case \haskelllhstexinline{newtype}s are used instead of regular \haskelllhstexinline{data} declarations.
143 A \haskelllhstexinline{newtype} is a special data type with a single constructor containing a single value only to which it is isomorphic.
144 It allows the programmer to define separate class instances that the instances of the isomorphic type without any overhead.
145 During compilation the constructor is completely removed~\citep[\citesection{4.2.3}]{peyton_jones_haskell_2003}.
146 }
147
148 \begin{lstHaskellLhstex}
149 newtype Eval_t = E_t Int
150
151 instance Expr_t Eval_t where
152 lit_t v = E_t v
153 add_t (E_t e1) (E_t e2) = E_t (e1 + e2)
154 \end{lstHaskellLhstex}
155
156 Adding constructs---e.g.\ subtraction---just results in an extra type class and corresponding instances.
157
158 \begin{lstHaskellLhstex}
159 class Sub_t s where
160 sub_t :: s -> s -> s
161
162 instance Sub_t Eval_t where
163 sub_t (E_t e1) (E_t e2) = E_t (e1 - e2)
164 \end{lstHaskellLhstex}
165
166 Finally, adding semantics such as a printer over the language is achieved by providing a data type representing the semantics accompanied by instances for the language constructs.
167
168 \begin{lstHaskellLhstex}
169 newtype Printer_t = P_t String
170
171 instance Expr_t Printer_t where
172 lit_t i = P_t (show i)
173 add_t (P_t e1) (P_t e2) = P_t ("(" ++ e1 ++ "+" ++ e2 ++ ")")
174
175 instance Sub_t Printer_t where
176 sub_t (P_t e1) (P_t e2) = P_t ("(" ++ e1 ++ "-" ++ e2 ++ ")")
177 \end{lstHaskellLhstex}
178
179 \section{Lifting the backends}%
180 Let us rethink the deeply embedded \gls{DSL} design.
181 Remember that in shallow embedding, the semantics are embedded in the language construct functions.
182 Obtaining extensibility both in constructs and semantics was accomplished by abstracting the semantics functions to type classes, making the constructs overloaded in the semantics.
183 In deep embedding, the constructs are embedded in the semantics functions instead.
184 So, let us apply the same technique, i.e.\ make the semantics overloaded in the language constructs by abstracting the semantics functions to type classes.
185 The same effect may be achieved when using similar techniques such as explicit dictionary passing or ML style modules.
186 In our language this results in the following type class.
187
188 \begin{lstHaskellLhstex}
189 class Eval_1 v where
190 eval_1 :: v -> Int
191
192 data Expr_1
193 = Lit_1 Int
194 | Add_1 Expr_1 Expr_1
195 \end{lstHaskellLhstex}
196
197 Implementing the semantics type class instances for the \haskelllhstexinline{Expr_1} data type is an elementary exercise.
198 By a copy-paste and some modifications, we come to the following implementation.
199
200 \begin{lstHaskellLhstex}
201 instance Eval_1 Expr_1 where
202 eval_1 (Lit_1 v) = v
203 eval_1 (Add_1 e1 e2) = eval_1 e1 + eval_1 e2
204 \end{lstHaskellLhstex}
205
206 Subtraction can now be defined in a separate data type, leaving the original data type intact.
207 Instances for the additional semantics can now be implemented separately as instances of the type classes.
208
209 \begin{lstHaskellLhstex}
210 data Sub_1 = Sub_1 Expr_1 Expr_1
211
212 instance Eval_1 Sub_1 where
213 eval_1 (Sub_1 e1 e2) = eval_1 e1 - eval_1 e2
214 \end{lstHaskellLhstex}
215
216 \section{Existential data types}%
217
218 The astute reader might have noticed that we have dissociated ourselves from the original data type.
219 It is only possible to create an expression with a subtraction on the top level.
220 The recursive knot is left untied and as a result, \haskelllhstexinline{Sub_1} can never be reached from an \haskelllhstexinline{Expr_1}.
221
222 Luckily, we can reconnect them by adding a special constructor to the \haskelllhstexinline{Expr_1} data type for housing extensions.
223 It contains an existentially quantified~\citep{mitchell_abstract_1988} type with type class constraints~\citep{laufer_combining_1994,laufer_type_1996} for all semantics type classes~\citep[\citesection{6.4.6}]{ghc_team_ghc_2021} to allow it to house not just subtraction but any future extension.
224
225 \begin{lstHaskellLhstex}
226 data Expr_2
227 = Lit_2 Int
228 | Add_2 Expr_2 Expr_2
229 | forall x. Eval_2 x => Ext_2 x
230 \end{lstHaskellLhstex}
231
232 The implementation of the extension case in the semantics type classes is in most cases just a matter of calling the function for the argument as can be seen in the semantics instances shown below.
233
234 \begin{lstHaskellLhstex}
235 instance Eval_2 Expr_2 where
236 eval_2 (Lit_2 v) = v
237 eval_2 (Add_2 e1 e2) = eval_2 e1 + eval_2 e2
238 eval_2 (Ext_2 x) = eval_2 x
239 \end{lstHaskellLhstex}
240
241 Adding language construct extensions in different data types does mean that an extra \haskelllhstexinline{Ext_2} tag is introduced when using the extension.
242 This burden can be relieved by creating a smart constructor for it that automatically wraps the extension with the \haskelllhstexinline{Ext_2} constructor so that it is of the type of the main data type.
243
244 \begin{lstHaskellLhstex}
245 sub_2 :: Expr_2 -> Expr_2 -> Expr_2
246 sub_2 e1 e2 = Ext_2 (Sub_2 e1 e2)
247 \end{lstHaskellLhstex}
248
249 In our example this means that the programmer can write\footnotemark{}:
250 \footnotetext{%
251 Backticks are used to use functions or constructors in an infix fashion~\citep[\citesection{4.3.3}]{peyton_jones_haskell_2003}.
252 }
253 \begin{lstHaskellLhstex}
254 e2 :: Expr_2
255 e2 = Lit_2 42 `sub_2` Lit_2 1
256 \end{lstHaskellLhstex}
257 instead of having to write
258 \begin{lstHaskellLhstex}
259 e2p :: Expr_2
260 e2p = Ext_2 (Lit_2 42 `Sub_2` Lit_2 1)
261 \end{lstHaskellLhstex}
262
263 \subsection{Unbraiding the semantics from the data}
264 This approach does reveal a minor problem.
265 Namely, that all semantics type classes are braided into our datatypes via the \haskelllhstexinline{Ext_2} constructor.
266 Say if we add the printer again, the \haskelllhstexinline{Ext_2} constructor has to be modified to contain the printer type class constraint as well\footnote{Resulting in the following constructor: \haskelllhstexinline{forall x.(Eval_2 x, Print_2 x) => Ext_2 x}.}. %chktex 36
267 Thus, if we add semantics, the main data type's type class constraints in the \haskelllhstexinline{Ext_2} constructor need to be updated.
268 To avoid this, the type classes can be bundled in a type class alias or type class collection as follows.
269
270 \begin{lstHaskellLhstex}
271 class (Eval_2 x, Print_2 x) => Semantics_2 x
272
273 data Expr_2
274 = Lit_2 Int
275 | Add_2 Expr_2 Expr_2
276 | forall x. Semantics_2 x => Ext_2 x
277 \end{lstHaskellLhstex}
278
279 The class alias removes the need for the programmer to visit the main data type when adding additional semantics.
280 Unfortunately, the compiler does need to visit the main data type again.
281 Some may argue that adding semantics happens less frequently than adding language constructs but in reality it means that we have to concede that the language is not as easily extensible in semantics as in language constructs.
282 More exotic type system extensions such as constraint kinds~\citep{bolingbroke_constraint_2011,yorgey_giving_2012} can untangle the semantics from the data types by making the data types parametrised by the particular semantics.
283 However, by adding some boilerplate, even without this extension, the language constructs can be parametrised by the semantics by putting the semantics functions in a data type.
284 First the data types for the language constructs are parametrised by the type variable \haskelllhstexinline{d} as follows.
285
286 \begin{lstHaskellLhstex}
287 data Expr_3 d
288 = Lit_3 Int
289 | Add_3 (Expr_3 d) (Expr_3 d)
290 | forall x. Ext_3 (d x) x
291
292 data Sub_3 d = Sub_3 (Expr_3 d) (Expr_3 d)
293 \end{lstHaskellLhstex}
294
295 The \haskelllhstexinline{d} type variable is inhabited by an explicit dictionary for the semantics, i.e.\ a witness to the class instance.
296 Therefore, for all semantics type classes, a data type is made that contains the semantics function for the given semantics.
297 This means that for \haskelllhstexinline{Eval_3}, a dictionary with the function \haskelllhstexinline{EvalDict_3} is defined, a type class \haskelllhstexinline{HasEval_3} for retrieving the function from the dictionary and an instance for \haskelllhstexinline{HasEval_3} for \haskelllhstexinline{EvalDict_3}.
298
299 \begin{lstHaskellLhstex}
300 newtype EvalDict_3 v = EvalDict_3 (v -> Int)
301
302 class HasEval_3 d where
303 getEval_3 :: d v -> v -> Int
304
305 instance HasEval_3 EvalDict_3 where
306 getEval_3 (EvalDict_3 e) = e
307 \end{lstHaskellLhstex}
308
309 The instances for the type classes change as well according to the change in the datatype.
310 Given that there is a \haskelllhstexinline{HasEval_3} instance for the witness type \haskelllhstexinline{d}, we can provide an implementation of \haskelllhstexinline{Eval_3} for \haskelllhstexinline{Expr_3 d}.
311
312 \begin{lstHaskellLhstex}
313 instance HasEval_3 d => Eval_3 (Expr_3 d) where
314 eval_3 (Lit_3 v) = v
315 eval_3 (Add_3 e1 e2) = eval_3 e1 + eval_3 e2
316 eval_3 (Ext_3 d x) = getEval_3 d x
317
318 instance HasEval_3 d => Eval_3 (Sub_3 d) where
319 eval_3 (Sub_3 e1 e2) = eval_3 e1 - eval_3 e2
320 \end{lstHaskellLhstex}
321
322 Because the \haskelllhstexinline{Ext_3} constructor from \haskelllhstexinline{Expr_3} now contains a value of type \haskelllhstexinline{d}, the smart constructor for \haskelllhstexinline{Sub_3} must somehow come up with this value.
323 To achieve this, a type class is introduced that allows the generation of such a dictionary.
324
325 \begin{lstHaskellLhstex}
326 class GDict a where
327 gdict :: a
328 \end{lstHaskellLhstex}
329
330 This type class has individual instances for all semantics dictionaries, linking the class instance to the witness value.
331 I.e.\ if there is a type class instance known, a witness value can be conjured using the \haskelllhstexinline{gdict} function.
332
333 \begin{lstHaskellLhstex}
334 instance Eval_3 v => GDict (EvalDict_3 v) where
335 gdict = EvalDict_3 eval_3
336 \end{lstHaskellLhstex}
337
338 With these instances, the semantics function can be retrieved from the \haskelllhstexinline{Ext_3} constructor and in the smart constructors they can be generated as follows:
339
340 \begin{lstHaskellLhstex}
341 sub_3 :: GDict (d (Sub_3 d)) => Expr_3 d -> Expr_3 d -> Expr_3 d
342 sub_3 e1 e2 = Ext_3 gdict (Sub_3 e1 e2)
343 \end{lstHaskellLhstex}
344
345 Finally, we reached the end goal, orthogonal extension of both language constructs as shown by adding subtraction to the language and in language semantics.
346 Adding the printer can now be done without touching the original code as follows.
347 First the printer type class, dictionaries and instances for \haskelllhstexinline{GDict} are defined.
348
349 \begin{lstHaskellLhstex}
350 class Print_3 v where
351 print_3 :: v -> String
352
353 newtype PrintDict_3 v = PrintDict_3 (v -> String)
354
355 class HasPrint_3 d where
356 getPrint_3 :: d v -> v -> String
357
358 instance HasPrint_3 PrintDict_3 where
359 getPrint_3 (PrintDict_3 e) = e
360
361 instance Print_3 v => GDict (PrintDict_3 v) where
362 gdict = PrintDict_3 print_3
363 \end{lstHaskellLhstex}
364
365 Then the instances for \haskelllhstexinline{Print_3} of all the language constructs can be defined.
366
367 \begin{lstHaskellLhstex}
368 instance HasPrint_3 d => Print_3 (Expr_3 d) where
369 print_3 (Lit_3 v) = show v
370 print_3 (Add_3 e1 e2) = "(" ++ print_3 e1 ++ "+" ++ print_3 e2 ++ ")"
371 print_3 (Ext_3 d x) = getPrint_3 d x
372 instance HasPrint_3 d => Print_3 (Sub_3 d) where
373 print_3 (Sub_3 e1 e2) = "(" ++ print_3 e1 ++ "-" ++ print_3 e2 ++ ")"
374 \end{lstHaskellLhstex}
375
376 \section{Transformation semantics}
377 Most semantics convert a term to some final representation and can be expressed just by functions on the cases.
378 However, the implementation of semantics such as transformation or optimisation may benefit from a so-called intentional analysis of the abstract syntax tree.
379 In shallow embedding, the implementation for these types of semantics is difficult because there is no tangible abstract syntax tree.
380 In off-the-shelf deep embedding this is effortless since the function can pattern match on the constructor or structures of constructors.
381
382 To demonstrate intensional analyses in classy deep embedding we write an optimizer that removes addition and subtraction by zero.
383 In classy deep embedding, adding new semantics means first adding a new type class housing the function including the machinery for the extension constructor.
384
385 \begin{lstHaskellLhstex}
386 class Opt_3 v where
387 opt_3 :: v -> v
388
389 newtype OptDict_3 v = OptDict_3 (v -> v)
390
391 class HasOpt_3 d where
392 getOpt_3 :: d v -> v -> v
393
394 instance HasOpt_3 OptDict_3 where
395 getOpt_3 (OptDict_3 e) = e
396
397 instance Opt_3 v => GDict (OptDict_3 v) where
398 gdict = OptDict_3 opt_3
399 \end{lstHaskellLhstex}
400
401 The implementation of the optimizer for the \haskelllhstexinline{Expr_3} data type is no complicated task.
402 The only interesting bit occurs in the \haskelllhstexinline{Add_3} constructor, where we pattern match on the optimised children to determine whether an addition with zero is performed.
403 If this is the case, the addition is removed.
404
405 \begin{lstHaskellLhstex}
406 instance HasOpt_3 d => Opt_3 (Expr_3 d) where
407 opt_3 (Lit_3 v) = Lit_3 v
408 opt_3 (Add_3 e1 e2) = case (opt_3 e1, opt_3 e2) of
409 (Lit_3 0, e2p ) -> e2p
410 (e1p, Lit_3 0) -> e1p
411 (e1p, e2p ) -> Add_3 e1p e2p
412 opt_3 (Ext_3 d x) = Ext_3 d (getOpt_3 d x)
413 \end{lstHaskellLhstex}
414
415 Replicating this for the \haskelllhstexinline{Opt_3} instance of \haskelllhstexinline{Sub_3} seems a clear-cut task at first glance.
416
417 \begin{lstHaskellLhstex}
418 instance HasOpt_3 d => Opt_3 (Sub_3 d) where
419 opt_3 (Sub_3 e1 e2) = case (opt_3 e1, opt_3 e2) of
420 (e1p, Lit_3 0) -> e1p
421 (e1p, e2p ) -> Sub_3 e1p e2p
422 \end{lstHaskellLhstex}
423
424 Unsurprisingly, this code is rejected by the compiler.
425 When a literal zero is matched as the right-hand side of a subtraction, the left-hand side of type \haskelllhstexinline{Expr_3} is returned.
426 However, the type signature of the function dictates that it should be of type \haskelllhstexinline{Sub_3}.
427 To overcome this problem we add a convolution constructor.
428
429 \subsection{Convolution}
430 Adding a loopback case or convolution constructor to \haskelllhstexinline{Sub_3} allows the removal of the \haskelllhstexinline{Sub_3} constructor while remaining the \haskelllhstexinline{Sub_3} type.
431 It should be noted that a loopback case is \emph{only} required if the transformation actually removes tags.
432 This changes the \haskelllhstexinline{Sub_3} data type as follows.
433
434 \begin{lstHaskellLhstex}
435 data Sub_4 d
436 = Sub_4 (Expr_4 d) (Expr_4 d)
437 | SubLoop_4 (Expr_4 d)
438
439 instance HasEval_4 d => Eval_4 (Sub_4 d) where
440 eval_4 (Sub_4 e1 e2) = eval_4 e1 - eval_4 e2
441 eval_4 (SubLoop_4 e1) = eval_4 e1
442 \end{lstHaskellLhstex}
443
444 With this loopback case in the toolbox, the following \haskelllhstexinline{Sub} instance optimises away subtraction with zero literals.
445
446 \begin{lstHaskellLhstex}
447 instance HasOpt_4 d => Opt_4 (Sub_4 d) where
448 opt_4 (Sub_4 e1 e2) = case (opt_4 e1, opt_4 e2) of
449 (e1p, Lit_4 0) -> SubLoop_4 e1p
450 (e1p, e2p ) -> Sub_4 e1p e2p
451 opt_4 (SubLoop_4 e) = SubLoop_4 (opt_4 e)
452 \end{lstHaskellLhstex}
453
454 \subsection{Pattern matching}
455 Pattern matching within datatypes and from an extension to the main data type works out of the box.
456 Cross-extensional pattern matching on the other hand---matching on a particular extension---is something that requires a bit of extra care.
457 Take for example negation propagation and double negation elimination.
458 Pattern matching on values with an existential type is not possible without leveraging dynamic typing~\citep{abadi_dynamic_1991,baars_typing_2002}.
459 To enable dynamic typing support, the \haskelllhstexinline{Typeable} type class as provided by \haskelllhstexinline{Data.Dynamic}~\citep{ghc_team_datadynamic_2021} is added to the list of constraints in all places where we need to pattern match across extensions.
460 As a result, the \haskelllhstexinline{Typeable} type class constraints are added to the quantified type variable \haskelllhstexinline{x} of the \haskelllhstexinline{Ext_4} constructor and to \haskelllhstexinline{d}s in the smart constructors.
461
462 \begin{lstHaskellLhstex}
463 data Expr_4 d
464 = Lit_4 Int
465 | Add_4 (Expr_4 d) (Expr_4 d)
466 | forall x. Typeable x => Ext_4 (d x) x
467 \end{lstHaskellLhstex}
468
469 First let us add negation to the language by defining a datatype representing it.
470 Negation elimination requires the removal of negation constructors, so a convolution constructor is defined as well.
471
472 \begin{lstHaskellLhstex}
473 data Neg_4 d
474 = Neg_4 (Expr_4 d)
475 | NegLoop_4 (Expr_4 d)
476
477 neg_4 :: (Typeable d, GDict (d (Neg_4 d))) => Expr_4 d -> Expr_4 d
478 neg_4 e = Ext_4 gdict (Neg_4 e)
479 \end{lstHaskellLhstex}
480
481 The evaluation and printer instances for the \haskelllhstexinline{Neg_4} datatype are defined as follows.
482
483 \begin{lstHaskellLhstex}
484 instance HasEval_4 d => Eval_4 (Neg_4 d) where
485 eval_4 (Neg_4 e) = negate (eval_4 e)
486 eval_4 (NegLoop_4 e) = eval_4 e
487
488 instance HasPrint_4 d => Print_4 (Neg_4 d) where
489 print_4 (Neg_4 e) = "(~" ++ print_4 e ++ ")"
490 print_4 (NegLoop_4 e) = print_4 e
491 \end{lstHaskellLhstex}
492
493 The \haskelllhstexinline{Opt_4} instance contains the interesting bit.
494 If the sub expression of a negation is an addition, negation is propagated downwards.
495 If the sub expression is again a negation, something that can only be found out by a dynamic pattern match, it is replaced by a \haskelllhstexinline{NegLoop_4} constructor.
496
497 \begin{lstHaskellLhstex}
498 instance (Typeable d, GDict (d (Neg_4 d)), HasOpt_4 d) => Opt_4 (Neg_4 d) where
499 opt_4 (Neg_4 (Add_4 e1 e2))
500 = NegLoop_4 (Add_4 (opt_4 (neg_4 e1)) (opt_4 (neg_4 e2)))
501 opt_4 (Neg_4 (Ext_4 d x))
502 = case fromDynamic (toDyn (getOpt_4 d x)) of
503 Just (Neg_4 e) -> NegLoop_4 e
504 _ -> Neg_4 (Ext_4 d (getOpt_4 d x))
505 opt_4 (Neg_4 e) = Neg_4 (opt_4 e)
506 opt_4 (NegLoop_4 e) = NegLoop_4 (opt_4 e)
507 \end{lstHaskellLhstex}
508
509 Loopback cases do make cross-extensional pattern matching less modular in general.
510 For example, \haskelllhstexinline{Ext_4 d (SubLoop_4 (Lit_4 0))} is equivalent to \haskelllhstexinline{Lit_4 0} in the optimisation semantics and would require an extra pattern match.
511 Fortunately, this problem can be mitigated---if required---by just introducing an additional optimisation semantics that removes loopback cases.
512 Luckily, one does not need to resort to these arguably blunt matters often.
513 Dependent language functionality often does not need to span extensions, i.e.\ it is possible to group them in the same data type.
514
515 \subsection{Chaining semantics}
516 Now that the data types are parametrised by the semantics a final problem needs to be overcome.
517 The data type is parametrised by the semantics, thus, using multiple semantics, such as evaluation after optimising is not straightforwardly possible.
518 Luckily, a solution is readily at hand: introduce an ad-hoc combination semantics.
519
520 \begin{lstHaskellLhstex}
521 data OptPrintDict_4 v = OPD_4 (OptDict_4 v) (PrintDict_4 v)
522
523 instance HasOpt_4 OptPrintDict_4 where
524 getOpt_4 (OPD_4 v _) = getOpt_4 v
525 instance HasPrint_4 OptPrintDict_4 where
526 getPrint_4 (OPD_4 _ v) = getPrint_4 v
527
528 instance (Opt_4 v, Print_4 v) => GDict (OptPrintDict_4 v) where
529 gdict = OPD_4 gdict gdict
530 \end{lstHaskellLhstex}
531
532 And this allows us to write \haskelllhstexinline{print_4 (opt_4 e1)} resulting in \verb|"((~42)+(~38))"| when \haskelllhstexinline{e1} represents $(\sim(42+38))-0$ and is thus defined as follows.
533
534 \begin{lstHaskellLhstex}
535 e1 :: Expr_4 OptPrintDict_4
536 e1 = neg_4 (Lit_4 42 `Add_4` Lit_4 38) `sub_4` Lit_4 0
537 \end{lstHaskellLhstex}
538
539 When using classy deep embedding to the fullest, the ability of the compiler to infer very general types expires.
540 As a consequence, defining reusable expressions that are overloaded in their semantics requires quite some type class constraints that cannot be inferred by the compiler (yet) if they use many extensions.
541 Solving this remains future work.
542 For example, the expression $\sim(42-38)+1$ has to be defined as:
543
544 \begin{lstHaskellLhstex}
545 e3 :: (Typeable d, GDict (d (Neg_4 d)), GDict (d (Sub_4 d))) => Expr_4 d
546 e3 = neg_4 (Lit_4 42 `sub_4` Lit_4 38) `Add_4` Lit_4 1
547 \end{lstHaskellLhstex}
548
549 \section{\texorpdfstring{\Glsxtrlongpl{GADT}}{Generalised algebraic data types}}%
550 \Glspl{GADT} are enriched data types that allow the type instantiation of the constructor to be explicitly defined~\citep{cheney_first-class_2003,hinze_fun_2003}.
551 Leveraging \glspl{GADT}, deeply embedded \glspl{DSL} can be made statically type safe even when different value types are supported.
552 Even when \glspl{GADT} are not supported natively in the language, they can be simulated using embedding-projection pairs or equivalence types~\citep[\citesection{2.2}]{cheney_lightweight_2002}.
553 Where some solutions to the expression problem do not easily generalise to \glspl{GADT} (see \cref{sec:cde:related}), classy deep embedding does.
554 Generalising the data structure of our \gls{DSL} is fairly straightforward and to spice things up a bit, we add an equality and boolean not language construct.
555 To make the existing \gls{DSL} constructs more general, we relax the types of those constructors.
556 For example, operations on integers now work on all numerals instead.
557 Moreover, the \haskelllhstexinline{Lit_g} constructor can be used to lift values of any type to the \gls{DSL} domain as long as they have a \haskelllhstexinline{Show} instance, required for the printer.
558 Since some optimisations on \haskelllhstexinline{Not_g} remove constructors and therefore use cross-extensional pattern matches, \haskelllhstexinline{Typeable} constraints are added to \haskelllhstexinline{a}.
559 Furthermore, because the optimisations for \haskelllhstexinline{Add_g} and \haskelllhstexinline{Sub_g} are now more general, they do not only work for \haskelllhstexinline{Int}s but for any type with a \haskelllhstexinline{Num} instance, the \haskelllhstexinline{Eq} constraint is added to these constructors as well.
560 Finally, not to repeat ourselves too much, we only show the parts that substantially changed.
561 The omitted definitions and implementation can be found in \cref{sec:cde:appendix}.
562
563 \begin{lstHaskellLhstex}
564 data Expr_g d a where
565 Lit_g :: Show a => a -> Expr_g d a
566 Add_g :: (Eq a, Num a) => Expr_g d a -> Expr_g d a -> Expr_g d a
567 Ext_g :: Typeable x => d x -> x a -> Expr_g d a
568 data Neg_g d a where
569 Neg_g :: (Typeable a, Num a) => Expr_g d a -> Neg_g d a
570 NegLoop_g :: Expr_g d a -> Neg_g d a
571 data Not_g d a where
572 Not_g :: Expr_g d Bool -> Not_g d Bool
573 NotLoop_g :: Expr_g d a -> Not_g d a
574 \end{lstHaskellLhstex}
575
576 The smart constructors for the language extensions inherit the class constraints of their data types and include a \haskelllhstexinline{Typeable} constraint on the \haskelllhstexinline{d} type variable for it to be usable in the \haskelllhstexinline{Ext_g} constructor as can be seen in the smart constructor for \haskelllhstexinline{Neg_g}:
577
578 \begin{lstHaskellLhstex}
579 neg_g :: (Typeable d, GDict (d (Neg_g d)), Typeable a, Num a) =>
580 Expr_g d a -> Expr_g d a
581 neg_g e = Ext_g gdict (Neg_g e)
582
583 not_g :: (Typeable d, GDict (d (Not_g d))) =>
584 Expr_g d Bool -> Expr_g d Bool
585 not_g e = Ext_g gdict (Not_g e)
586 \end{lstHaskellLhstex}
587
588 Upgrading the semantics type classes to support \glspl{GADT} is done by an easy textual search and replace.
589 All occurrences of \haskelllhstexinline{v} are now parametrised by type variable \haskelllhstexinline{a}:
590
591 \begin{lstHaskellLhstex}
592 class Eval_g v where
593 eval_g :: v a -> a
594 class Print_g v where
595 print_g :: v a -> String
596 class Opt_g v where
597 opt_g :: v a -> v a
598 \end{lstHaskellLhstex}
599
600 Now that the shape of the type classes has changed, the dictionary data types and the type classes need to be adapted as well.
601 The introduced type variable \haskelllhstexinline{a} is not an argument to the type class, so it should not be an argument to the dictionary data type.
602 To represent this type class function, a rank-2 polymorphic function is needed~\citep[\citesection{6.4.15}]{ghc_team_ghc_2021}\citep{odersky_putting_1996}.
603 Concretely, for the evaluatior this results in the following definitions:
604
605 \begin{lstHaskellLhstex}
606 newtype EvalDict_g v = EvalDict_g (forall a. v a -> a)
607 class HasEval_g d where
608 getEval_g :: d v -> v a -> a
609 instance HasEval_g EvalDict_g where
610 getEval_g (EvalDict_g e) = e
611 \end{lstHaskellLhstex}
612
613 The \haskelllhstexinline{GDict} type class is general enough, so the instances can remain the same.
614 The \haskelllhstexinline{Eval_g} instance of \haskelllhstexinline{GDict} looks as follows:
615
616 \begin{lstHaskellLhstex}
617 instance Eval_g v => GDict (EvalDict_g v) where
618 gdict = EvalDict_g eval_g
619 \end{lstHaskellLhstex}
620
621 Finally, the implementations for the instances can be ported without complication show using the optimisation instance of \haskelllhstexinline{Not_g}:
622
623 \begin{lstHaskellLhstex}
624 instance (Typeable d, GDict (d (Not_g d)), HasOpt_g d) => Opt_g (Not_g d) where
625 opt_g (Not_g (Ext_g d x))
626 = case fromDynamic (toDyn (getOpt_g d x)) :: Maybe (Not_g d Bool) of
627 Just (Not_g e) -> NotLoop_g e
628 _ -> Not_g (Ext_g d (getOpt_g d x))
629 opt_g (Not_g e) = Not_g (opt_g e)
630 opt_g (NotLoop_g e) = NotLoop_g (opt_g e)
631 \end{lstHaskellLhstex}
632
633 \section{Conclusion}%
634
635 Classy deep embedding is a novel organically grown embedding technique that alleviates deep embedding from the extensibility problem in most cases.
636
637 By abstracting the semantics functions to type classes they become overloaded in the language constructs.
638 Thus, making it possible to add new language constructs in a separate type.
639 These extensions are brought together in a special extension constructor residing in the main data type.
640 This extension case is overloaded by the language construct using a data type containing the class dictionary.
641 As a result, orthogonal extension is possible for language constructs and semantics using only little syntactic overhead or type annotations.
642 The basic technique only requires---well established through history and relatively standard---existential data types.
643 However, if needed, the technique generalises to \glspl{GADT} as well, adding rank-2 types to the list of type system requirements as well.
644 Finally, the abstract syntax tree remains observable which makes it suitable for intensional analyses, albeit using occasional dynamic typing for truly cross-extensional transformations.
645
646 Defining reusable expressions overloaded in semantics or using multiple semantics on a single expression requires some boilerplate still, getting around this remains future work.
647 \Cref{sec:classy_reprise} shows how the boilerplate can be minimised using advanced type system extensions.
648
649 \section{Related work}%
650 \label{sec:cde:related}
651
652 Embedded \gls{DSL} techniques in functional languages have been a topic of research for many years, thus we do not claim a complete overview of related work.
653
654 Clearly, classy deep embedding bears most similarity to the \emph{Datatypes \`a la Carte}~\citep{swierstra_data_2008}.
655 In \citeauthor{swierstra_data_2008}'s approach, semantics are lifted to type classes similarly to classy deep embedding.
656 Each language construct is their own datatype parametrised by a type parameter.
657 This parameter contains some type level representation of language constructs that are in use.
658 In classy deep embedding, extensions only have to be enumerated at the type level when the term is required to be overloaded, in all other cases they are captured in the extension case.
659 Because all the constructs are expressed in the type system, nifty type system tricks need to be employed to convince the compiler that everything is type safe and the class constraints can be solved.
660 Furthermore, it requires some boilerplate code such as functor instances for the data types.
661 In return, pattern matching is easier and does not require dynamic typing.
662 Classy deep embedding only strains the programmer with writing the extension case for the main data type and the occasional loopback constructor.
663
664 \Citet{loh_open_2006} proposed a language extension that allows open data types and open functions, i.e.\ functions and data types that can be extended with more cases later on.
665 They hinted at the possibility of using type classes for open functions but had serious concerns that pattern matching would be crippled because constructors are becoming types, thus ultimately becoming impossible to type.
666 In contrast, this paper shows that pattern matching is easily attainable---albeit using dynamic types---and that the terms can be typed without complicated type system extensions.
667
668 A technique similar to classy deep embedding was proposed by \citet{najd_trees_2017} to tackle a slightly different problem, namely that of reusing a data type for multiple purposes in a slightly different form.
669 For example to decorate the abstract syntax tree of a compiler differently for each phase of the compiler.
670 They propose to add an extension descriptor as a type variable to a data type and a type family that can be used to decorate constructors with extra information and add additional constructors to the data type using an extension constructor.
671 Classy deep embedding works similarly but uses existentially quantified type variables to describe possible extensions instead of type variables and type families.
672 In classy deep embedding, the extensions do not need to be encoded in the type system and less boilerplate is required.
673 Furthermore, pattern matching on extensions becomes a bit more complicated but in return it allows for multiple extensions to be added orthogonally and avoids the necessity for type system extensions.
674
675 Tagless-final embedding is the shallowly embedded counterpart of classy deep embedding and was invented for the same purpose; overcoming the issues with standard shallow embedding~\citep{carette_finally_2009}.
676 Classy deep embedding was organically grown from observing the evolution of tagless-final embedding.
677 The main difference between tagless-final embedding and classy deep embedding---and in general between shallow and deep embedding---is that intensional analyses of the abstract syntax tree is more difficult because there is no tangible abstract syntax tree data structure.
678 In classy deep embedding, it is possible to define transformations even across extensions.
679 Furthermore, in classy deep embedding, defining (mutual) dependent interpretations is automatically supported whereas in tagless-final embedding this requires some amount of code duplication \citep{sun_compositional_2022}.
680
681 Hybrid approaches between deep and shallow embedding exist as well.
682 For example, \citet{svenningsson_combining_2013} show that by expressing the deeply embedded language in a shallowly embedded core language, extensions can be made orthogonally as well.
683 This paper differs from those approaches in the sense that it does not require a core language in which all extensions need to be expressible.
684
685 \section*{Acknowledgements}
686 This research is partly funded by the Royal Netherlands Navy.
687 Furthermore, I would like to thank Pieter and Rinus for the fruitful discussions, Ralf for inspiring me to write a functional pearl, and the anonymous reviewers for their valuable and honest comments.
688
689 \begin{subappendices}
690 \section{Reprise: reducing boilerplate}%
691 \label{sec:classy_reprise}
692 \todo{Improve text}
693 One of the unique selling points of this novel \gls{DSL} embedding technique is that it, in its basic form, does not require advanced type system extensions nor a lot of boilerplate.
694 However, generalising the technique to \glspl{GADT} arguably unleashes a cesspool of \emph{unsafe} compiler extensions.
695 If we are willing to work with extensions, almost all of the boilerplate can be inferred or generated.
696
697 In classy deep embedding, the \gls{DSL} datatype is parametrised by a type variable providing a witness to the interpretation on the language.
698 When using multiple interpretations, these need to be bundled in a data type.
699 Using the \gls{GHC}'s \GHCmod{ConstraintKind} extension, we can make these witnesses explicit, tying into \gls{HASKELL}'s type system immediately.
700 Furthermore, this constraint does not necessarily has to be a single constraint, after enabling \GHCmod{DataKinds} and \GHCmod{TypeOperators}, we can encode lists of witnesses instead.
701 The data type for this list of witnesses is \haskelllhstexinline{Record} as shown in \cref{lst_cbde:record_type}.
702 This \gls{GADT} is parametrised by two type variables.
703 The first type variable (\haskelllhstexinline{dt}) is the type or type constructor on which the constraints can be applied and the second type variable (\haskelllhstexinline{clist}) is the list of constraints constructors itself.
704 This means that when \haskelllhstexinline{Cons} is pattern matched, the overloading of the type class constraint for \haskelllhstexinline{c dt} can be solved by the compiler.
705 \GHCmod{KindSignatures} is used to force the kinds of the type parameters and the kind of \haskelllhstexinline{dt} is polymorphic (\GHCmod{PolyKinds}) so that the \haskelllhstexinline{Record} data type can be used for \glspl{DSL} using type classes but also type constructor classes (e.g.\ when using \glspl{GADT}).
706
707 \begin{lstHaskellLhstex}[label={lst_cbde:record_type},caption={Data type for a list of constraints}]
708 data Record (dt :: k) (clist :: [k -> Constraint]) where
709 Nil :: Record dt '[]
710 Cons :: c dt => Record dt cs -> Record dt (c ': cs)
711 \end{lstHaskellLhstex}
712
713 To incorporate this type in the \haskelllhstexinline{Expr} type, the \haskelllhstexinline{Ext} constructor changes from containing a single witness dictionary to a \haskelllhstexinline{Record} type containing all the required dictionaries.
714
715 \begin{lstHaskellLhstex}[caption={Data type for a list of constraints}]
716 data Expr c
717 = Lit Int
718 | Add (Expr c) (Expr c)
719 | Ext (Record x c) x
720 \end{lstHaskellLhstex}
721
722 Furthermore, we define a type class (\haskelllhstexinline{In}) that allows us to extract explicit dictionaries \haskelllhstexinline{Dict} from these records if the constraint can is present in the list.
723 Since the constraints become available as soon as the \haskelllhstexinline{Cons} constructor is matched, the implementation is a trivial type-level list traversal.
724
725 \begin{lstHaskellLhstex}[caption={Membership functions for constraints}]
726 class c `In` cs where
727 project :: Record dt cs -> Dict (c dt)
728 instance {-# OVERLAPPING #-} c `In` (c ': cs) where
729 project (Cons _) = Dict
730 instance {-# OVERLAPPING #-} c `In` cs => c `In` (b ': cs) where
731 project (Cons xs) = project xs
732 \end{lstHaskellLhstex}
733
734 The final scaffolding is a multi-parameter type class \haskelllhstexinline{CreateRecord} (requiring the \GHCmod{MultiParamTypeclasses} and \GHCmod{FlexibleInstances} extension) to create these \haskelllhstexinline{Record} witnesses automatically.
735 This type class creates a record structure cons by cons if and only if all type class constraints are available in the list of constraints.
736 It is not required to provide instances for this for specific records or type classes, the two instances describe all the required constraints.
737
738 \begin{lstHaskellLhstex}[caption={Membership functions for constraints}]
739 class CreateRecord dt c where
740 createRecord :: Record dt c
741 instance CreateRecord d '[] where
742 createRecord = Nil
743 instance (c (d c0), CreateRecord (d c0) cs) =>
744 CreateRecord (d c0) (c ': cs) where
745 createRecord = Cons createRecord
746 \end{lstHaskellLhstex}
747
748 The class constraints for the interpretation instances can now be greatly simplified, as shown in the evaluation instance for \haskelllhstexinline{Expr}.
749 The implementation remains the same, only that for the extension case, a trick needs to be applied to convince the compiler of the correct instances.
750 Using \haskelllhstexinline{`In`}'s \haskelllhstexinline{project} function, a dictionary can be brought into scope.
751 This dictionary can then subsequently be used to apply the type class function on the extension using the \haskelllhstexinline{withDict} function from the \haskelllhstexinline{Data.Constraint} library\footnote{\haskelllhstexinline{withDict :: Dict c -> (c => r) -> r}}.
752 The \GHCmod{ScopedTypeVariables} extension is used to make sure the existentially quantified type variable for the extension is matched to the type of the dictionary.
753 Furthermore, because the class constraint is not smaller than the instance head, \GHCmod{UndecidableInstances} should be enabled.
754
755 \begin{lstHaskellLhstex}[caption={Evaluation instance for the main data type}]
756 class Eval v where
757 eval :: v -> Int
758
759 instance Eval `In` s => Eval (Expr s) where
760 eval (Lit i) = i
761 eval (Add l r) = eval l + eval r
762 eval (Ext r (e :: x)) = withDict (project r :: Dict (Eval x)) eval e
763 \end{lstHaskellLhstex}
764
765 Smart constructors need to be adapted as well, as can be seen from the smart constructor \haskelllhstexinline{subst}.
766 Instead of a \haskelllhstexinline{GDict} class constraint, a \haskelllhstexinline{CreateRecord} class constraint needs to be added.
767
768 \begin{lstHaskellLhstex}[caption={Substitution smart constructor}]
769 subst :: (Typeable c, CreateRecord (Subt c) c) => Expr c -> Expr c -> Expr c
770 subst l r = Ext createRecord (l `Subt` r)
771 \end{lstHaskellLhstex}
772
773 Finally, defining terms in the language can be done immediately if the interpretations are known.
774 For example, if we want to print and/or optimise the term $~(~(42+(38-4)))$, we can define it as follows:
775
776 \begin{lstHaskellLhstex}[caption={Substitution smart constructor}]
777 e0 :: Expr '[Print,Opt]
778 e0 = neg (neg (Lit 42 `Add` (Lit 38 `subt` Lit 4)))
779 \end{lstHaskellLhstex}
780
781 It is also possible to define terms in the \gls{DSL} as being overloaded in the interpretation.
782 This does require enumerating all the \haskelllhstexinline{CreateRecord} type classes for every extension in a similar fashion as was required for \haskelllhstexinline{GDict}.
783 At the call site, the concrete list of constraints must be known.
784
785 \begin{lstHaskellLhstex}[caption={Substitution smart constructor}]
786 e1 :: (Typeable c
787 , CreateRecord (Neg c) c
788 , CreateRecord (Subst c) c
789 ) => Expr c
790 e1 = neg (neg (Lit 42 `Add` (Lit 38 `subt` Lit 4)))
791 \end{lstHaskellLhstex}
792
793 Finally, using the \GHCmod{TypeFamilies} extension, type families can be created for bundling \haskelllhstexinline{`In`} constraints (\haskelllhstexinline{UsingExt}) and \haskelllhstexinline{CreateRecord} constraints (\haskelllhstexinline{DependsOn}), making the syntax even more descriptive.
794 E.g.\ \haskelllhstexinline{UsingExt '[A, B, C] c} expands to \haskelllhstexinline{(CreateRecord (A c) c, CreateRecord (B c) c, CreateRecord (C c) c)} and \haskelllhstexinline{DependsOn '[A, B, C] s} expands to \haskelllhstexinline{(A `In` s, B `In` s, C `In` s)}.
795
796 \begin{lstHaskellLhstex}
797 type family UsingExt cs c :: Constraint where
798 UsingExt '[] c = ()
799 UsingExt (d ': cs) c = (CreateRecord (d c) c, UsingExt cs c)
800
801 type family DependsOn cs c :: Constraint where
802 DependsOn '[] c = ()
803 DependsOn (d ': cs) c = (d `In` c, DependsOn cs c)
804 \end{lstHaskellLhstex}
805
806 Defining the previous expression can now be done with the following shortened type that describes the semantics better:
807
808 \begin{lstHaskellLhstex}
809 e1 :: (Typeable c, UsingExt '[Neg, Subst]) => Expr c
810 \end{lstHaskellLhstex}
811
812 Giving an instance for \haskelllhstexinline{Interp} for \haskelllhstexinline{DataType} that uses the extensions \haskelllhstexinline{e_1, e2, ...} and depends on interpretations \haskelllhstexinline{i_1,i_2, ...} is done as follows:
813
814 \begin{lstHaskellLhstex}
815 instance ( UsingExt '[e_1,e_2,...] s
816 , DependsOn '[i_1, i_2,...] s
817 ) => Interp (DataType s) where
818 ...
819 \end{lstHaskellLhstex}
820
821 With these enhancements, there is hardly any boilerplate required to use classy deep embedding.
822 The \haskelllhstexinline{Record} data type; the \haskelllhstexinline{CreateRecord} type class; and the \haskelllhstexinline{UsingExt} and \haskelllhstexinline{DependsOn} type families can be provided as a library only requiring the programmer to create the extension constructors with their respective implementations and smart constructors for language construct extensions.
823 The source code for this extension can be found here: \url{https://gitlab.com/mlubbers/classydeepembedding}.
824
825 \section{Data types and definitions}%
826 \label{sec:cde:appendix}
827 \begin{lstHaskellLhstex}[caption={Data type definitions.}]
828 data Sub_g d a where
829 Sub_g :: (Eq a, Num a) => Expr_g d a -> Expr_g d a -> Sub_g d a
830 SubLoop_g :: Expr_g d a -> Sub_g d a
831
832 data Eq_g d a where
833 Eq_g :: (Typeable a, Eq a) => Expr_g d a -> Expr_g d a -> Eq_g d Bool
834 EqLoop_g :: Expr_g d a -> Eq_g d a
835 \end{lstHaskellLhstex}
836
837 \begin{lstHaskellLhstex}[caption={Smart constructions.}]
838 sub_g :: (Typeable d, GDict (d (Sub_g d)), Eq a, Num a) =>
839 Expr_g d a -> Expr_g d a -> Expr_g d a
840 sub_g e1 e2 = Ext_g gdict (Sub_g e1 e2)
841
842 eq_g :: (Typeable d, GDict (d (Eq_g d)), Eq a, Typeable a) =>
843 Expr_g d a -> Expr_g d a -> Expr_g d Bool
844 eq_g e1 e2 = Ext_g gdict (Eq_g e1 e2)
845 \end{lstHaskellLhstex}
846
847 \begin{lstHaskellLhstex}[caption={Semantics classes and data types.}]
848 newtype PrintDict_g v = PrintDict_g (forall a.v a -> String)
849
850 class HasPrint_g d where
851 getPrint_g :: d v -> v a -> String
852
853 instance HasPrint_g PrintDict_g where
854 getPrint_g (PrintDict_g e) = e
855
856 newtype OptDict_g v = OptDict_g (forall a.v a -> v a)
857
858 class HasOpt_g d where
859 getOpt_g :: d v -> v a -> v a
860
861 instance HasOpt_g OptDict_g where
862 getOpt_g (OptDict_g e) = e
863 \end{lstHaskellLhstex}
864
865 \begin{lstHaskellLhstex}[caption={\texorpdfstring{\haskelllhstexinline{GDict}}{GDict} instances}]
866 instance Print_g v => GDict (PrintDict_g v) where
867 gdict = PrintDict_g print_g
868 instance Opt_g v => GDict (OptDict_g v) where
869 gdict = OptDict_g opt_g
870 \end{lstHaskellLhstex}
871
872 \begin{lstHaskellLhstex}[caption={Evaluator instances}]
873 instance HasEval_g d => Eval_g (Expr_g d) where
874 eval_g (Lit_g v) = v
875 eval_g (Add_g e1 e2) = eval_g e1 + eval_g e2
876 eval_g (Ext_g d x) = getEval_g d x
877
878 instance HasEval_g d => Eval_g (Sub_g d) where
879 eval_g (Sub_g e1 e2) = eval_g e1 - eval_g e2
880 eval_g (SubLoop_g e) = eval_g e
881
882 instance HasEval_g d => Eval_g (Neg_g d) where
883 eval_g (Neg_g e) = negate (eval_g e)
884 eval_g (NegLoop_g e) = eval_g e
885
886 instance HasEval_g d => Eval_g (Eq_g d) where
887 eval_g (Eq_g e1 e2) = eval_g e1 == eval_g e2
888 eval_g (EqLoop_g e) = eval_g e
889
890 instance HasEval_g d => Eval_g (Not_g d) where
891 eval_g (Not_g e) = not (eval_g e)
892 eval_g (NotLoop_g e) = eval_g e
893 \end{lstHaskellLhstex}
894
895 \begin{lstHaskellLhstex}[caption={Printer instances}]
896 instance HasPrint_g d => Print_g (Expr_g d) where
897 print_g (Lit_g v) = show v
898 print_g (Add_g e1 e2) = "(" ++ print_g e1 ++ "+" ++ print_g e2 ++ ")"
899 print_g (Ext_g d x) = getPrint_g d x
900
901 instance HasPrint_g d => Print_g (Sub_g d) where
902 print_g (Sub_g e1 e2) = "(" ++ print_g e1 ++ "-" ++ print_g e2 ++ ")"
903 print_g (SubLoop_g e) = print_g e
904
905 instance HasPrint_g d => Print_g (Neg_g d) where
906 print_g (Neg_g e) = "(negate " ++ print_g e ++ ")"
907 print_g (NegLoop_g e) = print_g e
908
909 instance HasPrint_g d => Print_g (Eq_g d) where
910 print_g (Eq_g e1 e2) = "(" ++ print_g e1 ++ "==" ++ print_g e2 ++ ")"
911 print_g (EqLoop_g e) = print_g e
912
913 instance HasPrint_g d => Print_g (Not_g d) where
914 print_g (Not_g e) = "(not " ++ print_g e ++ ")"
915 print_g (NotLoop_g e) = print_g e
916 \end{lstHaskellLhstex}
917
918 \begin{lstHaskellLhstex}[caption={Optimisation instances}]
919 instance HasOpt_g d => Opt_g (Expr_g d) where
920 opt_g (Lit_g v) = Lit_g v
921 opt_g (Add_g e1 e2) = case (opt_g e1, opt_g e2) of
922 (Lit_g 0, e2p ) -> e2p
923 (e1p, Lit_g 0) -> e1p
924 (e1p, e2p ) -> Add_g e1p e2p
925 opt_g (Ext_g d x) = Ext_g d (getOpt_g d x)
926
927 instance HasOpt_g d => Opt_g (Sub_g d) where
928 opt_g (Sub_g e1 e2) = case (opt_g e1, opt_g e2) of
929 (e1p, Lit_g 0) -> SubLoop_g e1p
930 (e1p, e2p ) -> Sub_g e1p e2p
931 opt_g (SubLoop_g e) = SubLoop_g (opt_g e)
932
933 instance (Typeable d, GDict (d (Neg_g d)), HasOpt_g d) => Opt_g (Neg_g d) where
934 opt_g (Neg_g (Add_g e1 e2))
935 = NegLoop_g (Add_g (opt_g (neg_g e1)) (opt_g (neg_g e2)))
936 opt_g (Neg_g (Ext_g d x))
937 = case fromDynamic (toDyn (getOpt_g d x)) of
938 Just (Neg_g e) -> NegLoop_g e
939 _ -> Neg_g (Ext_g d (getOpt_g d x))
940 opt_g (Neg_g e) = Neg_g (opt_g e)
941 opt_g (NegLoop_g e) = NegLoop_g (opt_g e)
942
943 instance HasOpt_g d => Opt_g (Eq_g d) where
944 opt_g (Eq_g e1 e2) = Eq_g (opt_g e1) (opt_g e2)
945 opt_g (EqLoop_g e) = EqLoop_g (opt_g e)
946 \end{lstHaskellLhstex}
947
948 \end{subappendices}
949
950 \input{subfilepostamble}
951 \end{document}