diff --git a/src/org/rascalmpl/library/lang/box/syntax/Box.rsc b/src/org/rascalmpl/library/lang/box/syntax/Box.rsc index e45113d41c4..f63eff82a72 100644 --- a/src/org/rascalmpl/library/lang/box/syntax/Box.rsc +++ b/src/org/rascalmpl/library/lang/box/syntax/Box.rsc @@ -5,119 +5,81 @@ which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html } +@contributor{Jurgen J. Vinju - Jurgen.Vinju@cwi.nl - CWI} @contributor{Bert Lisser - Bert.Lisser@cwi.nl (CWI)} - module lang::box::\syntax::Box -start syntax Main = Boxx WhitespaceAndComment*; - -syntax Boxx - = StrCon - | BoxOperator box_operator "[" Boxx* list "]" - | FontOperator font_operator "[" Boxx* list "]" - | "LBL" "[" StrCon "," Boxx "]" - | "REF" "[" StrCon "," Boxx "]" - | "CNT" "[" StrCon "," StrCon "]" - // | "O" SOptions "[" Boxx BoxString Boxx "]" - ; - +import List; -lexical StrCon - = [\"] StrChar* chars [\"] - ; - -lexical StrChar - = "\\" [\" \' \< \> \\ b f n r t] - | ![\" \' \< \> \\] +@synopsis{Every kind of boxes encodes one or more parameterized two-dimensional text constraints.} +@description{ +* `H` puts their elements next to each other one the same line separated by `hs` spaces. +* `V` puts their elements below each other on their own line, separated by `vs` empty lines. +* `HOV` acts like `H` as long as the elements all fit on one line, otherwise it becomes a `V` +* `HV` acts like H until the line is full, and then continues on the next line like `V`. +* `I` is a `V` box that indents every line in the output with `is` spaces +* `WD` produces a number of spaces exactly as wide as the wides line of the constituent boxes +* `A` is a table formatter. The list of Rows is formatted with `H` but each cell is aligned vertically with the rows above and below. +* `SPACE` produces `space` spaces +* `L` produces A literal word. This word may only contain printable characters and no spaces; this is a required property that the formatting algorithm depends on for correctness. +* `U` splices its contents in the surrounding box, for automatic flattening of overly nested structures in syntax trees. +* `G` is an additional group-by feature that reduces tot the above core features +* `SL` is a convenience box for separated syntax lists based on `G` +* `NULL()` is the group that will dissappear from its context, useful for skipping content. It is based on the `U` box. +} +@benefits{ +* Box expressions are a declarative mechanism to express formatting rules that are flexible enough to deal +with limited horizontal space, and satisfy the typical alignment and indentation principles found in +the coding standards for programming languages. +* The parameters of Box expressions allow for full configuration. It is up to the code that produces Box +expressions to present these parameters to the user or not. For example, indentation level `is` should be +set on every `I` Box according to the current preferences of the user. +} +@pitfalls{ +* `U(boxes)` is rendered as `H(boxes)` if it's the outermost Box. +} +data Box(int hs=1, int vs=0, int is=2) + = H(list[Box] boxes) + | V(list[Box] boxes) + | HOV(list[Box] boxes) + | HV(list[Box] boxes) + | I(list[Box] boxes) + | WD(list[Box] boxes) + | A(list[Row] rows, list[Alignment] columns=[l() | [R(list[Box] cs), *_] := rows, _ <- cs] /* learns the amount of columns from the first row */) + | SPACE(int space) + | L(str word) + | U(list[Box] boxes) + | G(list[Box] boxes, Box(list[Box]) op = H, int gs=2) + | NULL() ; - -lexical NatCon = [0-9]+ ; +@synopsis{A row is a list of boxes that go into an `A` array/table.} +@description{ +Rows do not have parameters. These are set on the `A` level instead, +or per cell Box. +} +data Row = R(list[Box] cells); + +data Alignment = l() | r() | c(); -syntax BoxOperator - = "A" AlignmentOptions alignments SpaceOption* options - | "R" - | "H" SpaceOption* options - | "V" SpaceOption* options - | "HV" SpaceOption* options - | "HOV" SpaceOption* options - | "I" SpaceOption* options - | "WD" - /* - | "COMM" - | "F" FontOption* options - | "G" GroupOption* options - | "SL" GroupOption* options - */ - ; - -syntax FontOperator - = "KW" - | "VAR" - | "NUM" - | "MATH" - | "ESC" - | "COMM" - | "STRING" - ; - -syntax AlignmentOption - = "l" SpaceOption* options - | "c" SpaceOption* options - | "r" SpaceOption* options - ; - -syntax AlignmentOptions - = "(" {AlignmentOption ","}* ")" - ; - -syntax SpaceSymbol - = "hs" - | "vs" - | "is" - | "ts" - ; - -syntax SpaceOption - = SpaceSymbol "=" NatCon - ; +@synopsis{NULL can be used to return a Box that will completely dissappear in the surrounding context.} +@description{ +Consider `NULL()`` as an alternative to producing `H([])` when you see unexpected +additional spaces generated. -syntax Context - = "H" - | "V" - ; -/* -syntax FontValue - = NatCon - | FontId - ; -syntax FontOption - = - FontParam "=" FontValue - ; -syntax FontParam - = "fn" - | "fm" - | "se" - | "sh" - | "sz" - | "cl" - ; +Typical applications for NULL would be to produce it for layout nodes that contain +only whitespace. If you encounter source code comments you can produce the appropriate `L` content, +but if you want the position ignored, use `NULL`. - */ -syntax GroupOption - = "gs" "=" NatCon - | "op" "=" BoxOperator - ; - -layout WhiteSpace = - WhitespaceAndComment* - !>> [\ \t\n\r] - !>> "%" - ; - -lexical WhitespaceAndComment - = [\ \t\n\r] - | "%" [!%]* "%" - | "%%" [!\n]* "\n" - ; +`NULL`` depends on the splicing semantics of `U` to dissappear completely before the layout +algorithm starts counting boxes and widths. +} +@examples{ +* `H([L("A"), H([]),L("B")])` will produce `"A B"` with two spaces; +* `H([L("A"), NULL(),L("B")])` will produce `"A B"` with only one space. +} +@pitfalls{ +* Do not use `NULL` for empty Row cells, unless you do want your cells aligned to the left and filled up to the right with empty H boxes. +* NULL will be formatted as `H([])` if it's the outermost Box. +} +Box NULL() = U([]); \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/box/util/Box.rsc b/src/org/rascalmpl/library/lang/box/util/Box.rsc deleted file mode 100644 index 2ae67e1bd9c..00000000000 --- a/src/org/rascalmpl/library/lang/box/util/Box.rsc +++ /dev/null @@ -1,37 +0,0 @@ -@license{ - Copyright (c) 2009-2015 CWI - All rights reserved. This program and the accompanying materials - are made available under the terms of the Eclipse Public License v1.0 - which accompanies this distribution, and is available at - http://www.eclipse.org/legal/epl-v10.html -} -@contributor{Jurgen J. Vinju - Jurgen.Vinju@cwi.nl - CWI} -@contributor{Bert Lisser - Bert.Lisser@cwi.nl (CWI)} -module lang::box::util::Box - -data Box(int hs=-1, int vs=-1, int is=-1, int ts=-1, int width=-1, int height=-1) - = H (list[Box] h) - | V(list[Box] v) - | HOV (list[Box] hov) - | HV (list[Box] hv) - | I(list[Box] i) - | WD(list[Box] wd) - | R(list[Box] r) - | A(list[Box] a) - | SPACE(int space) - | L(str l) - | KW(Box kw) - | VAR(Box var) - | NM(Box nm) - | STRING(Box string) - | COMM(Box comm) - | MATH(Box math) - | ESC(Box esc) - | REF(int ref) - | NULL() - ; - -alias text = list[str]; - - - diff --git a/src/org/rascalmpl/library/lang/box/util/Box2Text.rsc b/src/org/rascalmpl/library/lang/box/util/Box2Text.rsc index 2ef83eb3d03..173418d6cab 100644 --- a/src/org/rascalmpl/library/lang/box/util/Box2Text.rsc +++ b/src/org/rascalmpl/library/lang/box/util/Box2Text.rsc @@ -7,613 +7,525 @@ } @contributor{Jurgen J. Vinju - Jurgen.Vinju@cwi.nl - CWI} @contributor{Bert Lisser - Bert.Lisser@cwi.nl (CWI)} -@synopsis{A 2-dimensional string layout algorithm, useful for formatting programming languages and pretty-printing syntax trees.} +@synopsis{Two-dimensional text layout algorithm} @description{ -This is an implementation of "From Box to Tex:An algebraic approach to the construction of documentation tools" -by Mark van den Brand and Eelco Visser (June 30, 1994) - -The main function `format` maps a box tree (which describes 2-dimensional layout constraints for a linear text) -to a string which satisfies these constraints. - -To create a formatter using this back-end, first connect a front-end that maps your languages to Box -terms. There exist "default formatters" that map any Rascal parse tree to Box, which you can specialize -for cases where the default mapper does something funny. -} -@benefits{ -* Box2text is a fast two-dimensional constraint algorithm which is versatile enough to deal with -limited horizontal space on the screen and paper. The intermediate format allows for many different -solutions that do not have to programmed manually because of this. This is comparable to what CSS does -for HTML layout. -* Box2text can create ANSI color codes and also HTML markup for syntax highlighting. +The input to Box2Text is a hierarchy of "Boxes" represented by the Box algebraic data-type. +These boxes put hard and soft relative positioning constraints on the embedded text fragments, and +there is the global soft constraints of the width of the screen (or the paper). Box2Text can also +add markup for syntax highlighting in either ANSI plaintext encoding, HTML font tags or LaTex macros. + +This implementation is a port from ASF+SDF to Rascal. The ASF+SDF implementation was published as +"From Box to Tex:An algebraic approach to the construction of documentation tools" by Mark van den Brand +and Eelco Visser (June 30, 1994). The original Box concept was introduced by Joel Coutaz as this technical report: +"The Box, A Layout Abstraction for User Interface Toolkits" (1984) Pittsburgh, PA: Carnegie Mellon University. + +The main function `format` maps a Box tree to a `str`: +* To obtain Box terms people typically transform ASTs or ((ParseTree))s to Box using pattern matching in Rascal. +* ((Options)) encode global default options for constraint parameters that only override local parameters if they were elided. +* ((MarkupLanguage)) configures which markup language to use for syntax highlighting purposes. } -@pitfalls{ -* When mapping a language to box, it is often forgotten to also map source code comments to the Box tree. +@examples{ +This demonstrates the semantics of the main hard constraints: +* `H` for horizontal; +* `V` for vertical; +* `I` for indentation. + +```rascal-shell +import lang::box::util::Box2Text; +import lang::box::util::Box; +format(H([L("A"), L("B"), L("C")], hs=2)) +format(H([L("A"), L("B"), L("C")], hs=1)) +format(H([L("A"), L("B"), L("C")], hs=0)) +format(V([L("A"), L("B"), L("C")], vs=2)) +format(V([L("A"), L("B"), L("C")], vs=1)) +format(V([L("A"), L("B"), L("C")], vs=0)) +format(H([L("A"), V([L("B"), L("C")])])) +format(H([L("A"), I([L("B")]), L("C")])) +format(H([L("A"), V([L("B"), H([L("C"), L("D")])])])) +``` + +The "soft" constraints change their behavior based on available horizontal room: +```rascal-shell,continue +format(HV([L("W") | i <- [0..10]])); +format(HV([L("W") | i <- [0..20]])); +format(HV([L("W") | i <- [0..40]])); +format(HV([L("W") | i <- [0..80]])); +format(HV([L("W") | i <- [0..100]])); +format(HOV([L("W") | i <- [0..10]])); +format(HOV([L("W") | i <- [0..20]])); +format(HOV([L("W") | i <- [0..30]])); +``` + +By cleverly combining constraints, a specifically desired behavior is easy to achieve: +```rascal-shell,continue +format(H([L("if"), H([L("("), L("true"), L(")")], hs=0), HOV([L("doSomething")])])) +format(H([L("if"), H([L("("), L("true"), L(")")], hs=0), HOV([L("W") | i <- [0..30]])])) +format(H([L("if"), H([L("("), L("true"), L(")")], hs=0), HV([L("W") | i <- [0..30]])])) +``` } module lang::box::util::Box2Text +import util::Math; import List; import String; +import lang::box::\syntax::Box; import IO; -import lang::box::util::Box; - -int maxWidth = 80; -int hv2h_crit = 70; - -alias options = map [str, int]; -options oDefault = ("h":1,"v":0, "i":2, "t":10); -@synopsis{Print boxes} -public void fprint(Box b) { - print(format(b)); -} +@synopsis{Converts boxes into a string by finding an "optimal" two-dimensional layout} +@description{ +* This algorithm never changes the left-to-right order of the Boxes constituents, such that +syntactical correctness is maintained +* This algorithm tries not never over-run the maxWidth parameter, but if it must to maintain +text order, and the specified nesting of boxes, it will anyway. For example, if a table column doesn't +fit it will still be printed. We say `maxWidth` is a _soft_ constraint. +* Separator options like `i`, `h` and `v` options are _hard_ constraints, they may lead to overriding `maxWidth`. +* H, V and I boxes represent hard constraints too. +* HV and HOV are the soft constraints that allow for better solutions, so use them where you can to allow for +flexible layout that can handle deeply nested expressions and statements. +} +public str format(Box b, int maxWidth=80, int wrapAfter=70) + = " + '<}>"; -@synopsis{Print boxes followed by newline} -public void fprintln(Box b) { - println(format(b)); +@synopsis{Box2text uses list[str] as intermediate representation of the output during formatting} +@benefits{ +* Helps with fast concatenation +* Allows for measuring (max) width and height of intermediate results very quickly } - -@synopsis{Converts boxes into a string} -public str format(Box b) { - box2textmap=(); - text t = box2text(b); - return "\n<}>"; +@pitfalls{ +* Because of this representation, box2text does not recognize that unprintable characters have width 0. So, +ANSI escape codes, and characters like \r and \n in `L` boxes _will break_ the accuracy of the algorithm. } +alias Text = list[str]; +@synopsis{Converts boxes into list of lines (Unicode)} +public Text box2text(Box b, int maxWidth=80, int wrapAfter=70) + = box2data(b, options(maxWidth=maxWidth, wrapAfter=wrapAfter)); -@synopsis{Converts boxes into latex} -public text box2latex(Box b) { - // println("Start box2latex"); - text q = []; - if (box2textmap[b]?) q = box2textmap[b]; - else { - q = box2data(b); - box2textmap+=(b:q); - } - text t = readFileLines(|std:///lang/box/util/Start.tex|)+text2latex(q)+readFileLines(|std:///lang/box/util/End.tex|); - // println("End box2latex"); - return t; - } - -@synopsis{Converts boxes into html} -public text box2html(Box b) { - //println("Start box2html"); - text q = []; - if (box2textmap[b]?) q = box2textmap[b]; - else { - q = box2data(b); - box2textmap+=(b:q); - } - text t = readFileLines(|std:///lang/box/util/Start.html|)+text2html(q)+readFileLines(|std:///lang/box/util/End.html|); - //println("End box2html"); - return t; - } - -@synopsis{Converts boxes into list of lines (ASCII)} -public text box2text(Box b) { - text q = []; - if (box2textmap[b]?) q = box2textmap[b]; - else { - q = box2data(b); - box2textmap+=(b:q); - } - text t = text2txt(q); - return t; - } - -//------------------------------------------------------------------------------------------------- - -alias foptions = map[str, list[str]]; - - -map[Box, text] box2textmap=(); +////////// private functions below implement the intermediate data-structures +////////// and the constraint solver -data Box(list[str] format=[]); +@synopsis{Configuration options for a single formatting run.} +@description{ +This is used during the algorithm, not for external usage. + +* `hs` is the current separation between every horizontal element in H, HV and HOV boxes +* `vs` is the current separation between vertical elements in V, HV and HOV boxes +* `is` is the default (additional) indentation for indented boxes +* `maxWidth` is the number of columns (characters) of a single line on screen or on paper +* `wrapAfter` is the threshold criterium for line fullness, to go to the next line in a HV box and to switching +between horizontal and vertical for HOV boxes. +} +data Options = options( + int hs = 1, + int vs = 0, + int is = 2, + int maxWidth=80, + int wrapAfter=70 +); + +@synopsis{Quickly splice in any nested U boxes} +list[Box] u(list[Box] boxes) { + return [*((U(list[Box] nested) := b) ? u(nested) : [b]) | b <- boxes]; +} @synopsis{simple vertical concatenation (every list element is a line)} -text vv(text a, text b) = [*a, *b]; +private Text vv(Text a, Text b) = [*a, *b]; -str blank(str a) = right("", width(a)); +@synopsis{Create a string of spaces just as wide as the parameter a} +private str blank(str a) = right("", width(a)); @synopsis{Computes a white line with the length of the last line of a} -text wd([]) = []; -text wd([*_, str x]) = wd([x]); + Text wd([]) = []; + Text wd([*_, str x]) = [blank(x)]; @synopsis{Computes the length of unescaped string s} -int width(str s) = size(s); // replaceAll(s,"\r...",""); ?? +private int width(str s) = size(s); @synopsis{Computes the maximum width of text t} -int twidth(text t) = max([width(line) | line <- t]); +private int twidth([]) = 0; +private default int twidth(Text t) = max([width(line) | line <- t]); @synopsis{Computes the length of the last line of t} -int hwidth([]) = 0; -int hwidth([*_, str last]) = width(last); +private int hwidth([]) = 0; +private int hwidth([*_, str last]) = width(last); @synopsis{Prepends str a before text b, all lines of b will be shifted} -text bar(str a, []) = [a]; -text bar(str a, [str bh, *str bt]) = vv([a+bh], prepend(blank(a), bt)); +private Text bar(str a, []) = [a]; +private Text bar(str a, [str bh, *str bt]) = vv([""], prepend(blank(a), bt)); @synopsis{Produce text consisting of a white line of length n} -text hskip(int n) = [right("", n)]; +Text hskip(int n) = [right("", n)]; @synopsis{Produces text consisting of n white lines at length 0} -text vskip(int n) = ([] | vv(it, [""]) | _ <- [0..n]); - -@synopsis{Check if a string already consists of only blanks.} -bool isBlank(str a) = (a == blank(a)); +private Text vskip(int n) = ["" | _ <- [0..n]]; @synopsis{Prepend Every line in b with `a`} -text prepend(str a, text b) = ["" | line <- b]; +private Text prepend(str a, Text b) = ["" | line <- b]; @synopsis{Implements horizontal concatenation, also for multiple lines} -text hh([], text b) = b; -text hh(text a, []) = a; -text hh([a], text b) = bar(a, b); +private Text hh([], Text b) = b; +private Text hh(Text a, []) = a; +private Text hh([a], Text b) = bar(a, b); -default text hh(text a, text b) = vv(a[0..-1], bar(a[-1], b)); +private default Text hh(Text a, Text b) = vv(a[0..-1], bar(a[-1], b)); @synsopsis{Horizontal concatenation, but if the left text is empty return nothing.} -text lhh([], text _) = []; -default text lhh(a, b) = hh(a, b); - -@synsopsis{Vertical concatenation, but if the left text is empty return nothing.} -text lvv([], text _) = []; -default text lvv(text a, text b) = vv(a,b); +private Text lhh([], Text _) = []; +private default Text lhh(a, b) = hh(a, b); @synsopsis{Horizontal concatenation, but if the right text is empty return nothing.} -text rhh(text _, []) = []; -text rhh(text a, text b) = hh(a, b); +private Text rhh(Text _, []) = []; +private Text rhh(Text a, Text b) = hh(a, b); @synsopsis{Vertical concatenation, but if the right text is empty return nothing.} -text rvv(text _, []) = []; -default text rvv(text a, text b) = vv(a,b); +private Text rvv(Text _, []) = []; +private default Text rvv(Text a, Text b) = vv(a,b); -text LL(str s ) = [s]; - -/* -text HH(list[Box] b, Box c, options opts, int m) { - if (isEmpty(b)) return []; - int h = opts["h"]; - text t = O(b[0], H([]), opts, m); - int s = hwidth(t); - return hh(t, rhh(hskip(h), HH(tail(b), H([]), opts, m-s-h))); - } -*/ +private Text LL(str s ) = [s]; -text HH([], Box _, options opts, int m) = []; +private Text HH([], Box _, Options _opts, int _m) = []; -text HH(list[Box] b:[_, *_], Box _, options opts, int m) { - int h = opts["h"]; - text r = []; +private Text HH(list[Box] b:[_, *_], Box _, Options opts, int m) { + Text r = []; b = reverse(b); for (a <- b) { - text t = O(a, H([]), opts, m); + Text t = \continue(a, H([]), opts, m); int s = hwidth(t); - r = hh(t, rhh(hskip(h), r)); - m = m - s - h; + r = hh(t, rhh(hskip(opts.hs), r)); + m = m - s - opts.hs; } return r; } -text VV(list[Box] b, Box c, options opts, int m) { - if (isEmpty(b)) return []; - int v = opts["v"]; - text r = []; +private Text VV([], Box _c, Options _opts, int _m) = []; + +private Text VV(list[Box] b:[_, *_], Box c, Options opts, int m) { + Text r = []; b = reverse(b); - for (a<-b) { - if (V(_)!:=c || L("")!:=a) - { - text t = O(a, V([]), opts, m); - r = vv(t, rvv(vskip(v), r)); - } + for (a <- b) { + if (V(_) !:= c || L("") !:= a) { + Text t = \continue(a, V([]), opts, m); + r = vv(t, rvv(vskip(opts.vs), r)); + } } return r; } -/* -text VV(list[Box] b, Box c, options opts, int m) { - if (isEmpty(b)) return []; - int v = opts["v"]; - return vv(O(b[0], c , opts, m), rvv(vskip(v), VV(tail(b), V([]), opts, m))); - } -*/ - -text II([], Box c, options opts, int m) = []; +private Text II([], Box _c, Options _opts, int _m) = []; -text II(list[Box] b:[_,*_], c:H(list[Box] _), options opts, int m) = HH(b, c, opts, m); +private Text II(list[Box] b:[_, *_], c:H(list[Box] _), Options opts, int m) = HH(b, c, opts, m); -text II(list[Box] b:[_,*_], c:V(list[Box] _), options opts, int m) { - text t = O(b[0], c, opts, m - opts["i"]); - return rhh(hskip(i), hh(t, II(tail(b), c, opts, m - opts["i"] - hwidth(t)))); +private Text II(list[Box] b:[Box head, *Box tail], c:V(list[Box] _), Options opts, int m) { + Text t = \continue(head, c, opts, m - opts.is); + return rhh(hskip(opts.is), hh(t, II(tail, c, opts, m - opts.is - hwidth(t)))); } -text WDWD(list[Box] b, Box c , options opts, int m) { - if (isEmpty(b)) return []; - int h= b[0].hs?opts["h"]; - text t = O(b[0], c, opts, m); - int s = hwidth(t); - return hh(t , rhh(hskip(h) , WDWD(tail(b), c, opts, m-s-h))); - } +private Text WDWD([], Box _c , Options _opts, int _m) + = []; +private Text WDWD([Box head, *Box tail], Box c , Options opts, int m) { + int h = head.hs ? opts.hs; + Text t = \continue(head, c, opts, m); + int s = hwidth(t); + return hh(wd(t), rhh(hskip(h) , WDWD(tail, c, opts, m - s - h))); +} +private Text ifHOV([], Box b, Box c, Options opts, int m) = []; -text ifHOV(text t, Box b, Box c, options opts, int m) { - if (isEmpty(t)) return []; - if (size(t)==1) { - if (width(t[0])<=m) return t; - else - return O(b, c, opts, m); - } - return O(b, c, opts, m); - } +private Text ifHOV(Text t:[str head], Box b, Box c, Options opts, int m) + = width(head) <= m ? t : \continue(b, c, opts, m); - text HOVHOV(list[Box] b, Box c, options opts, int m) { - return ifHOV(HH(b, c, opts, m), V(b), c, opts, m); - } +private Text ifHOV(Text t:[str head, str _, *str_], Box b, Box c, Options opts, int m) + = \continue(b, c, opts, m); +private Text HOVHOV(list[Box] b, Box c, Options opts, int m) + = ifHOV(HH(b, c, opts, m), V(b), c, opts, m); /* Gets complicated HVHV */ -text HVHV(text T, int s, text a, Box A, list[Box] B, options opts, int m) { - int h= opts["h"]; - int v = opts["v"]; - int i= opts["i"]; - int n = h + hwidth(a); - if (size(a)>1) { // Multiple lines - text T1 = O(A, V([]), opts, m-i); - return vv(T, rvv(vskip(v), HVHV(T1, m-hwidth(T1), B, opts, m, H([])))); - } - if (n <= s) { // Box A fits in current line - return HVHV(hh(lhh(T, hskip(h)), a), s-n, B, opts, m, H([])); - } - else { +private Text HVHV(Text T, int s, Text a, Box A, list[Box] B, Options opts, int m) { + int h= opts.hs; + int v = opts.vs; + int i= opts.is; + int n = h + hwidth(a); + + if (size(a) > 1) { // Multiple lines + Text T1 = \continue(A, V([]), opts, m-i); + return vv(T, rvv(vskip(v), HVHV(T1, m-hwidth(T1), B, opts, m, H([])))); + } + + if (n <= s) { // Box A fits in current line + return HVHV(hh(lhh(T, hskip(h)), a), s-n, B, opts, m, H([])); + } + else { n -= h; // n == width(a) - if ((i+n)"); - if (isEmpty(b)) return []; - text T = O(b[0], V([]), opts, m); // Was H([]) - if (size(b)==1) return T; - return HVHV(T, m-hwidth(T), tail(b), opts, m, H([])); - } - -text font(text t, str tg) { - if (isEmpty(t)) return t; - str h = "\r{"+t[0]; - int n = size(t)-1; - if (n==0) { - h += "\r}12"; - return [h]; - } - text r = []; - r+=h; - for (int i <-[1, 2..n]) { - r+=t[i]; - } - r+=(t[n]+"\r}"); - return r; - } - -text QQ(Box b, Box c, options opts, foptions f, int m) { - // println("QQ: "); - switch(b) { - case L(str s): {return LL(s);} - case H(list[Box] bl): {return HH(bl, c, opts, m); } - case V(list[Box] bl): {return VV(bl, c, opts, m);} - case I(list[Box] bl):{return II(bl, c, opts, m);} - case WD(list[Box] bl):{return WDWD(bl, c, opts, m);} - case HOV(list[Box] bl):{return HOVHOV(bl, c, opts, m);} - case HV(list[Box] bl):{return HVHV(bl, c, opts, m);} - case SPACE(int n):{return hskip(n);} - case A(list[Box] bl):{return AA(bl, c, opts, f, m);} - //case R(list[Box] bl):{return RR(bl, c, opts, m);} // TODO: Return type should be subtype of `list[str]`, found `list[list[Box]]` - case KW(Box a):{return font(O(a, c, opts, m),"KW");} - case VAR(Box a):{return font(O( a, c, opts, m),"VR");} - case NM(Box a):{return font(O( a, c, opts, m),"NM");} - case STRING(Box a):{return font(O( a, c, opts, m),"SG");} - case COMM(Box a):{return font(O( a, c, opts, m),"CT");} - case MATH(Box a):{return font(O( a, c, opts, m),"MT");} - case ESC(Box a):{return font(O( a, c, opts, m),"SC");} - } -return []; +private Text HVHV(Text T, int _s, [], Options _opts, int _m, Box _c) = T; + +private Text HVHV(Text T, int s, [Box head, *Box tail], Options opts, int m, Box c) { + Text T1 = \continue(head, c , opts, s); + return HVHV(T, s, T1 , head, tail, opts, m); } -text O(Box b, Box c, options opts, int m) { - int h = opts["h"]; - int v = opts["v"]; - int i = opts["i"]; - // if ((b.vs)?) println("Start: "); - if ((b.hs)?) {opts["h"] = b.hs;} - if ((b.vs)?) {opts["v"] = b.vs;} - if ((b.is)?) {opts["i"] = b.is;} - foptions f =(); - if ((b.format)?) {f["f"] = b.format;} - text t = QQ(b, c, opts, f, m); - opts["h"]=h; - opts["v"]=v; - opts["i"]=i; - // println("End:"); - return t; -} +private Text HVHV([], Box _, Options opts, int m) + = []; -/* ------------------------------- Alignment ------------------------------------------------------------*/ +private Text HVHV(list[Box] b:[Box head], Box _, Options opts, int m) + = \continue(head, V([]), opts, m); -Box boxSize(Box b, Box c, options opts, int m) { - text s = O(b, c, opts, m); - b.width = twidth(s); - b.height = size(s); - return b; - } - -list[list[Box]] RR(list[Box] bl, Box c, options opts, int m) { - list[list[Box]] g = [ b |R(list[Box] b)<-bl]; - // println(g); - return [ [ boxSize(z, c, opts, m) | Box z <- b ] | list[Box] b<- g]; +private Text HVHV(list[Box] b:[Box head, Box next, *Box tail], Box _, Options opts, int m) { + Text T = \continue(head, V([]), opts, m); + return HVHV(T, m - hwidth(T), [next, *tail], opts, m, H([])); } -int getMaxWidth(list[Box] b) { - return max([c.width| Box c <- b]); -} +// empty lists do not need grouping +private Text GG([], Box(list[Box]) op, int gs, Box c, Options opts, int m) + = \continue(U([]), c, opts, m); -list[int] Awidth(list[list[Box]] a) { - if (isEmpty(a)) return []; - int m = size(head(a)); // Rows have the same length - list[int] r = []; - for (int k<-[0..m]) { - r+=[max([b[k].width|b<-a])]; - } - return r; - } - -text AA(list[Box] bl, Box c ,options opts, foptions f, int m) { - // println(bl); - list[list[Box]] r=RR(bl, c, opts, m); - list[int] mw0 = Awidth(r); - list[str] format0 = ((f["f"]?)?f["f"]:[]); - list[Box] vargs = []; - for (list[Box] bl2 <- r) { - list[int] mw = mw0; - list[str] format =format0; - list[Box] hargs = []; - for (Box b<- bl2) { - int width = b.width; - str f_str = !isEmpty(format)?head(format):"l"; - if (!isEmpty(format)) format = tail(format); - max_width = head(mw); - mw=tail(mw); - int h= opts["h"]; - switch(f_str) { - case "l": { - // b.hs=max_width - width+h; /*left alignment */ - hargs+=b; - hargs += SPACE(max_width - width); - } - case "r": { - // b.hs=max_width - width+h; /*left alignment */ - hargs += SPACE(max_width - width); - hargs+=b; - } - case "c": { - hargs += SPACE((max_width - width)/2); - hargs+=b; - hargs += SPACE((max_width - width)/2); - } - } -} - vargs += H(hargs); - } - return O(V(vargs), c, opts, m); -} +// the last elements are smaller than the group size, just wrap them up and finish +private Text GG([*Box last], Box(list[Box]) op, int gs, Box c, Options opts, int m) + = \continue(op(u(last))[hs=opts.hs][vs=opts.vs][is=opts.is], c, opts, m) + when size(last) < gs; -bool changeHV2H(list[Box] hv) { - int n = 0; - visit(hv) { - case L(str s): {n+=size(s);} - } - return n { - int h = (t.hs)?(-1); - int i = (t.is)?(-1); - int v = (t.vs)?(-1); - Box r = H(hv); - if (h>=0) r.hs = h; - if (i>=0) r.is = i; - if (v>=0) r.vs = v; - r; - } - when changeHV2H(hv) - }; -} +// This is a degenerate case, an outermost U-Box without a wrapper around it. +private Text continueWith(Box b:U(list[Box] bl) , Box c, Options opts, int m) = HH(u(bl), c, opts, m); -Box removeHOV(Box b) { -return innermost visit(b) { - case t:HOV(list[Box] hov) => { - int h = (t.hs)?(-1); - int i = (t.is)?(-1); - int v = (t.vs)?(-1); - Box r = changeHV2H(hov)?H(hov):V(hov); - if (h>=0) r.hs = h; - if (i>=0) r.is = i; - if (v>=0) r.vs = v; - // println("changed2"); - r; - } - // when changeHV2H(hov) - }; -} +private Text continueWith(Box b:A(list[Row] rows), Box c, Options opts, int m) + = AA(rows, c, b.columns, opts, m); + +private Text continueWith(Box b:G(list[Box] bl), Box c, Options opts, int m) = GG(u(bl), b.op, b.gs, c, opts, m); +@synopsis{General shape of a Box operator, as a parameter to `G`} +private alias BoxOp = Box(list[Box]); + +@synopsis{Option inheritance layer; then continue with the next box.} +@description{ +The next box is either configured by itself. Options are transferred from the +box to the opts parameter for easy passing on to recursive calls. +} +private Text \continue(Box b, Box c, Options opts, int m) + = continueWith(b, c, opts[hs=b.hs][vs=b.vs][is=b.is], m); +/* ------------------------------- Alignment ------------------------------------------------------------*/ +@synopsis{This is to store the result of the first pass of the algorithm over all the cells in an array/table} +data Box(int width=0, int height=1); +@synopsis{Completely layout a box and then measure its width and height, and annotate the result into the Box} +private Box boxSize(Box b, Box c, Options opts, int m) { + Text s = \continue(b, c, opts, m); + b.width = twidth(s); + b.height = size(s); + return b; +} -public text box2data(Box b) { - //println("BEGIN box2data"); - // println(b); - b = removeHV(b); - b = removeHOV(b); - text t = O(b, V([]), oDefault, maxWidth); - // println(t); - //println("END box2data"); - return t; - } - -public str convert2latex(str s) { - return visit (s) { - case /^\r\{/ => "\r{" - case /^\r\}/ => "\r}" - case /^`/ => "{\\textasciigrave}" - case /^\"/ => "{\\textacutedbl}" - case /^\{/ => "\\{" - case /^\}/ => "\\}" - case /^\\/ => "{\\textbackslash}" - case /^\ "{\\textless}" - case /^\>/ => "{\\textgreater}" - case /^\|/ => "{\\textbar}" - case /^%/ => "\\%" - // case /^-/ => "{\\textendash}" - } +private list[list[Box]] RR(list[Row] bl, Box c, Options opts, int m) { + list[list[Box]] g = [b | R(list[Box] b) <- bl]; + return [ [ boxSize(z, c, opts, m) | Box z <- b ] | list[Box] b <- g]; } +@synopsis{Compute the maximum number of columns of the rows in a table} +private int Acolumns(list[Row] rows) = (0 | max(it, size(row.cells)) | row <- rows); +@synopsis{Compute the maximum cell width for each column in an array} +private list[int] Awidth(list[list[Box]] rows) + = [(0 | max(it, row[col].width) | row <- rows ) | int col <- [0..size(head(rows))]]; -str text2latex(str t) { - t = convert2latex(t); - return visit(t) { - // case /^\r\{\r\}../ => "\\{}" - case /^\r\{/ => "\\{" - case /^\r\}../ => "}" - } - } +@synopsis{Adds empty cells to every row until every row has the same amount of columns.} +list[Row] AcompleteRows(list[Row] rows, int columns=Acolumns(rows)) + = [ R(u([*row.cells, *[H([]) | _ <- [0..columns - size(row.cells)]]])) | row <- rows]; +@synopsis{Helper function for aligning Text inside an array cell} +private Box align(l(), Box cell, int maxWidth) = maxWidth - cell.width > 0 + ? H([cell, SPACE(maxWidth - cell.width)], hs=0) + : cell; -str selectBeginTag(str tg, str key) { - if (tg=="KW") return "\"; - if (tg=="CT") return "\"; - if (tg=="SG") return "\"; - if (tg=="NM") return "\"; - if (tg=="SC") return "\"; - return key; +private Box align(r(), Box cell, int maxWidth) = maxWidth - cell.width > 0 + ? H([SPACE(maxWidth - cell.width), cell], hs=0) + : cell; + +private Box align(c(), Box cell, int maxWidth) = maxWidth - cell.width > 1 + ? H([SPACE((maxWidth - cell.width) / 2), cell, SPACE((maxWidth - cell.width) / 2)], hs=0) + : maxWidth - cell.width == 1 ? + align(l(), cell, maxWidth) + : cell; + +private Text AA(list[Row] table, Box c, list[Alignment] alignments, Options opts, int m) { + list[list[Box]] rows = RR(AcompleteRows(table), c, opts, m); + list[int] maxWidths = Awidth(rows); + + return \continue(V([ + H([align(al, cell, mw) | <- zip3(row, alignments, maxWidths)]) + | row <- rows + ]),c, opts, m); } -str selectEndTag(str tg) { - if (tg=="KW") return "\"; - if (tg=="CT") return "\"; - if (tg=="SG") return "\"; - if (tg=="NM") return "\"; - if (tg=="SC") return "\"; - return ""; +@synopsis{Check soft limit for HV and HOV boxes} +// TODO this seems to ignore SPACE boxes? +private bool noWidthOverflow(list[Box] hv, Options opts) + = (0 | it + size(s) | /L(s) := hv) < opts.wrapAfter; + +@synopsis{Changes all HV boxes that do fit horizontally into hard H boxes.} +private Box applyHVconstraints(Box b, Options opts) = innermost visit(b) { + case HV(boxes, hs=h, is=i, vs=v) => H(boxes, hs=h, is=i, vs=v) + when noWidthOverflow(boxes, opts) +}; + +@synopsis{Changes all HOV boxes that do fit horizontally into hard H boxes, +and the others into hard V boxes.} +private Box applyHOVconstraints(Box b, Options opts) = innermost visit(b) { + case HOV(boxes, hs=h, is=i, vs=v) => noWidthOverflow(boxes, opts) + ? H(boxes, hs=h, is=i, vs=v) + : V(boxes, hs=h, is=i, vs=v) +}; + +@synopsis{Workhorse, that first applies hard HV and HOV limits and then starts the general algorithm} +private Text box2data(Box b, Options opts) { + b = applyHVconstraints(b, opts); + b = applyHOVconstraints(b, opts); + return \continue(b, V([]), options(), opts.maxWidth); } - -public str convert2html(str s) { - return visit (s) { - case /^\r\{/ => "\r{" - case /^\r\}/ => "\r}" - case /^ / => " " - case /^\"/ => """ - case /^&/ => "&" - case /^\ "<" - case /^\>/ => ">" - case /^%/ => "\\%" - } + +///////////////// regression tests //////////////////////////////// + +test bool horizontalPlacement2() + = format(H([L("A"), L("B"), L("C")], hs=2)) + == "A B C + '"; + +test bool horizontalPlacement3() + = format(H([L("A"), L("B"), L("C")], hs=3)) + == "A B C + '"; + +test bool verticalPlacement0() + = format(V([L("A"), L("B"), L("C")], vs=0)) + == "A + 'B + 'C + '"; + +test bool verticalPlacement1() + = format(V([L("A"), L("B"), L("C")], vs=1)) + == "A + ' + 'B + ' + 'C + '"; + +test bool verticalIndentation2() + = format(V([L("A"), I([L("B")]), L("C")])) + == "A + ' B + 'C + '"; + +test bool blockIndent() + = format(V([L("A"), I([V([L("B"), L("C")])]), L("D")])) + == "A + ' B + ' C + 'D + '"; + +test bool wrappingIgnoreIndent() + = format(HV([L("A"), I([L("B")]), L("C")], hs=0), maxWidth=2, wrapAfter=2) + == "AB + 'C + '"; + +test bool wrappingWithIndent() + = format(HV([L("A"), I([L("B")]), I([L("C")])], hs=0), maxWidth=2, wrapAfter=2) + == "AB + ' C + '"; + +test bool flipping1NoIndent() + = format(HOV([L("A"), L("B"), L("C")], hs=0, vs=0), maxWidth=2, wrapAfter=2) + == "A + 'B + 'C + '"; + +test bool horizontalOfOneVertical() + = format(H([L("A"), V([L("B"), L("C")])])) + == "A B + ' C + '"; + +test bool stairCase() + = format(H([L("A"), V([L("B"), H([L("C"), V([L("D"), H([L("E"), L("F")])])])])])) + == "A B + ' C D + ' E F + '"; + +test bool simpleTable() + = format(A([R([L("1"),L("2"),L("3")]),R([L("4"), L("5"), L("6")]),R([L("7"), L("8"), L("9")])])) + == "1 2 3 + '4 5 6 + '7 8 9 + '"; + +test bool simpleAlignedTable() + = format(A([R([L("1"),L("2"),L("3")]),R([L("44"), L("55"), L("66")]),R([L("777"), L("888"), L("999")])], + columns=[l(),c(),r()])) + == "1 2 3 + '44 55 66 + '777 888 999 + '"; + +test bool simpleAlignedTableDifferentAlignment() + = format(A([R([L("1"),L("2"),L("3")]),R([L("44"), L("55"), L("66")]),R([L("777"), L("888"), L("999")])], + columns=[r(),c(),l()])) + == " 1 2 3 + ' 44 55 66 + '777 888 999 + '"; + +test bool WDtest() { + L1 = H([L("aap")] , hs=0); + L2 = H([WD([L1]), L("noot")], hs=0); + L3 = H([WD([L2]), L("mies")], hs=0); + + return format(V([L1, L2, L3])) + == "aap + ' noot + ' mies + '"; } -str text2html(str t) { - t = convert2html(t); - return visit(t) { - case /^\r\{/ => selectBeginTag(tg, key) - case /^\r\}/ => selectEndTag(tg) - } - } - -public str text2txt(str t) { - return visit(t) { - case /^\r\{../ => "" - case /^\r\}../ => "" - } - } - -text text2latex(text t) { - return [text2latex(s)|s<-t]; - } - -text text2html(text t) { - return ["\\\"|s<-t]; - } - -public text text2txt(text t) { - return [text2txt(s)|s<-t]; - } - - - -// JV TODO: these functions do not compile because writeData does not exist -//public value toText(Box b, loc src, loc dest) { -// text t = box2text(b); -// writeData(src, dest, t, ".txt"); -// return t; -// } -// -//public value toLatex(Box b, loc src, loc dest) { -// text t = box2latex(b); -// writeData(src, dest, t, ".tex"); -// return t; -// } -// -// -//public value toHtml(Box b, loc src, loc dest) { -// text t = box2html(b); -// writeData(src, dest, t, ".html"); -// return t; -// } - -void tst() { - Box b1 = R([L("ab"), L("c")]); - Box b2 = R([L("def"), L("hg")]); - Box b3 = R([L("ijkl"), L("m")]); - Box b = A([b1, b2, b3]); - b.format=["c","c"]; -} +test bool groupBy() { + lst = [L("") | i <- [0..10]]; + g1 = G(lst, op=H, gs=3); + lst2 = [H([L(""), L(""), L("")]) | i <- [0,3..7]] + [H([L("9")])]; -public str baseName(str input) { - str s = input; - str find = "/"; - if (/^\..*?$/:=input) { - s = ""; - if(/^.*$/ := s) { - s = post2; - } - } - return s; - } - -public void toExport(loc src,loc dest, text r,str suffix) { - str s=baseName(src.path); - loc g=|file:///|; - println("Written content in file:\"\""); - writeFile(g); - for (str q<-r) appendToFile(g,"\n"); - } - - public void main(Box b) { - fprintln(b); - } + return format(V([g1])) == format(V(lst2)); +} \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/box/util/End.html b/src/org/rascalmpl/library/lang/box/util/End.html deleted file mode 100644 index 15615e42e0d..00000000000 --- a/src/org/rascalmpl/library/lang/box/util/End.html +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/box/util/End.tex b/src/org/rascalmpl/library/lang/box/util/End.tex deleted file mode 100644 index 8fe671bf975..00000000000 --- a/src/org/rascalmpl/library/lang/box/util/End.tex +++ /dev/null @@ -1,2 +0,0 @@ -\end{alltt} -\end{document} \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/box/util/Start.html b/src/org/rascalmpl/library/lang/box/util/Start.html deleted file mode 100644 index d8aa36826de..00000000000 --- a/src/org/rascalmpl/library/lang/box/util/Start.html +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/src/org/rascalmpl/library/lang/box/util/Start.tex b/src/org/rascalmpl/library/lang/box/util/Start.tex deleted file mode 100644 index bd668def5e1..00000000000 --- a/src/org/rascalmpl/library/lang/box/util/Start.tex +++ /dev/null @@ -1,23 +0,0 @@ -\documentclass[ps]{article} -\usepackage{alltt} -\usepackage{txfonts} -\usepackage[svgnames]{xcolor} -\usepackage{mathcomp} -%\usepackage{lmodern} -\newcommand{\KW}[1] {{\textcolor{DarkMagenta}{\textbf{#1}}}} -\newcommand{\SG}[1] {{\textcolor{DarkBlue}{\textit{#1}}}} -\newcommand{\SC}[1] {{\textcolor{DarkBlue}{\textit{#1}}}} -\newcommand{\NM}[1] {{\textcolor{DarkBlue}{\textit{#1}}}} -\newcommand{\VR}[1] {{\it{#1}}} -\newcommand{\CT}[1] {{\it{#1}}} -\newcommand{\MT}[1] {{\it{#1}}} - - -\makeatletter -%\renewcommand{\verbatim@font}{\ttfamily} -\renewcommand{\ttdefault}{txtt} -\makeatother -\begin{document} -\begin{alltt} - - diff --git a/src/org/rascalmpl/library/lang/box/util/Tree2Box.rsc b/src/org/rascalmpl/library/lang/box/util/Tree2Box.rsc new file mode 100644 index 00000000000..81db09fd950 --- /dev/null +++ b/src/org/rascalmpl/library/lang/box/util/Tree2Box.rsc @@ -0,0 +1,241 @@ +@synopsis{The default formatting rules for _any_ parsetree.} +@description{ +This module is meant to be extended to include rules specific for a language. + +The main goal of this module is to minimize the number of necessary specializations +for any specific programming language. + +This module is a port of the original default formatting rules, implemented in C + ATerm library + APIgen, +of the "Pandora" in The ASF+SDF Meta-Environment, as described +in +> M.G.J. van den Brand, A.T. Kooiker, Jurgen J. Vinju, and N.P. Veerman. A Language Independent Framework for +> Context-sensitive Formatting. In CSMR '06: Proceedings of the Conference on Software Maintenance and +> Reengineering, pages 103-112, Washington, DC, USA, 2006. IEEE Computer Society Press. + +However, due to the more powerful pattern matching available in Rascal, than in C with the ATerm library, +we can specialize for more cases more easily than in the original paper. For example, single and multi-line +comment styles are automatically recognized. + +The current algorithm, not extended, additionally guarantees that no comments are lost as long as their grammar +rules have been tagged with `@category="Comment"` + +Another new feature is the normalization of case-insensitive literals. By providing ((toUpper)) or ((toLower)) +the mapping algorithm will change every instance of a case-insensitive literal accordingly before translating +it to an L box expression. In case of ((asIs)), the literal will be printed as it occurred in the source code. +} +@examples{ +```rascal-shell +import lang::box::\syntax::Box; +extend lang::box::util::Tree2Box; +// Notice how we used `extend` and not `import`, which will be important in the following. +import lang::pico::\syntax::Main; +// First, let's get an example program text +example = "begin + '%% this is an example Pico program + ' declare + ' a : %inline comment% natural, + ' b : natural; + ' a := a + b; + ' b := a - b; + ' a := a - b + 'end"; +// Now we parse it: +program = [start[Program]] example; +// Then we can convert it to a Box tree: +b = toBox(program); +// Finally, we can format the box tree to get a prettier format: +import lang::box::util::Box2Text; +format(b) +// If you are not happy, then you should produce a specialization: +Box toBox((Program) `begin <{Statement ";"}* body> end`, FormatOptions opts=formatOptions()) + = V([ + L("begin"), + I([ + toBox(decls) + ], is=2), + I([ + toBox(body) + ], is=4), + L("end") + ]); +// and we see the result here: +format(toBox(program)); +``` +} +module lang::box::util::Tree2Box + +import ParseTree; +import lang::box::\syntax::Box; +import String; + +@synopsis{Configuration options for toBox} +data FormatOptions = formatOptions( + CaseInsensitivity ci = asIs() +); + +@synopsis{Normalization choices for case-insensitive literals.} +data CaseInsensitivity + = toLower() + | toUpper() + | asIs() + ; + +@synopsis{This is the generic default formatter} +@description{ +This generic formatter is to be overridden by someone constructig a formatter tools +for a specific language. The goal is that this `toBox` default rule maps +syntax trees to plausible Box expressions, and that only a minimal amount of specialization +by the user is necessary. +} +default Box toBox(t:appl(Production p, list[Tree] args), FO opts = fo()) { + // the big workhorse switch identifies all kinds of special cases for shapes of + // grammar rules, and accidental instances (emptiness, only whitespace, etc.) + Symbol nl = #[\n].symbol; + Symbol notNl = #![\n].symbol; + + switch () { + // nothing should not produce additional spaces + case <_, []>: + return NULL(); + + // literals are printed as-is + case : + return L(""); + + // case-insensitive literals are optionally normalized + case : + return L(ci("", opts.ci)); + + // non-existing content should not generate accidental spaces + case : + return NULL(); + + // non-separated lists should stick without spacing (probably lexical) + case : + return H([toBox(e, opts=opts) | e <- elements], hs=0); + + case : + return H([toBox(e, opts=opts) | e <- elements], hs=0); + + case : + return HV([G([toBox(e, opts=opts) | e <- elements], gs=4, hs=0, op=H)], hs=1); + + case : + return V([G([toBox(e, opts=opts) | e <- elements], gs=4, hs=0, op=H)], hs=1); + + case : + return V([G([toBox(e, opts=opts) | e <- elements], gs=4, hs=0, op=H)], hs=1); + + // with only one separator it's probably a lexical + case : + return V([G([toBox(e, opts=opts) | e <- elements], gs=2, hs=0, op=H)], hs=0); + + case : + return V([G([toBox(e, opts=opts) | e <- elements], gs=2, hs=0, op=H)], hs=0); + + // if comments are found in layout trees, then we include them here + // and splice them into our context. If the deep match does not find any + // comments, then layout positions are reduced to U([]) which dissappears + // by splicing the empty list. + case : + return U([toBox(u, opts=opts) | /u:appl(prod(_, _, {*_,\tag("category"("Comment"))}), _) <- content]); + + // single line comments are special, since they have the newline in a literal + // we must guarantee that the formatter will print the newline, but we don't + // want an additional newline due to the formatter. We do remove any unnecessary + // spaces + case : + return V([ + H([toBox(elements[0], opts=opts), + H([L(e) | e <- words("")], hs=1) + ], hs=1) + ]); + + case : + return V([ + H([toBox(elements[0], opts=opts), + H([L(w) | e <- elements[1..], w <- words("")], hs=1) + ], hs=1) + ]); + + // multiline comments are rewrapped for the sake of readability and fitting on the page + case : + return HV([toBox(elements[0], opts=opts), // recurse in case its a ci literal + *[L(w) | e <- elements[1..-1], w <- words("")], // wrap a nice paragraph + toBox(elements[-1], opts=opts) // recurse in case its a ci literal + ], hs=1); + + // lexicals are never split in pieces, unless it's comments but those are handled above. + case : + return L(""); + + // Now we will deal with a lot of cases for expressions and block-structured statements. + // Those kinds of structures appear again and again as many languages share inspiration + // from their pre-decessors. Watching out not to loose any comments... + + // we flatten binary operators into their context for better flow of deeply nested + // operators. The effect will be somewhat like a separated list of expressions where + // the operators are the separators. + case : + return U([toBox(e) | e <- elements]); + + // postfix operators stick + case : + return H([toBox(e) | e <- elements], hs=0); + + // prefix operators stick + case : + return H([toBox(e) | e <- elements], hs=0); + + // brackets stick + case : + return H([toBox(e) | e <- elements], hs=0); + + // if the sort name is statement-like and the structure block-like, we go for + // vertical with indentation + // program: "begin" Declarations decls {Statement ";"}* body "end" ; + case : + return V([ + toBox(elements[0], opts=opts), + I([V([toBox(e, opts=opts) | e <- elements[1..-1]])]), + toBox(elements[-1], opts=opts) + ]); + } + + return HV([toBox(a, opts=opts) | a <- args]); +} + +@synopsis{For ambiguity clusters an arbitrary choice is made.} +default Box toBox(amb({Tree t, *Tree _}), FO opts=fo()) = toBox(t); + +@synopsis{When we end up here we simply render the unicode codepoint back.} +default Box toBox(c:char(_), FormatOptions opts=fo() ) = L(""); + +@synopsis{Cycles are invisible and zero length} +default Box toBox(cycle(_, _), FO opts=fo()) = NULL(); + +@synopsis{Private type alias for legibility's sake} +private alias FO = FormatOptions; + +@synopsis{Removing production labels removes similar patterns in the main toBox function.} +private Production delabel(prod(label(_, Symbol s), list[Symbol] syms, set[Attr] attrs)) + = prod(s, delabel(syms), attrs); + +private default Production delabel(Production p) = p; + +private list[Symbol] delabel(list[Symbol] syms) = [delabel(s) | s <- syms]; + +private Symbol delabel(label(_, Symbol s)) = s; +private default Symbol delabel(Symbol s) = s; + +@synopsis{This is a short-hand for legibility's sake} +private FO fo() = formatOptions(); + +@synopsis{Implements normalization of case-insensitive literals} +private str ci(str word, toLower()) = toLowerCase(word); +private str ci(str word, toUpper()) = toUpperCase(word); +private str ci(str word, asIs()) = word; + +@synopsis{Split a text by the supported whitespace characters} +private list[str] words(str text) + = [ x | // := text]; \ No newline at end of file diff --git a/src/org/rascalmpl/library/util/Highlight.rsc b/src/org/rascalmpl/library/util/Highlight.rsc index 11c3d178af5..65f8d530690 100644 --- a/src/org/rascalmpl/library/util/Highlight.rsc +++ b/src/org/rascalmpl/library/util/Highlight.rsc @@ -1,84 +1,155 @@ + +@license{ + Copyright (c) 2013-2024 CWI + All rights reserved. This program and the accompanying materials + are made available under the terms of the Eclipse Public License v1.0 + which accompanies this distribution, and is available at + http://www.eclipse.org/legal/epl-v10.html +} +@contributor{Tijs.van.der.Storm@cwi.nl} +@contributor{Jurgen.Vinju@cwi.nl} +@synopsis{Maps parse trees to highlighting markup in ANSI, HTML or LaTeX format.} module util::Highlight import ParseTree; import String; -// A comment +@synopsis{Yields the characters of a parse tree as the original input sentence in a ... block, but with spans for highlighted segments in HTML} +public str ToHTML(Tree t) { + htmlEscapes = ( + "\<": "<", + "\>": ">", + "&" : "&" + ); + + str rec(t:appl(prod(lit(str l), _, _), _)) + = span("Keyword", l) when isKeyword(l); + + str rec(t:appl(prod(cilit(str l), _, _), _)) + = span("Keyword", l) when isKeyword(l); + + str rec(t:appl(prod(_, _, {*_, \tag("category"(str cat))}), list[Tree] as)) + = span(cat, "<}>"); + + default str rec(appl(_, list[Tree] as)) + = "<}>"; + + str rec(amb({k, *_})) = rec(k); + + default str rec(Tree t:char(_)) = escape("", htmlEscapes); + + str span(str class, str src) = "\\"\>\"; + + return "\
\\\";
+}
+
+@synopsis{Yields the characters of a parse tree as the original input sentence but using macros to wrap to-be-highlighted areas.}
+public str toLaTeX(Tree t) {
+  texEscapes = (
+    "\\": "\\textbackslash{}",
+    "\<": "\\textless{}",
+    "\>": "\\textgreater{}",
+    "%": "\\%{}",
+    "&" : "\\&{}",
+    "_" : "\\_{}",
+    "^" : "\\^{}",
+    "{" : "\\{{}",
+    "}" : "\\}{}",
+    "$" : "\\${}",
+    "[" : "{}[",
+    "\t" : "    "
+  );
+
+  str rec(appl(prod(lit(str l), _, _), _)) = cat("Keyword", l)
+    when isKeyword(l);
+
+  str rec(appl(prod(cilit(str l), _, _), _)) = cat("Keyword", l)
+    when isKeyword(l);
+
+  str rec(appl(prod(_, _, {*_, \tag("category"(str category))}), list[Tree] as))
+    = cat(category, "<}>");
+
+  default str rec(appl(_, list[Tree] as)) 
+    = "<}>";
+    
+  str rec(amb({k, *_})) = rec(k);
 
-public map[str, str] htmlEscapes = (
-	"\<": "<",
-	"\>": ">",
-	"&" : "&"
-);
+  default str rec(Tree t:char(_)) = escape("", texEscapes);
 
+  str cat(str class, str src) = "\\CAT{}{}";
 
-str highlight2html(Tree t) 
-  = "\
\\\";
+  return rec(t);
+} 
 
-bool isKeyword(str s) = /^[a-zA-Z0-9_\-]*$/ := s;
+@synopsis{Yields the characters of a parse tree as the original input sentence in a ... block, but with spans for highlighted segments in HTML}
+public str toHTML(Tree t) {
+  htmlEscapes = (
+	  "\<": "<",
+	  "\>": ">",
+	  "&" : "&"
+  );
 
-str highlight2htmlRec(t:appl(prod(lit(str l), _, _), _)) 
-  = wrapLink(span("Keyword", l), t)
-  when isKeyword(l);
+  str rec(t:appl(prod(lit(str l), _, _), _)) 
+    = wrapLink(span("Keyword", l), t)
+    when isKeyword(l);
 
-str highlight2htmlRec(t:appl(prod(cilit(str l), _, _), _)) 
-  = wrapLink(span("Keyword", l), t)
-  when isKeyword(l);
+  str rec(t:appl(prod(cilit(str l), _, _), _)) 
+    = wrapLink(span("Keyword", l), t)
+    when isKeyword(l);
 
-str highlight2htmlRec(t:appl(prod(_, _, {*_, \tag("category"(str cat))}), list[Tree] as))
-  = wrapLink(span(cat, ( "" | it + highlight2htmlRec(a) | a <- as )), t);
+  str rec(t:appl(prod(_, _, {*_, \tag("category"(str cat))}), list[Tree] as))
+    = wrapLink(span(cat, ( "" | it + rec(a) | a <- as )), t);
 
-str highlight2htmlRec(appl(prod(_, _, set[Attr] attrs), list[Tree] as))
-  = ( "" | it + highlight2htmlRec(a) | a <- as )
-  when {*_, \tag("category"(str _))} !:= attrs;
+  str rec(appl(prod(_, _, set[Attr] attrs), list[Tree] as))
+    = ( "" | it + rec(a) | a <- as )
+    when {*_, \tag("category"(str _))} !:= attrs;
 
-str highlight2htmlRec(appl(regular(_), list[Tree] as))
-  = ( "" | it + highlight2htmlRec(a) | a <- as );
+  str rec(appl(regular(_), list[Tree] as))
+    = ( "" | it + rec(a) | a <- as );
 
-str highlight2htmlRec(amb({k, *_})) = highlight2htmlRec(k);
+  str rec(amb({k, *_})) = rec(k);
 
-default str highlight2htmlRec(Tree t) 
-  = wrapLink(escape(unparse(t), htmlEscapes), t);
+  default str rec(Tree t) 
+    = wrapLink(escape(unparse(t), htmlEscapes), t);
 
-str span(str class, str src) = "\\"\>\";
+  str span(str class, str src) = "\\"\>\";
 
-default str wrapLink(str text, Tree _) = text;
+  default str wrapLink(str text, Tree _) = text;
 
-// Latex
+  return "\
\\\";
+}
 
-public map[str, str] texEscapes = (
-	"\\": "\\textbackslash{}",
-	"\<": "\\textless{}",
-	"\>": "\\textgreater{}",
-	"%": "\\%{}",
-	"&" : "\\&{}",
-	"_" : "\\_{}",
-	"^" : "\\^{}",
-	"{" : "\\{{}",
-	"}" : "\\}{}",
-	"$" : "\\${}",
-	"[" : "{}[",
-	"\t" : "    "
-);
+@synopsis{Unparse a parse tree to unicode characters, wrapping certain substrings with ANSI codes for highlighting.}
+public str toANSI(Tree t, bool underlineAmbiguity=false, int tabSize=4) {
+  str rec(Tree x:appl(prod(lit(str l), _, _), _))   = isKeyword(l) ? bold("") :  "";
+  str rec(Tree x:appl(prod(cilit(str l), _, _), _)) = isKeyword(l) ? bold("") :  "";
 
-str highlight2latex(appl(prod(lit(str l), _, _), _)) = catCmd("Keyword", l)
-  when isKeyword(l);
+  str rec(Tree x:appl(prod(_, _, {*_, \tag("category"(str cat))}), list[Tree] as))
+    = \map(cat, "");
 
-str highlight2latex(appl(prod(cilit(str l), _, _), _)) = catCmd("Keyword", l)
-  when isKeyword(l);
+  default str rec(x:appl(_, list[Tree] as))
+    = "<}>";
 
-str highlight2latex(appl(prod(_, _, {*_, \tag("category"(str cat))}), list[Tree] as))
-  = catCmd(cat, ( "" | it + highlight2latex(a) | a <- as ));
+  str rec(amb({k, *_})) = underlineAmbiguity ? underline(rec(k)) : rec(k);
 
-str highlight2latex(appl(prod(_, _, set[Attr] attrs), list[Tree] as))
-  = ( "" | it + highlight2latex(a) | a <- as )
-  when {*_, \tag("category"(str _))} !:= attrs;
+  str rec (char(9)) = right("", tabSize);
+  default str rec(Tree t:char(_)) = "";
 
-str highlight2latex(appl(regular(_), list[Tree] as))
-  = ( "" | it + highlight2latex(a) | a <- as );
+  str ESC               = "\a1b[";
+  str Bold              = "1m";
+  str Underline         = "4m";
+  str Normal            = "0m";
+  str Comment           = "3m2m";
+  str bold(str s)       = "";
+  str underline(str s)  = "";
+  str comment(str s)    = "";
 
-str highlight2latex(amb({k, *_})) = highlight2latex(k);
+  str \map("Comment", text)         = comment(text);
+  str \map("Keyword", text)         = bold(text);
+  default str \map(str _, str text) = text;
 
-default str highlight2latex(Tree t) = escape(unparse(t), texEscapes);
+  return rec(t);
+} 
 
-str catCmd(str class, str src) = "\\CAT{}{}";
+@synopsis{Encodes when to highlight a literal as a keyword category}
+private bool isKeyword(str s) = /^[a-zA-Z0-9_\-]*$/ := s;
\ No newline at end of file