Skip to content

Commit

Permalink
Merge pull request #1936 from usethesource/m3-jls-13-and-higher
Browse files Browse the repository at this point in the history
Support Java JLS 13 and JLS14 preview in M3 models and Java ASTs; also make ASTs jump through the correctness specification in lang::analysis::AST
  • Loading branch information
jurgenvinju authored May 9, 2024
2 parents 6bb94a2 + efa6401 commit 2690e1f
Show file tree
Hide file tree
Showing 34 changed files with 2,748 additions and 1,201 deletions.
9 changes: 7 additions & 2 deletions src/org/rascalmpl/interpreter/TestEvaluator.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,13 @@ public static int readIntTag(AbstractFunction test, String key, int defaultVal)
}

private void runTests(ModuleEnvironment env, List<AbstractFunction> tests) {
testResultListener.start(env.getName(), tests.size());
if (tests.size() <= 0) {
return;
}

eval.job("Testing " + env.getName(), tests.size(), (String jn) -> {
testResultListener.start(env.getName(), tests.size());

// first, let's shuffle the tests
var theTests = new ArrayList<>(tests); // just to be sure, clone the list
Collections.shuffle(theTests);
Expand Down Expand Up @@ -146,8 +151,8 @@ private void runTests(ModuleEnvironment env, List<AbstractFunction> tests) {
eval.getErrorPrinter().flush();
}

testResultListener.done();
return true;
});
testResultListener.done();
}
}
30 changes: 30 additions & 0 deletions src/org/rascalmpl/library/IO.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,36 @@ is a module name, then `lang/x/myLanguage/examples/myExampleFile.mL` is an examp
java set[loc] findResources(str fileName);
set[loc] findResources(loc path) = findResources(path.path) when path.scheme == "relative";

@synopsis{Search for a single resource instance, and fail if no or multiple instances are found}
@description{
This is a utility wrapper around ((findResources)).
It processes the result set of ((findResources)) to:
* return a singleton location if the `fileName`` was found.
* throw an IO exception if no instances of `fileName` was found.
* throw an IO exception if multiple instances of `fileName` were found.
}
@benefits{
* Save some code to unpack of the set that ((findResources)) produces.
}
@pitfalls{
* ((getResource)) searches for all instances in the entire run-time context of the current
module. So if the search path (classpath) grows, new similar files may be added that match and this
function will start throwing IO exceptions. If you can influence the `fileName`, then make sure
to pick a name that is always going to be unique for the current project.
}
loc getResource(str fileName) throws IO {
result = findResources(fileName);

switch (result) {
case {}:
throw IO("<fileName> not found");
case {loc singleton}:
return singleton;
default:
throw IO("<fileName> found more than once: <result>");
}
}

@synopsis{Append a value to a file.}
@description{
Append a textual representation of some values to an existing or a newly created file:
Expand Down
12 changes: 12 additions & 0 deletions src/org/rascalmpl/library/Node.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,18 @@ setKeywordParameters("f"(10, "abc"), ("height":0));
@javaClass{org.rascalmpl.library.Prelude}
public java &T <: node setKeywordParameters(&T <: node x, map[str,value] keywordParameters);

// @synopsis{Adds new keyword parameters to a node, keeping the existing ones unless there is an entry in the new map.}
// @examples{
// ```rascal-shell
// import Node;
// mergeKeywordParameters("f"(10, "abc", width=10), ("height":0));
// ```
// }
// @javaClass{org.rascalmpl.library.Prelude}
// TODO: uncomment after bootstrap
// public java &T <: node mergeKeywordParameters(&T <: node x, map[str,value] keywordParameters);


@deprecated{
Use setKeywordParameters(x, keywordParameters)
}
Expand Down
6 changes: 6 additions & 0 deletions src/org/rascalmpl/library/Prelude.java
Original file line number Diff line number Diff line change
Expand Up @@ -2201,6 +2201,12 @@ public INode setKeywordParameters(INode node, IMap kwargs) {
kwargs.entryIterator().forEachRemaining((kv) -> map.put(((IString)kv.getKey()).getValue(), kv.getValue()));
return node.asWithKeywordParameters().setParameters(map);
}

public INode mergeKeywordParameters(INode node, IMap kwargs) {
Map<String,IValue> map = node.asWithKeywordParameters().getParameters();
kwargs.entryIterator().forEachRemaining((kv) -> map.put(((IString)kv.getKey()).getValue(), kv.getValue()));
return node.asWithKeywordParameters().setParameters(map);
}

public INode unset(INode node, IString label) {
return node.mayHaveKeywordParameters() ? node.asWithKeywordParameters().unsetParameter(label.getValue()) : node;
Expand Down
172 changes: 139 additions & 33 deletions src/org/rascalmpl/library/analysis/m3/AST.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ The concept of _declaration_ is also relevant. A `decl` annotation points from a
Finally, the concept of a _type_ is relevant for ASTs. In particular an `Expression` may have a `typ` annotation, or a variable declaration, etc.
}
@benefits{
* Symbolic abstract syntax trees can be analyzed and transformed easily using Rascal primitives such as patterns, comprehensions and visit.
* Symbolic abstract syntax trees can be analyzed and transformed easily using Rascal primitives such as patterns, comprehensions and visit.
* By re-using recognizable names for different programming languages, it's easier to switch between languages to analyze.
* Some algorithms made be reusable on different programming languages, but please be aware of the _pitfalls_.
}
@pitfalls{
* Even though different languages may map to the same syntactic construct, this does not mean that the semantics is the same. Downstream
Expand All @@ -26,43 +28,93 @@ module analysis::m3::AST

import Message;
import Node;
import IO;
import Set;
import util::Monitor;
import analysis::m3::TypeSymbol;

@synopsis{For metric purposes we can use a true AST declaration tree, a simple list of lines for generic metrics, or the reason why we do not have an AST.}
data \AST(loc file = |unknown:///|)
= declaration(Declaration declaration)
| lines(list[str] contents)
| noAST(Message msg)
;

loc unknownSource = |unknown:///|;
loc unresolvedDecl = |unresolved:///|;
loc unresolvedType = |unresolved:///|;

@synopsis{Uniform name for everything that is declared in programming languages: variables, functions, classes, etc.}
@description{
Instances of the Declaration type represent the _syntax_ of declarations in programming languages.
| field name | description |
| ---------- | ----------- |
| `src` | the exact source location of the declaration in a source file |
| `decl` | the resolved fully qualified name of the artefact that is being declared here |
| `typ` | a symbolic representation of the static type of the declared artefact here (not the syntax of the type) |
}
data Declaration(
loc src = |unknown:///|,
loc decl = |unresolved:///|, //unresolvedDecl
TypeSymbol typ = \any(),
list[Modifier] modifiers = [],
list[Message] messages = []
loc decl = |unresolved:///|,
TypeSymbol typ = unresolved()
);

@synopsis{Uniform name for everything that is typically a _statement_ programming languages: assignment, loops, conditionals, jumps}
@description{
Instances of the Statement type represent the _syntax_ of statements in programming languages.
| field name | description |
| ---------- | ----------- |
| `src` | the exact source location of the statement in a source file |
| `decl` | if the statement directly represent a usage of a declared artefact, then this points to the fully qualified name of the used artifact.
}
data Statement(
loc src = |unknown:///|,
loc decl = |unresolved:///| //unresolvedDecl
loc decl = |unresolved:///|
);

@synopsis{Uniform name for everything that is an _expression_ in programming languages: arithmetic, comparisons, function invocations, ...}
@description{
Instances of the Expression type represent the _syntax_ of expressions in programming languages.
| field name | description |
| ---------- | ----------- |
| `src` | the exact source location of the expression in a source file |
| `decl` | if this expression represents a usage, decl is the resolved fully qualified name of the artefact that is being used here |
| `typ` | a symbolic representation of the static type of the _result_ of the expression |
}
data Expression(
loc src = |unknown:///|,
loc decl = |unresolved:///|, //unresolvedDecl,
TypeSymbol typ = \any()
loc decl = |unresolved:///|,
TypeSymbol typ = \unresolved()
);

@synopsis{Uniform name for everything that is an _type_ in programming languages syntax: int, void, List<Expression>, ...}
@description{
Instances of the Type type represent the _syntax_ of types in programming languages.
| field name | description |
| ---------- | ----------- |
| `src` | the exact source location of the expression in a source file |
| `decl` | the fully qualified name of the type, if resolved and if well-defined |
| `typ` | a symbolic representation of the static type that is the meaning of this type expression |
}
data Type(
loc name = |unresolved:///|, //unresolvedType,
TypeSymbol typ = \any()
loc src = |unknown:///|,
loc decl = |unresolved:///|,
TypeSymbol typ = \unresolved()
);

data Modifier;
@synopsis{Uniform name for everything that is a _modifier_ in programming languages syntax: public, static, final, etc.}
@description{
Instances of the Modifer type represent the _syntax_ of modifiers in programming languages.
| field name | description |
| ---------- | ----------- |
| `src` | the exact source location of the expression in a source file |
}
data Modifier(
loc src = |unknown:///|
);

data Bound;

@synopsis{Test for the consistency characteristics of an M3 annotated abstract syntax tree}
bool astNodeSpecification(node n, str language = "java", bool checkNameResolution=false, bool checkSourceLocation=true) {
Expand All @@ -75,35 +127,89 @@ bool astNodeSpecification(node n, str language = "java", bool checkNameResolutio
int end(loc l) = l.offset + l.length;
bool leftToRight(loc l, loc r) = end(l) <= begin(r);
bool leftToRight(node a, node b) = leftToRight(pos(a), pos(b));
bool included(node parent, node child) = begin(parent) <= begin(child) && end(child) <= end(parent);

if (checkSourceLocation) {
// all nodes have src annotations
assert all(/node x := n, x.src?);
// siblings are sorted in the input, even if some of them are lists
assert all(/node x := n, [*_, node a, node b, *_] := getChildren(x), leftToRight(a,b));
assert all(/node x := n, [*_, node a, [node b, *_], *_] := getChildren(x), leftToRight(a,b));
assert all(/node x := n, [*_, [*_, node a], node b, *_] := getChildren(x), leftToRight(a,b));
assert all(/node x := n, [*_, [*_, node a], [node b, *_], *_] := getChildren(x), leftToRight(a,b));
assert all(/[*_, node a, node b, *_] := n, leftToRight(a,b));

// children positions are included in the parent input scope
assert all(/node parent := n, /node child := parent, begin(parent) <= begin(child), end(child) <= end(parent));
// all AST nodes have src annotations
for (/node x := n, TypeSymbol _ !:= x, Message _ !:= x, Bound _ !:= x) {
if (!(x.src?)) {
println("No .src annotation on:
' <x>");
return false;
}
// Note that by removing all the (unannotated) empty lists here, we cover many more complex situations
// below in detecting adjacent nodes in syntax trees.
children = [ e | e <- getChildren(x), e != []];
// Here we collect all the possible ways nodes can be direct siblings in an abstract syntax tree:
siblings = [
*[<a,b> | [*_, node a, node b, *_] := children], // adjacent nodes
*[<a,b> | [*_, node a, [node b, *_], *_] := children], // node followed by non-empty list
*[<a,b> | [*_, [*_, node a], node b, *_] := children], // non-empty list followed by node
*[<a,b> | [*_, [*_, node a], [node b, *_], *_] := children], // adjacent non-empty lists
*[<a,b> | [*_, [*_, node a, node b, *_], *_] := children] // nodes inside a list (elements can not be lists again)
];
// Note that by induction: if all the pairwise adjacent siblings are in-order, then all siblings are in order
// siblings are sorted in the input, even if some of them are lists
for (<a,b> <- siblings) {
if (!leftToRight(a, b)) {
println("Siblings are out of order:
'a : <a.src> is <a>
'b : <b.src> is <b>");
return false;
}
if (ab <- [a,b], !included(n, ab)) {
println("Child location not is not covered by the parent location:
' parent: <n.src>
' child : <ab.src>, is <ab>");
return false;
}
}
// if ([*_, [*_, [*_], *_], *_] := getChildren(x)) {
// println("Node contains a directly nested list:
// ' <n.src> : <n>");
// return false;
// }
// if ([_, *_, str _, *_] := children || [*_, str _, *_, _] := children) {
// println("Literals and identifiers must be singletons:
// ' <n>");
// return false;
// }
}
}
if (checkNameResolution) {
// all resolved names have the language as schema prefix
//TODO: for the benefit of the compiler, changed
// assert all(/node m := n, m.decl?, /^<language>/ := decl(m).scheme);
//to:
for(/node m := n){
assert m.decl? && /^<language>/ := decl(m).scheme;
for (/node m := n, m.decl?) {
if (decl(m).scheme == "unresolved") {
println("Use decl has remained unresolved at <m.src>.");
}
else if (/^<language>/ !:= decl(m).scheme) {
println("<m.decl> has a strange loc scheme at <m.src>");
return false;
}
}
}

return true;
}

@synopsis{Check the AST node specification on a (large) set of ASTs and monitor the progress.}
bool astNodeSpecification(set[node] toCheck, str language = "java", bool checkNameResolution=false, bool checkSourceLocation=true)
= job("AST specification checker", bool (void (str, int) step) {
for (node ast <- toCheck) {
step(loc l := ast.src ? l.path : "AST without src location", 1);
if (!astNodeSpecification(ast, language=language, checkNameResolution=checkNameResolution, checkSourceLocation=checkSourceLocation)) {
return false;
}
}

return true;
}, totalWork=size(toCheck));



Loading

0 comments on commit 2690e1f

Please sign in to comment.