From 668be7fe7748c4b6670bd90c9676ced7454d5569 Mon Sep 17 00:00:00 2001 From: paulklint Date: Sat, 9 Nov 2024 00:51:26 +0100 Subject: [PATCH 1/7] Experimenting with string similarity --- src/analysis/typepal/StringSimilarity.rsc | 56 +++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 src/analysis/typepal/StringSimilarity.rsc diff --git a/src/analysis/typepal/StringSimilarity.rsc b/src/analysis/typepal/StringSimilarity.rsc new file mode 100644 index 0000000..12de519 --- /dev/null +++ b/src/analysis/typepal/StringSimilarity.rsc @@ -0,0 +1,56 @@ +module analysis::typepal::StringSimilarity + +import String; +import List; + +int min(int a, int b, int c) += a < b ? (a < c ? a : c) : (b < c ? b : c); + +@synopsis{Calculate the Levenshtein distance of 2 strings} +@memo +int lev(str a, str b) + = lev(a, 0, b, 0); + +@memo +private int lev(str a, int ia, str b, int ib){ + if(ib == size(b)) return size(a) - ia; + if(ia == size(a)) return size(b) - ib; + if(a[ia] == b[ib]) return lev(a, ia+1, b, ib+1); + + return 1 + min(lev(a, ia+1, b, ib), + lev(a, ia, b, ib+1), + lev(a, ia+1, b, ib+1)); +} + +// @memo +// private int lev(str a, int ia, int lena, str b, int ib, int lenb){ +// if(lenb == 0) return lena; +// if(lena == 0) return lenb; +// if(a[ia] == b[ib]) return lev(a, ia+1, lena-1, b, ib+1, lenb-1); + +// return 1 + min(lev(a, ia+1, lena-1, b, ib, lenb), +// lev(a, ia, lena, b, ib+1, lenb-1), +// lev(a, ia+1, lena-1, b, ib+1, lenb-1)); +// } + +test bool lev0(str a, str b) = lev(a, b) == lev(b, a); + +test bool levx(str a, str b, str c) = lev(a, b) == lev(c + a, c + b); + +test bool lev1() = lev("kitten", "sitting") == 3; +test bool lev2() = lev("kelm", "hello") == 3; +test bool lev3() = lev("hello", "hella") == 1; +test bool lev4() = lev("hello", "") == 5; +test bool lev5() = lev("", "hello") == 5; +test bool lev6() = lev("aap", "noot") == 4; +test bool lev7() = lev("page", "pope") == 2; +test bool lev8() = lev("december", "january") == 8; +test bool lev9() = lev("march", "may") == 3; + + +alias WordSim = tuple[str word, int sim]; + +list[str] similarWords(str w, list[str] vocabulary, int maxDistance) += sort([ | str v <- vocabulary, d := lev(w, v), d <= maxDistance ], bool (WordSim x, WordSim y){ return x.sim < y.sim;}).word; + +value main() = similarWords("ac", ["a", "ab", "ac", "x"], 10); \ No newline at end of file From b5806dacc5abef1669090fa1deca426dc79a9736 Mon Sep 17 00:00:00 2001 From: paulklint Date: Sat, 9 Nov 2024 20:33:28 +0100 Subject: [PATCH 2/7] First experiment to add CodeActions to typepal - For every undefined name, suggestions are generated for similar names in similar roles. TODO: maybe type could also be used to filter suggestions. - An elegant/efficient implementation of Levensthein distance has been made to measure edit distance. - When this approachs works, the other errors generated directly by TypePal will be reviewed for possible additions of fixes. --- src/analysis/typepal/Solver.rsc | 36 +++++++++----- src/analysis/typepal/StringSimilarity.rsc | 57 ++++++++++++----------- 2 files changed, 54 insertions(+), 39 deletions(-) diff --git a/src/analysis/typepal/Solver.rsc b/src/analysis/typepal/Solver.rsc index e5bfe24..fe35ba4 100644 --- a/src/analysis/typepal/Solver.rsc +++ b/src/analysis/typepal/Solver.rsc @@ -3,21 +3,22 @@ module analysis::typepal::Solver /* Implementation of the ISolver interface; this is the API of TypePal's constraint solver */ -import Set; -import Node; -import Map; +extend analysis::typepal::Collector; +extend analysis::typepal::Messenger; + +import Exception; import IO; import List; import Location; +import Map; +import Message; +import Node; import ParseTree; -import Type; +import Set; import String; -import Message; -import Exception; -//import util::Benchmark; - -extend analysis::typepal::Collector; -extend analysis::typepal::Messenger; +import Type; +import analysis::typepal::StringSimilarity; +import util::IDEServices; void checkAllTypesAvailable(TModel tm){ for(tup: <- tm.defines){ @@ -1517,13 +1518,13 @@ Solver newSolver(map[str,Tree] namedTrees, TModel tm){ foundDefs = scopeGraph.lookup(u); } catch NoBinding(): { roles = size(u.idRoles) > 5 ? "" : intercalateOr([prettyRole(idRole) | idRole <- u.idRoles]); - messages += error("Undefined ``", u.occ); + messages += error("Undefined ``", u.occ, fixes=undefinedNameProposals(u, tm)); } } for(u <- notYetDefinedUses){ roles = size(u.idRoles) > 5 ? "" : intercalateOr([prettyRole(idRole) | idRole <- u.idRoles]); - messages += error("Undefined ``", u.occ); + messages += error("Undefined ``", u.occ, fixes=undefinedNameProposals(u, tm)); } error_locations = { src | error(_,loc src) <- messages }; @@ -1704,3 +1705,14 @@ Solver newSolver(map[str,Tree] namedTrees, TModel tm){ return thisSolver; } + +// CodeActions for errors generated by Solver + +list[CodeAction] undefinedNameProposals(Use u, TModel tm) + = + [ action( + title="Replace undefined ``", + edits=[changed([replace(u.occ, prop)])] + ) + | str prop <- similarNames(getOrgId(u), u.idRoles, tm, 3) + ]; \ No newline at end of file diff --git a/src/analysis/typepal/StringSimilarity.rsc b/src/analysis/typepal/StringSimilarity.rsc index 12de519..afd3866 100644 --- a/src/analysis/typepal/StringSimilarity.rsc +++ b/src/analysis/typepal/StringSimilarity.rsc @@ -1,41 +1,37 @@ module analysis::typepal::StringSimilarity -import String; import List; +import String; +import analysis::typepal::TModel; +@synopsis{Tryadic minimum function on integers} int min(int a, int b, int c) = a < b ? (a < c ? a : c) : (b < c ? b : c); @synopsis{Calculate the Levenshtein distance of 2 strings} -@memo -int lev(str a, str b) - = lev(a, 0, b, 0); - -@memo -private int lev(str a, int ia, str b, int ib){ - if(ib == size(b)) return size(a) - ia; - if(ia == size(a)) return size(b) - ib; - if(a[ia] == b[ib]) return lev(a, ia+1, b, ib+1); - - return 1 + min(lev(a, ia+1, b, ib), - lev(a, ia, b, ib+1), - lev(a, ia+1, b, ib+1)); +int lev(str a, str b){ + int sizea = size(a); + int sizeb = size(b); + + @memo + int lev(int ia, int ib){ + if(ib == sizeb) return sizea - ia; + if(ia == sizea) return sizeb - ib; + if(a[ia] == b[ib]) return lev(ia+1, ib+1); + + return 1 + min(lev(ia+1, ib), + lev(ia, ib+1), + lev(ia+1, ib+1)); + } + + return lev(0, 0); } -// @memo -// private int lev(str a, int ia, int lena, str b, int ib, int lenb){ -// if(lenb == 0) return lena; -// if(lena == 0) return lenb; -// if(a[ia] == b[ib]) return lev(a, ia+1, lena-1, b, ib+1, lenb-1); +// Tests for `lev` -// return 1 + min(lev(a, ia+1, lena-1, b, ib, lenb), -// lev(a, ia, lena, b, ib+1, lenb-1), -// lev(a, ia+1, lena-1, b, ib+1, lenb-1)); -// } +test bool levCommutative(str a, str b) = lev(a, b) == lev(b, a); -test bool lev0(str a, str b) = lev(a, b) == lev(b, a); - -test bool levx(str a, str b, str c) = lev(a, b) == lev(c + a, c + b); +test bool levLeftAdditive(str a, str b, str c) = lev(a, b) == lev(c + a, c + b); test bool lev1() = lev("kitten", "sitting") == 3; test bool lev2() = lev("kelm", "hello") == 3; @@ -47,10 +43,17 @@ test bool lev7() = lev("page", "pope") == 2; test bool lev8() = lev("december", "january") == 8; test bool lev9() = lev("march", "may") == 3; +// Similarity functions to be used by TypePal +@synopsis{WordSim represents one word from the vocabulary and its similariy to the original word} alias WordSim = tuple[str word, int sim]; +@synopsis{Compute list of words from vocabulary, that are similar to give word w with at most maxDistance edits} list[str] similarWords(str w, list[str] vocabulary, int maxDistance) = sort([ | str v <- vocabulary, d := lev(w, v), d <= maxDistance ], bool (WordSim x, WordSim y){ return x.sim < y.sim;}).word; -value main() = similarWords("ac", ["a", "ab", "ac", "x"], 10); \ No newline at end of file +@synopsis{Find in TModel tm, names similar to w, in gives roles, with at most maxDistance edits} +list[str] similarNames(str w, set[IdRole] idRoles, TModel tm, int maxDistance){ + vocabulary = [ d.orgId | d <- tm.defines, d.idRole in idRoles ]; + return similarWords(w, vocabulary, maxDistance); +} From d142d91db1d1e0dacebcf6ac984d4daed1156906 Mon Sep 17 00:00:00 2001 From: paulklint Date: Wed, 13 Nov 2024 15:23:52 +0100 Subject: [PATCH 3/7] Added configuration options for error fixes --- src/analysis/typepal/ConfigurableScopeGraph.rsc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/analysis/typepal/ConfigurableScopeGraph.rsc b/src/analysis/typepal/ConfigurableScopeGraph.rsc index 02f6636..0d8d923 100644 --- a/src/analysis/typepal/ConfigurableScopeGraph.rsc +++ b/src/analysis/typepal/ConfigurableScopeGraph.rsc @@ -137,7 +137,11 @@ data TypePalConfig( bool(loc def, TModel tm) reportUnused = defaultReportUnused, - loc (Define def, str modelName, PathConfig pcfg) createLogicalLoc = defaultLogicalLoc + loc (Define def, str modelName, PathConfig pcfg) createLogicalLoc = defaultLogicalLoc, + + bool enableErrorFixes = true, + + int cutoffForNameSimilarity = 3 ); From e138af43aad002413be1d5cdebfa09d5ae2dc141 Mon Sep 17 00:00:00 2001 From: paulklint Date: Wed, 13 Nov 2024 15:25:17 +0100 Subject: [PATCH 4/7] Added parameters to @memo; use configuration parameters --- src/analysis/typepal/StringSimilarity.rsc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/analysis/typepal/StringSimilarity.rsc b/src/analysis/typepal/StringSimilarity.rsc index afd3866..973528d 100644 --- a/src/analysis/typepal/StringSimilarity.rsc +++ b/src/analysis/typepal/StringSimilarity.rsc @@ -13,7 +13,7 @@ int lev(str a, str b){ int sizea = size(a); int sizeb = size(b); - @memo + @memo{expireAfter(minutes=1),maximumSize(50)} int lev(int ia, int ib){ if(ib == sizeb) return sizea - ia; if(ia == sizea) return sizeb - ib; @@ -52,8 +52,8 @@ alias WordSim = tuple[str word, int sim]; list[str] similarWords(str w, list[str] vocabulary, int maxDistance) = sort([ | str v <- vocabulary, d := lev(w, v), d <= maxDistance ], bool (WordSim x, WordSim y){ return x.sim < y.sim;}).word; -@synopsis{Find in TModel tm, names similar to w, in gives roles, with at most maxDistance edits} -list[str] similarNames(str w, set[IdRole] idRoles, TModel tm, int maxDistance){ +@synopsis{Find in TModel tm, names similar to w, in gives roles. Max edit distance comes from TypePal Configuration.} +list[str] similarNames(str w, set[IdRole] idRoles, TModel tm){ vocabulary = [ d.orgId | d <- tm.defines, d.idRole in idRoles ]; - return similarWords(w, vocabulary, maxDistance); + return similarWords(w, vocabulary, maxDistance,tm.config.cutoffForNameSimilarity); } From fa9a53be199dd588e77ac61cbd900bcc2b478b7c Mon Sep 17 00:00:00 2001 From: paulklint Date: Wed, 13 Nov 2024 15:26:27 +0100 Subject: [PATCH 5/7] Error fixes now depend configuration settings --- src/analysis/typepal/Solver.rsc | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/analysis/typepal/Solver.rsc b/src/analysis/typepal/Solver.rsc index fe35ba4..1b3d52d 100644 --- a/src/analysis/typepal/Solver.rsc +++ b/src/analysis/typepal/Solver.rsc @@ -1518,13 +1518,21 @@ Solver newSolver(map[str,Tree] namedTrees, TModel tm){ foundDefs = scopeGraph.lookup(u); } catch NoBinding(): { roles = size(u.idRoles) > 5 ? "" : intercalateOr([prettyRole(idRole) | idRole <- u.idRoles]); - messages += error("Undefined ``", u.occ, fixes=undefinedNameProposals(u, tm)); + msg = error("Undefined ``", u.occ); + if(config.enableErrorFixes){ + msg.fixes = undefinedNameProposals(u, tm); + } + messages += msg; } } for(u <- notYetDefinedUses){ roles = size(u.idRoles) > 5 ? "" : intercalateOr([prettyRole(idRole) | idRole <- u.idRoles]); - messages += error("Undefined ``", u.occ, fixes=undefinedNameProposals(u, tm)); + msg = error("Undefined ``", u.occ); + if(config.enableErrorFixes){ + msg.fixes = undefinedNameProposals(u, tm); + } + messages += msg; } error_locations = { src | error(_,loc src) <- messages }; @@ -1714,5 +1722,5 @@ list[CodeAction] undefinedNameProposals(Use u, TModel tm) title="Replace undefined ``", edits=[changed([replace(u.occ, prop)])] ) - | str prop <- similarNames(getOrgId(u), u.idRoles, tm, 3) + | str prop <- similarNames(getOrgId(u), u.idRoles, tm) ]; \ No newline at end of file From d6c21a21f6bb49e908b245d2cb7a48dae3581cf2 Mon Sep 17 00:00:00 2001 From: paulklint Date: Wed, 13 Nov 2024 22:05:43 +0100 Subject: [PATCH 6/7] Adapted to new configuration settings --- src/analysis/typepal/Collector.rsc | 2 +- src/analysis/typepal/Solver.rsc | 4 ++-- src/analysis/typepal/StringSimilarity.rsc | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/analysis/typepal/Collector.rsc b/src/analysis/typepal/Collector.rsc index 7b50d40..e3635f0 100644 --- a/src/analysis/typepal/Collector.rsc +++ b/src/analysis/typepal/Collector.rsc @@ -1016,7 +1016,7 @@ Collector newCollector(str modelName, map[str,Tree] namedTrees, TypePalConfig co throw TypePalUsage("Missing `leaveScope`(s): unclosed scopes "); } - tm = tmodel(); + tm = tmodel()[usesPhysicalLocs=true]; tm.modelName = modelName; tm.moduleLocs = (nm : getLoc(namedTrees[nm]) | nm <- namedTrees); diff --git a/src/analysis/typepal/Solver.rsc b/src/analysis/typepal/Solver.rsc index 1b3d52d..037dada 100644 --- a/src/analysis/typepal/Solver.rsc +++ b/src/analysis/typepal/Solver.rsc @@ -1519,7 +1519,7 @@ Solver newSolver(map[str,Tree] namedTrees, TModel tm){ } catch NoBinding(): { roles = size(u.idRoles) > 5 ? "" : intercalateOr([prettyRole(idRole) | idRole <- u.idRoles]); msg = error("Undefined ``", u.occ); - if(config.enableErrorFixes){ + if(tm.config.enableErrorFixes){ msg.fixes = undefinedNameProposals(u, tm); } messages += msg; @@ -1529,7 +1529,7 @@ Solver newSolver(map[str,Tree] namedTrees, TModel tm){ for(u <- notYetDefinedUses){ roles = size(u.idRoles) > 5 ? "" : intercalateOr([prettyRole(idRole) | idRole <- u.idRoles]); msg = error("Undefined ``", u.occ); - if(config.enableErrorFixes){ + if(tm.config.enableErrorFixes){ msg.fixes = undefinedNameProposals(u, tm); } messages += msg; diff --git a/src/analysis/typepal/StringSimilarity.rsc b/src/analysis/typepal/StringSimilarity.rsc index 973528d..25717ca 100644 --- a/src/analysis/typepal/StringSimilarity.rsc +++ b/src/analysis/typepal/StringSimilarity.rsc @@ -1,8 +1,10 @@ module analysis::typepal::StringSimilarity import List; +import Set; import String; import analysis::typepal::TModel; +import analysis::typepal::ConfigurableScopeGraph; @synopsis{Tryadic minimum function on integers} int min(int a, int b, int c) @@ -50,10 +52,10 @@ alias WordSim = tuple[str word, int sim]; @synopsis{Compute list of words from vocabulary, that are similar to give word w with at most maxDistance edits} list[str] similarWords(str w, list[str] vocabulary, int maxDistance) -= sort([ | str v <- vocabulary, d := lev(w, v), d <= maxDistance ], bool (WordSim x, WordSim y){ return x.sim < y.sim;}).word; += sort({ | str v <- vocabulary, d := lev(w, v), d <= maxDistance }, bool (WordSim x, WordSim y){ return x.sim < y.sim;}).word; @synopsis{Find in TModel tm, names similar to w, in gives roles. Max edit distance comes from TypePal Configuration.} list[str] similarNames(str w, set[IdRole] idRoles, TModel tm){ vocabulary = [ d.orgId | d <- tm.defines, d.idRole in idRoles ]; - return similarWords(w, vocabulary, maxDistance,tm.config.cutoffForNameSimilarity); + return similarWords(w, vocabulary, tm.config.cutoffForNameSimilarity); } From 0a07fe6b276d1277e5ff79983a84d446e92b5f74 Mon Sep 17 00:00:00 2001 From: paulklint Date: Mon, 25 Nov 2024 22:27:53 +0100 Subject: [PATCH 7/7] Added a similarNames field to TypePalConfig A user of TypePal can now control the way similar names are generated. --- src/analysis/typepal/ConfigurableScopeGraph.rsc | 5 +++++ src/analysis/typepal/Solver.rsc | 2 +- src/analysis/typepal/StringSimilarity.rsc | 15 ++++++++++----- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/analysis/typepal/ConfigurableScopeGraph.rsc b/src/analysis/typepal/ConfigurableScopeGraph.rsc index 0d8d923..3204020 100644 --- a/src/analysis/typepal/ConfigurableScopeGraph.rsc +++ b/src/analysis/typepal/ConfigurableScopeGraph.rsc @@ -15,6 +15,7 @@ import Map; import util::Reflective; import String; extend ParseTree; +import analysis::typepal::StringSimilarity; public loc anonymousOccurrence = |rascal-typepal:///anonymous_occurrence|(0,1,<2,3>,<2,4>); @@ -79,6 +80,8 @@ loc defaultLogicalLoc(Define def, str _modelName, PathConfig _pcfg){ return def.defined; // return original and don't create logical location } +list[str] defaultSimilarNames(Use u, TModel tm) = similarNames(u, tm); + // Extends TypePalConfig defined in analysis::typepal::ScopeGraph data TypePalConfig( @@ -139,6 +142,8 @@ data TypePalConfig( loc (Define def, str modelName, PathConfig pcfg) createLogicalLoc = defaultLogicalLoc, + list[str] (Use u, TModel tm) similarNames = defaultSimilarNames, + bool enableErrorFixes = true, int cutoffForNameSimilarity = 3 diff --git a/src/analysis/typepal/Solver.rsc b/src/analysis/typepal/Solver.rsc index 037dada..813d220 100644 --- a/src/analysis/typepal/Solver.rsc +++ b/src/analysis/typepal/Solver.rsc @@ -1722,5 +1722,5 @@ list[CodeAction] undefinedNameProposals(Use u, TModel tm) title="Replace undefined ``", edits=[changed([replace(u.occ, prop)])] ) - | str prop <- similarNames(getOrgId(u), u.idRoles, tm) + | str prop <- tm.config.similarNames(u, tm) ]; \ No newline at end of file diff --git a/src/analysis/typepal/StringSimilarity.rsc b/src/analysis/typepal/StringSimilarity.rsc index 25717ca..af44681 100644 --- a/src/analysis/typepal/StringSimilarity.rsc +++ b/src/analysis/typepal/StringSimilarity.rsc @@ -1,10 +1,12 @@ module analysis::typepal::StringSimilarity import List; +import IO; +import Location; import Set; import String; import analysis::typepal::TModel; -import analysis::typepal::ConfigurableScopeGraph; +import analysis::typepal::ConfigurableScopeGraph; @synopsis{Tryadic minimum function on integers} int min(int a, int b, int c) @@ -52,10 +54,13 @@ alias WordSim = tuple[str word, int sim]; @synopsis{Compute list of words from vocabulary, that are similar to give word w with at most maxDistance edits} list[str] similarWords(str w, list[str] vocabulary, int maxDistance) -= sort({ | str v <- vocabulary, d := lev(w, v), d <= maxDistance }, bool (WordSim x, WordSim y){ return x.sim < y.sim;}).word; += sort({ | str v <- vocabulary, d := lev(w, v), d <= maxDistance }, + bool (WordSim x, WordSim y){ return x.sim < y.sim;}).word; -@synopsis{Find in TModel tm, names similar to w, in gives roles. Max edit distance comes from TypePal Configuration.} -list[str] similarNames(str w, set[IdRole] idRoles, TModel tm){ - vocabulary = [ d.orgId | d <- tm.defines, d.idRole in idRoles ]; +@synopsis{Find in TModel tm, names similar to Use u. Max edit distance comes from TypePal Configuration.} +list[str] similarNames(Use u, TModel tm){ + w = getOrgId(u); + idRoles = u.idRoles; + vocabulary = [ d.orgId | d <- tm.defines, d.idRole in idRoles, isContainedIn(u.occ, d.scope) ]; return similarWords(w, vocabulary, tm.config.cutoffForNameSimilarity); }