From 4c8f2e6e7ee0a0d3b49cb8c80cc9390217bfb82b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=93lafur=20P=C3=A1ll=20Geirsson?= Date: Wed, 19 May 2021 12:26:28 +0200 Subject: [PATCH 1/2] Prioritize Maven packages over the JDK when exporting monikers Previously, when a Maven package re-defined a class from the JDK then lsif-java would emit an export against the JDK instead of the Maven package. This caused problems with navigation to the JDK on Sourcegraph because there were multiple repositories that exported monikers against the `jdk/8` packageInformation node. Now, only the JDK repo should export monikers for the JDK repo. --- .../java/com/sourcegraph/lsif_semanticdb/PackageTable.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java index 395ffcf7..7240e6f4 100644 --- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java +++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java @@ -36,10 +36,14 @@ public class PackageTable implements Function { public PackageTable(LsifSemanticdbOptions options, LsifWriter writer) throws IOException { this.writer = writer; this.javaVersion = new JavaVersion(); + // NOTE: it's important that we index the JDK before maven packages. Some maven packages + // redefine classes from the JDK and we want those maven packages to take precedence over + // the JDK. The motivation to prioritize maven packages over the JDK is that we only want + // to exports monikers against the JDK when indexing the JDK repo. + indexJdk(); for (MavenPackage pkg : options.packages) { indexPackage(pkg); } - indexJdk(); } public void writeImportedSymbol(String symbol, int monikerId) { From 543fb44a18f17d7946691db298e796c77552a686 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=93lafur=20P=C3=A1ll=20Geirsson?= Date: Wed, 19 May 2021 13:30:22 +0200 Subject: [PATCH 2/2] Never export monikers for the JDK except when indexin the JDK Previously, there was a small risk that lsif-java would export symbols against the JDK when indexing non-JDK packages like com.google.gwt:gwt-user:2.9.0, which contain source files for `java.lang.String` but don't define `java/lang/String.class`. This commit, adds a special case for the JDK so that monikers for JDK symbols are always imported when indexing non-JDK repos. --- .../lsif_java/buildtools/BuildTool.scala | 1 + .../lsif_java/buildtools/LsifBuildTool.scala | 2 ++ .../lsif_java/commands/IndexCommand.scala | 1 + .../commands/IndexSemanticdbCommand.scala | 6 ++++- .../lsif_semanticdb/LsifSemanticdb.java | 2 +- .../LsifSemanticdbOptions.java | 5 +++- .../lsif_semanticdb/PackageTable.java | 26 +++++++++---------- .../lsif_semanticdb/ResultSets.java | 19 ++++++++++++-- 8 files changed, 44 insertions(+), 18 deletions(-) diff --git a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/buildtools/BuildTool.scala b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/buildtools/BuildTool.scala index b002b527..86784b82 100644 --- a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/buildtools/BuildTool.scala +++ b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/buildtools/BuildTool.scala @@ -13,6 +13,7 @@ abstract class BuildTool(val name: String, index: IndexCommand) { protected def defaultTargetroot: Path def isHidden: Boolean = false + def buildKind: String = "" final def sourceroot: Path = index.workingDirectory final def targetroot: Path = diff --git a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/buildtools/LsifBuildTool.scala b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/buildtools/LsifBuildTool.scala index 9d25c9dc..5a539ab7 100644 --- a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/buildtools/LsifBuildTool.scala +++ b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/buildtools/LsifBuildTool.scala @@ -54,6 +54,8 @@ class LsifBuildTool(index: IndexCommand) extends BuildTool("LSIF", index) { index.workingDirectory.resolve(LsifBuildTool.ConfigFileName) def usedInCurrentDirectory(): Boolean = Files.isRegularFile(configFile) override def isHidden: Boolean = true + override def buildKind: String = + parsedConfig.fold(_.kind, _ => super.buildKind) def generateSemanticdb(): CommandResult = { parsedConfig match { case ValueResult(value) => diff --git a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexCommand.scala b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexCommand.scala index 8f28d764..5dde290a 100644 --- a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexCommand.scala +++ b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexCommand.scala @@ -160,6 +160,7 @@ case class IndexCommand( output = finalOutput, targetroot = List(tool.targetroot), packagehub = packagehub, + buildKind = tool.buildKind, app = app ).run() } diff --git a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexSemanticdbCommand.scala b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexSemanticdbCommand.scala index 180ca89c..6a037301 100644 --- a/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexSemanticdbCommand.scala +++ b/lsif-java/src/main/scala/com/sourcegraph/lsif_java/commands/IndexSemanticdbCommand.scala @@ -38,6 +38,9 @@ final case class IndexSemanticdbCommand( packagehub: Option[String] = None, @Description("Directories that contain SemanticDB files.") @PositionalArguments() targetroot: List[Path] = Nil, + @Description( + "The kind of this build, one of: empty string, jdk, maven" + ) buildKind: String = "", @Inline() app: Application = Application.default ) extends Command { def sourceroot: Path = AbsolutePath.of(app.env.workingDirectory) @@ -72,7 +75,8 @@ final case class IndexSemanticdbCommand( "java", format, parallel, - packages.map(_.toPackageInformation).asJava + packages.map(_.toPackageInformation).asJava, + buildKind ) LsifSemanticdb.run(options) postPackages(packages) diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdb.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdb.java index 4b91c839..514b2ea7 100644 --- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdb.java +++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdb.java @@ -97,7 +97,7 @@ private Integer processDocument( .collect(Collectors.toSet()); doc.id = documentId; ResultSets results = - new ResultSets(writer, globals, isExportedSymbol, localDefinitions, packages); + new ResultSets(writer, globals, isExportedSymbol, localDefinitions, packages, options); Set rangeIds = new LinkedHashSet<>(); for (SymbolOccurrence occ : doc.sortedSymbolOccurrences()) { diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdbOptions.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdbOptions.java index d3181bf7..c592cf5c 100644 --- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdbOptions.java +++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/LsifSemanticdbOptions.java @@ -16,6 +16,7 @@ public class LsifSemanticdbOptions { public final LsifOutputFormat format; public final boolean parallel; public final List packages; + public final String buildKind; public LsifSemanticdbOptions( List targetroots, @@ -26,7 +27,8 @@ public LsifSemanticdbOptions( String language, LsifOutputFormat format, boolean parallel, - List packages) { + List packages, + String buildKind) { this.targetroots = targetroots; this.output = output; this.sourceroot = sourceroot; @@ -36,5 +38,6 @@ public LsifSemanticdbOptions( this.format = format; this.parallel = parallel; this.packages = packages; + this.buildKind = buildKind; } } diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java index 7240e6f4..77cdf151 100644 --- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java +++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/PackageTable.java @@ -46,23 +46,16 @@ public PackageTable(LsifSemanticdbOptions options, LsifWriter writer) throws IOE } } - public void writeImportedSymbol(String symbol, int monikerId) { - packageForSymbol(symbol) - .ifPresent( - pkg -> { - int pkgId = lsif.computeIfAbsent(pkg, this); - writer.emitPackageInformationEdge(monikerId, pkgId); - }); + public void writeMonikerPackage(int monikerId, Package pkg) { + int pkgId = lsif.computeIfAbsent(pkg, this); + writer.emitPackageInformationEdge(monikerId, pkgId); } - private Optional packageForClassfile(String classfile) { - Package result = byClassfile.get(classfile); - if (result != null) return Optional.of(result); - if (!javaVersion.isJava8 && isJrtClassfile(classfile)) return Optional.of(javaVersion.pkg); - return Optional.empty(); + public void writeImportedSymbol(String symbol, int monikerId) { + packageForSymbol(symbol).ifPresent(pkg -> writeMonikerPackage(monikerId, pkg)); } - private Optional packageForSymbol(String symbol) { + public Optional packageForSymbol(String symbol) { return SymbolDescriptor.toplevel(symbol) .flatMap( toplevel -> { @@ -71,6 +64,13 @@ private Optional packageForSymbol(String symbol) { }); } + private Optional packageForClassfile(String classfile) { + Package result = byClassfile.get(classfile); + if (result != null) return Optional.of(result); + if (!javaVersion.isJava8 && isJrtClassfile(classfile)) return Optional.of(javaVersion.pkg); + return Optional.empty(); + } + private void indexPackage(MavenPackage pkg) throws IOException { if (!JAR_PATTERN.matches(pkg.jar)) { return; diff --git a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/ResultSets.java b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/ResultSets.java index c5047b73..092b82ce 100644 --- a/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/ResultSets.java +++ b/lsif-semanticdb/src/main/java/com/sourcegraph/lsif_semanticdb/ResultSets.java @@ -3,6 +3,7 @@ import com.sourcegraph.semanticdb_javac.SemanticdbSymbols; import java.util.HashMap; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.function.Function; @@ -15,18 +16,21 @@ public class ResultSets implements Function { private final Set exportedSymbols; private final Set localDefinitions; private final PackageTable packages; + private final boolean isJdkRepo; public ResultSets( LsifWriter writer, Map globals, Set exportedSymbols, Set localDefinitions, - PackageTable packages) { + PackageTable packages, + LsifSemanticdbOptions options) { this.writer = writer; this.globals = globals; this.exportedSymbols = exportedSymbols; this.localDefinitions = localDefinitions; this.packages = packages; + this.isJdkRepo = options.buildKind.equals("jdk"); locals = new HashMap<>(); } @@ -43,9 +47,20 @@ public ResultIds apply(String symbol) { int resultSet = writer.emitResultSet(); // Moniker + Optional pkg = packages.packageForSymbol(symbol); + if (pkg.isPresent() && pkg.get() instanceof JdkPackage && !isJdkRepo) { + // Never export monikers for the JDK repo unless we're indexing the JDK repo. + // Some Maven packages contain sources that redefine symbols like `java/lang/String#` + // even if the the jar files don't contain `java/lang/String.class`. For example, + // see the package com.google.gwt:gwt-user:2.9.0. + // Related issue: https://github.com/sourcegraph/sourcegraph/issues/21058 + isExportedSymbol = false; + } int monikerId = writer.emitMonikerVertex(symbol, hasDefinitionResult); writer.emitMonikerEdge(resultSet, monikerId); - packages.writeImportedSymbol(symbol, monikerId); + if (pkg.isPresent()) { + packages.writeMonikerPackage(monikerId, pkg.get()); + } int definitionId = hasDefinitionResult ? writer.emitDefinitionResult(resultSet) : -1;