From 29cd3bdba233f355acf2bfbca77625f24abc734e Mon Sep 17 00:00:00 2001 From: Magnus Eide-Fredriksen Date: Thu, 5 Sep 2024 15:42:54 +0200 Subject: [PATCH 01/13] feat: schema documentation markdown fetching in Java --- integration/schema-language-server/.gitignore | 1 + .../language-server/pom.xml | 48 ++++--- .../documentation/ContentFetcher.java | 116 +++++++++++++++++ .../documentation/DocumentationFetcher.java | 85 +++++++++++++ .../SchemaDocumentationFetcher.java | 117 ++++++++++++++++++ .../java/ai/vespa/schemals/FetchDocsTest.java | 25 ++++ 6 files changed, 373 insertions(+), 19 deletions(-) create mode 100644 integration/schema-language-server/.gitignore create mode 100644 integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/ContentFetcher.java create mode 100644 integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/DocumentationFetcher.java create mode 100644 integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/SchemaDocumentationFetcher.java create mode 100644 integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/FetchDocsTest.java diff --git a/integration/schema-language-server/.gitignore b/integration/schema-language-server/.gitignore new file mode 100644 index 000000000000..e660fd93d319 --- /dev/null +++ b/integration/schema-language-server/.gitignore @@ -0,0 +1 @@ +bin/ diff --git a/integration/schema-language-server/language-server/pom.xml b/integration/schema-language-server/language-server/pom.xml index a1bc0e7483b8..0e4713a0a87b 100644 --- a/integration/schema-language-server/language-server/pom.xml +++ b/integration/schema-language-server/language-server/pom.xml @@ -18,6 +18,16 @@ org.eclipse.lsp4j 0.23.1 + + org.jsoup + jsoup + 1.17.2 + + + com.vladsch.flexmark + flexmark-all + 0.64.8 + com.yahoo.vespa config-model @@ -102,25 +112,25 @@ com.github.os72 protoc-jar-maven-plugin - - org.codehaus.mojo - exec-maven-plugin - - - - exec - - generate-sources - - ${project.basedir}/src/main/python/buildDocs.sh - - ${project.basedir}/target/generated-resources/hover - - src/main/python/ - - - - + + + + + + + + + + + + + + + + + + + org.congocc org.congocc.maven.plugin diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/ContentFetcher.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/ContentFetcher.java new file mode 100644 index 000000000000..846fe3ecd7aa --- /dev/null +++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/ContentFetcher.java @@ -0,0 +1,116 @@ +package ai.vespa.schemals.documentation; + +import java.io.IOException; +import java.util.Collections; +import java.util.Map; +import java.util.Set; + +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.jsoup.nodes.Node; + +import com.vladsch.flexmark.ext.tables.TablesExtension; +import com.vladsch.flexmark.html.renderer.ResolvedLink; +import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter; +import com.vladsch.flexmark.html2md.converter.HtmlLinkResolver; +import com.vladsch.flexmark.html2md.converter.HtmlLinkResolverFactory; +import com.vladsch.flexmark.html2md.converter.HtmlNodeConverterContext; +import com.vladsch.flexmark.parser.Parser; +import com.vladsch.flexmark.util.data.MutableDataHolder; +import com.vladsch.flexmark.util.data.MutableDataSet; + +/** + * ContentFetcher + */ +public abstract class ContentFetcher { + protected final static String URL_PREFIX = "https://docs.vespa.ai"; + + String fileUrl; + + ContentFetcher(String relativeFileUrl) { + this.fileUrl = relativeFileUrl; + } + + abstract Map getMarkdownContent() throws IOException; + + FlexmarkHtmlConverter getHtmlParser() { + MutableDataSet options = new MutableDataSet() + .set(FlexmarkHtmlConverter.OUTPUT_ATTRIBUTES_ID, false) + .set(FlexmarkHtmlConverter.SETEXT_HEADINGS, false) + .set(TablesExtension.DISCARD_EXTRA_COLUMNS, true) + .set(Parser.EXTENSIONS, Collections.singletonList(new HtmlConverterTextExtension(URL_PREFIX + fileUrl))); + + return FlexmarkHtmlConverter.builder(options).build(); + } + + static class CustomLinkResolver implements HtmlLinkResolver { + String fileUrl; + + public CustomLinkResolver(HtmlNodeConverterContext context, String fileUrl) { + this.fileUrl = fileUrl; + } + + @Override + public ResolvedLink resolveLink(Node node, HtmlNodeConverterContext context, ResolvedLink link) { + // convert all links from relative to absolute http url. + String curr = link.getUrl(); + + if (curr.startsWith("http")) + return link; + + if (curr.startsWith("#")) + return link.withUrl(fileUrl + curr); + + + return link.withUrl(URL_PREFIX + curr); + } + + static class Factory implements HtmlLinkResolverFactory { + String fileUrl; + + @Nullable + @Override + public Set> getAfterDependents() { + return null; + } + + @Nullable + @Override + public Set> getBeforeDependents() { + return null; + } + + @Override + public boolean affectsGlobalScope() { + return false; + } + + @Override + public HtmlLinkResolver apply(HtmlNodeConverterContext context) { + return new CustomLinkResolver(context, this.fileUrl); + } + + public Factory(String fileUrl) { + this.fileUrl = fileUrl; + } + } + } + + static class HtmlConverterTextExtension implements FlexmarkHtmlConverter.HtmlConverterExtension { + private String fileUrl; + + public HtmlConverterTextExtension(String fileUrl) { + this.fileUrl = fileUrl; + } + + @Override + public void rendererOptions(@NotNull MutableDataHolder options) { + + } + + @Override + public void extend(FlexmarkHtmlConverter.@NotNull Builder builder) { + builder.linkResolverFactory(new CustomLinkResolver.Factory(this.fileUrl)); + } + } +} diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/DocumentationFetcher.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/DocumentationFetcher.java new file mode 100644 index 000000000000..8c7d34fed466 --- /dev/null +++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/DocumentationFetcher.java @@ -0,0 +1,85 @@ +package ai.vespa.schemals.documentation; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.jsoup.parser.Tag; +import org.jsoup.select.Elements; + +import com.vladsch.flexmark.ext.tables.TablesExtension; +import com.vladsch.flexmark.html.renderer.ResolvedLink; +import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter; +import com.vladsch.flexmark.html2md.converter.HtmlLinkResolver; +import com.vladsch.flexmark.html2md.converter.HtmlLinkResolverFactory; +import com.vladsch.flexmark.html2md.converter.HtmlNodeConverterContext; +import com.vladsch.flexmark.parser.Parser; +import com.vladsch.flexmark.util.data.MutableDataHolder; +import com.vladsch.flexmark.util.data.MutableDataSet; + +/** + * DocumentationFetcher + */ +public class DocumentationFetcher { + private final static String SCHEMA_URL = "/en/reference/schema-reference.html"; + private final static String RANK_EXPRESSION_URL = "/en/reference/rank-features.html"; + + private final static Map> REPLACE_FILENAME_MAP = new HashMap<>(){{ + put("EXPRESSION", List.of( "EXPRESSION_SL", "EXPRESSION_ML" )); + put("RANK_FEATURES", List.of( "RANKFEATURES_SL", "RANKFEATURES_ML" )); + put("FUNCTION (INLINE)? [NAME]", List.of( "FUNCTION" )); + put("SUMMARY_FEATURES", List.of( "SUMMARYFEATURES_SL", "SUMMARYFEATURES_ML", "SUMMARYFEATURES_ML_INHERITS" )); + put("MATCH_FEATURES", List.of( "MATCHFEATURES_SL", "MATCHFEATURES_ML", "MATCHFEATURES_SL_INHERITS" )); + put("IMPORT FIELD", List.of( "IMPORT" )); + }}; + + public static String fetchDocs() throws IOException { + Path targetPath = Paths.get("").resolve("target").resolve("generated-resources").resolve("hover"); + Files.createDirectories(targetPath); + Files.createDirectories(targetPath.resolve("schema")); + Files.createDirectories(targetPath.resolve("rankExpression")); + + Path writePath = targetPath.resolve("schema"); + + Map schemaMarkdownContent = new SchemaDocumentationFetcher(SCHEMA_URL).getMarkdownContent(); + + for (var entry : schemaMarkdownContent.entrySet()) { + String fileName = convertToToken(entry.getKey()); + String content = entry.getValue(); + + if (REPLACE_FILENAME_MAP.containsKey(fileName)) { + for (String replacedFileName : REPLACE_FILENAME_MAP.get(fileName)) { + Files.write(writePath.resolve(replacedFileName + ".md"), content.getBytes(), StandardOpenOption.CREATE); + } + } else { + Files.write(writePath.resolve(fileName + ".md"), content.getBytes(), StandardOpenOption.CREATE); + } + } + + return "LGTM"; + } + + private static String convertToToken(String h2Id) { + return h2Id.toUpperCase().replaceAll("-", "_"); + } +} diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/SchemaDocumentationFetcher.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/SchemaDocumentationFetcher.java new file mode 100644 index 000000000000..ff8ece2d32a3 --- /dev/null +++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/SchemaDocumentationFetcher.java @@ -0,0 +1,117 @@ +package ai.vespa.schemals.documentation; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.jsoup.parser.Tag; + +import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter; + +/** + * SchemaDocumentationFetcher + */ +public class SchemaDocumentationFetcher extends ContentFetcher { + + SchemaDocumentationFetcher(String relativeFileUrl) { + super(relativeFileUrl); + } + + @Override + Map getMarkdownContent() throws IOException { + Document schemaDoc = Jsoup.connect(ContentFetcher.URL_PREFIX + this.fileUrl).get(); + + Element prevH2 = null; + + Node nodeIterator = schemaDoc.selectFirst("h2#schema"); + + Map htmlContents = new HashMap<>(); + + for (; nodeIterator != null; nodeIterator = nodeIterator.nextSibling()) { + Element element = null; + if (nodeIterator instanceof Element) { + element = (Element)nodeIterator; + + if (element.tag().equals(Tag.valueOf("h2"))) { + if (!element.id().equals("syntax") + && !element.id().equals("elements")) { + prevH2 = element; + } else { + prevH2 = null; + } + } + } + if (prevH2 == null) continue; + + String contentKey = prevH2.text(); + + if (!htmlContents.containsKey(contentKey)) { + htmlContents.put(contentKey, new StringBuilder().append(prevH2.outerHtml())); + continue; + } + StringBuilder currentBuilder = htmlContents.get(contentKey); + + currentBuilder.append("\n"); + + if (element == null) { + if (!nodeIterator.toString().isBlank()) + currentBuilder.append(nodeIterator.toString()); + continue; + } + + if (element.tag().equals(Tag.valueOf("table"))) { + Element tbody = element.selectFirst("tbody"); + // replace all in tbody with + tbody.select("th").tagName("td"); + + // some tables have very big texts in td. For our purposes, only keep the first sentence. + if (prevH2.id().equals("field")) + manuallyFixFieldTable(tbody); + } + + currentBuilder.append(element.outerHtml()); + } + + Map result = new HashMap<>(); + + FlexmarkHtmlConverter converter = this.getHtmlParser(); + + for (var entry : htmlContents.entrySet()) { + String md = converter.convert(entry.getValue().toString()); + + // Edge case occuring at "bolding" html, don't know why. + md = md.replaceAll("````\n", ""); + + result.put(entry.getKey(), md); + } + return result; + } + + private static void manuallyFixFieldTable(Element tbodyElement) { + for (Element td : tbodyElement.select("tr td:nth-child(2)")) { + String curr = td.html(); + int level = 0; + int i; + for (i = 0; i < curr.length(); ++i) { + if (( + (curr.charAt(i) == '.' && !curr.substring(i-1, Math.min(curr.length(), i+3)).equals("i.e.") && !curr.substring(i-3,i+1).equals("i.e.")) + || curr.substring(i).startsWith("") + || curr.substring(i).startsWith("
") 
+                    || curr.charAt(i) == ':') && level == 0) {
+                    break;
+                }
+                if (curr.charAt(i) == '(')++level;
+                if (curr.charAt(i) == ')')--level;
+                if (curr.charAt(i) == '<')++level;
+                if (curr.charAt(i) == '>')--level;
+            }
+            String firstSentence = curr.substring(0, i) + ".";
+            td.html(firstSentence);
+        }
+    }
+    
+}
diff --git a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/FetchDocsTest.java b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/FetchDocsTest.java
new file mode 100644
index 000000000000..d6b865e425df
--- /dev/null
+++ b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/FetchDocsTest.java
@@ -0,0 +1,25 @@
+package ai.vespa.schemals;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+
+import java.io.IOException;
+
+import org.junit.jupiter.api.Test;
+
+import ai.vespa.schemals.documentation.DocumentationFetcher;
+
+/**
+ * FetchDocsTest
+ */
+public class FetchDocsTest {
+    @Test
+    public void testFetchDocs() {
+        try {
+            String result = DocumentationFetcher.fetchDocs();
+            assertEquals(0, 1, result);
+        } catch(IOException ioe) {
+            assertEquals(0, 1, ioe.getMessage());
+        }
+    }
+}

From 3e8b6b31eed7f1dcac48e39c6eed02ccab9242f4 Mon Sep 17 00:00:00 2001
From: Magnus Eide-Fredriksen 
Date: Thu, 5 Sep 2024 16:38:29 +0200
Subject: [PATCH 02/13] feat: rank feature docs fetching in java

---
 .../documentation/ContentFetcher.java         | 15 +++++--
 .../documentation/DocumentationFetcher.java   | 39 ++++------------
 .../RankFeatureDocumentationFetcher.java      | 45 +++++++++++++++++++
 3 files changed, 66 insertions(+), 33 deletions(-)
 create mode 100644 integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/RankFeatureDocumentationFetcher.java

diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/ContentFetcher.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/ContentFetcher.java
index 846fe3ecd7aa..b430d1104c2a 100644
--- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/ContentFetcher.java
+++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/ContentFetcher.java
@@ -1,6 +1,7 @@
 package ai.vespa.schemals.documentation;
 
 import java.io.IOException;
+import java.net.URI;
 import java.util.Collections;
 import java.util.Map;
 import java.util.Set;
@@ -21,9 +22,10 @@
 
 /**
  * ContentFetcher
+ * Common logic for setting options for HTML -> Markdown converter and link resolving.
  */
 public abstract class ContentFetcher {
-    protected final static String URL_PREFIX = "https://docs.vespa.ai";
+    protected final static String URL_PREFIX = "https://docs.vespa.ai/";
 
     String fileUrl;
 
@@ -38,7 +40,7 @@ FlexmarkHtmlConverter getHtmlParser() {
             .set(FlexmarkHtmlConverter.OUTPUT_ATTRIBUTES_ID, false)
             .set(FlexmarkHtmlConverter.SETEXT_HEADINGS, false)
             .set(TablesExtension.DISCARD_EXTRA_COLUMNS, true)
-            .set(Parser.EXTENSIONS, Collections.singletonList(new HtmlConverterTextExtension(URL_PREFIX + fileUrl)));
+            .set(Parser.EXTENSIONS, Collections.singletonList(new HtmlConverterTextExtension(fileUrl)));
 
         return FlexmarkHtmlConverter.builder(options).build();
     }
@@ -58,9 +60,16 @@ public ResolvedLink resolveLink(Node node, HtmlNodeConverterContext context, Res
             if (curr.startsWith("http")) 
                 return link;
 
+            try {
+                return link.withUrl(new URI(URL_PREFIX).resolve(fileUrl).resolve(curr).toString());
+            } catch(Exception e) {
+            }
+
             if (curr.startsWith("#"))
-                return link.withUrl(fileUrl + curr);
+                return link.withUrl(URL_PREFIX + fileUrl + curr);
 
+            if (curr.startsWith("."))
+                return link.withUrl(URL_PREFIX + fileUrl + curr);
 
             return link.withUrl(URL_PREFIX + curr);
         }
diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/DocumentationFetcher.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/DocumentationFetcher.java
index 8c7d34fed466..246e4e3ab8c7 100644
--- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/DocumentationFetcher.java
+++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/DocumentationFetcher.java
@@ -1,48 +1,20 @@
 package ai.vespa.schemals.documentation;
 
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.DataOutputStream;
-import java.io.FileWriter;
 import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.HttpURLConnection;
-import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.StandardOpenOption;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
-
-import org.jetbrains.annotations.NotNull;
-import org.jetbrains.annotations.Nullable;
-import org.jsoup.Jsoup;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.nodes.Node;
-import org.jsoup.parser.Tag;
-import org.jsoup.select.Elements;
-
-import com.vladsch.flexmark.ext.tables.TablesExtension;
-import com.vladsch.flexmark.html.renderer.ResolvedLink;
-import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
-import com.vladsch.flexmark.html2md.converter.HtmlLinkResolver;
-import com.vladsch.flexmark.html2md.converter.HtmlLinkResolverFactory;
-import com.vladsch.flexmark.html2md.converter.HtmlNodeConverterContext;
-import com.vladsch.flexmark.parser.Parser;
-import com.vladsch.flexmark.util.data.MutableDataHolder;
-import com.vladsch.flexmark.util.data.MutableDataSet;
 
 /**
  * DocumentationFetcher
  */
 public class DocumentationFetcher {
-    private final static String SCHEMA_URL = "/en/reference/schema-reference.html";
-    private final static String RANK_EXPRESSION_URL = "/en/reference/rank-features.html";
+    private final static String SCHEMA_URL = "en/reference/schema-reference.html";
+    private final static String RANK_FEATURE_URL = "en/reference/rank-features.html";
 
     private final static Map> REPLACE_FILENAME_MAP = new HashMap<>(){{
         put("EXPRESSION", List.of( "EXPRESSION_SL", "EXPRESSION_ML" ));
@@ -76,6 +48,13 @@ public static String fetchDocs() throws IOException {
             }
         }
 
+        Map rankFeatureMarkdownContent = new RankFeatureDocumentationFetcher(RANK_FEATURE_URL).getMarkdownContent();
+
+        writePath = targetPath.resolve("rankExpression");
+        for (var entry : rankFeatureMarkdownContent.entrySet()) {
+            Files.write(writePath.resolve(entry.getKey() + ".md"), entry.getValue().getBytes(), StandardOpenOption.CREATE);
+        }
+
         return "LGTM";
     }
 
diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/RankFeatureDocumentationFetcher.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/RankFeatureDocumentationFetcher.java
new file mode 100644
index 000000000000..31e3381c6a72
--- /dev/null
+++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/RankFeatureDocumentationFetcher.java
@@ -0,0 +1,45 @@
+package ai.vespa.schemals.documentation;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
+
+/**
+ * RankfeatureDocumentationFetcher
+ */
+public class RankFeatureDocumentationFetcher extends ContentFetcher {
+
+	RankFeatureDocumentationFetcher(String relativeFileUrl) {
+		super(relativeFileUrl);
+	}
+
+	@Override
+	Map getMarkdownContent() throws IOException {
+        Document document = Jsoup.connect(ContentFetcher.URL_PREFIX + this.fileUrl).get();
+        Element tableElement = document.selectFirst("table.table");
+
+        Elements trs = tableElement.select("tr:has(> td:nth-child(3)):not(:has(> td:nth-child(4)))");
+
+        Map result = new HashMap<>();
+
+        FlexmarkHtmlConverter converter = this.getHtmlParser();
+
+        for (Element tr : trs) {
+            String name = tr.child(0).text();
+            name = name.replaceAll(", ", ",").replaceAll("input_1,input_2,...", "input,...");
+
+            String content = "## " + name + "\n" + converter.convert(tr.child(2).html());
+            content += "\nDefault: " + tr.child(1).text();
+            result.put(name, content);
+        }
+
+        return result;
+	}
+}

From 2303c554f08aba58ca4f1299b63e767c8d2282eb Mon Sep 17 00:00:00 2001
From: Magnus Eide-Fredriksen 
Date: Thu, 5 Sep 2024 17:51:56 +0200
Subject: [PATCH 03/13] feat: fetch documentation in the background upon server
 start

---
 .../clients/vscode/.vscodeignore              |  1 +
 .../clients/vscode/package.json               |  2 +-
 .../language-server/.gitignore                |  3 +-
 .../language-server/pom.xml                   |  7 ----
 .../vespa/schemals/SchemaLanguageServer.java  | 33 ++++++++++++++++++-
 .../documentation/ContentFetcher.java         |  1 +
 ...onFetcher.java => FetchDocumentation.java} |  7 ++--
 .../RankFeatureDocumentationFetcher.java      |  2 +-
 .../SchemaDocumentationFetcher.java           | 31 +++++++++++++----
 .../java/ai/vespa/schemals/FetchDocsTest.java |  6 ++--
 10 files changed, 68 insertions(+), 25 deletions(-)
 rename integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/{DocumentationFetcher.java => FetchDocumentation.java} (91%)

diff --git a/integration/schema-language-server/clients/vscode/.vscodeignore b/integration/schema-language-server/clients/vscode/.vscodeignore
index 8624a5cf3e62..d462e3b38116 100644
--- a/integration/schema-language-server/clients/vscode/.vscodeignore
+++ b/integration/schema-language-server/clients/vscode/.vscodeignore
@@ -9,3 +9,4 @@ src/**
 **/*.ts
 node_modules
 esbuild.js
+out/
diff --git a/integration/schema-language-server/clients/vscode/package.json b/integration/schema-language-server/clients/vscode/package.json
index a6d2ec47abbb..2fb1240dac05 100644
--- a/integration/schema-language-server/clients/vscode/package.json
+++ b/integration/schema-language-server/clients/vscode/package.json
@@ -23,7 +23,7 @@
   },
   "icon": "images/icon.png",
   "activationEvents": [],
-  "main": "./out/extension.js",
+  "main": "./dist/extension.js",
   "contributes": {
     "languages": [
       {
diff --git a/integration/schema-language-server/language-server/.gitignore b/integration/schema-language-server/language-server/.gitignore
index 83dd0a1c2e68..241492c9ac21 100644
--- a/integration/schema-language-server/language-server/.gitignore
+++ b/integration/schema-language-server/language-server/.gitignore
@@ -1,4 +1,5 @@
 effective-pom.xml
 debug.log
 .settings/
-__pycache__
\ No newline at end of file
+__pycache__
+tmp/
diff --git a/integration/schema-language-server/language-server/pom.xml b/integration/schema-language-server/language-server/pom.xml
index 0e4713a0a87b..4b8a19ecf017 100644
--- a/integration/schema-language-server/language-server/pom.xml
+++ b/integration/schema-language-server/language-server/pom.xml
@@ -70,13 +70,6 @@
     
   
   
-
-    
-      
-        target/generated-resources
-      
-    
-
     
       
         org.apache.maven.plugins
diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/SchemaLanguageServer.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/SchemaLanguageServer.java
index d628d2611466..c9958aa70d67 100644
--- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/SchemaLanguageServer.java
+++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/SchemaLanguageServer.java
@@ -1,5 +1,8 @@
 package ai.vespa.schemals;
 
+import java.io.File;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.List;
 import java.util.concurrent.CompletableFuture;
 
@@ -24,6 +27,7 @@
 import org.eclipse.lsp4j.services.WorkspaceService;
 
 import ai.vespa.schemals.common.ClientLogger;
+import ai.vespa.schemals.documentation.FetchDocumentation;
 import ai.vespa.schemals.index.SchemaIndex;
 import ai.vespa.schemals.lsp.command.CommandRegistry;
 import ai.vespa.schemals.lsp.semantictokens.SchemaSemanticTokens;
@@ -35,6 +39,8 @@
  */
 public class SchemaLanguageServer implements LanguageServer, LanguageClientAware {
 
+    public static Path serverPath = null;
+
     private WorkspaceService workspaceService;
     private SchemaTextDocumentService textDocumentService;
     private SchemaDocumentScheduler schemaDocumentScheduler;
@@ -64,7 +70,7 @@ public SchemaLanguageServer() {
 
         this.textDocumentService = new SchemaTextDocumentService(this.logger, schemaDocumentScheduler, schemaIndex, schemaMessageHandler);
         this.workspaceService = new SchemaWorkspaceService(this.logger, schemaDocumentScheduler, schemaIndex, schemaMessageHandler);
-
+        serverPath = Paths.get(SchemaLanguageServer.class.getProtectionDomain().getCodeSource().getLocation().getPath()).getParent();
     }    
 
     @Override
@@ -142,5 +148,30 @@ public void connect(LanguageClient languageClient) {
         this.schemaMessageHandler.connectClient(languageClient);
         this.client.logMessage(new MessageParams(MessageType.Log, "Language Server successfully connected to client."));
 
+        if (serverPath == null) return;
+
+        // Start a document fetching job in the background.
+        Path docPath = serverPath.resolve("hover");
+
+        Thread thread = new Thread() {
+            @Override
+            public void run() {
+                try {
+                    FetchDocumentation.fetchDocs(docPath);
+                } catch(Exception e) {
+                    throw new RuntimeException(e.getMessage());
+                }
+            }
+        };
+        var logger = this.logger;
+        thread.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
+            @Override
+            public void uncaughtException(Thread th, Throwable ex) {
+                logger.error("Failed to fetch docs:" + ex.getMessage());
+            }
+        });
+        logger.info("Fetching docs to path: " + docPath.toAbsolutePath().toString());
+        thread.start();
+
     }
 }
diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/ContentFetcher.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/ContentFetcher.java
index b430d1104c2a..e92f7c18e1a2 100644
--- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/ContentFetcher.java
+++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/ContentFetcher.java
@@ -23,6 +23,7 @@
 /**
  * ContentFetcher
  * Common logic for setting options for HTML -> Markdown converter and link resolving.
+ * Override {@link getMarkdownContent} to return a set of key/value pairs, where key gives a markdown file name, and value is markdown content.
  */
 public abstract class ContentFetcher {
     protected final static String URL_PREFIX = "https://docs.vespa.ai/";
diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/DocumentationFetcher.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/FetchDocumentation.java
similarity index 91%
rename from integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/DocumentationFetcher.java
rename to integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/FetchDocumentation.java
index 246e4e3ab8c7..87082ff7234a 100644
--- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/DocumentationFetcher.java
+++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/FetchDocumentation.java
@@ -12,7 +12,7 @@
 /**
  * DocumentationFetcher
  */
-public class DocumentationFetcher {
+public class FetchDocumentation {
     private final static String SCHEMA_URL = "en/reference/schema-reference.html";
     private final static String RANK_FEATURE_URL = "en/reference/rank-features.html";
 
@@ -25,8 +25,7 @@ public class DocumentationFetcher {
         put("IMPORT FIELD", List.of( "IMPORT" ));
     }};
 
-    public static String fetchDocs() throws IOException {
-        Path targetPath = Paths.get("").resolve("target").resolve("generated-resources").resolve("hover");
+    public static void fetchDocs(Path targetPath) throws IOException {
         Files.createDirectories(targetPath);
         Files.createDirectories(targetPath.resolve("schema"));
         Files.createDirectories(targetPath.resolve("rankExpression"));
@@ -54,8 +53,6 @@ public static String fetchDocs() throws IOException {
         for (var entry : rankFeatureMarkdownContent.entrySet()) {
             Files.write(writePath.resolve(entry.getKey() + ".md"), entry.getValue().getBytes(), StandardOpenOption.CREATE);
         }
-
-        return "LGTM";
     }
 
     private static String convertToToken(String h2Id) {
diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/RankFeatureDocumentationFetcher.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/RankFeatureDocumentationFetcher.java
index 31e3381c6a72..a08c5febf4de 100644
--- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/RankFeatureDocumentationFetcher.java
+++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/RankFeatureDocumentationFetcher.java
@@ -35,7 +35,7 @@ Map getMarkdownContent() throws IOException {
             String name = tr.child(0).text();
             name = name.replaceAll(", ", ",").replaceAll("input_1,input_2,...", "input,...");
 
-            String content = "## " + name + "\n" + converter.convert(tr.child(2).html());
+            String content = converter.convert(tr.child(2).html());
             content += "\nDefault: " + tr.child(1).text();
             result.put(name, content);
         }
diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/SchemaDocumentationFetcher.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/SchemaDocumentationFetcher.java
index ff8ece2d32a3..c84412702381 100644
--- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/SchemaDocumentationFetcher.java
+++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/SchemaDocumentationFetcher.java
@@ -1,8 +1,10 @@
 package ai.vespa.schemals.documentation;
 
 import java.io.IOException;
+import java.net.URI;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Set;
 
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
@@ -17,10 +19,20 @@
  */
 public class SchemaDocumentationFetcher extends ContentFetcher {
 
+    private final static Set IGNORE_H2_IDS = Set.of(
+        "syntax",
+        "elements",
+        "document-and-search-field-types",
+        "modifying-schemas"
+    );
+
 	SchemaDocumentationFetcher(String relativeFileUrl) {
 		super(relativeFileUrl);
 	}
 
+    // Store html content as well as id of h2 element. The id is used to append a read more link at the end.
+    private record HTMLContentEntry(StringBuilder htmlContent, String h2ID) { }
+
 	@Override
 	Map getMarkdownContent() throws IOException {
         Document schemaDoc = Jsoup.connect(ContentFetcher.URL_PREFIX + this.fileUrl).get();
@@ -29,7 +41,7 @@ Map getMarkdownContent() throws IOException {
 
         Node nodeIterator = schemaDoc.selectFirst("h2#schema");
 
-        Map htmlContents = new HashMap<>();
+        Map htmlContents = new HashMap<>();
 
         for (; nodeIterator != null; nodeIterator = nodeIterator.nextSibling()) {
             Element element = null;
@@ -37,8 +49,7 @@ Map getMarkdownContent() throws IOException {
                 element = (Element)nodeIterator;
 
                 if (element.tag().equals(Tag.valueOf("h2"))) {
-                    if (!element.id().equals("syntax")
-                        && !element.id().equals("elements")) {
+                    if (!IGNORE_H2_IDS.contains(element.id())) {
                         prevH2 = element;
                     } else {
                         prevH2 = null;
@@ -50,10 +61,12 @@ Map getMarkdownContent() throws IOException {
             String contentKey = prevH2.text();
 
             if (!htmlContents.containsKey(contentKey)) {
-                htmlContents.put(contentKey, new StringBuilder().append(prevH2.outerHtml()));
+                htmlContents.put(contentKey,
+                    new HTMLContentEntry(new StringBuilder().append(prevH2.outerHtml()), prevH2.id())
+                );
                 continue;
             }
-            StringBuilder currentBuilder = htmlContents.get(contentKey);
+            StringBuilder currentBuilder = htmlContents.get(contentKey).htmlContent();
 
             currentBuilder.append("\n");
 
@@ -81,7 +94,13 @@ Map getMarkdownContent() throws IOException {
         FlexmarkHtmlConverter converter = this.getHtmlParser();
 
         for (var entry : htmlContents.entrySet()) {
-            String md = converter.convert(entry.getValue().toString());
+            StringBuilder htmlContent = entry.getValue().htmlContent();
+            String h2id = entry.getValue().h2ID();
+
+            URI readMoreLink = URI.create(ContentFetcher.URL_PREFIX).resolve(fileUrl).resolve("#" + h2id);
+            htmlContent.append("Read more");
+
+            String md = converter.convert(htmlContent.toString());
 
             // Edge case occuring at "bolding" html, don't know why.
             md = md.replaceAll("````\n", "");
diff --git a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/FetchDocsTest.java b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/FetchDocsTest.java
index d6b865e425df..cf43e51dddad 100644
--- a/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/FetchDocsTest.java
+++ b/integration/schema-language-server/language-server/src/test/java/ai/vespa/schemals/FetchDocsTest.java
@@ -4,10 +4,11 @@
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
 
 import java.io.IOException;
+import java.nio.file.Paths;
 
 import org.junit.jupiter.api.Test;
 
-import ai.vespa.schemals.documentation.DocumentationFetcher;
+import ai.vespa.schemals.documentation.FetchDocumentation;
 
 /**
  * FetchDocsTest
@@ -16,8 +17,7 @@ public class FetchDocsTest {
     @Test
     public void testFetchDocs() {
         try {
-            String result = DocumentationFetcher.fetchDocs();
-            assertEquals(0, 1, result);
+            FetchDocumentation.fetchDocs(Paths.get("").resolve("tmp").resolve("generated-resources").resolve("hover"));
         } catch(IOException ioe) {
             assertEquals(0, 1, ioe.getMessage());
         }

From e9ea3fff235bb511173ea176bfeecd4e54490881 Mon Sep 17 00:00:00 2001
From: Magnus Eide-Fredriksen 
Date: Thu, 5 Sep 2024 18:03:56 +0200
Subject: [PATCH 04/13] feat: no python in build step

---
 .../vespa/schemals/lsp/hover/SchemaHover.java |  29 ++-
 .../language-server/src/main/python/Node.py   | 209 ------------------
 .../main/python/RankExpressionHTMLParser.py   | 119 ----------
 .../src/main/python/VespaDocHTMLParser.py     |  89 --------
 .../src/main/python/buildDocs.py              |  96 --------
 .../src/main/python/buildDocs.sh              |   7 -
 .../src/main/python/requirements.txt          |   2 -
 7 files changed, 18 insertions(+), 533 deletions(-)
 delete mode 100644 integration/schema-language-server/language-server/src/main/python/Node.py
 delete mode 100644 integration/schema-language-server/language-server/src/main/python/RankExpressionHTMLParser.py
 delete mode 100644 integration/schema-language-server/language-server/src/main/python/VespaDocHTMLParser.py
 delete mode 100644 integration/schema-language-server/language-server/src/main/python/buildDocs.py
 delete mode 100755 integration/schema-language-server/language-server/src/main/python/buildDocs.sh
 delete mode 100644 integration/schema-language-server/language-server/src/main/python/requirements.txt

diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/hover/SchemaHover.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/hover/SchemaHover.java
index 8f2819fb9185..b99ce1ffc549 100644
--- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/hover/SchemaHover.java
+++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/hover/SchemaHover.java
@@ -1,8 +1,11 @@
 package ai.vespa.schemals.lsp.hover;
 
 import java.io.BufferedReader;
+import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
@@ -15,6 +18,7 @@
 import org.eclipse.lsp4j.MarkupKind;
 import org.eclipse.lsp4j.Range;
 
+import ai.vespa.schemals.SchemaLanguageServer;
 import ai.vespa.schemals.context.EventPositionContext;
 import ai.vespa.schemals.index.Symbol;
 import ai.vespa.schemals.index.Symbol.SymbolStatus;
@@ -341,19 +345,22 @@ public static Optional getFileHoverInformation(String markdownKey, Range
             }
         }
 
-        String fileName = markdownPathRoot + markdownKey + ".md";
-        InputStream inputStream = Thread.currentThread().getContextClassLoader().getResourceAsStream(fileName);
 
-        if (inputStream == null) {
-            markdownContentCache.put(markdownKey, Optional.empty());
+        if (SchemaLanguageServer.serverPath == null)return Optional.empty();
+        String fileName = markdownKey + ".md";
+
+        Path markdownPath = SchemaLanguageServer.serverPath.resolve("hover").resolve(fileName);
+
+        try {
+            String markdown = Files.readString(markdownPath);
+
+            MarkupContent mdContent = new MarkupContent(MarkupKind.MARKDOWN, markdown);
+            Hover hover = new Hover(mdContent, range);
+            markdownContentCache.put(markdownKey, Optional.of(mdContent));
+            return Optional.of(hover);
+
+        } catch(IOException ex) {
             return Optional.empty();
         }
-        BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
-        String markdown = reader.lines().collect(Collectors.joining(System.lineSeparator()));
-
-        MarkupContent mdContent = new MarkupContent(MarkupKind.MARKDOWN, markdown);
-        Hover hover = new Hover(mdContent, range);
-		markdownContentCache.put(markdownKey, Optional.of(mdContent));
-        return Optional.of(hover);
     }
 }
diff --git a/integration/schema-language-server/language-server/src/main/python/Node.py b/integration/schema-language-server/language-server/src/main/python/Node.py
deleted file mode 100644
index 8359f2de47eb..000000000000
--- a/integration/schema-language-server/language-server/src/main/python/Node.py
+++ /dev/null
@@ -1,209 +0,0 @@
-from urllib.parse import urljoin
-
-class Node:
-    tag: str
-    attrs: list = []
-    children: list = []
-    link: str = ""
-
-    def __init__(self, tag: str, linkPrefix: str = "", attrs = []):
-        self.tag = tag
-        self.children = []
-        
-        self.attrs = []
-
-        for attrTuple in attrs:
-            attr = list(attrTuple)
-            if attr[0] == "href":
-                attr[1] = urljoin(linkPrefix, attr[1])
-            
-            self.attrs.append(attr)
-        
-        self.link = linkPrefix
-    
-    def addChild(self, child):
-        self.children.append(child)
-
-        if type(child) == Node:
-            child.close()
-    
-    def closeNoteNode(self):
-
-        singleQuote = False
-
-        for i, child in enumerate(self.children):
-            if type(child) == str:
-
-
-                childSplitted = child.split("content=\"")
-                if (len(childSplitted) == 1):
-                    childSplitted = child.split("content='")
-                    singleQuote = True
-
-                includeSplit = childSplitted[0].split("include")
-                if len(includeSplit) <= 1:
-                    return
-                includeFile = includeSplit[1].strip().split(".")[0]
-                
-                self.children[i] = f"*{includeFile.upper()}:* " + "".join(childSplitted[1:])
-
-                break
-        
-        for i, child in reversed(list(enumerate(self.children))):
-            if type(child) == str:
-
-                splitChar = "'" if singleQuote else "\""
-                
-                self.children[i] = "".join(child.split(splitChar)[:-1])
-
-                break
-    
-    def close(self):
-
-        if (self.tag == "note"):
-            return self.closeNoteNode()
-
-        noteNode = None
-
-        newChildren = []
-        
-        for child in self.children:
-
-            if noteNode is None:
-                newChildren.append(child)
-            else:
-                noteNode.addChild(child)
-
-
-            if type(child) == str:
-                
-                if "{%" in child and noteNode is None:
-                    newChildren.pop()
-                    noteNode = Node("note")
-                    childSplitted = child.split("{%")
-
-                    if len(childSplitted[0]) > 0:
-                        newChildren.append(childSplitted[0])
-                    
-                    if len(childSplitted[1]) > 0:
-
-                        toNoteNode = childSplitted[1]
-
-                        if "%}" in childSplitted[1]:
-                            toNoteNode = childSplitted[1].split("%}")[0]
-                        
-                        if len(toNoteNode) > 0:
-                            noteNode.addChild(toNoteNode)
-                    
-                if "%}" in child and noteNode is not None:
-                    childSplitted = child.split("%}")
-                    noteNode.children.pop()
-
-                    if len(childSplitted[0]) > 0:
-                        toNoteNode = childSplitted[0] 
-                        if "{%" in childSplitted[0]:
-                            toNoteNode = childSplitted[0].split("{%")[1]
-                        
-                        if len(toNoteNode) > 0:
-                            noteNode.addChild(toNoteNode)
-
-                    newChildren.append(noteNode)
-                    noteNode.close()
-
-                    noteNode = None
-
-                    if len(childSplitted[1]) > 0:
-                        newChildren.append(childSplitted[1])
-        
-        if noteNode is not None:
-            newChildren.append(noteNode)
-            noteNode.close()
-        
-        self.children = newChildren
-    
-    def __str__(self) -> str:
-        ret = f"<{self.tag}>"
-
-        # for child in self.children:
-        #     ret += "\n\t" + str(child).replace('\n', '\n\t')
-
-        return ret
-
-    def __repr__(self) -> str:
-        return f"Node({self.tag}, {len(self.children)})"
-
-    def toHTML(self) -> str:
-        attrsStr = ""
-        for a in self.attrs:
-            attrsStr += f" {a[0]}=\"{a[1]}\""
-
-        ret = f"\n<{self.tag}{attrsStr}>\n"
-
-        for child in self.children:
-            data = ""
-            if (type(child) == Node):
-                data = child.toHTML()
-            else:
-                data = child
-            
-            ret += "\t" + data.replace('\n', '\n\t')
-
-        ret += f"\n\n"
-
-        return ret
-
-    def getContentStr(self) -> str:
-        ret = ""
-
-        for child in self.children:
-            ret += child.__str__()
-        
-        return ret
-
-    def getAttr(self, type: str) -> str:
-        for attr in self.attrs:
-            if (attr[0] == type):
-                return attr[1]
-        
-        return ""
-    
-    def toMarkdown(self, readMoreLink = False) -> str:
-        ret = ""
-
-        if (self.tag == "h2"):
-            return "## " + self.getContentStr() + "\n"
-        
-        if (self.tag == "a"):
-            return f"[{self.getContentStr()}]({self.getAttr('href')})"
-        
-        if (self.tag == "pre"):
-            ret += "```"
-        
-        if (self.tag == "code"):
-            ret += "`"
-
-        for child in self.children:
-            if (type(child) == Node):
-                ret += child.toMarkdown(readMoreLink)
-            else:
-                ret += child
-        
-        if (self.tag == "pre"):
-            ret += "``` \n"
-        
-        if (self.tag == "code"):
-            ret += "`"
-
-        if (self.tag == "parent" and len(self.link) > 0 and readMoreLink):
-            ret += f"\n[Read more]({self.link})"
-        
-        if self.tag == "note":
-            return "\n> " + ret.replace("\n", "\n> ") + "\n"
-        
-        return ret
-
-    def getName(self):
-        if (self.tag != "parent"):
-            raise Exception("The getFilename method should only be called on the parent Node")
-
-        return self.children[0].getContentStr()
\ No newline at end of file
diff --git a/integration/schema-language-server/language-server/src/main/python/RankExpressionHTMLParser.py b/integration/schema-language-server/language-server/src/main/python/RankExpressionHTMLParser.py
deleted file mode 100644
index ff9f5d38a89a..000000000000
--- a/integration/schema-language-server/language-server/src/main/python/RankExpressionHTMLParser.py
+++ /dev/null
@@ -1,119 +0,0 @@
-from html.parser import HTMLParser
-from dataclasses import dataclass
-from urllib.parse import urljoin
-from Node import Node
-
-class TDElm:
-
-    content: str
-    node: Node
-
-    def __init__(self):
-        self.content = ""
-
-    def addText(self, content):
-        self.content += content
-    
-    def getContent(self) -> str:
-        return self.content
-
-    def setNode(self, node: Node):
-        self.node = node
-    
-    def getNode(self) -> Node:
-        return self.node
-
-class TRElm:
-
-    children: list
-
-    def __init__(self):
-        self.children = []
-    
-    def addChild(self, child: TDElm):
-        self.children.append(child)
-    
-    def __str__(self):
-        return f"TR({self.children[0].getContent()}, {self.children[2].getContent()})"
-
-    def __repr__(self) -> str:
-        return self.__str__()
-
-    def getFunctionIdentifier(self) -> str:
-        return self.children[0].getContent().replace(", ", ",").replace("input_1,input_2,...", "input,...")
-
-    def getMarkdownContent(self) -> str:
-        rawContent = self.children[2].getNode().toMarkdown(False)
-        returnContent = ""
-        for line in rawContent.split("\n"):
-            returnContent += line.strip() + "\n"
-        
-        return returnContent.strip()
-
-class RankExpressionHTMLParser(HTMLParser):
-    
-    insideTable = False
-
-    tableRows: list = []
-    currentTR = None
-    currentTD = None
-
-    linkPrefix = ""
-
-    parseStack = []
-
-    def __init__(self, linkPrefix):
-        super().__init__()
-        self.linkPrefix = linkPrefix
-
-    def parse(self, input: str):
-        super().parse(input)
-        self.inputText = input
-
-    def handle_starttag(self, tag, attrs):
-        if (tag == "table"):
-            self.insideTable = True
-        
-        if (not self.insideTable):
-            return
-        
-        if (tag == "tr"):
-            self.currentTR = TRElm()
-        
-        if (tag == "td"):
-            self.currentTD = TDElm()
-            self.parseStack = [Node("parent", f"{self.linkPrefix}")]
-
-        elif (self.currentTD is not None):
-            self.parseStack.append(Node(tag, self.linkPrefix, attrs))
-
-    
-    def handle_data(self, data):
-        if self.currentTD is not None:
-            self.currentTD.addText(data)
-            self.parseStack[-1].addChild(data)
-    
-    def handle_endtag(self, tag):
-        
-        if (tag == "table"):
-            self.insideTable = False
-        
-        if (tag == "tr"):
-            if len(self.currentTR.children) == 3:
-                self.tableRows.append(self.currentTR)
-            self.currentTR = None
-        
-        while len(self.parseStack) > 1:
-            elm = self.parseStack.pop()
-            self.parseStack[-1].addChild(elm)
-            if (elm.tag == tag):
-                break
-        
-        if (tag == "td"):
-            self.currentTD.setNode(self.parseStack[0])
-            self.parseStack[0].close()
-            self.currentTR.addChild(self.currentTD)
-            self.currentTD = None
-    
-    def getRows(self):
-        return self.tableRows
diff --git a/integration/schema-language-server/language-server/src/main/python/VespaDocHTMLParser.py b/integration/schema-language-server/language-server/src/main/python/VespaDocHTMLParser.py
deleted file mode 100644
index db4408974497..000000000000
--- a/integration/schema-language-server/language-server/src/main/python/VespaDocHTMLParser.py
+++ /dev/null
@@ -1,89 +0,0 @@
-from html.parser import HTMLParser
-from dataclasses import dataclass
-
-from Node import Node
-
-EXCLUDED_IDS = [
-    "syntax",
-    "elements"
-]
-
-@dataclass
-class Tag:
-    id: str
-    content: str = ""
-    AST: Node = None
-
-class VespaDocHTMLParser(HTMLParser):
-    enconteredTags = []
-    inputText: str
-
-    currentReadingTag = None
-    parseStack = []
-
-    linkPrefix = ""
-
-    def __init__(self, linkPrefix):
-        super().__init__()
-        self.linkPrefix = linkPrefix
-
-    def parse(self, input: str):
-        super().parse(input)
-        self.inputText = input
-
-    def stopReading(self):
-        if (self.currentReadingTag is None):
-            return
-
-        self.currentReadingTag.endPos = self.getpos()
-
-        while len(self.parseStack) > 1:
-            elm = self.parseStack.pop()
-            self.parseStack[-1].addChild(elm)
-
-        self.currentReadingTag.AST = self.parseStack[0]
-        self.currentReadingTag.AST.close()
-        self.enconteredTags.append(self.currentReadingTag)
-        self.currentReadingTag = None
-    
-    def startReading(self, name):
-        self.parseStack = [Node("parent", f"{self.linkPrefix}#{name}")]
-        self.currentReadingTag = Tag(name)
-
-    def handle_starttag(self, tag, attrs):
-        if (self.currentReadingTag is not None and tag == "table"):
-            self.stopReading()
-
-        for attr in attrs:
-            if attr[0] == "id":
-
-                self.stopReading()
-
-                if tag == "h2" and attr[1] not in EXCLUDED_IDS:
-                    self.startReading(attr[1])
-        
-        if (self.currentReadingTag is not None):
-            self.currentReadingTag.content += f"<{tag}>"
-
-            self.parseStack.append(Node(tag, self.linkPrefix, attrs))
-    
-    def handle_data(self, data):
-        
-        if (self.currentReadingTag is not None):
-            self.currentReadingTag.content += data
-            self.parseStack[-1].addChild(data)
-    
-    def handle_endtag(self, tag):
-        
-        if self.currentReadingTag is not None:
-            self.currentReadingTag.content += f""
-
-            while len(self.parseStack) > 1:
-                elm = self.parseStack.pop()
-                self.parseStack[-1].addChild(elm)
-                if (elm.tag == tag):
-                    break
-
-    def getTags(self) -> list:
-
-        return self.enconteredTags
diff --git a/integration/schema-language-server/language-server/src/main/python/buildDocs.py b/integration/schema-language-server/language-server/src/main/python/buildDocs.py
deleted file mode 100644
index da71da402fe7..000000000000
--- a/integration/schema-language-server/language-server/src/main/python/buildDocs.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import sys
-import os
-import requests
-import pathlib
-from VespaDocHTMLParser import VespaDocHTMLParser
-from RankExpressionHTMLParser import RankExpressionHTMLParser
-
-URL_PREFIX: str = "https://raw.githubusercontent.com/vespa-engine/documentation/master/en"
-
-SCHEMA_URL = "/reference/schema-reference.html"
-RANK_EXPRESSION_URL = "/reference/rank-features.html"
-
-LINK_BASE_URL = "https://docs.vespa.ai/en"
-
-REPLACE_FILENAME_MAP = {
-    "EXPRESSION": [ "EXPRESSION_SL", "EXPRESSION_ML" ],
-    "RANK_FEATURES": [ "RANKFEATURES_SL", "RANKFEATURES_ML" ],
-    "FUNCTION (INLINE)? [NAME]": [ "FUNCTION" ],
-    "SUMMARY_FEATURES": [ "SUMMARYFEATURES_SL", "SUMMARYFEATURES_ML", "SUMMARYFEATURES_ML_INHERITS" ],
-    "MATCH_FEATURES": [ "MATCHFEATURES_SL", "MATCHFEATURES_ML", "MATCHFEATURES_SL_INHERITS" ],
-    "IMPORT FIELD": [ "IMPORT" ]
-}
-
-# TODO: fix dictionary and attribute and index
-
-def fetchFile(file_url: str) -> str:
-    data = requests.get(f"{URL_PREFIX}{file_url}")
-    if data.status_code != 200:
-        raise Exception("Could not fetch the news documentation! Has the url paths changed?")
-
-    return data.text
-
-def main():
-    print("Downloading docs...")
-
-    targetPath: pathlib.Path = pathlib.Path()
-    subPaths = ["schema", "rankExpression"]
-
-    if (len(sys.argv) >= 2):
-        targetPath = pathlib.Path(sys.argv[1])
-    else:
-        raise Exception("No target directory specified")
-    
-    if (len(sys.argv) >= 3):
-        if (sys.argv[2] == "skip"):
-            return
-    
-    if not os.path.exists(targetPath):
-        os.makedirs(targetPath)
-
-    for subPath in subPaths:
-        absoluteSubPath = targetPath.joinpath(subPath)
-        if not os.path.exists(absoluteSubPath):
-            os.makedirs(absoluteSubPath)
-
-    shcemaDocData: str = fetchFile(SCHEMA_URL)
-
-    parser = VespaDocHTMLParser(LINK_BASE_URL + SCHEMA_URL)
-    parser.feed(shcemaDocData)
-
-    tags = parser.getTags()
-
-    for tag in tags:
-
-        filename = convertToToken(tag.AST.getName())
-        shcemaDocData = tag.AST.toMarkdown(True)
-
-        if filename in REPLACE_FILENAME_MAP:
-            for fn in REPLACE_FILENAME_MAP[filename]:
-                 writeToFile(f"{targetPath}/schema/{fn}.md", shcemaDocData)
-        else:
-            writeToFile(f"{targetPath}/schema/{filename}.md", shcemaDocData)
-    
-    rankExpressionDocData: str = fetchFile(RANK_EXPRESSION_URL)
-
-    # print(rankExpressionDocData)
-
-    parser = RankExpressionHTMLParser(LINK_BASE_URL + RANK_EXPRESSION_URL)
-    parser.feed(rankExpressionDocData)
-
-    rows = parser.getRows()
-
-    for row in rows:
-        filename = row.getFunctionIdentifier()
-
-        writeToFile(f"{targetPath}/rankExpression/{filename}.md", row.getMarkdownContent())
-
-def writeToFile(filepath: str, data: str):
-    with open(filepath, "w") as file:
-        file.write(data)
-
-def convertToToken(name):
-    return name.upper().replace("-", "_")
-
-if __name__ == "__main__":
-    main()
diff --git a/integration/schema-language-server/language-server/src/main/python/buildDocs.sh b/integration/schema-language-server/language-server/src/main/python/buildDocs.sh
deleted file mode 100755
index f0e9ebe98625..000000000000
--- a/integration/schema-language-server/language-server/src/main/python/buildDocs.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-
-python3 -m pip install -r requirements.txt --user
-
-python3 buildDocs.py $1
-
-#exit 0
\ No newline at end of file
diff --git a/integration/schema-language-server/language-server/src/main/python/requirements.txt b/integration/schema-language-server/language-server/src/main/python/requirements.txt
deleted file mode 100644
index 864a8527cd66..000000000000
--- a/integration/schema-language-server/language-server/src/main/python/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-requests
-dataclasses
\ No newline at end of file

From 0056e2e08ef8b8fdd6a126da8f2b3fc9f6496119 Mon Sep 17 00:00:00 2001
From: Magnus Eide-Fredriksen 
Date: Thu, 5 Sep 2024 18:13:10 +0200
Subject: [PATCH 05/13] fix: add flexmark dependency

---
 vespa-dependencies-enforcer/allowed-maven-dependencies.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
index 305b17f57e28..4f6caf322276 100644
--- a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
+++ b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
@@ -37,6 +37,7 @@ com.sun.istack:istack-commons-runtime:4.1.2
 com.sun.xml.bind:jaxb-core:${jaxb-core.vespa.version}
 com.sun.xml.bind:jaxb-impl:${jaxb-impl.vespa.version}
 com.thaiopensource:jing:20091111
+com.vladsch.flexmark.flexmark-all:0.64.8
 com.yahoo.athenz:athenz-auth-core:${athenz.vespa.version}
 com.yahoo.athenz:athenz-client-common:${athenz.vespa.version}
 com.yahoo.athenz:athenz-zms-core:${athenz.vespa.version}

From 8e314645d2d4469a41d30f28c142b093069120d0 Mon Sep 17 00:00:00 2001
From: Magnus Eide-Fredriksen 
Date: Thu, 5 Sep 2024 18:13:10 +0200
Subject: [PATCH 06/13] fix: add flexmark dependency

---
 vespa-dependencies-enforcer/allowed-maven-dependencies.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
index 305b17f57e28..9fc2127e417d 100644
--- a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
+++ b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
@@ -37,6 +37,7 @@ com.sun.istack:istack-commons-runtime:4.1.2
 com.sun.xml.bind:jaxb-core:${jaxb-core.vespa.version}
 com.sun.xml.bind:jaxb-impl:${jaxb-impl.vespa.version}
 com.thaiopensource:jing:20091111
+com.vladsch.flexmark:flexmark-all:0.64.8
 com.yahoo.athenz:athenz-auth-core:${athenz.vespa.version}
 com.yahoo.athenz:athenz-client-common:${athenz.vespa.version}
 com.yahoo.athenz:athenz-zms-core:${athenz.vespa.version}

From fac4b74bc5553bfaaf02a55ed6fc4367f93e3000 Mon Sep 17 00:00:00 2001
From: Magnus Eide-Fredriksen 
Date: Fri, 13 Sep 2024 10:10:03 +0200
Subject: [PATCH 07/13] fix: only required flexmark artifact dependency

---
 integration/schema-language-server/language-server/pom.xml | 2 +-
 vespa-dependencies-enforcer/allowed-maven-dependencies.txt | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/integration/schema-language-server/language-server/pom.xml b/integration/schema-language-server/language-server/pom.xml
index 4b8a19ecf017..ac05a89c8f2d 100644
--- a/integration/schema-language-server/language-server/pom.xml
+++ b/integration/schema-language-server/language-server/pom.xml
@@ -25,7 +25,7 @@
     
     
       com.vladsch.flexmark
-      flexmark-all
+      flexmark-html2md-converter
       0.64.8
     
     
diff --git a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
index 9fc2127e417d..e376cc1b9245 100644
--- a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
+++ b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
@@ -177,6 +177,7 @@ org.hamcrest:hamcrest-core:${hamcrest.vespa.version}
 org.hamcrest:hamcrest:${hamcrest.vespa.version}
 org.hdrhistogram:HdrHistogram:${hdrhistogram.vespa.version}
 org.json:json:${org.json.vespa.version}
+org.jsoup:jsoup:1.17.2
 org.junit.jupiter:junit-jupiter-api:${junit.vespa.tenant.version}
 org.junit.jupiter:junit-jupiter-api:${junit.vespa.version}
 org.junit.jupiter:junit-jupiter-engine:${junit.vespa.tenant.version}

From ad6b8fdcc7bf2fbb42a3c1b9f9fb8ee6d7999b0d Mon Sep 17 00:00:00 2001
From: Magnus Eide-Fredriksen 
Date: Fri, 13 Sep 2024 10:18:45 +0200
Subject: [PATCH 08/13] fix: vespa-dependencies-enforcer

---
 vespa-dependencies-enforcer/allowed-maven-dependencies.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
index e376cc1b9245..0b7e7a95be77 100644
--- a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
+++ b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
@@ -37,7 +37,7 @@ com.sun.istack:istack-commons-runtime:4.1.2
 com.sun.xml.bind:jaxb-core:${jaxb-core.vespa.version}
 com.sun.xml.bind:jaxb-impl:${jaxb-impl.vespa.version}
 com.thaiopensource:jing:20091111
-com.vladsch.flexmark:flexmark-all:0.64.8
+com.vladsch.flexmark:flexmark-html2md-converter:0.64.8
 com.yahoo.athenz:athenz-auth-core:${athenz.vespa.version}
 com.yahoo.athenz:athenz-client-common:${athenz.vespa.version}
 com.yahoo.athenz:athenz-zms-core:${athenz.vespa.version}
@@ -176,6 +176,7 @@ org.glassfish.jaxb:txw2:${jaxb.runtime.vespa.version}
 org.hamcrest:hamcrest-core:${hamcrest.vespa.version}
 org.hamcrest:hamcrest:${hamcrest.vespa.version}
 org.hdrhistogram:HdrHistogram:${hdrhistogram.vespa.version}
+org.jetbrains:annotations:24.0.1
 org.json:json:${org.json.vespa.version}
 org.jsoup:jsoup:1.17.2
 org.junit.jupiter:junit-jupiter-api:${junit.vespa.tenant.version}

From 8e6479ca62a2d705a973352e0bc7a9783bcea042 Mon Sep 17 00:00:00 2001
From: Magnus Eide-Fredriksen 
Date: Fri, 13 Sep 2024 10:22:30 +0200
Subject: [PATCH 09/13] fix: vespa-dependencies-enforcer

---
 vespa-dependencies-enforcer/allowed-maven-dependencies.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
index 0b7e7a95be77..d3526ecaf58e 100644
--- a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
+++ b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
@@ -37,7 +37,7 @@ com.sun.istack:istack-commons-runtime:4.1.2
 com.sun.xml.bind:jaxb-core:${jaxb-core.vespa.version}
 com.sun.xml.bind:jaxb-impl:${jaxb-impl.vespa.version}
 com.thaiopensource:jing:20091111
-com.vladsch.flexmark:flexmark-html2md-converter:0.64.8
+com.vladsch.flexmark:flexmark:0.64.8
 com.yahoo.athenz:athenz-auth-core:${athenz.vespa.version}
 com.yahoo.athenz:athenz-client-common:${athenz.vespa.version}
 com.yahoo.athenz:athenz-zms-core:${athenz.vespa.version}

From 63cab920112e1665366df05e838a01d5d79dd001 Mon Sep 17 00:00:00 2001
From: Magnus Eide-Fredriksen 
Date: Fri, 13 Sep 2024 10:38:25 +0200
Subject: [PATCH 10/13] fix: add all required flexmark dependencies

---
 .../allowed-maven-dependencies.txt            | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
index d3526ecaf58e..186f28caad61 100644
--- a/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
+++ b/vespa-dependencies-enforcer/allowed-maven-dependencies.txt
@@ -37,6 +37,26 @@ com.sun.istack:istack-commons-runtime:4.1.2
 com.sun.xml.bind:jaxb-core:${jaxb-core.vespa.version}
 com.sun.xml.bind:jaxb-impl:${jaxb-impl.vespa.version}
 com.thaiopensource:jing:20091111
+com.vladsch.flexmark:flexmark-ext-emoji:0.64.8
+com.vladsch.flexmark:flexmark-ext-gfm-strikethrough:0.64.8
+com.vladsch.flexmark:flexmark-ext-ins:0.64.8
+com.vladsch.flexmark:flexmark-ext-superscript:0.64.8
+com.vladsch.flexmark:flexmark-ext-tables:0.64.8
+com.vladsch.flexmark:flexmark-ext-wikilink:0.64.8
+com.vladsch.flexmark:flexmark-html2md-converter:0.64.8
+com.vladsch.flexmark:flexmark-jira-converter:0.64.8
+com.vladsch.flexmark:flexmark-util-ast:0.64.8
+com.vladsch.flexmark:flexmark-util-builder:0.64.8
+com.vladsch.flexmark:flexmark-util-collection:0.64.8
+com.vladsch.flexmark:flexmark-util-data:0.64.8
+com.vladsch.flexmark:flexmark-util-dependency:0.64.8
+com.vladsch.flexmark:flexmark-util-format:0.64.8
+com.vladsch.flexmark:flexmark-util-html:0.64.8
+com.vladsch.flexmark:flexmark-util-misc:0.64.8
+com.vladsch.flexmark:flexmark-util-options:0.64.8
+com.vladsch.flexmark:flexmark-util-sequence:0.64.8
+com.vladsch.flexmark:flexmark-util-visitor:0.64.8
+com.vladsch.flexmark:flexmark-util:0.64.8
 com.vladsch.flexmark:flexmark:0.64.8
 com.yahoo.athenz:athenz-auth-core:${athenz.vespa.version}
 com.yahoo.athenz:athenz-client-common:${athenz.vespa.version}

From 7ee2007def765bd7b9b4f07ead22a0273fe2a886 Mon Sep 17 00:00:00 2001
From: Magnus Eide-Fredriksen 
Date: Thu, 19 Sep 2024 09:30:43 +0200
Subject: [PATCH 11/13] fix: remove commented plugin from pom.xml

---
 .../language-server/pom.xml                   | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/integration/schema-language-server/language-server/pom.xml b/integration/schema-language-server/language-server/pom.xml
index ac05a89c8f2d..cf1322bca09c 100644
--- a/integration/schema-language-server/language-server/pom.xml
+++ b/integration/schema-language-server/language-server/pom.xml
@@ -105,25 +105,6 @@
         com.github.os72
         protoc-jar-maven-plugin
       
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
          
         org.congocc   
         org.congocc.maven.plugin   

From 5b3eb2b77904b0589478e6ecb2ebcaed224c4d03 Mon Sep 17 00:00:00 2001
From: Magnus Eide-Fredriksen 
Date: Thu, 19 Sep 2024 12:01:46 +0200
Subject: [PATCH 12/13] feat: embed markdown files in fat jar during build

---
 .gitignore                                    |  3 +-
 integration/intellij/.gitignore               |  1 +
 .../language-server/pom.xml                   | 44 +++++++++++++++++++
 .../documentation/FetchDocumentation.java     | 16 +++++++
 .../vespa/schemals/lsp/hover/SchemaHover.java |  1 -
 5 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index ebb1badec12e..a5c37784eea9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,4 +55,5 @@ install_manifest.txt
 *.cbp
 !/.copr/Makefile
 !/.buildkite/Makefile
-.vscode
\ No newline at end of file
+.vscode
+.metadata
diff --git a/integration/intellij/.gitignore b/integration/intellij/.gitignore
index 96a25be67256..9105b843b1ed 100644
--- a/integration/intellij/.gitignore
+++ b/integration/intellij/.gitignore
@@ -1,3 +1,4 @@
+bin/
 .gradle
 **/build/
 !src/**/build/
diff --git a/integration/schema-language-server/language-server/pom.xml b/integration/schema-language-server/language-server/pom.xml
index cf1322bca09c..dccf184ba1a0 100644
--- a/integration/schema-language-server/language-server/pom.xml
+++ b/integration/schema-language-server/language-server/pom.xml
@@ -88,6 +88,50 @@
         maven-install-plugin
         true
       
+      
+        org.codehaus.mojo
+        exec-maven-plugin
+        
+          
+            fetch-documentation
+            prepare-package
+            
+              java
+            
+            
+                ai.vespa.schemals.documentation.FetchDocumentation
+                
+                    ${project.basedir}/target/generated-resources/hover
+                
+            
+          
+        
+      
+      
+      
+      
+        org.apache.maven.plugins
+        maven-resources-plugin
+        
+          
+            prepare-package 
+            
+              copy-resources
+            
+            
+              
+              ${project.build.outputDirectory}
+              
+                
+                  ${project.basedir}/target/generated-resources
+                  false
+                
+              
+            
+          
+        
+      
       
         com.yahoo.vespa
         bundle-plugin
diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/FetchDocumentation.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/FetchDocumentation.java
index 87082ff7234a..906c56c7d81c 100644
--- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/FetchDocumentation.java
+++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/documentation/FetchDocumentation.java
@@ -1,5 +1,6 @@
 package ai.vespa.schemals.documentation;
 
+import java.io.File;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
@@ -58,4 +59,19 @@ public static void fetchDocs(Path targetPath) throws IOException {
     private static String convertToToken(String h2Id) {
         return h2Id.toUpperCase().replaceAll("-", "_");
     }
+
+    public static void main(String[] args) {
+        if (args.length < 1) {
+            System.err.println("FetchDocumentation requires one argument: ");
+            System.exit(1);
+        }
+        Path targetPath = Paths.get(args[0]);
+        try {
+            System.out.println("Fetching docs to " + args[0]);
+            fetchDocs(targetPath);
+        } catch (IOException ex) {
+            System.err.println("FetchDocumentation failed to download documentation: " + ex.getMessage());
+            System.exit(1);
+        }
+    }
 }
diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/hover/SchemaHover.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/hover/SchemaHover.java
index b99ce1ffc549..8bd7be8536ba 100644
--- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/hover/SchemaHover.java
+++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/lsp/hover/SchemaHover.java
@@ -345,7 +345,6 @@ public static Optional getFileHoverInformation(String markdownKey, Range
             }
         }
 
-
         if (SchemaLanguageServer.serverPath == null)return Optional.empty();
         String fileName = markdownKey + ".md";
 

From 6814607aa1d0b46ec8da2ff8ada73a1824b0c913 Mon Sep 17 00:00:00 2001
From: Magnus Eide-Fredriksen 
Date: Thu, 19 Sep 2024 13:07:32 +0200
Subject: [PATCH 13/13] feat: use embedded documentation files as fallback for
 fetching documentation online

---
 .../clients/vscode/package.json               |  2 +-
 .../vespa/schemals/SchemaLanguageServer.java  | 84 ++++++++++++++++++-
 2 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/integration/schema-language-server/clients/vscode/package.json b/integration/schema-language-server/clients/vscode/package.json
index 685cd212bb81..4644b21f123a 100644
--- a/integration/schema-language-server/clients/vscode/package.json
+++ b/integration/schema-language-server/clients/vscode/package.json
@@ -65,7 +65,7 @@
     "test": "vscode-test",
     "check-types": "tsc --noEmit",
     "copy-images": "cp ../../resources/* ./images",
-    "langserver-install": "mkdir -p server && cp ../../language-server/target/schema-language-server-jar-with-dependencies.jar ./server/",
+    "langserver-install": "mkdir -p server && rm -r server/* && cp ../../language-server/target/schema-language-server-jar-with-dependencies.jar ./server/",
     "publish": "npm run compile && node out/publish.js"
   },
   "devDependencies": {
diff --git a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/SchemaLanguageServer.java b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/SchemaLanguageServer.java
index c9958aa70d67..12267ad3f481 100644
--- a/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/SchemaLanguageServer.java
+++ b/integration/schema-language-server/language-server/src/main/java/ai/vespa/schemals/SchemaLanguageServer.java
@@ -1,10 +1,23 @@
 package ai.vespa.schemals;
 
+import java.io.BufferedReader;
 import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URL;
+import java.net.URLDecoder;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
+import java.util.Enumeration;
 import java.util.List;
 import java.util.concurrent.CompletableFuture;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+import java.util.stream.Collectors;
 
 import org.eclipse.lsp4j.CodeActionKind;
 import org.eclipse.lsp4j.CodeActionOptions;
@@ -153,11 +166,29 @@ public void connect(LanguageClient languageClient) {
         // Start a document fetching job in the background.
         Path docPath = serverPath.resolve("hover");
 
+        try {
+            setupDocumentation(docPath);
+        } catch (IOException ioex) {
+            this.logger.error("Failed to set up documentation. Error: " + ioex.getMessage());
+        }
+    }
+
+    /**
+     * Initial setup of the documentation.
+     */
+    public void setupDocumentation(Path documentationPath) throws IOException {
+
+        Files.createDirectories(documentationPath);
+        Files.createDirectories(documentationPath.resolve("schema"));
+        Files.createDirectories(documentationPath.resolve("rankExpression"));
+
+        ensureLocalDocumentationLoaded(documentationPath);
+
         Thread thread = new Thread() {
             @Override
             public void run() {
                 try {
-                    FetchDocumentation.fetchDocs(docPath);
+                    FetchDocumentation.fetchDocs(documentationPath);
                 } catch(Exception e) {
                     throw new RuntimeException(e.getMessage());
                 }
@@ -167,11 +198,58 @@ public void run() {
         thread.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
             @Override
             public void uncaughtException(Thread th, Throwable ex) {
-                logger.error("Failed to fetch docs:" + ex.getMessage());
+                logger.warning("Failed to fetch docs: " + ex.getMessage() + " is unavailable. Locally cached documentation will be used.");
             }
         });
-        logger.info("Fetching docs to path: " + docPath.toAbsolutePath().toString());
+        logger.info("Fetching docs to path: " + documentationPath.toAbsolutePath().toString());
         thread.start();
 
     }
+
+    /**
+     * Assumes documentation is loaded if documentationPath/schema contains .md files.
+     * If documentation is not loaded, unpacks markdown files from the current jar.
+     */
+    private void ensureLocalDocumentationLoaded(Path documentationPath) throws IOException {
+        File dir = new File(documentationPath.resolve("schema").toString());
+        File[] contents = dir.listFiles(new FileFilter() {
+            @Override
+            public boolean accept(File pathname) {
+                return pathname.getName().endsWith(".md");
+            }
+        });
+        // Documentation exists
+        if (contents.length > 0) return;
+
+        logger.info("Extracting embedded documentation files.");
+
+        // If it doesn't exist, unpack from jar
+        var resources = Thread.currentThread().getContextClassLoader().getResources(documentationPath.getFileName().toString());
+
+        if (!resources.hasMoreElements()) {
+            throw new IOException("Could not find documentation in jar file!");
+        }
+
+        URL resourceURL = resources.nextElement();
+
+        if (!resourceURL.getProtocol().equals("jar")) {
+            throw new IOException("Unhandled protocol for resource " +  resourceURL.toString());
+        }
+
+        String jarPath = resourceURL.getPath().substring(5, resourceURL.getPath().indexOf('!'));
+        try (JarFile jarFile = new JarFile(URLDecoder.decode(jarPath, "UTF-8"))) {
+            Enumeration entries = jarFile.entries();
+            while (entries.hasMoreElements()) {
+                JarEntry entry = entries.nextElement();
+                if (!entry.isDirectory() && entry.getName().startsWith(documentationPath.getFileName().toString())) {
+                    Path destination = documentationPath.getParent().resolve(entry.getName());
+                    try (InputStream inputStream = Thread.currentThread().getContextClassLoader().getResourceAsStream(entry.getName())) {
+                        BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
+                        String content = reader.lines().collect(Collectors.joining(System.lineSeparator()));
+                        Files.write(destination, content.getBytes(), StandardOpenOption.CREATE);
+                    }
+                }
+            }
+        }
+    }
 }