diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 58a79ae1412..1c5e3f2ebfa 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -3,3 +3,4 @@ ### Bug Fixes +- Correctly escape special characters in generated Javadocs. \ No newline at end of file diff --git a/pkg/codegen/java/testdata/TestFormatForignComments/repro_auth0_failure.golden b/pkg/codegen/java/testdata/TestFormatForeignComments/repro_auth0_failure.golden similarity index 97% rename from pkg/codegen/java/testdata/TestFormatForignComments/repro_auth0_failure.golden rename to pkg/codegen/java/testdata/TestFormatForeignComments/repro_auth0_failure.golden index c5f1bf6b2ab..f014fe09c99 100644 --- a/pkg/codegen/java/testdata/TestFormatForignComments/repro_auth0_failure.golden +++ b/pkg/codegen/java/testdata/TestFormatForeignComments/repro_auth0_failure.golden @@ -702,20 +702,20 @@ * import java.nio.file.Files; * import java.nio.file.Paths; * - * public class App { - * public static void main(String[] args) { + * public class App }{{@code + * public static void main(String[] args) }{{@code * Pulumi.run(App::stack); - * } + * }}{@code * - * public static void stack(Context ctx) { + * public static void stack(Context ctx) }{{@code * // This is an example of an Email connection. * var passwordlessEmail = new Connection("passwordlessEmail", ConnectionArgs.builder() * .strategy("email") * .name("email") * .options(ConnectionOptionsArgs.builder() * .name("email") - * .from("{{ application.name }} \u003croot{@literal @}auth0.com\u003e") - * .subject("Welcome to {{ application.name }}") + * .from("}{{{@code application.name }}}{@code \u003croot}{@literal @}{@code auth0.com\u003e") + * .subject("Welcome to }{{{@code application.name }}}{@code ") * .syntax("liquid") * .template("\u003chtml\u003eThis is the body of the email\u003c/html\u003e") * .disableSignup(false) @@ -733,8 +733,8 @@ * .build()) * .build()); * - * } - * } + * }}{@code + * }}{@code * } * * \u003c!--End PulumiCodeChooser --\u003e @@ -763,12 +763,12 @@ * import java.nio.file.Files; * import java.nio.file.Paths; * - * public class App { - * public static void main(String[] args) { + * public class App }{{@code + * public static void main(String[] args) }{{@code * Pulumi.run(App::stack); - * } + * }}{@code * - * public static void stack(Context ctx) { + * public static void stack(Context ctx) }{{@code * // This is an example of a SAML connection. * var samlp = new Connection("samlp", ConnectionArgs.builder() * .name("SAML-Connection") @@ -786,11 +786,11 @@ * .protocolBinding("urn:oasis:names:tc:SAML:2.0:bindings:HTTP-POST") * .requestTemplate(""" * \u003csamlp:AuthnRequest xmlns:samlp="urn:oasis:names:tc:SAML:2.0:protocol" - * {@literal @@}AssertServiceURLAndDestination{@literal @@} - * ID="{@literal @@}ID{@literal @@}" - * IssueInstant="{@literal @@}IssueInstant{@literal @@}" - * ProtocolBinding="{@literal @@}ProtocolBinding{@literal @@}" Version="2.0"\u003e - * \u003csaml:Issuer xmlns:saml="urn:oasis:names:tc:SAML:2.0:assertion"\u003e{@literal @@}Issuer{@literal @@}\u003c/saml:Issuer\u003e + * }{@literal @@}{@code AssertServiceURLAndDestination}{@literal @@}{@code + * ID="}{@literal @@}{@code ID}{@literal @@}{@code " + * IssueInstant="}{@literal @@}{@code IssueInstant}{@literal @@}{@code " + * ProtocolBinding="}{@literal @@}{@code ProtocolBinding}{@literal @@}{@code " Version="2.0"\u003e + * \u003csaml:Issuer xmlns:saml="urn:oasis:names:tc:SAML:2.0:assertion"\u003e}{@literal @@}{@code Issuer}{@literal @@}{@code \u003c/saml:Issuer\u003e * \u003c/samlp:AuthnRequest\u003e """) * .userIdAttribute("https://saml.provider/imi/ns/identity-200810") * .signatureAlgorithm("rsa-sha256") @@ -822,21 +822,21 @@ * .signingKey(ConnectionOptionsSigningKeyArgs.builder() * .key(""" * -----BEGIN PRIVATE KEY----- - * ...{your private key here}... + * ...}{{@code your private key here}}{@code ... * -----END PRIVATE KEY----- """) * .cert(""" * -----BEGIN CERTIFICATE----- - * ...{your public key cert here}... + * ...}{{@code your public key cert here}}{@code ... * -----END CERTIFICATE----- """) * .build()) * .decryptionKey(ConnectionOptionsDecryptionKeyArgs.builder() * .key(""" * -----BEGIN PRIVATE KEY----- - * ...{your private key here}... + * ...}{{@code your private key here}}{@code ... * -----END PRIVATE KEY----- """) * .cert(""" * -----BEGIN CERTIFICATE----- - * ...{your public key cert here}... + * ...}{{@code your public key cert here}}{@code ... * -----END CERTIFICATE----- """) * .build()) * .idpInitiated(ConnectionOptionsIdpInitiatedArgs.builder() @@ -847,8 +847,8 @@ * .build()) * .build()); * - * } - * } + * }}{@code + * }}{@code * } * * \u003c!--End PulumiCodeChooser --\u003e diff --git a/pkg/codegen/java/utilities.go b/pkg/codegen/java/utilities.go index 78dac2315a8..00d8024f9bf 100644 --- a/pkg/codegen/java/utilities.go +++ b/pkg/codegen/java/utilities.go @@ -1,4 +1,16 @@ -// Copyright 2022, Pulumi Corporation. All rights reserved. +// Copyright 2022-2024, Pulumi Corporation. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. package java @@ -6,7 +18,6 @@ import ( "fmt" "html" "io" - "regexp" "strings" "github.com/pulumi/pulumi/pkg/v3/codegen" @@ -20,93 +31,287 @@ func formatForeignBlockComment(comment, indent string) string { return formatForeignBlockCommentFrom(comment, 0, indent) } -var replaceAtRegexp = regexp.MustCompile("( ?)(@+)") - // formatForeignBlockCommentFrom is like [formatForeignBlockComment], except that it // unconditionally accepts the first idx bytes of comment. func formatForeignBlockCommentFrom(comment string, idx int, indent string) string { comment = codegen.FilterExamples(comment, "java") + comment = comment[:idx] + mapComment(comment[idx:], escapeCode, escapeNonCode) + return formatBlockComment(comment, indent) +} - // Escape a '@' with a '{@literal @}'. - // - // This inserts a space before the new '@', so we attempt to remove a leading - // space when possible. It not possible, we do it anyway. - escapeAtLiteral := func(s string) string { - return replaceAtRegexp.ReplaceAllString(s, "{@literal $2}") +// Minimally escapes a piece of Java code for use within a Javadoc comment. +// +// Returned code is wrapped in a Javadoc `{@code ...}` block inside an HTML `
` tag. This provides the best
+// out-of-the-box experience when it comes to not having to escape a large number of characters (e.g. HTML is fine
+// as-is).
+func escapeCode(code string) string {
+	const (
+		header = "
\n{@code"
+		footer = "}\n
" + ) + + if !codeNeedsEscaping(code) { + return header + code + footer } - comment = comment[:idx] + mapCommentHelper(comment[idx:], - // Code - func(s string) string { - s = strings.TrimPrefix(s, "```java") - s = strings.TrimSuffix(s, "```") - - // Javadoc doesn't have a way to escape an arbitrarily code - // snippet. The best we have is: - // - //
-			// {@code
-			// THE CODE GOES HERE:
-			// - '@' needs to be escaped.
-			// - '&', '<', '>' does not need to be escaped.
-			// }
-			// 
- var b strings.Builder - b.WriteString("
\n{@code")
-			b.WriteString(escapeAtLiteral(s))
-			b.WriteString("}\n
") - return b.String() - }, - // Non-code - func(s string) string { return html.EscapeString(escapeAtLiteral(s)) }, - ) + w := &codeJavadocWriter{} + for _, rune := range code { + w.WriteRune(rune) + } - return formatBlockComment(comment, indent) + return "
\n{@code" + w.String() + "}\n
" } -// mapCommentHelper maps 2 functions over `comment`. It maps `code` over the section of -// `comment` that is made up of Java code (as defined by markdown code fences) and -// `nonCode` over all other parts of `comment`. -func mapCommentHelper(comment string, code, nonCode func(string) string) string { +// Returns true if and only if a piece of code needs escaping. That is: +// +// - It contains an '@' character. +// - It contains a '*/' sequence. +// - It contains unbalanced braces. +// +// All other code should be acceptable as-is within a Javadoc `{@code ...}` block. +func codeNeedsEscaping(code string) bool { + openBraces := 0 + lastSawAsterisk := false + for _, rune := range code { + if rune == '@' || (rune == '/' && lastSawAsterisk) { + return true + } else if rune == '{' { + openBraces++ + } else if rune == '}' { + openBraces-- + } + + if openBraces < 0 { + return true + } + + lastSawAsterisk = rune == '*' + } + + return openBraces != 0 +} + +// Escapes a piece of text for using within a Javadoc comment. +func escapeNonCode(nonCode string) string { + w := &plainJavadocWriter{} + for _, rune := range nonCode { + w.WriteRune(rune) + } + + return w.String() +} + +// A plainJavadocWriter can be used to build a string safe for embedding in a Javadoc comment as text. It performs the +// following escaping: +// +// * '@' characters are escaped as '{@literal @}'. +// * The pair '*/' is escaped as '*/' (the slash being HTML-encoded). +// * All other characters are HTML-encoded as-is. +type plainJavadocWriter struct { + // The underlying string builder used to build the escaped string. + b strings.Builder + // The current state of the writer. + state plainJavadocWriterState + // True if and only if the last character written was an asterisk. This is used to detect "*/" sequences that need + // to be escaped. + lastWroteAsterisk bool +} + +// A state that a plainJavadocWriter can be in. +type plainJavadocWriterState int + +const ( + // The writer is currently writing plain text. + plainJavadocText plainJavadocWriterState = iota + // The writer is currently writing a literal sequence. + plainJavadocLiteral +) + +// WriteRune writes a rune to the writer, escaping it as necessary. +func (w *plainJavadocWriter) WriteRune(r rune) { + switch w.state { + case plainJavadocText: + // If we are writing plain text and we encounter an '@', we open a literal sequence in order to write it. If we + // encounter a '*/' sequence, we escape the slash and continue. Otherwise, we HTML-encode the character. + if r == '@' { + w.b.WriteString("{@literal @") + w.state = plainJavadocLiteral + } else if r == '/' && w.lastWroteAsterisk { + w.b.WriteString("/") + } else { + w.b.WriteString(html.EscapeString(string(r))) + } + case plainJavadocLiteral: + // If we are already inside a literal and we encounter another '@', we can just write it as-is. If we see any + // other character, we need to close the literal, write the rune and return to the text state. + if r == '@' { + w.b.WriteRune('@') + } else { + w.b.WriteRune('}') + w.b.WriteRune(r) + w.state = plainJavadocText + } + } + + w.lastWroteAsterisk = r == '*' +} + +// Returns the string of escaped text that has been written to the writer so far. +func (w *plainJavadocWriter) String() string { + return w.b.String() +} + +// A codeJavadocWriter can be used to build a string safe for embedding in a Javadoc comment as code. It assumes that +// its content will be wrapped in a `
{@code ...}
` block and escapes '@', '{', '}', and '*/' sequences. In +// general, the only safe way to handle these characters is to exit the `@code` block temporarily, print them out (in +// some cases still requiring escapes) and then re-enter the code block. This generally harms the readability of the +// generated comment at the expense of it being renderable by Javadoc and the majority of IDEs that support e.g. docs on +// hover. +// +// In cases where multiple escape-requiring characters appear in a sequence, we "stay outside the `@code` block" as long +// as possible in order to avoid hampering readability even further. +type codeJavadocWriter struct { + // The underlying string builder used to build the escaped string. + b strings.Builder + // The current state of the writer. + state codeJavadocWriterState + // True if and only if the last character written was an asterisk. This is used to detect "*/" sequences that need + // to be escaped. + lastWroteAsterisk bool +} + +// A state that a codeJavadocWriter can be in. +type codeJavadocWriterState int + +const ( + // The writer is currently writing code. + codeJavadocCode codeJavadocWriterState = iota + // The writer is currently writing text. + codeJavadocText + // The writer is currently writing a literal sequence. + codeJavadocLiteral +) + +// WriteRune writes a rune to the writer, escaping it as necessary. +func (w *codeJavadocWriter) WriteRune(r rune) { + switch w.state { + case codeJavadocCode: + // If we are writing code and we encounter an '@', we open a literal sequence in order to write it. If we + // encounter a brace, we enter a text state and write it out. If we see a '*/' sequence, we escape the slash in + // a text state and continue. Otherwise, we write the character as-is. + if r == '@' { + w.b.WriteString("}{@literal @") + w.state = codeJavadocLiteral + } else if r == '{' || r == '}' { + w.b.WriteRune('}') + w.b.WriteRune(r) + w.state = codeJavadocText + } else if r == '/' && w.lastWroteAsterisk { + w.b.WriteString("}/") + w.state = codeJavadocText + } else { + w.b.WriteRune(r) + } + case codeJavadocText: + // If we are writing text and we encounter an '@', we open a literal sequence in order to write it. If we + // encounter a brace, we write it as-is, allowing us to write multiple braces without repeatedly entering and + // leaving the code state. All other characters return us to the code state before continuing. + if r == '@' { + w.b.WriteString("{@literal @") + w.state = codeJavadocLiteral + } else if r == '{' || r == '}' { + w.b.WriteRune(r) + } else { + w.b.WriteString("{@code") + if r != '\n' { + w.b.WriteRune(' ') + } + w.b.WriteRune(r) + w.state = codeJavadocCode + } + case codeJavadocLiteral: + // If we are already inside a literal and we encounter another '@', we can just write it as-is. If we see a + // brace, we enter a text state and write it out. If we see any other character, we need to close the literal, + // write the rune and return to the text state. + if r == '@' { + w.b.WriteRune('@') + } else if r == '{' || r == '}' { + w.b.WriteRune('}') + w.b.WriteRune(r) + w.state = codeJavadocText + } else { + w.b.WriteString("}{@code") + if r != '\n' { + w.b.WriteRune(' ') + } + w.b.WriteRune(r) + w.state = codeJavadocCode + } + } + + w.lastWroteAsterisk = r == '*' +} + +// Returns the string of escaped code that has been written to the writer so far. +func (w *codeJavadocWriter) String() string { + return w.b.String() +} + +// mapComment maps 2 functions over `comment`. It maps `codeF` over the section of `comment` that is made up of +// Java code (as defined by markdown code fences) and `nonCodeF` over all other parts of `comment`. +func mapComment(comment string, codeF, nonCodeF func(string) string) string { var dst strings.Builder dst.Grow(len(comment)) for { + // At any given point in this loop, we are looking for the next code block in the comment. We do this by hunting + // for the start marker and from there an end marker, using these to calculate the placement of the code within: + // + // ... ```java\npublic class C{}\n``` ... + // ^ ^ ^ ^ + // | | | | + // codeStartMarkerIndex | codeEndIndex + // | | + // codeStartIndex codeEndMarkerIndex + // + // The code inside (trimmed of the markers) is passed to `codeF` for processing. Once we are done, we move to + // the point after the code block (`codeEndIndex`) and go around for another iteration. Any non-code that + // appears before the block (when the `codeStartMarkerIndex` is not zero) or after the last block (outside the + // loop) is passed to `nonCodeF`. + const ( codeStartMarker = "```java" codeEndMarker = "```" ) - codeStart := strings.Index(comment, codeStartMarker) - if codeStart == -1 { + codeStartMarkerIndex := strings.Index(comment, codeStartMarker) + if codeStartMarkerIndex == -1 { break } - codeStartOffset := codeStart + len(codeStartMarker) + codeStartIndex := codeStartMarkerIndex + len(codeStartMarker) - codeEnd := strings.Index(comment[codeStartOffset:], codeEndMarker) - if codeEnd == -1 { + codeEndMarkerIndex := strings.Index(comment[codeStartIndex:], codeEndMarker) + if codeEndMarkerIndex == -1 { break } - codeEnd += codeStartOffset // Make codeEnd relative to comment - codeEndOffset := codeEnd + len(codeEndMarker) - // We have now found a code block: + codeEndMarkerIndex += codeStartIndex + codeEndIndex := codeEndMarkerIndex + len(codeEndMarker) + + code := comment[codeStartIndex:codeEndMarkerIndex] - // Write all non-copied text proceeding the code block - if codeStart != 0 { - dst.WriteString(nonCode(comment[:codeStart])) + if codeStartMarkerIndex != 0 { + dst.WriteString(nonCodeF(comment[:codeStartMarkerIndex])) } - // Then write the code block itself - dst.WriteString(code(comment[codeStart:codeEndOffset])) - // Then adjust copiedTo to start after the code block - comment = comment[codeEndOffset:] + dst.WriteString(codeF(code)) + comment = comment[codeEndIndex:] } // Copy any remaining non-code into the dst buffer. if comment != "" { - dst.WriteString(nonCode(comment)) + dst.WriteString(nonCodeF(comment)) } return dst.String() @@ -114,7 +319,6 @@ func mapCommentHelper(comment string, code, nonCode func(string) string) string func formatBlockComment(comment, indent string) string { prefix := fmt.Sprintf("%s * ", indent) - comment = strings.ReplaceAll(comment, "*/", "*{@literal /}") lines := strings.Split(comment, "\n") if nth := len(lines) - 1; nth >= 0 && lines[nth] != "" { diff --git a/pkg/codegen/java/utilities_test.go b/pkg/codegen/java/utilities_test.go index 83965c0eda0..5586ebb7872 100644 --- a/pkg/codegen/java/utilities_test.go +++ b/pkg/codegen/java/utilities_test.go @@ -1,5 +1,18 @@ -// Copyright 2022, Pulumi Corporation. All rights reserved. +// Copyright 2022-2024, Pulumi Corporation. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//nolint:goconst package java import ( @@ -10,7 +23,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestFormatForignComments(t *testing.T) { +func TestFormatForeignComments(t *testing.T) { tests := []struct { name string input string @@ -52,18 +65,18 @@ func TestFormatForignComments(t *testing.T) { name: "@ escapes", input: `@foo ` + "```java" + ` -@ does not need to be escaped -& must be escaped -< and > must be escaped +@ needs to be escaped +& doesn't need to be escaped +< and > don't need to be escaped ` + "```" + ` @foo `, expected: autogold.Expect(` * {@literal @}foo *
  * {@code
- * {@literal @} does not need to be escaped
- * & must be escaped
- * < and > must be escaped
+ * }{@literal @}{@code  needs to be escaped
+ * & doesn't need to be escaped
+ * < and > don't need to be escaped
  * }
  * 
* {@literal @}foo @@ -80,14 +93,154 @@ func TestFormatForignComments(t *testing.T) { { name: "escape @ variations", input: ` --@foo- -- @foo- The space here should go away since it will *always* be inserted by the {@literal} escape -- @@@foo - +- @Foo @ bar +- Foo @ bar +- Foo bar @@ `, - //nolint:lll - expected: autogold.Expect(` * -{@literal @}foo- - * -{@literal @}foo- The space here should go away since it will *always* be inserted by the {{@literal @}literal} escape - * -{@literal @@@}foo - + expected: autogold.Expect(` * - {@literal @}Foo {@literal @} bar + * - Foo {@literal @} bar + * - Foo bar {@literal @@} + * `), + }, + { + name: "balanced braces text", + input: "This is some text with {balanced} braces", + expected: autogold.Expect(` * This is some text with {balanced} braces + * `), + }, + { + name: "unbalanced braces text", + input: "This is some text with {unbalanced braces", + expected: autogold.Expect(` * This is some text with {unbalanced braces + * `), + }, + { + name: "balanced braces code", + input: "```java" + ` +This is some code with {balanced} braces +` + "```", + expected: autogold.Expect(` *
+ * {@code
+ * This is some code with {balanced} braces
+ * }
+ * 
+ * `), + }, + { + name: "unbalanced braces code", + input: "```java" + ` +This is some code with {unbalanced braces +` + "```", + expected: autogold.Expect(` *
+ * {@code
+ * This is some code with }{{@code unbalanced braces
+ * }
+ * 
+ * `), + }, + { + name: "balanced braces code with @", + input: "```java" + ` +This is some code with {balanced} braces and @ +` + "```", + expected: autogold.Expect(` *
+ * {@code
+ * This is some code with }{{@code balanced}}{@code  braces and }{@literal @}{@code
+ * }
+ * 
+ * `), + }, + { + name: "unbalanced braces code with @", + input: "```java" + ` +This is some @@ code with {unbalanced braces and @ +` + "```", + expected: autogold.Expect(` *
+ * {@code
+ * This is some }{@literal @@}{@code  code with }{{@code unbalanced braces and }{@literal @}{@code
+ * }
+ * 
+ * `), + }, + { + name: "code with Javadoc-looking contents", + input: "```java" + ` +This is some code with {@code stuff} in it +` + "```", + expected: autogold.Expect(` *
+ * {@code
+ * This is some code with }{{@literal @}{@code code stuff}}{@code  in it
+ * }
+ * 
+ * `), + }, + { + name: "code with repeated escapes", + input: "```java" + ` +This is some @@ code with {{ repeated escapes }} that should be @@ chained together +` + "```", + // nolint:lll + expected: autogold.Expect(` *
+ * {@code
+ * This is some }{@literal @@}{@code  code with }{{{@code  repeated escapes }}}{@code  that should be }{@literal @@}{@code  chained together
+ * }
+ * 
+ * `), + }, + { + name: "code with comment terminators", + input: "```java" + ` +This is some code with */ in it, as well as {@code stuff} +` + "```", + expected: autogold.Expect(` *
+ * {@code
+ * This is some code with *}/{@code  in it, as well as }{{@literal @}{@code code stuff}}{@code
+ * }
+ * 
+ * `), + }, + { + name: "messy code and text", + input: ` +Foo {} @@bar */ + +This is */ @*/ + +Should be escaped + +` + "```java" + ` +This is some @code with */*/ in it, as well as {{@code stuff} + +xx @SuppressWarnings +public static void main(String[] args) {} + +public static bar(String s) { + baz(s, s, s); +} + +x @ foo +`, + // nolint:lll + expected: autogold.Expect(` * Foo {} {@literal @@}bar */ + * + * This is */ {@literal @}*/ + * + * <html>Should be escaped</html> + * + *
+ * {@code
+ * This is some }{@literal @}{@code code with *}/{@code *}/{@code  in it, as well as }{{{@literal @}{@code code stuff}}{@code
+ *
+ * xx }{@literal @}{@code SuppressWarnings
+ * public static void main(String[] args) }{}{@code
+ *
+ * public static bar(String s) }{{@code
+ *   baz(s, s, s);
+ * }}{@code
+ *
+ * x }{@literal @}{@code  foo
+ * }
+ * 
* `), }, }