-
Notifications
You must be signed in to change notification settings - Fork 100
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
web: escape + as %2B in file path templates
When constructing a shard we specify templates for constructing a URL. Finally those URLs end up going via html/template which has pretty strict escaping rules. This commit makes two changes: URL construction via text/template. We still get the safety benefits later on when finally rendering the output, but given we are constructing URLs it makes more sense to use text/template. Special escaping of + in URLs. I couldn't convince html/template to not break URls containing + in it. So instead we use + escaped to %2B. I tested gerrit, github and sourcegraph with %2B in filenames and they all worked. To do the above I introduced a template function called URLJoinPath which is a wrapper around url.JoinPath with the additional behaviour around + escaping. Test Plan: Did lots of updates in tests. See diff.
- Loading branch information
1 parent
6501360
commit 09c5343
Showing
6 changed files
with
191 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,11 +19,13 @@ import ( | |
"encoding/binary" | ||
"fmt" | ||
"hash/crc64" | ||
"html/template" | ||
"log" | ||
"net/url" | ||
"os" | ||
"path/filepath" | ||
"sort" | ||
"strings" | ||
"text/template" | ||
"time" | ||
"unicode/utf8" | ||
|
||
|
@@ -216,13 +218,43 @@ type IndexBuilder struct { | |
|
||
func (d *Repository) verify() error { | ||
for _, t := range []string{d.FileURLTemplate, d.LineFragmentTemplate, d.CommitURLTemplate} { | ||
if _, err := template.New("").Parse(t); err != nil { | ||
if _, err := ParseTemplate(t); err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func urlJoinPath(base string, elem ...string) string { | ||
// golangs html/template always escapes "+" appearing in an HTML attribute | ||
// [1]. We may even want to treat more characters, differently but this | ||
// atleast makes it possible to visit URLs like [2]. | ||
// | ||
// We only do this to elem since base will normally be a hardcoded string. | ||
// | ||
// [1]: https://sourcegraph.com/github.com/golang/[email protected]/-/blob/src/html/template/html.go?L71-80 | ||
// [2]: https://github.com/apple/swift-system/blob/main/Sources/System/Util+StringArray.swift | ||
elem = append([]string{}, elem...) // copy to mutate | ||
for i := range elem { | ||
elem[i] = strings.ReplaceAll(elem[i], "+", "%2B") | ||
} | ||
u, err := url.JoinPath(base, elem...) | ||
if err != nil { | ||
return "#!error: " + err.Error() | ||
} | ||
return u | ||
} | ||
|
||
// ParseTemplate will parse the templates for FileURLTemplate, | ||
// LineFragmentTemplate and CommitURLTemplate. | ||
// | ||
// It makes available the extra function UrlJoinPath. | ||
func ParseTemplate(text string) (*template.Template, error) { | ||
return template.New("").Funcs(template.FuncMap{ | ||
"URLJoinPath": urlJoinPath, | ||
}).Parse(text) | ||
} | ||
|
||
// ContentSize returns the number of content bytes so far ingested. | ||
func (b *IndexBuilder) ContentSize() uint32 { | ||
// Add the name too so we don't skip building index if we have | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.