Skip to content

Commit

Permalink
web: escape + as %2B in file path templates
Browse files Browse the repository at this point in the history
When constructing a shard we specify templates for constructing a URL.
Finally those URLs end up going via html/template which has pretty
strict escaping rules. This commit makes two changes:

URL construction via text/template. We still get the safety benefits
later on when finally rendering the output, but given we are
constructing URLs it makes more sense to use text/template.

Special escaping of + in URLs. I couldn't convince html/template to not
break URls containing + in it. So instead we use + escaped to %2B. I
tested gerrit, github and sourcegraph with %2B in filenames and they all
worked.

To do the above I introduced a template function called URLJoinPath
which is a wrapper around url.JoinPath with the additional behaviour
around + escaping.

Test Plan: Did lots of updates in tests. See diff.
  • Loading branch information
keegancsmith committed Oct 7, 2024
1 parent 6501360 commit 09c5343
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 31 deletions.
44 changes: 30 additions & 14 deletions gitindex/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,22 +90,38 @@ func setTemplates(repo *zoekt.Repository, u *url.URL, typ string) error {
u.User = nil
}

// helper to generate u.JoinPath as a template
varVersion := ".Version"
varPath := ".Path"
urlJoinPath := func(elem ...string) string {
elem = append([]string{u.String()}, elem...)
var parts []string
for _, e := range elem {
if e == varVersion || e == varPath {
parts = append(parts, e)
} else {
parts = append(parts, strconv.Quote(e))
}
}
return fmt.Sprintf("{{URLJoinPath %s}}", strings.Join(parts, " "))
}

repo.URL = u.String()
switch typ {
case "gitiles":
// eg. https://gerrit.googlesource.com/gitiles/+/master/tools/run_dev.sh#20
repo.CommitURLTemplate = u.String() + "/+/{{.Version}}"
repo.FileURLTemplate = u.String() + "/+/{{.Version}}/{{.Path}}"
repo.CommitURLTemplate = urlJoinPath("+", varVersion)
repo.FileURLTemplate = urlJoinPath("+", varVersion, varPath)
repo.LineFragmentTemplate = "#{{.LineNumber}}"
case "github":
// eg. https://github.com/hanwen/go-fuse/blob/notify/genversion.sh#L10
repo.CommitURLTemplate = u.String() + "/commit/{{.Version}}"
repo.FileURLTemplate = u.String() + "/blob/{{.Version}}/{{.Path}}"
repo.CommitURLTemplate = urlJoinPath("commit", varVersion)
repo.FileURLTemplate = urlJoinPath("blob", varVersion, varPath)
repo.LineFragmentTemplate = "#L{{.LineNumber}}"
case "cgit":
// http://git.savannah.gnu.org/cgit/lilypond.git/tree/elisp/lilypond-mode.el?h=dev/philh&id=b2ca0fefe3018477aaca23b6f672c7199ba5238e#n100
repo.CommitURLTemplate = u.String() + "/commit/?id={{.Version}}"
repo.FileURLTemplate = u.String() + "/tree/{{.Path}}/?id={{.Version}}"
repo.CommitURLTemplate = urlJoinPath("commit") + "/?id={{.Version}}"
repo.FileURLTemplate = urlJoinPath("tree", varPath) + "/?id={{.Version}}"
repo.LineFragmentTemplate = "#n{{.LineNumber}}"
case "gitweb":
// https://gerrit.libreoffice.org/gitweb?p=online.git;a=blob;f=Makefile.am;h=cfcfd7c36fbae10e269653dc57a9b68c92d4c10b;hb=848145503bf7b98ce4a4aa0a858a0d71dd0dbb26#l10
Expand All @@ -115,29 +131,29 @@ func setTemplates(repo *zoekt.Repository, u *url.URL, typ string) error {
case "source.bazel.build":
// https://source.bazel.build/bazel/+/57bc201346e61c62a921c1cbf32ad24f185c10c9
// https://source.bazel.build/bazel/+/57bc201346e61c62a921c1cbf32ad24f185c10c9:tools/cpp/BUILD.empty;l=10
repo.CommitURLTemplate = u.String() + "/+/{{.Version}}"
repo.FileURLTemplate = u.String() + "/+/{{.Version}}:{{.Path}}"
repo.CommitURLTemplate = u.String() + "/%2B/{{.Version}}"
repo.FileURLTemplate = u.String() + "/%2B/{{.Version}}:{{.Path}}"
repo.LineFragmentTemplate = ";l={{.LineNumber}}"
case "bitbucket-server":
// https://<bitbucketserver-host>/projects/<project>/repos/<repo>/commits/5be7ca73b898bf17a08e607918accfdeafe1e0bc
// https://<bitbucketserver-host>/projects/<project>/repos/<repo>/browse/<file>?at=5be7ca73b898bf17a08e607918accfdeafe1e0bc
repo.CommitURLTemplate = u.String() + "/commits/{{.Version}}"
repo.FileURLTemplate = u.String() + "/{{.Path}}?at={{.Version}}"
repo.CommitURLTemplate = urlJoinPath("commits", varVersion)
repo.FileURLTemplate = urlJoinPath(varPath) + "?at={{.Version}}"
repo.LineFragmentTemplate = "#{{.LineNumber}}"
case "gitlab":
// https://gitlab.com/gitlab-org/omnibus-gitlab/-/commit/b152c864303dae0e55377a1e2c53c9592380ffed
// https://gitlab.com/gitlab-org/omnibus-gitlab/-/blob/aad04155b3f6fc50ede88aedaee7fc624d481149/files/gitlab-config-template/gitlab.rb.template
repo.CommitURLTemplate = u.String() + "/-/commit/{{.Version}}"
repo.FileURLTemplate = u.String() + "/-/blob/{{.Version}}/{{.Path}}"
repo.CommitURLTemplate = urlJoinPath("-/commit", varVersion)
repo.FileURLTemplate = urlJoinPath("-/blob", varVersion, varPath)
repo.LineFragmentTemplate = "#L{{.LineNumber}}"
case "gitea":
repo.CommitURLTemplate = u.String() + "/commit/{{.Version}}"
repo.CommitURLTemplate = urlJoinPath("commit", varVersion)
// NOTE The `display=source` query parameter is required to disable file rendering.
// Since line numbers are disabled in rendered files, you wouldn't be able to jump to
// a line without `display=source`. This is supported since gitea 1.17.0.
// When /src/{{.Version}} is used it will redirect to /src/commit/{{.Version}},
// but the query parameters are obmitted.
repo.FileURLTemplate = u.String() + "/src/commit/{{.Version}}/{{.Path}}?display=source"
repo.FileURLTemplate = urlJoinPath("src/commit", varVersion, varPath) + "?display=source"
repo.LineFragmentTemplate = "#L{{.LineNumber}}"
default:
return fmt.Errorf("URL scheme type %q unknown", typ)
Expand Down
95 changes: 93 additions & 2 deletions gitindex/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ import (
"bytes"
"context"
"fmt"
"net/url"
"os"
"os/exec"
"path/filepath"
"runtime"
"sort"
"strings"
"testing"

"github.com/go-git/go-git/v5"
Expand Down Expand Up @@ -770,7 +772,7 @@ func runScript(t *testing.T, cwd string, script string) {
}
}

func TestSetTemplate(t *testing.T) {
func TestSetTemplates_e2e(t *testing.T) {
repositoryDir := t.TempDir()

// setup: initialize the repository and all of its branches
Expand All @@ -781,11 +783,100 @@ func TestSetTemplate(t *testing.T) {
t.Fatalf("setTemplatesFromConfig: %v", err)
}

if got, want := desc.FileURLTemplate, "https://github.com/sourcegraph/zoekt/blob/{{.Version}}/{{.Path}}"; got != want {
if got, want := desc.FileURLTemplate, `{{URLJoinPath "https://github.com/sourcegraph/zoekt" "blob" .Version .Path}}`; got != want {
t.Errorf("got %q, want %q", got, want)
}
}

func TestSetTemplates(t *testing.T) {
base := "https://example.com/repo/name"
version := "VERSION"
path := "dir/name.txt"
lineNumber := 10
cases := []struct {
typ string
commit string
file string
line string
}{{
typ: "gitiles",
commit: "https://example.com/repo/name/%2B/VERSION",
file: "https://example.com/repo/name/%2B/VERSION/dir/name.txt",
line: "#10",
}, {
typ: "github",
commit: "https://example.com/repo/name/commit/VERSION",
file: "https://example.com/repo/name/blob/VERSION/dir/name.txt",
line: "#L10",
}, {
typ: "cgit",
commit: "https://example.com/repo/name/commit/?id=VERSION",
file: "https://example.com/repo/name/tree/dir/name.txt/?id=VERSION",
line: "#n10",
}, {
typ: "gitweb",
commit: "https://example.com/repo/name;a=commit;h=VERSION",
file: "https://example.com/repo/name;a=blob;f=dir/name.txt;hb=VERSION",
line: "#l10",
}, {
typ: "source.bazel.build",
commit: "https://example.com/repo/name/%2B/VERSION",
file: "https://example.com/repo/name/%2B/VERSION:dir/name.txt",
line: ";l=10",
}, {
typ: "bitbucket-server",
commit: "https://example.com/repo/name/commits/VERSION",
file: "https://example.com/repo/name/dir/name.txt?at=VERSION",
line: "#10",
}, {
typ: "gitlab",
commit: "https://example.com/repo/name/-/commit/VERSION",
file: "https://example.com/repo/name/-/blob/VERSION/dir/name.txt",
line: "#L10",
}, {
typ: "gitea",
commit: "https://example.com/repo/name/commit/VERSION",
file: "https://example.com/repo/name/src/commit/VERSION/dir/name.txt?display=source",
line: "#L10",
}}

for _, tc := range cases {
t.Run(tc.typ, func(t *testing.T) {
assertOutput := func(templateText string, want string) {
t.Helper()

tt, err := zoekt.ParseTemplate(templateText)
if err != nil {
t.Fatal(err)
}

var sb strings.Builder
err = tt.Execute(&sb, map[string]any{
"Version": version,
"Path": path,
"LineNumber": lineNumber,
})
if err != nil {
t.Fatal(err)
}
if got := sb.String(); got != want {
t.Fatalf("want: %q\ngot: %q", want, got)
}
}

var repo zoekt.Repository
u, _ := url.Parse(base)
err := setTemplates(&repo, u, tc.typ)
if err != nil {
t.Fatal(err)
}
assertOutput(repo.CommitURLTemplate, tc.commit)
assertOutput(repo.FileURLTemplate, tc.file)
assertOutput(repo.LineFragmentTemplate, tc.line)
})
}
}

func BenchmarkPrepareNormalBuild(b *testing.B) {
// NOTE: To run the benchmark, download a large repo (like github.com/chromium/chromium/) and change this to its path.
repoDir := "/path/to/your/repo"
Expand Down
36 changes: 34 additions & 2 deletions indexbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@ import (
"encoding/binary"
"fmt"
"hash/crc64"
"html/template"
"log"
"net/url"
"os"
"path/filepath"
"sort"
"strings"
"text/template"
"time"
"unicode/utf8"

Expand Down Expand Up @@ -216,13 +218,43 @@ type IndexBuilder struct {

func (d *Repository) verify() error {
for _, t := range []string{d.FileURLTemplate, d.LineFragmentTemplate, d.CommitURLTemplate} {
if _, err := template.New("").Parse(t); err != nil {
if _, err := ParseTemplate(t); err != nil {
return err
}
}
return nil
}

func urlJoinPath(base string, elem ...string) string {
// golangs html/template always escapes "+" appearing in an HTML attribute
// [1]. We may even want to treat more characters, differently but this
// atleast makes it possible to visit URLs like [2].
//
// We only do this to elem since base will normally be a hardcoded string.
//
// [1]: https://sourcegraph.com/github.com/golang/[email protected]/-/blob/src/html/template/html.go?L71-80
// [2]: https://github.com/apple/swift-system/blob/main/Sources/System/Util+StringArray.swift
elem = append([]string{}, elem...) // copy to mutate
for i := range elem {
elem[i] = strings.ReplaceAll(elem[i], "+", "%2B")
}
u, err := url.JoinPath(base, elem...)
if err != nil {
return "#!error: " + err.Error()
}
return u
}

// ParseTemplate will parse the templates for FileURLTemplate,
// LineFragmentTemplate and CommitURLTemplate.
//
// It makes available the extra function UrlJoinPath.
func ParseTemplate(text string) (*template.Template, error) {
return template.New("").Funcs(template.FuncMap{
"URLJoinPath": urlJoinPath,
}).Parse(text)
}

// ContentSize returns the number of content bytes so far ingested.
func (b *IndexBuilder) ContentSize() uint32 {
// Add the name too so we don't skip building index if we have
Expand Down
13 changes: 7 additions & 6 deletions web/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,17 @@ func TestBasic(t *testing.T) {
b, err := zoekt.NewIndexBuilder(&zoekt.Repository{
Name: "name",
URL: "repo-url",
CommitURLTemplate: "{{.Version}}",
FileURLTemplate: "file-url",
LineFragmentTemplate: "#line",
CommitURLTemplate: `{{ URLJoinPath "https://github.com/org/repo/commit/" .Version}}`,
FileURLTemplate: `{{ URLJoinPath "https://github.com/org/repo/blob/" .Version .Path}}`,
LineFragmentTemplate: "#L{{.LineNumber}}",
Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}},
})
if err != nil {
t.Fatalf("NewIndexBuilder: %v", err)
}
if err := b.Add(zoekt.Document{
Name: "f2",
// use a name which requires correct escaping. https://github.com/sourcegraph/zoekt/issues/807
Name: "foo/bar+baz",
Content: []byte("to carry water in the no later bla"),
// --------------0123456789012345678901234567890123
// --------------0 1 2 3
Expand Down Expand Up @@ -123,15 +124,15 @@ func TestBasic(t *testing.T) {
for req, needles := range map[string][]string{
"/": {"from 1 repositories"},
"/search?q=water": {
"href=\"file-url#line",
`href="https://github.com/org/repo/blob/1234/foo/bar%2Bbaz"`,
"carry <b>water</b>",
},
"/search?q=r:": {
"1234\">master",
"Found 1 repositories",
nowStr,
"repo-url\">name",
"1 files (36B)",
"1 files (45B)",
},
"/search?q=magic": {
`value=magic`,
Expand Down
28 changes: 24 additions & 4 deletions web/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"strconv"
"strings"
"sync"
texttemplate "text/template"
"time"

"github.com/grafana/regexp"
Expand Down Expand Up @@ -116,8 +117,9 @@ type Server struct {

startTime time.Time

templateMu sync.Mutex
templateCache map[string]*template.Template
templateMu sync.Mutex
templateCache map[string]*template.Template
textTemplateCache map[string]*texttemplate.Template

lastStatsMu sync.Mutex
lastStats *zoekt.RepoStats
Expand All @@ -134,13 +136,30 @@ func (s *Server) getTemplate(str string) *template.Template {

t, err := template.New("cache").Parse(str)
if err != nil {
log.Printf("template parse error: %v", err)
log.Printf("html template parse error: %v", err)
t = template.Must(template.New("empty").Parse(""))
}
s.templateCache[str] = t
return t
}

func (s *Server) getTextTemplate(str string) *texttemplate.Template {
s.templateMu.Lock()
defer s.templateMu.Unlock()
t := s.textTemplateCache[str]
if t != nil {
return t
}

t, err := zoekt.ParseTemplate(str)
if err != nil {
log.Printf("text template parse error: %v", err)
t = texttemplate.Must(texttemplate.New("empty").Parse(""))
}
s.textTemplateCache[str] = t
return t
}

func NewMux(s *Server) (*http.ServeMux, error) {
s.print = s.Top.Lookup("print")
if s.print == nil {
Expand All @@ -162,6 +181,7 @@ func NewMux(s *Server) (*http.ServeMux, error) {
}

s.templateCache = map[string]*template.Template{}
s.textTemplateCache = map[string]*texttemplate.Template{}
s.startTime = time.Now()

mux := http.NewServeMux()
Expand Down Expand Up @@ -510,7 +530,7 @@ func (s *Server) serveListReposErr(q query.Q, qStr string, r *http.Request) (*Re
}

for _, r := range repos.Repos {
t := s.getTemplate(r.Repository.CommitURLTemplate)
t := s.getTextTemplate(r.Repository.CommitURLTemplate)

repo := Repository{
Name: r.Repository.Name,
Expand Down
Loading

0 comments on commit 09c5343

Please sign in to comment.