Skip to content

Commit

Permalink
refactor shard name logic
Browse files Browse the repository at this point in the history
We had 2 copies of this logic in the code and this bit me when I
introduced id-based shards

indexbuilder.go seems an "ok" place to put. We cannot put it in the
shards package because of circular dependencies.

Test plan:
refactor, so relying on CI
  • Loading branch information
stefanhengl committed Dec 13, 2024
1 parent c03b77f commit 8d28d72
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 32 deletions.
14 changes: 1 addition & 13 deletions build/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"crypto/sha1"
"flag"
"fmt"
"io"
"log"
"net/url"
"os"
Expand Down Expand Up @@ -334,25 +333,14 @@ func (o *Options) SetDefaults() {
}
}

func hashString(s string) string {
h := sha1.New()
_, _ = io.WriteString(h, s)
return fmt.Sprintf("%x", h.Sum(nil))
}

// ShardName returns the name the given index shard.
func (o *Options) shardName(n int) string {
return o.shardNameVersion(zoekt.IndexFormatVersion, n)
}

func (o *Options) shardNameVersion(version, n int) string {
prefix := url.QueryEscape(cmp.Or(o.ShardPrefix, o.RepositoryDescription.Name))
if len(prefix) > 200 {
prefix = prefix[:200] + hashString(prefix)[:8]
}
shardName := filepath.Join(o.IndexDir,
fmt.Sprintf("%s_v%d.%05d.zoekt", prefix, version, n))
return shardName
return zoekt.ShardName(o.IndexDir, prefix, version, n)
}

type IndexState string
Expand Down
3 changes: 1 addition & 2 deletions cmd/zoekt-sourcegraph-indexserver/cleanup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package main

import (
"fmt"
"net/url"
"os"
"path/filepath"
"reflect"
Expand All @@ -23,7 +22,7 @@ func TestCleanup(t *testing.T) {
return shard{
RepoID: fakeID(name),
RepoName: name,
Path: fmt.Sprintf("%s_v%d.%05d.zoekt", url.QueryEscape(name), 15, n),
Path: zoekt.ShardName("", name, 15, n),
ModTime: mtime,
RepoTombstone: false,
}
Expand Down
10 changes: 10 additions & 0 deletions indexbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -583,3 +583,13 @@ func (t *DocChecker) clearTrigrams(maxTrigramCount int) {
delete(t.trigrams, key)
}
}

// ShardName returns the name of the shard for the given prefix, version, and
// shard number.
func ShardName(indexDir string, prefix string, version, n int) string {
prefix = url.QueryEscape(prefix)
if len(prefix) > 200 {
prefix = prefix[:200] + hashString(prefix)[:8]
}
return filepath.Join(indexDir, fmt.Sprintf("%s_v%d.%05d.zoekt", prefix, version, n))
}
49 changes: 49 additions & 0 deletions indexbuilder_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package zoekt

import (
"strings"
"testing"
)

func TestShardName(t *testing.T) {
tests := []struct {
name string
indexDir string
prefix string
version int
shardNum int
expected string
}{
{
name: "short prefix",
indexDir: "index",
prefix: "short",
version: 1,
shardNum: 42,
expected: "index/short_v1.00042.zoekt",
},
{
name: "long prefix truncated",
indexDir: "index",
prefix: strings.Repeat("a", 300),
version: 2,
shardNum: 1,
expected: "index/" + strings.Repeat("a", 200) + "003ef1ba" + "_v2.00001.zoekt",
},
{
name: "empty indexDir",
prefix: "short",
version: 1,
expected: "short_v1.00000.zoekt",
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual := ShardName(test.indexDir, test.prefix, test.version, test.shardNum)
if actual != test.expected {
t.Errorf("expected %q, got %q", test.expected, actual)
}
})
}
}
18 changes: 4 additions & 14 deletions merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"fmt"
"io"
"log"
"net/url"
"os"
"path/filepath"
"runtime"
Expand Down Expand Up @@ -170,10 +169,10 @@ func explode(dstDir string, f IndexFile, ibFuncs ...indexBuilderFunc) (map[strin
prefix = ib.repoList[0].Name
}

fn := filepath.Join(dstDir, shardName(prefix, ib.indexFormatVersion, 0))
fnTmp := fn + ".tmp"
shardNames[fnTmp] = fn
return builderWriteAll(fnTmp, ib)
shardName := ShardName(dstDir, prefix, ib.indexFormatVersion, 0)
shardNameTmp := shardName + ".tmp"
shardNames[shardNameTmp] = shardName
return builderWriteAll(shardNameTmp, ib)
}

var ib *IndexBuilder
Expand Down Expand Up @@ -264,12 +263,3 @@ func hashString(s string) string {
_, _ = io.WriteString(h, s)
return fmt.Sprintf("%x", h.Sum(nil))
}

// copied from builder package to avoid circular imports.
func shardName(prefix string, version, n int) string {
abs := url.QueryEscape(prefix)
if len(abs) > 200 {
abs = abs[:200] + hashString(abs)[:8]
}
return fmt.Sprintf("%s_v%d.%05d.zoekt", abs, version, n)
}
6 changes: 3 additions & 3 deletions read_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,10 +370,10 @@ func TestBackwardsCompat(t *testing.T) {
t.Fatal(err)
}

outname := fmt.Sprintf("testdata/backcompat/new_v%d.%05d.zoekt", IndexFormatVersion, 0)
t.Log("writing new file", outname)
outName := ShardName("testdata/backcompat", "new", IndexFormatVersion, 0)
t.Log("writing new file", outName)

err = os.WriteFile(outname, buf.Bytes(), 0o644)
err = os.WriteFile(outName, buf.Bytes(), 0o644)
if err != nil {
t.Fatalf("Creating output file: %v", err)
}
Expand Down

0 comments on commit 8d28d72

Please sign in to comment.