Skip to content

Commit

Permalink
Debug: make indexing timeout configurable (#676)
Browse files Browse the repository at this point in the history
On large repos, indexing might take quite a while and hit the indexing timeout.
This change helps debug these situations:
* Make the indexing timeout configurable through an env variable
`INDEXING_TIMEOUT`
* Add more info to progress logging: log the total number of files being
indexed, plus the file count per shard
  • Loading branch information
jtibshirani authored and keegancsmith committed Nov 1, 2023
1 parent ebf3aed commit 379ed5a
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 9 deletions.
7 changes: 5 additions & 2 deletions build/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1088,8 +1088,11 @@ func (b *Builder) writeShard(fn string, ib *zoekt.IndexBuilder) (*finishedShard,
return nil, err
}

log.Printf("finished %s: %d index bytes (overhead %3.1f)", fn, fi.Size(),
float64(fi.Size())/float64(ib.ContentSize()+1))
log.Printf("finished shard %s: %d index bytes (overhead %3.1f), %d files processed \n",
fn,
fi.Size(),
float64(fi.Size())/float64(ib.ContentSize()+1),
ib.NumFiles())

return &finishedShard{f.Name(), fn}, nil
}
Expand Down
10 changes: 5 additions & 5 deletions cmd/zoekt-sourcegraph-indexserver/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ import (
sglog "github.com/sourcegraph/log"
)

// indexTimeout defines how long the indexserver waits before
// killing an indexing job.
const indexTimeout = 1*time.Hour + 30*time.Minute // an index should never take longer than an hour and a half
const defaultIndexingTimeout = 1*time.Hour + 30*time.Minute

// IndexOptions are the options that Sourcegraph can set via it's search
// configuration endpoint.
Expand Down Expand Up @@ -163,6 +161,9 @@ type gitIndexConfig struct {
// The primary purpose of this configuration option is to be able to provide a stub
// implementation for this in our test suite. All other callers should use build.Options.FindRepositoryMetadata().
findRepositoryMetadata func(args *indexArgs) (repository *zoekt.Repository, metadata *zoekt.IndexMetadata, ok bool, err error)

// timeout defines how long the index server waits before killing an indexing job.
timeout time.Duration
}

func gitIndex(c gitIndexConfig, o *indexArgs, sourcegraph Sourcegraph, l sglog.Logger) error {
Expand All @@ -182,8 +183,7 @@ func gitIndex(c gitIndexConfig, o *indexArgs, sourcegraph Sourcegraph, l sglog.L
}

buildOptions := o.BuildOptions()

ctx, cancel := context.WithTimeout(context.Background(), indexTimeout)
ctx, cancel := context.WithTimeout(context.Background(), c.timeout)
defer cancel()

gitDir, err := tmpGitDir(o.Name)
Expand Down
12 changes: 11 additions & 1 deletion cmd/zoekt-sourcegraph-indexserver/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ var (
Help: "A histogram of latencies for indexing a repository.",
Buckets: prometheus.ExponentialBucketsRange(
(100 * time.Millisecond).Seconds(),
(40*time.Minute + indexTimeout).Seconds(), // add an extra 40 minutes to account for the time it takes to clone the repo
(40*time.Minute + defaultIndexingTimeout).Seconds(), // add an extra 40 minutes to account for the time it takes to clone the repo
20),
}, []string{
"state", // state is an indexState
Expand Down Expand Up @@ -188,6 +188,9 @@ type Server struct {
hostname string

mergeOpts mergeOpts

// timeout defines how long the index server waits before killing an indexing job.
timeout time.Duration
}

var debug = log.New(io.Discard, "", log.LstdFlags)
Expand Down Expand Up @@ -585,6 +588,7 @@ func (s *Server) Index(args *indexArgs) (state indexState, err error) {
findRepositoryMetadata: func(args *indexArgs) (repository *zoekt.Repository, metadata *zoekt.IndexMetadata, ok bool, err error) {
return args.BuildOptions().FindRepositoryMetadata()
},
timeout: s.timeout,
}

err = gitIndex(c, args, s.Sourcegraph, s.logger)
Expand Down Expand Up @@ -1369,6 +1373,11 @@ func newServer(conf rootConfig) (*Server, error) {
debug.Printf("skipping generating symbols metadata for: %s", joinStringSet(reposShouldSkipSymbolsCalculation, ", "))
}

indexingTimeout := getEnvWithDefaultDuration("INDEXING_TIMEOUT", defaultIndexingTimeout)
if indexingTimeout != defaultIndexingTimeout {
debug.Printf("using configured indexing timeout: %s", indexingTimeout)
}

var sg Sourcegraph
if rootURL.IsAbs() {
var batchSize int
Expand Down Expand Up @@ -1432,6 +1441,7 @@ func newServer(conf rootConfig) (*Server, error) {
minAgeDays: conf.minAgeDays,
maxPriority: conf.maxPriority,
},
timeout: indexingTimeout,
}, err
}

Expand Down
4 changes: 4 additions & 0 deletions gitindex/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -529,15 +529,19 @@ func indexGitRepo(opts Options, config gitIndexConfig) error {

var names []string
fileKeys := map[string][]fileKey{}
totalFiles := 0

for key := range repos {
n := key.FullPath()
fileKeys[n] = append(fileKeys[n], key)
names = append(names, n)
totalFiles++
}

sort.Strings(names)
names = uniq(names)

log.Printf("attempting to index %d total files", totalFiles)
for _, name := range names {
keys := fileKeys[name]

Expand Down
5 changes: 5 additions & 0 deletions indexbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,11 @@ func (b *IndexBuilder) ContentSize() uint32 {
return b.contentPostings.endByte + b.namePostings.endByte
}

// NumFiles returns the number of files added to this builder
func (b *IndexBuilder) NumFiles() int {
return len(b.contentStrings)
}

// NewIndexBuilder creates a fresh IndexBuilder. The passed in
// Repository contains repo metadata, and may be set to nil.
func NewIndexBuilder(r *Repository) (*IndexBuilder, error) {
Expand Down
2 changes: 1 addition & 1 deletion merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func builderWriteAll(fn string, ib *IndexBuilder) error {
return err
}

log.Printf("finished %s: %d index bytes (overhead %3.1f)", fn, fi.Size(),
log.Printf("finished shard %s: %d index bytes (overhead %3.1f)", fn, fi.Size(),
float64(fi.Size())/float64(ib.ContentSize()+1))

return nil
Expand Down

0 comments on commit 379ed5a

Please sign in to comment.