From 379ed5a0bb34915f4f9b322f0dceced05946df90 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Tue, 31 Oct 2023 10:34:51 -0700 Subject: [PATCH] Debug: make indexing timeout configurable (#676) On large repos, indexing might take quite a while and hit the indexing timeout. This change helps debug these situations: * Make the indexing timeout configurable through an env variable `INDEXING_TIMEOUT` * Add more info to progress logging: log the total number of files being indexed, plus the file count per shard --- build/builder.go | 7 +++++-- cmd/zoekt-sourcegraph-indexserver/index.go | 10 +++++----- cmd/zoekt-sourcegraph-indexserver/main.go | 12 +++++++++++- gitindex/index.go | 4 ++++ indexbuilder.go | 5 +++++ merge.go | 2 +- 6 files changed, 31 insertions(+), 9 deletions(-) diff --git a/build/builder.go b/build/builder.go index c5c823916..b4c1a5d86 100644 --- a/build/builder.go +++ b/build/builder.go @@ -1088,8 +1088,11 @@ func (b *Builder) writeShard(fn string, ib *zoekt.IndexBuilder) (*finishedShard, return nil, err } - log.Printf("finished %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), - float64(fi.Size())/float64(ib.ContentSize()+1)) + log.Printf("finished shard %s: %d index bytes (overhead %3.1f), %d files processed \n", + fn, + fi.Size(), + float64(fi.Size())/float64(ib.ContentSize()+1), + ib.NumFiles()) return &finishedShard{f.Name(), fn}, nil } diff --git a/cmd/zoekt-sourcegraph-indexserver/index.go b/cmd/zoekt-sourcegraph-indexserver/index.go index 2bbfadbee..553f93dfd 100644 --- a/cmd/zoekt-sourcegraph-indexserver/index.go +++ b/cmd/zoekt-sourcegraph-indexserver/index.go @@ -25,9 +25,7 @@ import ( sglog "github.com/sourcegraph/log" ) -// indexTimeout defines how long the indexserver waits before -// killing an indexing job. -const indexTimeout = 1*time.Hour + 30*time.Minute // an index should never take longer than an hour and a half +const defaultIndexingTimeout = 1*time.Hour + 30*time.Minute // IndexOptions are the options that Sourcegraph can set via it's search // configuration endpoint. @@ -163,6 +161,9 @@ type gitIndexConfig struct { // The primary purpose of this configuration option is to be able to provide a stub // implementation for this in our test suite. All other callers should use build.Options.FindRepositoryMetadata(). findRepositoryMetadata func(args *indexArgs) (repository *zoekt.Repository, metadata *zoekt.IndexMetadata, ok bool, err error) + + // timeout defines how long the index server waits before killing an indexing job. + timeout time.Duration } func gitIndex(c gitIndexConfig, o *indexArgs, sourcegraph Sourcegraph, l sglog.Logger) error { @@ -182,8 +183,7 @@ func gitIndex(c gitIndexConfig, o *indexArgs, sourcegraph Sourcegraph, l sglog.L } buildOptions := o.BuildOptions() - - ctx, cancel := context.WithTimeout(context.Background(), indexTimeout) + ctx, cancel := context.WithTimeout(context.Background(), c.timeout) defer cancel() gitDir, err := tmpGitDir(o.Name) diff --git a/cmd/zoekt-sourcegraph-indexserver/main.go b/cmd/zoekt-sourcegraph-indexserver/main.go index 24412d821..f491d6157 100644 --- a/cmd/zoekt-sourcegraph-indexserver/main.go +++ b/cmd/zoekt-sourcegraph-indexserver/main.go @@ -82,7 +82,7 @@ var ( Help: "A histogram of latencies for indexing a repository.", Buckets: prometheus.ExponentialBucketsRange( (100 * time.Millisecond).Seconds(), - (40*time.Minute + indexTimeout).Seconds(), // add an extra 40 minutes to account for the time it takes to clone the repo + (40*time.Minute + defaultIndexingTimeout).Seconds(), // add an extra 40 minutes to account for the time it takes to clone the repo 20), }, []string{ "state", // state is an indexState @@ -188,6 +188,9 @@ type Server struct { hostname string mergeOpts mergeOpts + + // timeout defines how long the index server waits before killing an indexing job. + timeout time.Duration } var debug = log.New(io.Discard, "", log.LstdFlags) @@ -585,6 +588,7 @@ func (s *Server) Index(args *indexArgs) (state indexState, err error) { findRepositoryMetadata: func(args *indexArgs) (repository *zoekt.Repository, metadata *zoekt.IndexMetadata, ok bool, err error) { return args.BuildOptions().FindRepositoryMetadata() }, + timeout: s.timeout, } err = gitIndex(c, args, s.Sourcegraph, s.logger) @@ -1369,6 +1373,11 @@ func newServer(conf rootConfig) (*Server, error) { debug.Printf("skipping generating symbols metadata for: %s", joinStringSet(reposShouldSkipSymbolsCalculation, ", ")) } + indexingTimeout := getEnvWithDefaultDuration("INDEXING_TIMEOUT", defaultIndexingTimeout) + if indexingTimeout != defaultIndexingTimeout { + debug.Printf("using configured indexing timeout: %s", indexingTimeout) + } + var sg Sourcegraph if rootURL.IsAbs() { var batchSize int @@ -1432,6 +1441,7 @@ func newServer(conf rootConfig) (*Server, error) { minAgeDays: conf.minAgeDays, maxPriority: conf.maxPriority, }, + timeout: indexingTimeout, }, err } diff --git a/gitindex/index.go b/gitindex/index.go index 2016bd503..e98cb0125 100644 --- a/gitindex/index.go +++ b/gitindex/index.go @@ -529,15 +529,19 @@ func indexGitRepo(opts Options, config gitIndexConfig) error { var names []string fileKeys := map[string][]fileKey{} + totalFiles := 0 + for key := range repos { n := key.FullPath() fileKeys[n] = append(fileKeys[n], key) names = append(names, n) + totalFiles++ } sort.Strings(names) names = uniq(names) + log.Printf("attempting to index %d total files", totalFiles) for _, name := range names { keys := fileKeys[name] diff --git a/indexbuilder.go b/indexbuilder.go index 11f2aaea1..9e65f25c6 100644 --- a/indexbuilder.go +++ b/indexbuilder.go @@ -230,6 +230,11 @@ func (b *IndexBuilder) ContentSize() uint32 { return b.contentPostings.endByte + b.namePostings.endByte } +// NumFiles returns the number of files added to this builder +func (b *IndexBuilder) NumFiles() int { + return len(b.contentStrings) +} + // NewIndexBuilder creates a fresh IndexBuilder. The passed in // Repository contains repo metadata, and may be set to nil. func NewIndexBuilder(r *Repository) (*IndexBuilder, error) { diff --git a/merge.go b/merge.go index 6140f37df..0483fc26d 100644 --- a/merge.go +++ b/merge.go @@ -82,7 +82,7 @@ func builderWriteAll(fn string, ib *IndexBuilder) error { return err } - log.Printf("finished %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), + log.Printf("finished shard %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), float64(fi.Size())/float64(ib.ContentSize()+1)) return nil