From b72cddc6fcc630cfbbf5ba416a9cf99d8ee61267 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Mon, 30 Oct 2023 15:00:46 -0700 Subject: [PATCH 1/3] Debug: make timeout configurable --- build/builder.go | 7 +++++-- cmd/zoekt-sourcegraph-indexserver/index.go | 12 ++++++++---- cmd/zoekt-sourcegraph-indexserver/main.go | 2 +- gitindex/index.go | 4 ++++ indexbuilder.go | 5 +++++ merge.go | 2 +- 6 files changed, 24 insertions(+), 8 deletions(-) diff --git a/build/builder.go b/build/builder.go index c5c82391..b4c1a5d8 100644 --- a/build/builder.go +++ b/build/builder.go @@ -1088,8 +1088,11 @@ func (b *Builder) writeShard(fn string, ib *zoekt.IndexBuilder) (*finishedShard, return nil, err } - log.Printf("finished %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), - float64(fi.Size())/float64(ib.ContentSize()+1)) + log.Printf("finished shard %s: %d index bytes (overhead %3.1f), %d files processed \n", + fn, + fi.Size(), + float64(fi.Size())/float64(ib.ContentSize()+1), + ib.NumFiles()) return &finishedShard{f.Name(), fn}, nil } diff --git a/cmd/zoekt-sourcegraph-indexserver/index.go b/cmd/zoekt-sourcegraph-indexserver/index.go index ee295956..4233204e 100644 --- a/cmd/zoekt-sourcegraph-indexserver/index.go +++ b/cmd/zoekt-sourcegraph-indexserver/index.go @@ -25,9 +25,7 @@ import ( sglog "github.com/sourcegraph/log" ) -// indexTimeout defines how long the indexserver waits before -// killing an indexing job. -const indexTimeout = 1*time.Hour + 30*time.Minute // an index should never take longer than an hour and a half +const defaultIndexingTimeout = 1*time.Hour + 30*time.Minute // IndexOptions are the options that Sourcegraph can set via it's search // configuration endpoint. @@ -183,7 +181,13 @@ func gitIndex(c gitIndexConfig, o *indexArgs, sourcegraph Sourcegraph, l sglog.L buildOptions := o.BuildOptions() - ctx, cancel := context.WithTimeout(context.Background(), indexTimeout) + // indexingTimeout defines how long the indexserver waits before killing an indexing job. + indexingTimeout := getEnvWithDefaultDuration("INDEXING_TIMEOUT", defaultIndexingTimeout) + if indexingTimeout != defaultIndexingTimeout { + debug.Printf("using configured indexing timeout: %s", indexingTimeout) + } + + ctx, cancel := context.WithTimeout(context.Background(), indexingTimeout) defer cancel() gitDir, err := tmpGitDir(o.Name) diff --git a/cmd/zoekt-sourcegraph-indexserver/main.go b/cmd/zoekt-sourcegraph-indexserver/main.go index 80319912..c34c6f66 100644 --- a/cmd/zoekt-sourcegraph-indexserver/main.go +++ b/cmd/zoekt-sourcegraph-indexserver/main.go @@ -82,7 +82,7 @@ var ( Help: "A histogram of latencies for indexing a repository.", Buckets: prometheus.ExponentialBucketsRange( (100 * time.Millisecond).Seconds(), - (40*time.Minute + indexTimeout).Seconds(), // add an extra 40 minutes to account for the time it takes to clone the repo + (40*time.Minute + defaultIndexingTimeout).Seconds(), // add an extra 40 minutes to account for the time it takes to clone the repo 20), }, []string{ "state", // state is an indexState diff --git a/gitindex/index.go b/gitindex/index.go index 2016bd50..e98cb012 100644 --- a/gitindex/index.go +++ b/gitindex/index.go @@ -529,15 +529,19 @@ func indexGitRepo(opts Options, config gitIndexConfig) error { var names []string fileKeys := map[string][]fileKey{} + totalFiles := 0 + for key := range repos { n := key.FullPath() fileKeys[n] = append(fileKeys[n], key) names = append(names, n) + totalFiles++ } sort.Strings(names) names = uniq(names) + log.Printf("attempting to index %d total files", totalFiles) for _, name := range names { keys := fileKeys[name] diff --git a/indexbuilder.go b/indexbuilder.go index 11f2aaea..9e65f25c 100644 --- a/indexbuilder.go +++ b/indexbuilder.go @@ -230,6 +230,11 @@ func (b *IndexBuilder) ContentSize() uint32 { return b.contentPostings.endByte + b.namePostings.endByte } +// NumFiles returns the number of files added to this builder +func (b *IndexBuilder) NumFiles() int { + return len(b.contentStrings) +} + // NewIndexBuilder creates a fresh IndexBuilder. The passed in // Repository contains repo metadata, and may be set to nil. func NewIndexBuilder(r *Repository) (*IndexBuilder, error) { diff --git a/merge.go b/merge.go index 6140f37d..0483fc26 100644 --- a/merge.go +++ b/merge.go @@ -82,7 +82,7 @@ func builderWriteAll(fn string, ib *IndexBuilder) error { return err } - log.Printf("finished %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), + log.Printf("finished shard %s: %d index bytes (overhead %3.1f)", fn, fi.Size(), float64(fi.Size())/float64(ib.ContentSize()+1)) return nil From 433ceab06a3b7ada5c3d4dff9bf4583430c411ba Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Tue, 31 Oct 2023 10:07:40 -0700 Subject: [PATCH 2/3] Read indexing timeout when starting server --- cmd/zoekt-sourcegraph-indexserver/index.go | 12 ++++-------- cmd/zoekt-sourcegraph-indexserver/main.go | 11 +++++++++++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/cmd/zoekt-sourcegraph-indexserver/index.go b/cmd/zoekt-sourcegraph-indexserver/index.go index 4233204e..59588693 100644 --- a/cmd/zoekt-sourcegraph-indexserver/index.go +++ b/cmd/zoekt-sourcegraph-indexserver/index.go @@ -94,6 +94,9 @@ type indexArgs struct { // DeltaShardNumberFallbackThreshold is an upper limit on the number of preexisting shards that can exist // before attempting a delta build. DeltaShardNumberFallbackThreshold uint64 + + // Timeout defines how long the indexserver waits before killing an indexing job. + Timeout time.Duration } // BuildOptions returns a build.Options represented by indexArgs. Note: it @@ -180,14 +183,7 @@ func gitIndex(c gitIndexConfig, o *indexArgs, sourcegraph Sourcegraph, l sglog.L } buildOptions := o.BuildOptions() - - // indexingTimeout defines how long the indexserver waits before killing an indexing job. - indexingTimeout := getEnvWithDefaultDuration("INDEXING_TIMEOUT", defaultIndexingTimeout) - if indexingTimeout != defaultIndexingTimeout { - debug.Printf("using configured indexing timeout: %s", indexingTimeout) - } - - ctx, cancel := context.WithTimeout(context.Background(), indexingTimeout) + ctx, cancel := context.WithTimeout(context.Background(), o.Timeout) defer cancel() gitDir, err := tmpGitDir(o.Name) diff --git a/cmd/zoekt-sourcegraph-indexserver/main.go b/cmd/zoekt-sourcegraph-indexserver/main.go index c34c6f66..c0824830 100644 --- a/cmd/zoekt-sourcegraph-indexserver/main.go +++ b/cmd/zoekt-sourcegraph-indexserver/main.go @@ -188,6 +188,9 @@ type Server struct { hostname string mergeOpts mergeOpts + + // timeout defines how long the indexserver waits before killing an indexing job. + timeout time.Duration } var debug = log.New(io.Discard, "", log.LstdFlags) @@ -546,6 +549,8 @@ func (s *Server) Index(args *indexArgs) (state indexState, err error) { args.Symbols = false } + args.Timeout = s.timeout + reason := "forced" if args.Incremental { @@ -1369,6 +1374,11 @@ func newServer(conf rootConfig) (*Server, error) { debug.Printf("skipping generating symbols metadata for: %s", joinStringSet(reposShouldSkipSymbolsCalculation, ", ")) } + indexingTimeout := getEnvWithDefaultDuration("INDEXING_TIMEOUT", defaultIndexingTimeout) + if indexingTimeout != defaultIndexingTimeout { + debug.Printf("using configured indexing timeout: %s", indexingTimeout) + } + var sg Sourcegraph if rootURL.IsAbs() { var batchSize int @@ -1432,6 +1442,7 @@ func newServer(conf rootConfig) (*Server, error) { minAgeDays: conf.minAgeDays, maxPriority: conf.maxPriority, }, + timeout: indexingTimeout, }, err } From 250b6d3c37ccbb8629130412768c500d238b76d9 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Tue, 31 Oct 2023 10:22:25 -0700 Subject: [PATCH 3/3] Use gitIndexConfig instead of indexArgs --- cmd/zoekt-sourcegraph-indexserver/index.go | 8 ++++---- cmd/zoekt-sourcegraph-indexserver/main.go | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cmd/zoekt-sourcegraph-indexserver/index.go b/cmd/zoekt-sourcegraph-indexserver/index.go index 59588693..aafd17d4 100644 --- a/cmd/zoekt-sourcegraph-indexserver/index.go +++ b/cmd/zoekt-sourcegraph-indexserver/index.go @@ -94,9 +94,6 @@ type indexArgs struct { // DeltaShardNumberFallbackThreshold is an upper limit on the number of preexisting shards that can exist // before attempting a delta build. DeltaShardNumberFallbackThreshold uint64 - - // Timeout defines how long the indexserver waits before killing an indexing job. - Timeout time.Duration } // BuildOptions returns a build.Options represented by indexArgs. Note: it @@ -164,6 +161,9 @@ type gitIndexConfig struct { // The primary purpose of this configuration option is to be able to provide a stub // implementation for this in our test suite. All other callers should use build.Options.FindRepositoryMetadata(). findRepositoryMetadata func(args *indexArgs) (repository *zoekt.Repository, metadata *zoekt.IndexMetadata, ok bool, err error) + + // timeout defines how long the index server waits before killing an indexing job. + timeout time.Duration } func gitIndex(c gitIndexConfig, o *indexArgs, sourcegraph Sourcegraph, l sglog.Logger) error { @@ -183,7 +183,7 @@ func gitIndex(c gitIndexConfig, o *indexArgs, sourcegraph Sourcegraph, l sglog.L } buildOptions := o.BuildOptions() - ctx, cancel := context.WithTimeout(context.Background(), o.Timeout) + ctx, cancel := context.WithTimeout(context.Background(), c.timeout) defer cancel() gitDir, err := tmpGitDir(o.Name) diff --git a/cmd/zoekt-sourcegraph-indexserver/main.go b/cmd/zoekt-sourcegraph-indexserver/main.go index c0824830..80539c4f 100644 --- a/cmd/zoekt-sourcegraph-indexserver/main.go +++ b/cmd/zoekt-sourcegraph-indexserver/main.go @@ -189,7 +189,7 @@ type Server struct { mergeOpts mergeOpts - // timeout defines how long the indexserver waits before killing an indexing job. + // timeout defines how long the index server waits before killing an indexing job. timeout time.Duration } @@ -549,8 +549,6 @@ func (s *Server) Index(args *indexArgs) (state indexState, err error) { args.Symbols = false } - args.Timeout = s.timeout - reason := "forced" if args.Incremental { @@ -590,6 +588,7 @@ func (s *Server) Index(args *indexArgs) (state indexState, err error) { findRepositoryMetadata: func(args *indexArgs) (repository *zoekt.Repository, metadata *zoekt.IndexMetadata, ok bool, err error) { return args.BuildOptions().FindRepositoryMetadata() }, + timeout: s.timeout, } err = gitIndex(c, args, s.Sourcegraph, s.logger)