Skip to content

Commit

Permalink
ctags: monitor symbol analysis and report stuck documents (#678)
Browse files Browse the repository at this point in the history
This adds a monitor which will report every minute the progress of
symbol analysis. Additionally, if a document is taking too long to
analyse (10s) we report it.

At first this is just reporting via stdlog. However, once we are
comfortable with thresholds around this we can likely also include a way
to kill analysis for a file.

Test Plan: Adjusted monitorReportStatus to 1s then indexed the
sourcegraph repo and inspected the output

  $ go run ./cmd/zoekt-git-index -require_ctags ../sourcegraph/
  2023/11/03 16:03:10 attempting to index 14533 total files
  2023/11/03 16:03:13 DEBUG: symbol analysis still running for shard statistics: duration=1s symbols=15805 bytes=44288971
  2023/11/03 16:03:14 DEBUG: symbol analysis still running for shard statistics: duration=2s symbols=26189 bytes=51564417
  2023/11/03 16:03:15 DEBUG: symbol analysis still running for shard statistics: duration=3s symbols=55613 bytes=64748084
  2023/11/03 16:03:16 DEBUG: symbol analysis still running for shard statistics: duration=4s symbols=86557 bytes=93771404
  2023/11/03 16:03:17 DEBUG: symbol analysis still running for shard statistics: duration=5s symbols=125352 bytes=116319453
  2023/11/03 16:03:18 symbol analysis finished for shard statistics: duration=5s symbols=142951 bytes=129180023
  2023/11/03 16:03:22 finished shard github.com%2Fsourcegraph%2Fsourcegraph_v16.00000.zoekt: 283983298 index bytes (overhead 2.8), 14533 files processed

I then added a random sleep for a minute in a file to see the stuck
reporting:

  $ go run ./cmd/zoekt-git-index -require_ctags ../sourcegraph/
  2023/11/03 16:14:57 attempting to index 14533 total files
  2023/11/03 16:15:15 WARN: symbol analysis for README.md (3485 bytes) has been running for 14s
  2023/11/03 16:15:25 WARN: symbol analysis for README.md (3485 bytes) has been running for 24s
  2023/11/03 16:15:45 WARN: symbol analysis for README.md (3485 bytes) has been running for 44s
  2023/11/03 16:16:00 DEBUG: symbol analysis still running for shard statistics: duration=1m0s symbols=958 bytes=624329
  2023/11/03 16:16:00 symbol analysis for README.md (size 3485 bytes) is done and found 4 symbols
  2023/11/03 16:16:06 symbol analysis finished for shard statistics: duration=1m5s symbols=142951 bytes=129180023
  2023/11/03 16:16:10 finished shard github.com%2Fsourcegraph%2Fsourcegraph_v16.00000.zoekt: 283983299 index bytes (overhead 2.8), 14533 files processed
  • Loading branch information
keegancsmith authored Nov 6, 2023
1 parent c7e066e commit 0ff0dd5
Showing 1 changed file with 117 additions and 0 deletions.
117 changes: 117 additions & 0 deletions build/ctags.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ package build
import (
"bytes"
"fmt"
"log"
"strings"
"sync"
"time"

"github.com/sourcegraph/zoekt"
"github.com/sourcegraph/zoekt/ctags"
Expand All @@ -38,6 +41,9 @@ func normalizeLanguage(filetype string) string {
}

func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserMap ctags.ParserMap) error {
monitor := newMonitor()
defer monitor.Stop()

for _, doc := range todo {
if doc.Symbols != nil {
continue
Expand All @@ -59,7 +65,10 @@ func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.Language
}
}

monitor.BeginParsing(doc)
es, err := parser.Parse(doc.Name, doc.Content)
monitor.EndParsing(es)

if err != nil {
return err
}
Expand Down Expand Up @@ -167,3 +176,111 @@ func newLinesIndices(in []byte) []uint32 {
}
return out
}

// monitorReportStuck is how long we need to be analysing a document before
// reporting it to stdout.
const monitorReportStuck = 10 * time.Second

// monitorReportStatus is how often we given status updates
const monitorReportStatus = time.Minute

type monitor struct {
mu sync.Mutex

lastUpdate time.Time

start time.Time
totalSize int
totalSymbols int

currentDocName string
currentDocSize int
currentDocStuckCount int

done chan struct{}
}

func newMonitor() *monitor {
start := time.Now()
m := &monitor{
start: start,
lastUpdate: start,
done: make(chan struct{}),
}
go m.run()
return m
}

func (m *monitor) BeginParsing(doc *zoekt.Document) {
now := time.Now()
m.mu.Lock()
m.lastUpdate = now

// set current doc
m.currentDocName = doc.Name
m.currentDocSize = len(doc.Content)

m.mu.Unlock()
}

func (m *monitor) EndParsing(entries []*ctags.Entry) {
now := time.Now()
m.mu.Lock()
m.lastUpdate = now

// update aggregate stats
m.totalSize += m.currentDocSize
m.totalSymbols += len(entries)

// inform done if we warned about current document
if m.currentDocStuckCount > 0 {
log.Printf("symbol analysis for %s (size %d bytes) is done and found %d symbols", m.currentDocName, m.currentDocSize, len(entries))
m.currentDocStuckCount = 0
}

// unset current document
m.currentDocName = ""
m.currentDocSize = 0

m.mu.Unlock()
}

func (m *monitor) Stop() {
close(m.done)
}

func (m *monitor) run() {
stuckTicker := time.NewTicker(monitorReportStuck / 2) // half due to sampling theorem (nyquist)
statusTicker := time.NewTicker(monitorReportStatus)

defer stuckTicker.Stop()
defer statusTicker.Stop()

for {
select {
case <-m.done:
now := time.Now()
m.mu.Lock()
log.Printf("symbol analysis finished for shard statistics: duration=%v symbols=%d bytes=%d", now.Sub(m.start).Truncate(time.Second), m.totalSymbols, m.totalSize)
m.mu.Unlock()
return

case <-stuckTicker.C:
now := time.Now()
m.mu.Lock()
running := now.Sub(m.lastUpdate).Truncate(time.Second)
report := monitorReportStuck * (1 << m.currentDocStuckCount) // double the amount of time each time we report
if m.currentDocName != "" && running >= report {
m.currentDocStuckCount++
log.Printf("WARN: symbol analysis for %s (%d bytes) has been running for %v", m.currentDocName, m.currentDocSize, running)
}
m.mu.Unlock()

case <-statusTicker.C:
now := time.Now()
m.mu.Lock()
log.Printf("DEBUG: symbol analysis still running for shard statistics: duration=%v symbols=%d bytes=%d", now.Sub(m.start).Truncate(time.Second), m.totalSymbols, m.totalSize)
m.mu.Unlock()
}
}
}

0 comments on commit 0ff0dd5

Please sign in to comment.