Skip to content

Commit

Permalink
Merge pull request #9 from gadget-inc/mtime_optimization
Browse files Browse the repository at this point in the history
Skip hashing files if the mod time is before the last summary
  • Loading branch information
angelini authored Dec 9, 2021
2 parents 9ddfa79 + ef83b3f commit 4906fd3
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 87 deletions.
2 changes: 1 addition & 1 deletion cmd/fsdiff/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func main() {
log.Fatalf("read summary from disk: %v", err)
}

d, s, err := diff.Diff(diff.WalkChan(args.dir, args.ignores), diff.SummaryChan(summary))
d, s, err := diff.Diff(diff.WalkChan(args.dir, args.ignores, summary.LatestModTime), diff.SummaryChan(summary))
if err != nil {
log.Fatalf("execute diff: %v", err)
}
Expand Down
3 changes: 1 addition & 2 deletions pkg/debug/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@ import (

func PrintSummarySummary(summary *pb.Summary) {
fmt.Println("=== Summary ===")
fmt.Printf("created at: %v\n", summary.CreatedAt)
fmt.Printf("latest mod time: %v\n", summary.LatestModTime)
fmt.Printf("total entries: %v\n", len(summary.Entries))
}

func PrintDiffSummary(diff *pb.Diff) {
fmt.Println("=== Diff ===")
fmt.Printf("created at: %v\n", diff.CreatedAt)
fmt.Printf("total updates: %v\n", len(diff.Updates))
}
83 changes: 52 additions & 31 deletions pkg/diff/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,21 +51,23 @@ func hashEmptyDir() []byte {
}

type Entry struct {
path string
mode fs.FileMode
hash []byte
err error
path string
mode fs.FileMode
modTime int64
hash []byte
err error
}

func (e *Entry) toPb() *pb.Entry {
return &pb.Entry{
Path: e.path,
Mode: uint32(e.mode),
Hash: e.hash,
Path: e.path,
Mode: uint32(e.mode),
ModTime: e.modTime,
Hash: e.hash,
}
}

func WalkChan(dir string, ignores []string) <-chan *Entry {
func WalkChan(dir string, ignores []string, latestModTime int64) <-chan *Entry {
entryChan := make(chan *Entry, 100)

pushErr := func(err error) error {
Expand Down Expand Up @@ -138,23 +140,26 @@ func WalkChan(dir string, ignores []string) <-chan *Entry {

var hash []byte

if info.Mode()&os.ModeSymlink == os.ModeSymlink {
hash, err = hashLink(path)
} else {
hash, err = hashFile(path)
}
if errors.Is(err, fs.ErrNotExist) {
return nil
}
if err != nil {
return pushErr(fmt.Errorf("hash file: %w", err))
if info.ModTime().UnixNano() >= latestModTime {
if info.Mode()&os.ModeSymlink == os.ModeSymlink {
hash, err = hashLink(path)
} else {
hash, err = hashFile(path)
}
if errors.Is(err, fs.ErrNotExist) {
return nil
}
if err != nil {
return pushErr(fmt.Errorf("hash file: %w", err))
}
}

entryChan <- &Entry{
path: relativePath,
mode: info.Mode(),
hash: hash[:],
err: nil,
path: relativePath,
mode: info.Mode(),
modTime: info.ModTime().UnixNano(),
hash: hash[:],
err: nil,
}

return nil
Expand All @@ -176,10 +181,11 @@ func SummaryChan(summary *pb.Summary) <-chan *Entry {

for _, entry := range summary.Entries {
entryChan <- &Entry{
path: entry.Path,
mode: fs.FileMode(entry.Mode),
hash: entry.Hash,
err: nil,
path: entry.Path,
mode: fs.FileMode(entry.Mode),
modTime: entry.ModTime,
hash: entry.Hash,
err: nil,
}
}
}()
Expand Down Expand Up @@ -233,7 +239,6 @@ func readFromChan(info *channelInfo) (*Entry, bool) {
} else {
log.Printf("single file timeout elapsed from the %v channel", info.name)
}

}
}

Expand All @@ -242,10 +247,21 @@ func readFromChan(info *channelInfo) (*Entry, bool) {
}, false
}

func latestModTime(summary *pb.Summary) int64 {
latest := int64(0)

for _, entry := range summary.Entries {
if entry.ModTime > latest {
latest = entry.ModTime
}
}

return latest
}

func Diff(walkC, sumC <-chan *Entry) (*pb.Diff, *pb.Summary, error) {
start := time.Now().Unix()
diff := &pb.Diff{CreatedAt: start}
summary := &pb.Summary{CreatedAt: start}
diff := &pb.Diff{}
summary := &pb.Summary{}

walk := channelInfo{name: "walk", channel: walkC}
sum := channelInfo{name: "sum", channel: sumC}
Expand All @@ -262,6 +278,7 @@ func Diff(walkC, sumC <-chan *Entry) (*pb.Diff, *pb.Summary, error) {
}

if !walkOpen && !sumOpen {
summary.LatestModTime = latestModTime(summary)
return diff, summary, nil
}

Expand All @@ -287,13 +304,17 @@ func Diff(walkC, sumC <-chan *Entry) (*pb.Diff, *pb.Summary, error) {
}

if walkEntry.path == sumEntry.path {
if walkEntry.mode != sumEntry.mode || !bytes.Equal(walkEntry.hash, sumEntry.hash) {
if walkEntry.mode != sumEntry.mode || (len(walkEntry.hash) > 0 && !bytes.Equal(walkEntry.hash, sumEntry.hash)) {
diff.Updates = append(diff.Updates, &pb.Update{
Path: walkEntry.path,
Action: pb.Update_CHANGE,
})
}

if len(walkEntry.hash) == 0 {
walkEntry.hash = sumEntry.hash
}

summary.Entries = append(summary.Entries, walkEntry.toPb())

walkEntry, walkOpen = readFromChan(&walk)
Expand Down
26 changes: 8 additions & 18 deletions pkg/pb/diff.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions pkg/pb/diff.proto
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,5 @@ message Update {
}

message Diff {
int64 created_at = 1;
repeated Update updates = 2;
repeated Update updates = 1;
}
47 changes: 29 additions & 18 deletions pkg/pb/summary.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions pkg/pb/summary.proto
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ option go_package = "github.com/gadget-inc/fsdiff/pkg/pb";
message Entry {
string path = 1;
uint32 mode = 2;
bytes hash = 3;
int64 mod_time = 3;
bytes hash = 4;
}

message Summary {
int64 created_at = 1;
int64 latest_mod_time = 1;
repeated Entry entries = 2;
}
8 changes: 4 additions & 4 deletions test/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func BenchmarkSimpleInitialDiff(b *testing.B) {
dir := getFixturesDir("simple")

for n := 0; n < b.N; n++ {
_, _, err := diff.Diff(diff.WalkChan(dir, []string{}), diff.SummaryChan(&pb.Summary{}))
_, _, err := diff.Diff(diff.WalkChan(dir, []string{}, 0), diff.SummaryChan(&pb.Summary{}))
if err != nil {
b.Fatalf("failed to run diff: %v", err)
}
Expand All @@ -33,7 +33,7 @@ func BenchmarkReactInitialDiff(b *testing.B) {
dir := getFixturesDir("example-react-app")

for n := 0; n < b.N; n++ {
_, _, err := diff.Diff(diff.WalkChan(dir, []string{}), diff.SummaryChan(&pb.Summary{}))
_, _, err := diff.Diff(diff.WalkChan(dir, []string{}, 0), diff.SummaryChan(&pb.Summary{}))
if err != nil {
b.Fatalf("failed to run diff: %v", err)
}
Expand All @@ -42,15 +42,15 @@ func BenchmarkReactInitialDiff(b *testing.B) {

func BenchmarkReactChangedDiff(b *testing.B) {
initialDir := getFixturesDir("example-react-app")
_, summary, err := diff.Diff(diff.WalkChan(initialDir, []string{}), diff.SummaryChan(&pb.Summary{}))
_, summary, err := diff.Diff(diff.WalkChan(initialDir, []string{}, 0), diff.SummaryChan(&pb.Summary{}))
if err != nil {
b.Fatalf("failed to run diff: %v", err)
}

changedDir := getFixturesDir("example-react-app-libraries")

for n := 0; n < b.N; n++ {
_, _, err := diff.Diff(diff.WalkChan(changedDir, []string{}), diff.SummaryChan(summary))
_, _, err := diff.Diff(diff.WalkChan(changedDir, []string{}, summary.LatestModTime), diff.SummaryChan(summary))
if err != nil {
b.Fatalf("failed to run diff: %v", err)
}
Expand Down
Loading

0 comments on commit 4906fd3

Please sign in to comment.