diff --git a/cmd/bee/cmd/cmd.go b/cmd/bee/cmd/cmd.go index 77a5d4bd6f7..e7c571f157d 100644 --- a/cmd/bee/cmd/cmd.go +++ b/cmd/bee/cmd/cmd.go @@ -156,6 +156,9 @@ func newCommand(opts ...option) (c *command, err error) { c.initVersionCmd() c.initDBCmd() + if err := c.initSplitCmd(); err != nil { + return nil, err + } if err := c.initConfigurateOptionsCmd(); err != nil { return nil, err diff --git a/cmd/bee/cmd/split.go b/cmd/bee/cmd/split.go new file mode 100644 index 00000000000..c57a9e02bd9 --- /dev/null +++ b/cmd/bee/cmd/split.go @@ -0,0 +1,109 @@ +// Copyright 2023 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmd + +import ( + "context" + "fmt" + "os" + "strings" + + "github.com/ethersphere/bee/pkg/file" + "github.com/ethersphere/bee/pkg/file/splitter" + "github.com/ethersphere/bee/pkg/storage" + "github.com/ethersphere/bee/pkg/swarm" + "github.com/spf13/cobra" +) + +// putter is a putter that stores all the split chunk addresses of a file +type putter struct { + rootHash string + chunkAddresses []string +} + +func (s *putter) Put(ctx context.Context, chunk swarm.Chunk) error { + s.chunkAddresses = append(s.chunkAddresses, chunk.Address().String()) + return nil +} + +var _ storage.Putter = (*putter)(nil) + +func (c *command) initSplitCmd() error { + optionNameInputFile := "input-file" + optionNameOutputFile := "output-file" + cmd := &cobra.Command{ + Use: "split", + Short: "Split a file into a list chunks. The 1st line is the root hash", + RunE: func(cmd *cobra.Command, args []string) error { + inputFileName, err := cmd.Flags().GetString(optionNameInputFile) + if err != nil { + return fmt.Errorf("get input file name: %w", err) + } + outputFileName, err := cmd.Flags().GetString(optionNameOutputFile) + if err != nil { + return fmt.Errorf("get output file name: %w", err) + } + + v, err := cmd.Flags().GetString(optionNameVerbosity) + if err != nil { + return fmt.Errorf("get verbosity: %w", err) + } + v = strings.ToLower(v) + logger, err := newLogger(cmd, v) + if err != nil { + return fmt.Errorf("new logger: %w", err) + } + + reader, err := os.Open(inputFileName) + if err != nil { + return fmt.Errorf("open input file: %w", err) + } + defer reader.Close() + + logger.Info("splitting", "file", inputFileName) + store := new(putter) + s := splitter.NewSimpleSplitter(store) + stat, err := reader.Stat() + if err != nil { + return fmt.Errorf("stat file: %w", err) + } + rootHash, err := file.SplitWriteAll(context.Background(), s, reader, stat.Size(), false) + if err != nil { + return fmt.Errorf("split write: %w", err) + } + store.rootHash = rootHash.String() + + logger.Info("writing output", "file", outputFileName) + writer, err := os.OpenFile(outputFileName, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("open output file: %w", err) + } + defer writer.Close() + + logger.Debug("write root", "hash", store.rootHash) + _, err = writer.WriteString(fmt.Sprintf("%s\n", store.rootHash)) + if err != nil { + return fmt.Errorf("write root hash: %w", err) + } + for _, chunkAddress := range store.chunkAddresses { + logger.Debug("write chunk", "hash", chunkAddress) + _, err = writer.WriteString(fmt.Sprintf("%s\n", chunkAddress)) + if err != nil { + return fmt.Errorf("write chunk address: %w", err) + } + } + logger.Info("done", "hashes", len(store.chunkAddresses)) + return nil + }, + } + + cmd.Flags().String(optionNameVerbosity, "info", "verbosity level") + cmd.Flags().String(optionNameInputFile, "", "input file") + cmd.Flags().String(optionNameOutputFile, "", "output file") + cmd.MarkFlagsRequiredTogether(optionNameInputFile, optionNameOutputFile) + + c.root.AddCommand(cmd) + return nil +} diff --git a/cmd/bee/cmd/split_test.go b/cmd/bee/cmd/split_test.go new file mode 100644 index 00000000000..4151a69b5b7 --- /dev/null +++ b/cmd/bee/cmd/split_test.go @@ -0,0 +1,62 @@ +// Copyright 2023 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmd_test + +import ( + "bufio" + crand "crypto/rand" + "math/rand" + "os" + "path" + "testing" + + "github.com/ethersphere/bee/cmd/bee/cmd" + "github.com/ethersphere/bee/pkg/api" +) + +func TestDBSplit(t *testing.T) { + t.Parallel() + + s := (rand.Intn(10) + 10) * 1024 // rand between 10 and 20 KB + buf := make([]byte, s) + _, err := crand.Read(buf) + if err != nil { + t.Fatal(err) + } + + inputFileName := path.Join(t.TempDir(), "input") + err = os.WriteFile(inputFileName, buf, 0644) + if err != nil { + t.Fatal(err) + } + + outputFileName := path.Join(t.TempDir(), "output") + + err = newCommand(t, cmd.WithArgs("split", "--input-file", inputFileName, "--output-file", outputFileName)).Execute() + if err != nil { + t.Fatal(err) + } + + stat, err := os.Stat(inputFileName) + if err != nil { + t.Fatal(err) + } + wantHashes := api.CalculateNumberOfChunks(stat.Size(), false) + 1 // +1 for the root hash + var gotHashes int64 + + f, err := os.Open(outputFileName) + if err != nil { + t.Fatal(err) + } + defer f.Close() + scanner := bufio.NewScanner(f) + for scanner.Scan() { + gotHashes++ + } + + if gotHashes != wantHashes { + t.Fatalf("got %d hashes, want %d", gotHashes, wantHashes) + } +} diff --git a/pkg/api/api.go b/pkg/api/api.go index 49331689a0f..ef680fd3ea5 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -878,9 +878,9 @@ func (r *cleanupOnErrWriter) WriteHeader(statusCode int) { r.ResponseWriter.WriteHeader(statusCode) } -// calculateNumberOfChunks calculates the number of chunks in an arbitrary +// CalculateNumberOfChunks calculates the number of chunks in an arbitrary // content length. -func calculateNumberOfChunks(contentLength int64, isEncrypted bool) int64 { +func CalculateNumberOfChunks(contentLength int64, isEncrypted bool) int64 { if contentLength <= swarm.ChunkSize { return 1 } diff --git a/pkg/api/export_test.go b/pkg/api/export_test.go index 3fca2526fcd..030a27d9001 100644 --- a/pkg/api/export_test.go +++ b/pkg/api/export_test.go @@ -57,10 +57,6 @@ func (s *Service) ResolveNameOrAddress(str string) (swarm.Address, error) { return s.resolveNameOrAddress(str) } -func CalculateNumberOfChunks(contentLength int64, isEncrypted bool) int64 { - return calculateNumberOfChunks(contentLength, isEncrypted) -} - type ( HealthStatusResponse = healthStatusResponse NodeResponse = nodeResponse