Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: split cmd #4484

Merged
merged 5 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmd/bee/cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ func newCommand(opts ...option) (c *command, err error) {

c.initVersionCmd()
c.initDBCmd()
if err := c.initSplitCmd(); err != nil {
return nil, err
}

if err := c.initConfigurateOptionsCmd(); err != nil {
return nil, err
Expand Down
130 changes: 130 additions & 0 deletions cmd/bee/cmd/split.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright 2023 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package cmd

import (
"context"
"fmt"
"os"
"strings"

"github.com/ethersphere/bee/pkg/file"
"github.com/ethersphere/bee/pkg/file/splitter"
"github.com/ethersphere/bee/pkg/storage"
"github.com/ethersphere/bee/pkg/swarm"

acha-bill marked this conversation as resolved.
Show resolved Hide resolved
"github.com/spf13/cobra"
)

// splitterStore is a store that stores all the split chunk addresses of a file
type splitterStore struct {
rootHash string
chunkAddresses []string
}

func (s *splitterStore) Iterate(ctx context.Context, fn storage.IterateChunkFn) error {
return nil
}

func (s *splitterStore) Close() error {
return nil
}

func (s *splitterStore) Get(ctx context.Context, address swarm.Address) (swarm.Chunk, error) {
return nil, nil
}

func (s *splitterStore) Put(ctx context.Context, chunk swarm.Chunk) error {
s.chunkAddresses = append(s.chunkAddresses, chunk.Address().String())
return nil
}

func (s *splitterStore) Delete(ctx context.Context, address swarm.Address) error {
return nil
}

func (s *splitterStore) Has(ctx context.Context, address swarm.Address) (bool, error) {
return false, nil
}

var _ storage.ChunkStore = (*splitterStore)(nil)

func (c *command) initSplitCmd() error {
optionNameInputFile := "input-file"
optionNameOutputFile := "output-file"
cmd := &cobra.Command{
Use: "split",
Short: "Split a file into a list chunks. The 1st line is the root hash",
RunE: func(cmd *cobra.Command, args []string) error {
inputFileName, err := cmd.Flags().GetString(optionNameInputFile)
if err != nil {
return fmt.Errorf("get input file name: %w", err)
}
outputFileName, err := cmd.Flags().GetString(optionNameOutputFile)
if err != nil {
return fmt.Errorf("get output file name: %w", err)
}

v, err := cmd.Flags().GetString(optionNameVerbosity)
if err != nil {
return fmt.Errorf("get verbosity: %w", err)
}
v = strings.ToLower(v)
logger, err := newLogger(cmd, v)
if err != nil {
return fmt.Errorf("new logger: %w", err)
}

reader, err := os.Open(inputFileName)
if err != nil {
return fmt.Errorf("open input file: %w", err)
}
defer reader.Close()

logger.Info("splitting", "file", inputFileName)
store := new(splitterStore)
s := splitter.NewSimpleSplitter(store)
stat, err := reader.Stat()
if err != nil {
return fmt.Errorf("stat file: %w", err)
}
rootHash, err := file.SplitWriteAll(context.Background(), s, reader, stat.Size(), false)
if err != nil {
return fmt.Errorf("split write: %w", err)
}
store.rootHash = rootHash.String()

logger.Info("writing output", "file", outputFileName)
writer, err := os.OpenFile(outputFileName, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("open output file: %w", err)
}
defer writer.Close()

logger.Debug("write root", "hash", store.rootHash)
_, err = writer.WriteString(fmt.Sprintf("%s\n", store.rootHash))
if err != nil {
return fmt.Errorf("write root hash: %w", err)
}
for _, chunkAddress := range store.chunkAddresses {
logger.Debug("write chunk", "hash", chunkAddress)
_, err = writer.WriteString(fmt.Sprintf("%s\n", chunkAddress))
if err != nil {
return fmt.Errorf("write chunk address: %w", err)
}
}
logger.Info("done", "hashes", len(store.chunkAddresses))
return nil
},
}

cmd.Flags().String(optionNameVerbosity, "info", "verbosity level")
cmd.Flags().String(optionNameInputFile, "", "input file")
cmd.Flags().String(optionNameOutputFile, "", "output file")
cmd.MarkFlagsRequiredTogether(optionNameInputFile, optionNameOutputFile)

c.root.AddCommand(cmd)
return nil
}
70 changes: 70 additions & 0 deletions cmd/bee/cmd/split_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright 2023 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package cmd_test

import (
"bufio"
crand "crypto/rand"
"errors"
"io"
"math/rand"
"os"
"testing"

"github.com/ethersphere/bee/cmd/bee/cmd"
"github.com/ethersphere/bee/pkg/api"
)

func TestDBSplit(t *testing.T) {
t.Parallel()

s := (rand.Intn(10) + 10) * 1024 // rand between 10 and 20 KB
buf := make([]byte, s)
_, err := crand.Read(buf)
if err != nil {
t.Fatal(err)
}

inputFileName := t.TempDir() + "/input"
acha-bill marked this conversation as resolved.
Show resolved Hide resolved
err = os.WriteFile(inputFileName, buf, 0644)
if err != nil {
t.Fatal(err)
}

outputFileName := t.TempDir() + "/output"
acha-bill marked this conversation as resolved.
Show resolved Hide resolved

err = newCommand(t, cmd.WithArgs("split", "--input-file", inputFileName, "--output-file", outputFileName)).Execute()
if err != nil {
t.Fatal(err)
}

stat, err := os.Stat(inputFileName)
if err != nil {
t.Fatal(err)
}
wantHashes := api.CalculateNumberOfChunks(stat.Size(), false) + 1 // +1 for the root hash
var gotHashes int64

f, err := os.Open(outputFileName)
if err != nil {
t.Fatal(err)
}
defer f.Close()
reader := bufio.NewReader(f)
acha-bill marked this conversation as resolved.
Show resolved Hide resolved
for {
_, err = reader.ReadString('\n')
if err != nil {
if errors.Is(err, io.EOF) {
break
}
t.Fatal(err)
}
gotHashes++
}

if gotHashes != wantHashes {
t.Fatalf("got %d hashes, want %d", gotHashes, wantHashes)
}
}
4 changes: 2 additions & 2 deletions pkg/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -878,9 +878,9 @@ func (r *cleanupOnErrWriter) WriteHeader(statusCode int) {
r.ResponseWriter.WriteHeader(statusCode)
}

// calculateNumberOfChunks calculates the number of chunks in an arbitrary
// CalculateNumberOfChunks calculates the number of chunks in an arbitrary
// content length.
func calculateNumberOfChunks(contentLength int64, isEncrypted bool) int64 {
func CalculateNumberOfChunks(contentLength int64, isEncrypted bool) int64 {
if contentLength <= swarm.ChunkSize {
return 1
}
Expand Down
4 changes: 0 additions & 4 deletions pkg/api/export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@ func (s *Service) ResolveNameOrAddress(str string) (swarm.Address, error) {
return s.resolveNameOrAddress(str)
}

func CalculateNumberOfChunks(contentLength int64, isEncrypted bool) int64 {
return calculateNumberOfChunks(contentLength, isEncrypted)
}

type (
HealthStatusResponse = healthStatusResponse
NodeResponse = nodeResponse
Expand Down
Loading