Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validation optimization - reducing the number of HEAD calls during cp and mv operations #4710

Merged
merged 10 commits into from
Oct 12, 2023
12 changes: 6 additions & 6 deletions cmd/common-methods.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,20 +108,20 @@ func getEncKeys(ctx *cli.Context) (map[string][]prefixSSEPair, *probe.Error) {
// Check if the passed URL represents a folder. It may or may not exist yet.
// If it exists, we can easily check if it is a folder, if it doesn't exist,
// we can guess if the url is a folder from how it looks.
func isAliasURLDir(ctx context.Context, aliasURL string, keys map[string][]prefixSSEPair, timeRef time.Time) bool {
func isAliasURLDir(ctx context.Context, aliasURL string, keys map[string][]prefixSSEPair, timeRef time.Time) (bool, *ClientContent) {
// If the target url exists, check if it is a directory
// and return immediately.
_, targetContent, err := url2Stat(ctx, aliasURL, "", false, keys, timeRef, false)
if err == nil {
return targetContent.Type.IsDir()
return targetContent.Type.IsDir(), targetContent
}

_, expandedURL, _ := mustExpandAlias(aliasURL)

// Check if targetURL is an FS or S3 aliased url
if expandedURL == aliasURL {
// This is an FS url, check if the url has a separator at the end
return strings.HasSuffix(aliasURL, string(filepath.Separator))
return strings.HasSuffix(aliasURL, string(filepath.Separator)), targetContent
}

// This is an S3 url, then:
Expand All @@ -134,14 +134,14 @@ func isAliasURLDir(ctx context.Context, aliasURL string, keys map[string][]prefi
switch len(fields) {
// Nothing or alias format
case 0, 1:
return false
return false, targetContent
// alias/bucket format
case 2:
return true
return true, targetContent
} // default case..

// alias/bucket/prefix format
return strings.HasSuffix(pathURL, "/")
return strings.HasSuffix(pathURL, "/"), targetContent
}

// getSourceStreamMetadataFromURL gets a reader from URL.
Expand Down
71 changes: 42 additions & 29 deletions cmd/cp-main.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ func doCopyFake(cpURLs URLs, pg Progress) URLs {
}

// doPrepareCopyURLs scans the source URL and prepares a list of objects for copying.
func doPrepareCopyURLs(ctx context.Context, session *sessionV8, cancelCopy context.CancelFunc) (totalBytes, totalObjects int64) {
func doPrepareCopyURLs(ctx context.Context, session *sessionV8, cancelCopy context.CancelFunc) (totalBytes, totalObjects int64, errSeen bool) {
zveinn marked this conversation as resolved.
Show resolved Hide resolved
// Separate source and target. 'cp' can take only one target,
// but any number of sources.
sourceURLs := session.Header.CommandArgs[:len(session.Header.CommandArgs)-1]
Expand Down Expand Up @@ -333,16 +333,10 @@ func doPrepareCopyURLs(ctx context.Context, session *sessionV8, cancelCopy conte
done = true
break
}

if cpURLs.Error != nil {
// Print in new line and adjust to top so that we don't print over the ongoing scan bar
if !globalQuiet && !globalJSON {
console.Eraseline()
}
if strings.Contains(cpURLs.Error.ToGoError().Error(), " is a folder.") {
errorIf(cpURLs.Error.Trace(), "Folder cannot be copied. Please use `...` suffix.")
} else {
errorIf(cpURLs.Error.Trace(), "Unable to prepare URL for copying.")
}
printCopyURLsError(&cpURLs)
errSeen = true
break
}

Expand Down Expand Up @@ -377,11 +371,29 @@ func doPrepareCopyURLs(ctx context.Context, session *sessionV8, cancelCopy conte
return
}

func printCopyURLsError(cpURLs *URLs) {
// Print in new line and adjust to top so that we
// don't print over the ongoing scan bar
if !globalQuiet && !globalJSON {
console.Eraseline()
}

if strings.Contains(cpURLs.Error.ToGoError().Error(),
" is a folder.") {
errorIf(cpURLs.Error.Trace(),
"Folder cannot be copied. Please use `...` suffix.")
} else {
zveinn marked this conversation as resolved.
Show resolved Hide resolved
errorIf(cpURLs.Error.Trace(),
"Unable to prepare URL for copying.")
}
}

func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli.Context, session *sessionV8, encKeyDB map[string][]prefixSSEPair, isMvCmd bool) error {
var isCopied func(string) bool
var totalObjects, totalBytes int64

cpURLsCh := make(chan URLs, 10000)
errSeen := false

// Store a progress bar or an accounter
var pg ProgressReader
Expand All @@ -405,7 +417,7 @@ func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli.
isCopied = isLastFactory(session.Header.LastCopied)

if !session.HasData() {
totalBytes, totalObjects = doPrepareCopyURLs(ctx, session, cancelCopy)
totalBytes, totalObjects, errSeen = doPrepareCopyURLs(ctx, session, cancelCopy)
} else {
totalBytes, totalObjects = session.Header.TotalBytes, session.Header.TotalObjects
}
Expand All @@ -431,6 +443,7 @@ func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli.
cpURLsCh <- cpURLs
}
}()

} else {
// Access recursive flag inside the session header.
isRecursive := cli.Bool("recursive")
Expand All @@ -452,23 +465,14 @@ func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli.
versionID: versionID,
isZip: cli.Bool("zip"),
}

for cpURLs := range prepareCopyURLs(ctx, opts) {
if cpURLs.Error != nil {
// Print in new line and adjust to top so that we
// don't print over the ongoing scan bar
if !globalQuiet && !globalJSON {
console.Eraseline()
}
if strings.Contains(cpURLs.Error.ToGoError().Error(),
" is a folder.") {
errorIf(cpURLs.Error.Trace(),
"Folder cannot be copied. Please use `...` suffix.")
} else {
errorIf(cpURLs.Error.Trace(),
"Unable to start copying.")
}
errSeen = true
printCopyURLsError(&cpURLs)
break
}

totalBytes += cpURLs.SourceContent.Size
pg.SetTotal(totalBytes)
totalObjects++
Expand Down Expand Up @@ -570,7 +574,6 @@ func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli.
}()

var retErr error
errSeen := false
cpAllFilesErr := true

loop:
Expand Down Expand Up @@ -639,14 +642,24 @@ loop:
}

if progressReader, ok := pg.(*progressBar); ok {
if (errSeen && totalObjects == 1) || (cpAllFilesErr && totalObjects > 1) {
console.Eraseline()
if errSeen || (cpAllFilesErr && totalObjects > 0) {
// We only erase a line if we are displaying a progress bar
if !globalQuiet && !globalJSON {
console.Eraseline()
}
} else if progressReader.ProgressBar.Get() > 0 {
progressReader.ProgressBar.Finish()
}
} else {
if accntReader, ok := pg.(*accounter); ok {
printMsg(accntReader.Stat())
if errSeen || (cpAllFilesErr && totalObjects > 0) {
// We only erase a line if we are displaying a progress bar
if !globalQuiet && !globalJSON {
console.Eraseline()
}
} else {
printMsg(accntReader.Stat())
}
}
}

Expand All @@ -670,7 +683,7 @@ func mainCopy(cliCtx *cli.Context) error {
}

// check 'copy' cli arguments.
checkCopySyntax(ctx, cliCtx, encKeyDB, false)
checkCopySyntax(cliCtx)
// Additional command specific theme customization.
console.SetColor("Copy", color.New(color.FgGreen, color.Bold))

Expand Down
150 changes: 1 addition & 149 deletions cmd/cp-url-syntax.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,14 @@
package cmd

import (
"context"
"fmt"
"runtime"
"time"

"github.com/minio/cli"
"github.com/minio/mc/pkg/probe"
"github.com/minio/pkg/v2/console"
)

func checkCopySyntax(ctx context.Context, cliCtx *cli.Context, encKeyDB map[string][]prefixSSEPair, isMvCmd bool) {
func checkCopySyntax(cliCtx *cli.Context) {
if len(cliCtx.Args()) < 2 {
if isMvCmd {
showCommandHelpAndExit(cliCtx, 1) // last argument is exit code.
}
showCommandHelpAndExit(cliCtx, 1) // last argument is exit code.
}

Expand All @@ -44,9 +37,7 @@ func checkCopySyntax(ctx context.Context, cliCtx *cli.Context, encKeyDB map[stri

srcURLs := URLs[:len(URLs)-1]
tgtURL := URLs[len(URLs)-1]
isRecursive := cliCtx.Bool("recursive")
isZip := cliCtx.Bool("zip")
timeRef := parseRewindFlag(cliCtx.String("rewind"))
versionID := cliCtx.String("version-id")

if versionID != "" && len(srcURLs) > 1 {
Expand All @@ -57,24 +48,6 @@ func checkCopySyntax(ctx context.Context, cliCtx *cli.Context, encKeyDB map[stri
fatalIf(errDummy().Trace(cliCtx.Args()...), "--zip and --rewind cannot be used together")
}

// Verify if source(s) exists.
for _, srcURL := range srcURLs {
var err *probe.Error
if !isRecursive {
_, _, err = url2Stat(ctx, srcURL, versionID, false, encKeyDB, timeRef, isZip)
} else {
_, _, err = firstURL2Stat(ctx, srcURL, timeRef, isZip)
}
if err != nil {
msg := "Unable to validate source `" + srcURL + "`"
if versionID != "" {
msg += " (" + versionID + ")"
}
msg += ": " + err.ToGoError().Error()
console.Fatalln(msg)
}
}

// Check if bucket name is passed for URL type arguments.
url := newClientURL(tgtURL)
if url.Host != "" {
Expand All @@ -91,129 +64,8 @@ func checkCopySyntax(ctx context.Context, cliCtx *cli.Context, encKeyDB map[stri
fatalIf(errInvalidArgument().Trace(), fmt.Sprintf("Both object retention flags `--%s` and `--%s` are required.\n", rdFlag, rmFlag))
}

operation := "copy"
if isMvCmd {
operation = "move"
}

// Guess CopyURLsType based on source and target URLs.
opts := prepareCopyURLsOpts{
sourceURLs: srcURLs,
targetURL: tgtURL,
isRecursive: isRecursive,
encKeyDB: encKeyDB,
olderThan: "",
newerThan: "",
timeRef: timeRef,
versionID: versionID,
isZip: isZip,
}
copyURLsType, _, err := guessCopyURLType(ctx, opts)
if err != nil {
fatalIf(errInvalidArgument().Trace(), "Unable to guess the type of "+operation+" operation.")
}

switch copyURLsType {
case copyURLsTypeA: // File -> File.
// Check source.
if len(srcURLs) != 1 {
fatalIf(errInvalidArgument().Trace(), "Invalid number of source arguments.")
}
checkCopySyntaxTypeA(ctx, srcURLs[0], versionID, encKeyDB, isZip, timeRef)
case copyURLsTypeB: // File -> Folder.
// Check source.
if len(srcURLs) != 1 {
fatalIf(errInvalidArgument().Trace(), "Invalid number of source arguments.")
}
checkCopySyntaxTypeB(ctx, srcURLs[0], versionID, tgtURL, encKeyDB, isZip, timeRef)
case copyURLsTypeC: // Folder... -> Folder.
checkCopySyntaxTypeC(ctx, srcURLs, tgtURL, isRecursive, isZip, encKeyDB, isMvCmd, timeRef)
case copyURLsTypeD: // File1...FileN -> Folder.
checkCopySyntaxTypeD(ctx, tgtURL, encKeyDB, timeRef)
default:
fatalIf(errInvalidArgument().Trace(), "Unable to guess the type of "+operation+" operation.")
}

// Preserve functionality not supported for windows
if cliCtx.Bool("preserve") && runtime.GOOS == "windows" {
fatalIf(errInvalidArgument().Trace(), "Permissions are not preserved on windows platform.")
}
}

// checkCopySyntaxTypeA verifies if the source and target are valid file arguments.
func checkCopySyntaxTypeA(ctx context.Context, srcURL, versionID string, keys map[string][]prefixSSEPair, isZip bool, timeRef time.Time) {
_, srcContent, err := url2Stat(ctx, srcURL, versionID, false, keys, timeRef, isZip)
fatalIf(err.Trace(srcURL), "Unable to stat source `"+srcURL+"`.")

if !srcContent.Type.IsRegular() {
fatalIf(errInvalidArgument().Trace(), "Source `"+srcURL+"` is not a file.")
}
}

// checkCopySyntaxTypeB verifies if the source is a valid file and target is a valid folder.
func checkCopySyntaxTypeB(ctx context.Context, srcURL, versionID, tgtURL string, keys map[string][]prefixSSEPair, isZip bool, timeRef time.Time) {
_, srcContent, err := url2Stat(ctx, srcURL, versionID, false, keys, timeRef, isZip)
fatalIf(err.Trace(srcURL), "Unable to stat source `"+srcURL+"`.")

if !srcContent.Type.IsRegular() {
fatalIf(errInvalidArgument().Trace(srcURL), "Source `"+srcURL+"` is not a file.")
}

// Check target.
if _, tgtContent, err := url2Stat(ctx, tgtURL, "", false, keys, timeRef, false); err == nil {
if !tgtContent.Type.IsDir() {
fatalIf(errInvalidArgument().Trace(tgtURL), "Target `"+tgtURL+"` is not a folder.")
}
}
}

// checkCopySyntaxTypeC verifies if the source is a valid recursive dir and target is a valid folder.
func checkCopySyntaxTypeC(ctx context.Context, srcURLs []string, tgtURL string, isRecursive, isZip bool, keys map[string][]prefixSSEPair, isMvCmd bool, timeRef time.Time) {
// Check source.
if len(srcURLs) != 1 {
fatalIf(errInvalidArgument().Trace(), "Invalid number of source arguments.")
}

// Check target.
if _, tgtContent, err := url2Stat(ctx, tgtURL, "", false, keys, timeRef, false); err == nil {
if !tgtContent.Type.IsDir() {
fatalIf(errInvalidArgument().Trace(tgtURL), "Target `"+tgtURL+"` is not a folder.")
}
}

for _, srcURL := range srcURLs {
c, srcContent, err := url2Stat(ctx, srcURL, "", false, keys, timeRef, isZip)
fatalIf(err.Trace(srcURL), "Unable to stat source `"+srcURL+"`.")

if srcContent.Type.IsDir() {
// Require --recursive flag if we are copying a directory
if !isRecursive {
operation := "copy"
if isMvCmd {
operation = "move"
}
fatalIf(errInvalidArgument().Trace(srcURL), fmt.Sprintf("To %v a folder requires --recursive flag.", operation))
}

// Check if we are going to copy a directory into itself
if isURLContains(srcURL, tgtURL, string(c.GetURL().Separator)) {
operation := "Copying"
if isMvCmd {
operation = "Moving"
}
fatalIf(errInvalidArgument().Trace(), fmt.Sprintf("%v a folder into itself is not allowed.", operation))
}
}
}
}

// checkCopySyntaxTypeD verifies if the source is a valid list of files and target is a valid folder.
func checkCopySyntaxTypeD(ctx context.Context, tgtURL string, keys map[string][]prefixSSEPair, timeRef time.Time) {
// Source can be anything: file, dir, dir...
// Check target if it is a dir
if _, tgtContent, err := url2Stat(ctx, tgtURL, "", false, keys, timeRef, false); err == nil {
if !tgtContent.Type.IsDir() {
fatalIf(errInvalidArgument().Trace(tgtURL), "Target `"+tgtURL+"` is not a folder.")
}
}
}
Loading