diff --git a/command/cp.go b/command/cp.go index 488f079c5..d556558f1 100644 --- a/command/cp.go +++ b/command/cp.go @@ -104,8 +104,14 @@ Examples: 22. Upload a file to S3 preserving the timestamp on disk > s5cmd --preserve-timestamp myfile.css.br s3://bucket/ - 22. Download a file from S3 preserving the timestamp it was originally uplaoded with + 23. Download a file from S3 preserving the timestamp it was originally uplaoded with > s5cmd --preserve-timestamp s3://bucket/myfile.css.br myfile.css.br + + 24. Upload a file to S3 preserving the ownership of files + > s5cmd --preserve-ownership myfile.css.br s3://bucket/ + + 25. Download a file from S3 preserving the ownership it was originally uplaoded with + > s5cmd --preserve-ownership s3://bucket/myfile.css.br myfile.css.br ` func NewSharedFlags() []cli.Flag { @@ -192,6 +198,10 @@ func NewSharedFlags() []cli.Flag { Name: "preserve-timestamp", Usage: "preserve the timestamp on disk while uploading and set the timestamp from s3 while downloading.", }, + &cli.BoolFlag{ + Name: "preserve-ownership", + Usage: "preserve the ownership (owner/group) on disk while uploading and set the ownership from s3 while downloading.", + }, } } @@ -276,6 +286,7 @@ type Copy struct { contentType string contentEncoding string preserveTimestamp bool + preserveOwnership bool // region settings srcRegion string @@ -316,6 +327,7 @@ func NewCopy(c *cli.Context, deleteSource bool) Copy { contentType: c.String("content-type"), contentEncoding: c.String("content-encoding"), preserveTimestamp: c.Bool("preserve-timestamp"), + preserveOwnership: c.Bool("preserve-ownership"), // region settings srcRegion: c.String("source-region"), dstRegion: c.String("destination-region"), @@ -549,6 +561,31 @@ func (c Copy) doDownload(ctx context.Context, srcurl *url.URL, dsturl *url.URL) _ = srcClient.Delete(ctx, srcurl) } + if c.preserveOwnership { + obj, err := srcClient.Stat(ctx, srcurl) + if err != nil { + return err + } + // SetFileUserGroup may return an InvalidOwnershipFormatError which signifies that it cannot + // understand the UserId or GroupId format. + // This is most common when a file is being ported across windows/linux. + // We aren't implementing a fix for it here, just a note that it cannot be resolved. + err = storage.SetFileUserGroup(dsturl.Absolute(), obj.UserId, obj.GroupId) + if err != nil { + invalidOwnershipFormat := &storage.InvalidOwnershipFormatError{} + if errors.As(err, &invalidOwnershipFormat) { + msg := log.ErrorMessage{ + Operation: c.op, + Command: c.fullCommand, + Err: fmt.Sprintf("UserId: %s or GroupId: %s are not valid on this operating system.", obj.UserId, obj.GroupId), + } + log.Debug(msg) + } + + return err + } + } + if c.preserveTimestamp { obj, err := srcClient.Stat(ctx, srcurl) if err != nil { @@ -616,6 +653,14 @@ func (c Copy) doUpload(ctx context.Context, srcurl *url.URL, dsturl *url.URL) er metadata.SetPreserveTimestamp(aTime, mTime, cTime) } + if c.preserveOwnership { + userId, groupId, err := storage.GetFileUserGroup(srcurl.Absolute()) + if err != nil { + return err + } + metadata.SetPreserveOwnership(userId, groupId) + } + if c.contentType != "" { metadata.SetContentType(c.contentType) } else { diff --git a/command/sync.go b/command/sync.go index 097ad0ef2..b3eed57be 100644 --- a/command/sync.go +++ b/command/sync.go @@ -59,6 +59,12 @@ Examples: 10. Sync all files to S3 bucket but exclude the ones with txt and gz extension > s5cmd {{.HelpName}} --exclude "*.txt" --exclude "*.gz" dir/ s3://bucket + + 11. Upload a folder to S3 preserving the ownership of files + > s5cmd {{.HelpName}} --preserve-ownership dir/ s3://bucket + + 12. Download a folder from S3 preserving the ownership it was originally uplaoded with + > s5cmd {{.HelpName}} --preserve-ownership s3://bucket/ dir ` func NewSyncCommandFlags() []cli.Flag { @@ -117,6 +123,7 @@ type Sync struct { delete bool sizeOnly bool preserveTimestamp bool + preserveOwnership bool // s3 options storageOpts storage.Options @@ -141,6 +148,7 @@ func NewSync(c *cli.Context) Sync { delete: c.Bool("delete"), sizeOnly: c.Bool("size-only"), preserveTimestamp: c.Bool("preserve-timestamp"), + preserveOwnership: c.Bool("preserve-ownership"), // flags followSymlinks: !c.Bool("no-follow-symlinks"), @@ -360,6 +368,9 @@ func (s Sync) planRun( defaultFlags := map[string]interface{}{ "raw": true, } + if s.preserveTimestamp { + defaultFlags["preserve-ownership"] = s.preserveOwnership + } if s.preserveTimestamp { defaultFlags["preserve-timestamp"] = s.preserveTimestamp } diff --git a/go.mod b/go.mod index 369e0fbfb..0fa7dc04f 100644 --- a/go.mod +++ b/go.mod @@ -30,7 +30,7 @@ require ( github.com/stretchr/objx v0.1.0 // indirect github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect go.etcd.io/bbolt v1.3.6 // indirect - golang.org/x/sys v0.0.0-20220405210540-1e041c57c461 // indirect + golang.org/x/sys v0.1.0 // indirect golang.org/x/tools v0.0.0-20190624222133-a101b041ded4 // indirect golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 // indirect gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect diff --git a/go.sum b/go.sum index 2393b5928..7d2f945b3 100644 --- a/go.sum +++ b/go.sum @@ -71,6 +71,8 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220405210540-1e041c57c461 h1:kHVeDEnfKn3T238CvrUcz6KeEsFHVaKh4kMTt6Wsysg= golang.org/x/sys v0.0.0-20220405210540-1e041c57c461/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= diff --git a/storage/fs.go b/storage/fs.go index 3626414e6..a30b0781b 100644 --- a/storage/fs.go +++ b/storage/fs.go @@ -12,6 +12,14 @@ import ( "github.com/peak/s5cmd/storage/url" ) +type InvalidOwnershipFormatError struct { + Err error +} + +func (e *InvalidOwnershipFormatError) Error() string { + return fmt.Sprintf("InvalidOwnershipFormatError: %v\n", e.Err) +} + // Filesystem is the Storage implementation of a local filesystem. type Filesystem struct { dryRun bool diff --git a/storage/fs_darwin.go b/storage/fs_darwin.go index b781e24a9..8172a5ee9 100644 --- a/storage/fs_darwin.go +++ b/storage/fs_darwin.go @@ -4,6 +4,7 @@ package storage import ( "os" + "strconv" "syscall" "time" ) @@ -45,3 +46,38 @@ func SetFileTime(filename string, accessTime, modificationTime, creationTime tim } return nil } + +// GetFileUserGroup will take a filename and return the userId and groupId associated with it. +// On windows this is in the format of a SID, on linux/darwin this is in the format of a UID/GID. +func GetFileUserGroup(filename string) (userId, groupId string, err error) { + info, err := os.Stat(filename) + if err != nil { + return "", "", err + } + + stat := info.Sys().(*syscall.Stat_t) + + userId = strconv.Itoa(int(stat.Uid)) + groupId = strconv.Itoa(int(stat.Gid)) + return userId, groupId, nil +} + +// SetFileUserGroup will set the UserId and GroupId on a filename. +// If the UserId/GroupId format does not match the platform, it will return an InvalidOwnershipFormatError. +// Windows expects the UserId/GroupId to be in SID format, Linux and Darwin expect it in UID/GID format. +func SetFileUserGroup(filename, userId, groupId string) error { + uid, err := strconv.Atoi(userId) + if err != nil { + return &InvalidOwnershipFormatError{Err: err} + } + gid, err := strconv.Atoi(groupId) + if err != nil { + return &InvalidOwnershipFormatError{Err: err} + } + + err = os.Lchown(filename, uid, gid) + if err != nil { + return err + } + return nil +} diff --git a/storage/fs_linux.go b/storage/fs_linux.go index c23e3d8ff..f72a75b7f 100644 --- a/storage/fs_linux.go +++ b/storage/fs_linux.go @@ -4,6 +4,7 @@ package storage import ( "os" + "strconv" "syscall" "time" ) @@ -47,3 +48,38 @@ func SetFileTime(filename string, accessTime, modificationTime, creationTime tim } return nil } + +// GetFileUserGroup will take a filename and return the userId and groupId associated with it. +// On windows this is in the format of a SID, on linux/darwin this is in the format of a UID/GID. +func GetFileUserGroup(filename string) (userId, groupId string, err error) { + info, err := os.Stat(filename) + if err != nil { + return "", "", err + } + + stat := info.Sys().(*syscall.Stat_t) + + userId = strconv.Itoa(int(stat.Uid)) + groupId = strconv.Itoa(int(stat.Gid)) + return userId, groupId, nil +} + +// SetFileUserGroup will set the UserId and GroupId on a filename. +// If the UserId/GroupId format does not match the platform, it will return an InvalidOwnershipFormatError. +// Windows expects the UserId/GroupId to be in SID format, Linux and Darwin expect it in UID/GID format. +func SetFileUserGroup(filename, userId, groupId string) error { + uid, err := strconv.Atoi(userId) + if err != nil { + return &InvalidOwnershipFormatError{Err: err} + } + gid, err := strconv.Atoi(groupId) + if err != nil { + return &InvalidOwnershipFormatError{Err: err} + } + + err = os.Lchown(filename, uid, gid) + if err != nil { + return err + } + return nil +} diff --git a/storage/fs_windows.go b/storage/fs_windows.go index 376a86223..60e0b5b76 100644 --- a/storage/fs_windows.go +++ b/storage/fs_windows.go @@ -3,9 +3,13 @@ package storage import ( + "golang.org/x/sys/windows" "os" + "strings" "syscall" "time" + + "github.com/Microsoft/go-winio" ) func GetFileTime(filename string) (time.Time, time.Time, time.Time, error) { @@ -62,3 +66,82 @@ func SetFileTime(filename string, accessTime, modificationTime, creationTime tim } return nil } + +// GetFileUserGroup will take a filename and return the userId and groupId associated with it. +// On windows this is in the format of a SID, on linux/darwin this is in the format of a UID/GID. +func GetFileUserGroup(filename string) (userId, groupId string, err error) { + sd, err := windows.GetNamedSecurityInfo(filename, windows.SE_FILE_OBJECT, windows.OWNER_SECURITY_INFORMATION|windows.GROUP_SECURITY_INFORMATION) + if err != nil { + return "", "", err + } + + userSID, _, err := sd.Owner() + groupSID, _, err := sd.Group() + + userId = userSID.String() + groupId = groupSID.String() + + return userId, groupId, nil +} + +// SetFileUserGroup will set the UserId and GroupId on a filename. +// If the UserId/GroupId format does not match the platform, it will return an InvalidOwnershipFormatError. +// Windows expects the UserId/GroupId to be in SID format, Linux and Darwin expect it in UID/GID format. +func SetFileUserGroup(filename, userId, groupId string) error { + var err error + privileges := []string{"SeRestorePrivilege", "SeTakeOwnershipPrivilege"} + if err := winio.EnableProcessPrivileges(privileges); err != nil { + return err + } + defer winio.DisableProcessPrivileges(privileges) + + var uidSid *windows.SID + var gidSid *windows.SID + if userId != "" { + uidSid, err = StringAsSid(userId) + if err != nil { + return err + } + } + + if groupId != "" { + gidSid, err = StringAsSid(groupId) + if err != nil { + return err + } + } + + err = windows.SetNamedSecurityInfo(filename, windows.SE_FILE_OBJECT, windows.OWNER_SECURITY_INFORMATION, uidSid, gidSid, nil, nil) + if err != nil { + return err + } + + return nil +} + +func StringAsSid(principal string) (*windows.SID, error) { + sid, err := windows.StringToSid(principal) + if err != nil { + if strings.Contains(err.Error(), "The security ID structure is invalid.") { + sid, _, _, err = windows.LookupSID("", principal) + if err != nil { + return nil, &InvalidOwnershipFormatError{Err: err} + } + } else { + return nil, &InvalidOwnershipFormatError{Err: err} + } + } + return sid, nil +} + +func StringSidAsName(strSID string) (name string, err error) { + sid, err := StringAsSid(strSID) + if err != nil { + return "", err + } + name, _, _, err = sid.LookupAccount("") + if err != nil { + return "", err + } + return name, nil +} diff --git a/storage/s3.go b/storage/s3.go index 829fc9878..a508799ea 100644 --- a/storage/s3.go +++ b/storage/s3.go @@ -129,10 +129,14 @@ func (s *S3) Stat(ctx context.Context, url *url.URL) (*Object, error) { etag := aws.StringValue(output.ETag) mod := aws.TimeValue(output.LastModified) + userId := aws.StringValue(output.Metadata["file-owner"]) + groupId := aws.StringValue(output.Metadata["file-group"]) obj := &Object{ URL: url, Etag: strings.Trim(etag, `"`), + UserId: userId, + GroupId: groupId, ModTime: &mod, Size: aws.Int64Value(output.ContentLength), CreateTime: &time.Time{}, @@ -406,6 +410,7 @@ func (s *S3) Copy(ctx context.Context, from, to *url.URL, metadata Metadata) err Key: aws.String(to.Path), CopySource: aws.String(copySource), RequestPayer: s.RequestPayer(), + Metadata: make(map[string]*string), } storageClass := metadata.StorageClass() @@ -440,6 +445,8 @@ func (s *S3) Copy(ctx context.Context, from, to *url.URL, metadata Metadata) err } input.Expires = aws.Time(t) } + input.Metadata["file-owner"] = aws.String(metadata.userId()) + input.Metadata["file-group"] = aws.String(metadata.groupId()) ctime := metadata.cTime() if ctime != "" { diff --git a/storage/storage.go b/storage/storage.go index b6c220b7b..a4e703a90 100644 --- a/storage/storage.go +++ b/storage/storage.go @@ -109,6 +109,8 @@ type Object struct { AccessTime *time.Time `json:"accessed,omitempty"` ModTime *time.Time `json:"last_modified,omitempty"` CreateTime *time.Time `json:"created,omitempty"` + UserId string `json:"uid,omitempty"` + GroupId string `json:"gid,omitempty"` Type ObjectType `json:"type,omitempty"` Size int64 `json:"size,omitempty"` StorageClass StorageClass `json:"storage_class,omitempty"` @@ -267,6 +269,20 @@ func (m Metadata) SetStorageClass(class string) Metadata { return m } +func (m Metadata) userId() string { + return m["file-owner"] +} + +func (m Metadata) groupId() string { + return m["file-group"] +} + +func (m Metadata) SetPreserveOwnership(userId, groupId string) Metadata { + m["file-owner"] = userId + m["file-group"] = groupId + return m +} + func (m Metadata) ContentType() string { return m["ContentType"] }