Skip to content

Commit

Permalink
Implement a user:group ownership preserving flag
Browse files Browse the repository at this point in the history
The `--preserve-ownership` flag will query the user and group and store that in S3 as metadata.

On Windows this stores the SID, on linux this stores the uid/gid.
  • Loading branch information
Ahuge committed Nov 7, 2022
1 parent f13420f commit a4de8d6
Show file tree
Hide file tree
Showing 10 changed files with 246 additions and 2 deletions.
47 changes: 46 additions & 1 deletion command/cp.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,14 @@ Examples:
22. Upload a file to S3 preserving the timestamp on disk
> s5cmd --preserve-timestamp myfile.css.br s3://bucket/
22. Download a file from S3 preserving the timestamp it was originally uplaoded with
23. Download a file from S3 preserving the timestamp it was originally uplaoded with
> s5cmd --preserve-timestamp s3://bucket/myfile.css.br myfile.css.br
24. Upload a file to S3 preserving the ownership of files
> s5cmd --preserve-ownership myfile.css.br s3://bucket/
25. Download a file from S3 preserving the ownership it was originally uplaoded with
> s5cmd --preserve-ownership s3://bucket/myfile.css.br myfile.css.br
`

func NewSharedFlags() []cli.Flag {
Expand Down Expand Up @@ -192,6 +198,10 @@ func NewSharedFlags() []cli.Flag {
Name: "preserve-timestamp",
Usage: "preserve the timestamp on disk while uploading and set the timestamp from s3 while downloading.",
},
&cli.BoolFlag{
Name: "preserve-ownership",
Usage: "preserve the ownership (owner/group) on disk while uploading and set the ownership from s3 while downloading.",
},
}
}

Expand Down Expand Up @@ -276,6 +286,7 @@ type Copy struct {
contentType string
contentEncoding string
preserveTimestamp bool
preserveOwnership bool

// region settings
srcRegion string
Expand Down Expand Up @@ -316,6 +327,7 @@ func NewCopy(c *cli.Context, deleteSource bool) Copy {
contentType: c.String("content-type"),
contentEncoding: c.String("content-encoding"),
preserveTimestamp: c.Bool("preserve-timestamp"),
preserveOwnership: c.Bool("preserve-ownership"),
// region settings
srcRegion: c.String("source-region"),
dstRegion: c.String("destination-region"),
Expand Down Expand Up @@ -549,6 +561,31 @@ func (c Copy) doDownload(ctx context.Context, srcurl *url.URL, dsturl *url.URL)
_ = srcClient.Delete(ctx, srcurl)
}

if c.preserveOwnership {
obj, err := srcClient.Stat(ctx, srcurl)
if err != nil {
return err
}
// SetFileUserGroup may return an InvalidOwnershipFormatError which signifies that it cannot
// understand the UserId or GroupId format.
// This is most common when a file is being ported across windows/linux.
// We aren't implementing a fix for it here, just a note that it cannot be resolved.
err = storage.SetFileUserGroup(dsturl.Absolute(), obj.UserId, obj.GroupId)
if err != nil {
invalidOwnershipFormat := &storage.InvalidOwnershipFormatError{}
if errors.As(err, &invalidOwnershipFormat) {
msg := log.ErrorMessage{
Operation: c.op,
Command: c.fullCommand,
Err: fmt.Sprintf("UserId: %s or GroupId: %s are not valid on this operating system.", obj.UserId, obj.GroupId),
}
log.Debug(msg)
}

return err
}
}

if c.preserveTimestamp {
obj, err := srcClient.Stat(ctx, srcurl)
if err != nil {
Expand Down Expand Up @@ -616,6 +653,14 @@ func (c Copy) doUpload(ctx context.Context, srcurl *url.URL, dsturl *url.URL) er
metadata.SetPreserveTimestamp(aTime, mTime, cTime)
}

if c.preserveOwnership {
userId, groupId, err := storage.GetFileUserGroup(srcurl.Absolute())
if err != nil {
return err
}
metadata.SetPreserveOwnership(userId, groupId)
}

if c.contentType != "" {
metadata.SetContentType(c.contentType)
} else {
Expand Down
11 changes: 11 additions & 0 deletions command/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ Examples:
10. Sync all files to S3 bucket but exclude the ones with txt and gz extension
> s5cmd {{.HelpName}} --exclude "*.txt" --exclude "*.gz" dir/ s3://bucket
11. Upload a folder to S3 preserving the ownership of files
> s5cmd {{.HelpName}} --preserve-ownership dir/ s3://bucket
12. Download a folder from S3 preserving the ownership it was originally uplaoded with
> s5cmd {{.HelpName}} --preserve-ownership s3://bucket/ dir
`

func NewSyncCommandFlags() []cli.Flag {
Expand Down Expand Up @@ -117,6 +123,7 @@ type Sync struct {
delete bool
sizeOnly bool
preserveTimestamp bool
preserveOwnership bool

// s3 options
storageOpts storage.Options
Expand All @@ -141,6 +148,7 @@ func NewSync(c *cli.Context) Sync {
delete: c.Bool("delete"),
sizeOnly: c.Bool("size-only"),
preserveTimestamp: c.Bool("preserve-timestamp"),
preserveOwnership: c.Bool("preserve-ownership"),

// flags
followSymlinks: !c.Bool("no-follow-symlinks"),
Expand Down Expand Up @@ -360,6 +368,9 @@ func (s Sync) planRun(
defaultFlags := map[string]interface{}{
"raw": true,
}
if s.preserveTimestamp {
defaultFlags["preserve-ownership"] = s.preserveOwnership
}
if s.preserveTimestamp {
defaultFlags["preserve-timestamp"] = s.preserveTimestamp
}
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ require (
github.com/stretchr/objx v0.1.0 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
go.etcd.io/bbolt v1.3.6 // indirect
golang.org/x/sys v0.0.0-20220405210540-1e041c57c461 // indirect
golang.org/x/sys v0.1.0 // indirect
golang.org/x/tools v0.0.0-20190624222133-a101b041ded4 // indirect
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 // indirect
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220405210540-1e041c57c461 h1:kHVeDEnfKn3T238CvrUcz6KeEsFHVaKh4kMTt6Wsysg=
golang.org/x/sys v0.0.0-20220405210540-1e041c57c461/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
Expand Down
8 changes: 8 additions & 0 deletions storage/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ import (
"github.com/peak/s5cmd/storage/url"
)

type InvalidOwnershipFormatError struct {
Err error
}

func (e *InvalidOwnershipFormatError) Error() string {
return fmt.Sprintf("InvalidOwnershipFormatError: %v\n", e.Err)
}

// Filesystem is the Storage implementation of a local filesystem.
type Filesystem struct {
dryRun bool
Expand Down
36 changes: 36 additions & 0 deletions storage/fs_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package storage

import (
"os"
"strconv"
"syscall"
"time"
)
Expand Down Expand Up @@ -45,3 +46,38 @@ func SetFileTime(filename string, accessTime, modificationTime, creationTime tim
}
return nil
}

// GetFileUserGroup will take a filename and return the userId and groupId associated with it.
// On windows this is in the format of a SID, on linux/darwin this is in the format of a UID/GID.
func GetFileUserGroup(filename string) (userId, groupId string, err error) {
info, err := os.Stat(filename)
if err != nil {
return "", "", err
}

stat := info.Sys().(*syscall.Stat_t)

userId = strconv.Itoa(int(stat.Uid))
groupId = strconv.Itoa(int(stat.Gid))
return userId, groupId, nil
}

// SetFileUserGroup will set the UserId and GroupId on a filename.
// If the UserId/GroupId format does not match the platform, it will return an InvalidOwnershipFormatError.
// Windows expects the UserId/GroupId to be in SID format, Linux and Darwin expect it in UID/GID format.
func SetFileUserGroup(filename, userId, groupId string) error {
uid, err := strconv.Atoi(userId)
if err != nil {
return &InvalidOwnershipFormatError{Err: err}
}
gid, err := strconv.Atoi(groupId)
if err != nil {
return &InvalidOwnershipFormatError{Err: err}
}

err = os.Lchown(filename, uid, gid)
if err != nil {
return err
}
return nil
}
36 changes: 36 additions & 0 deletions storage/fs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package storage

import (
"os"
"strconv"
"syscall"
"time"
)
Expand Down Expand Up @@ -47,3 +48,38 @@ func SetFileTime(filename string, accessTime, modificationTime, creationTime tim
}
return nil
}

// GetFileUserGroup will take a filename and return the userId and groupId associated with it.
// On windows this is in the format of a SID, on linux/darwin this is in the format of a UID/GID.
func GetFileUserGroup(filename string) (userId, groupId string, err error) {
info, err := os.Stat(filename)
if err != nil {
return "", "", err
}

stat := info.Sys().(*syscall.Stat_t)

userId = strconv.Itoa(int(stat.Uid))
groupId = strconv.Itoa(int(stat.Gid))
return userId, groupId, nil
}

// SetFileUserGroup will set the UserId and GroupId on a filename.
// If the UserId/GroupId format does not match the platform, it will return an InvalidOwnershipFormatError.
// Windows expects the UserId/GroupId to be in SID format, Linux and Darwin expect it in UID/GID format.
func SetFileUserGroup(filename, userId, groupId string) error {
uid, err := strconv.Atoi(userId)
if err != nil {
return &InvalidOwnershipFormatError{Err: err}
}
gid, err := strconv.Atoi(groupId)
if err != nil {
return &InvalidOwnershipFormatError{Err: err}
}

err = os.Lchown(filename, uid, gid)
if err != nil {
return err
}
return nil
}
83 changes: 83 additions & 0 deletions storage/fs_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
package storage

import (
"golang.org/x/sys/windows"
"os"
"strings"
"syscall"
"time"

"github.com/Microsoft/go-winio"
)

func GetFileTime(filename string) (time.Time, time.Time, time.Time, error) {
Expand Down Expand Up @@ -62,3 +66,82 @@ func SetFileTime(filename string, accessTime, modificationTime, creationTime tim
}
return nil
}

// GetFileUserGroup will take a filename and return the userId and groupId associated with it.
// On windows this is in the format of a SID, on linux/darwin this is in the format of a UID/GID.
func GetFileUserGroup(filename string) (userId, groupId string, err error) {
sd, err := windows.GetNamedSecurityInfo(filename, windows.SE_FILE_OBJECT, windows.OWNER_SECURITY_INFORMATION|windows.GROUP_SECURITY_INFORMATION)
if err != nil {
return "", "", err
}

userSID, _, err := sd.Owner()
groupSID, _, err := sd.Group()

userId = userSID.String()
groupId = groupSID.String()

return userId, groupId, nil
}

// SetFileUserGroup will set the UserId and GroupId on a filename.
// If the UserId/GroupId format does not match the platform, it will return an InvalidOwnershipFormatError.
// Windows expects the UserId/GroupId to be in SID format, Linux and Darwin expect it in UID/GID format.
func SetFileUserGroup(filename, userId, groupId string) error {
var err error
privileges := []string{"SeRestorePrivilege", "SeTakeOwnershipPrivilege"}
if err := winio.EnableProcessPrivileges(privileges); err != nil {
return err
}
defer winio.DisableProcessPrivileges(privileges)

var uidSid *windows.SID
var gidSid *windows.SID
if userId != "" {
uidSid, err = StringAsSid(userId)
if err != nil {
return err
}
}

if groupId != "" {
gidSid, err = StringAsSid(groupId)
if err != nil {
return err
}
}

err = windows.SetNamedSecurityInfo(filename, windows.SE_FILE_OBJECT, windows.OWNER_SECURITY_INFORMATION, uidSid, gidSid, nil, nil)
if err != nil {
return err
}

return nil
}

func StringAsSid(principal string) (*windows.SID, error) {
sid, err := windows.StringToSid(principal)
if err != nil {
if strings.Contains(err.Error(), "The security ID structure is invalid.") {
sid, _, _, err = windows.LookupSID("", principal)
if err != nil {
return nil, &InvalidOwnershipFormatError{Err: err}
}
} else {
return nil, &InvalidOwnershipFormatError{Err: err}
}
}
return sid, nil
}

func StringSidAsName(strSID string) (name string, err error) {
sid, err := StringAsSid(strSID)
if err != nil {
return "", err
}
name, _, _, err = sid.LookupAccount("")
if err != nil {
return "", err
}
return name, nil
}
7 changes: 7 additions & 0 deletions storage/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,14 @@ func (s *S3) Stat(ctx context.Context, url *url.URL) (*Object, error) {

etag := aws.StringValue(output.ETag)
mod := aws.TimeValue(output.LastModified)
userId := aws.StringValue(output.Metadata["file-owner"])
groupId := aws.StringValue(output.Metadata["file-group"])

obj := &Object{
URL: url,
Etag: strings.Trim(etag, `"`),
UserId: userId,
GroupId: groupId,
ModTime: &mod,
Size: aws.Int64Value(output.ContentLength),
CreateTime: &time.Time{},
Expand Down Expand Up @@ -406,6 +410,7 @@ func (s *S3) Copy(ctx context.Context, from, to *url.URL, metadata Metadata) err
Key: aws.String(to.Path),
CopySource: aws.String(copySource),
RequestPayer: s.RequestPayer(),
Metadata: make(map[string]*string),
}

storageClass := metadata.StorageClass()
Expand Down Expand Up @@ -440,6 +445,8 @@ func (s *S3) Copy(ctx context.Context, from, to *url.URL, metadata Metadata) err
}
input.Expires = aws.Time(t)
}
input.Metadata["file-owner"] = aws.String(metadata.userId())
input.Metadata["file-group"] = aws.String(metadata.groupId())

ctime := metadata.cTime()
if ctime != "" {
Expand Down
Loading

0 comments on commit a4de8d6

Please sign in to comment.