From f6e7bab14fb3160da8f0c25b7283402f8c295e8a Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Wed, 11 Dec 2019 11:25:11 +0000 Subject: [PATCH 1/3] Capture user input weakness in tests --- .../dacctl/actions_impl/parsers/job_test.go | 30 +++++++++++++++++++ internal/pkg/filesystem_impl/copy_test.go | 8 +++++ 2 files changed, 38 insertions(+) diff --git a/internal/pkg/dacctl/actions_impl/parsers/job_test.go b/internal/pkg/dacctl/actions_impl/parsers/job_test.go index d58e623a..af7c3dce 100644 --- a/internal/pkg/dacctl/actions_impl/parsers/job_test.go +++ b/internal/pkg/dacctl/actions_impl/parsers/job_test.go @@ -124,3 +124,33 @@ func TestGetJobSummary_Errors(t *testing.T) { assert.Equal(t, "only one per job buffer allowed", err.Error()) assert.Nil(t, result.PerJobBuffer) } + +func TestGetJobSummary_AvoidsBadInput(t *testing.T) { + // TODO: all these test should fail! + lines1 := []string{ + `#DW persistentdw name=myBBname1;doevil`, + } + result, err := getJobSummary(lines1) + assert.Nil(t, err) + assert.Equal(t,"myBBname1;doevil", string(result.Attachments[0])) + + lines2 := []string{ + `#DW jobdw capacity=4MiB access_mode=private type=scratch;asdf`, + `#DW stage_in source=/global/cscratch1/filename1;doevil destination=$DW_JOB_STRIPED/filename1 type=file`, + `#DW stage_in source=/global/cscratch1/filename2 destination=$DW_JOB_STRIPED/filename2&doevil type=file`, + } + result, err = getJobSummary(lines2) + assert.Nil(t, err) + assert.Contains(t, result.DataIn[0].Source, "doevil") + assert.Contains(t, result.DataIn[1].Destination, "doevil") + + lines3 := []string{ + `#DW jobdw capacity=4MiB access_mode=private type=scratch`, + `#DW stage_out source=$DW_JOB_STRIPED/outdir||doevil destination=$HOME../../scratch1/outdir&&doevil type=directory`, + `skipping other lines that we`, + } + result, err = getJobSummary(lines3) + assert.Nil(t, err) + assert.Contains(t, result.DataOut[0].Source, "doevil") + assert.Contains(t, result.DataOut[0].Destination, "doevil") +} \ No newline at end of file diff --git a/internal/pkg/filesystem_impl/copy_test.go b/internal/pkg/filesystem_impl/copy_test.go index f3240579..aea70411 100644 --- a/internal/pkg/filesystem_impl/copy_test.go +++ b/internal/pkg/filesystem_impl/copy_test.go @@ -77,4 +77,12 @@ func Test_GenerateRsyncCmd(t *testing.T) { cmd, err = generateRsyncCmd(testVolume, request) assert.Nil(t, err) assert.Equal(t, "rsync -ospgu --stats \\$DW_test dest", cmd) + + // TODO: all this test should fail! + request.SourceType = datamodel.Directory + request.Source = "source;doevil" + request.Destination = "dest" + cmd, err = generateRsyncCmd(testVolume, request) + assert.Nil(t, err) + assert.Equal(t, "rsync -r -ospgu --stats source;doevil dest", cmd) } From 954c28b98db98ed27725d5027997a81637ae51bf Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Wed, 11 Dec 2019 13:47:20 +0000 Subject: [PATCH 2/3] Don't run the rync as root We already sudo as the user who requested the buffer, don't also run the whole thing as root. --- .../actions_impl/parsers/capacity_test.go | 5 +++ .../dacctl/actions_impl/parsers/job_test.go | 4 +- internal/pkg/filesystem_impl/copy.go | 2 +- internal/pkg/filesystem_impl/mount.go | 44 ++++++++++--------- internal/pkg/filesystem_impl/mount_test.go | 2 +- 5 files changed, 33 insertions(+), 24 deletions(-) diff --git a/internal/pkg/dacctl/actions_impl/parsers/capacity_test.go b/internal/pkg/dacctl/actions_impl/parsers/capacity_test.go index b130621f..cca6c151 100644 --- a/internal/pkg/dacctl/actions_impl/parsers/capacity_test.go +++ b/internal/pkg/dacctl/actions_impl/parsers/capacity_test.go @@ -62,4 +62,9 @@ func TestParseCapacityBytes(t *testing.T) { assert.Equal(t, "unable to parse size: 1B", err.Error()) assert.Equal(t, "", pool) assert.Equal(t, 0, size) + + pool, size, err = ParseCapacityBytes("foo:2.1TiB") + assert.Nil(t, err) + assert.Equal(t, "foo", pool) + assert.Equal(t, 2308974418330, size) } diff --git a/internal/pkg/dacctl/actions_impl/parsers/job_test.go b/internal/pkg/dacctl/actions_impl/parsers/job_test.go index af7c3dce..5cfbd68c 100644 --- a/internal/pkg/dacctl/actions_impl/parsers/job_test.go +++ b/internal/pkg/dacctl/actions_impl/parsers/job_test.go @@ -132,7 +132,7 @@ func TestGetJobSummary_AvoidsBadInput(t *testing.T) { } result, err := getJobSummary(lines1) assert.Nil(t, err) - assert.Equal(t,"myBBname1;doevil", string(result.Attachments[0])) + assert.Equal(t, "myBBname1;doevil", string(result.Attachments[0])) lines2 := []string{ `#DW jobdw capacity=4MiB access_mode=private type=scratch;asdf`, @@ -153,4 +153,4 @@ func TestGetJobSummary_AvoidsBadInput(t *testing.T) { assert.Nil(t, err) assert.Contains(t, result.DataOut[0].Source, "doevil") assert.Contains(t, result.DataOut[0].Destination, "doevil") -} \ No newline at end of file +} diff --git a/internal/pkg/filesystem_impl/copy.go b/internal/pkg/filesystem_impl/copy.go index 868ae0e4..85e72429 100644 --- a/internal/pkg/filesystem_impl/copy.go +++ b/internal/pkg/filesystem_impl/copy.go @@ -18,7 +18,7 @@ func processDataCopy(session datamodel.Session, request datamodel.DataCopyReques } log.Printf("Doing copy: %s", cmd) - return runner.Execute("localhost", cmd) + return runner.Execute("localhost", false, cmd) } func generateDataCopyCmd(session datamodel.Session, request datamodel.DataCopyRequest) (string, error) { diff --git a/internal/pkg/filesystem_impl/mount.go b/internal/pkg/filesystem_impl/mount.go index 371ca53f..a6dd738e 100644 --- a/internal/pkg/filesystem_impl/mount.go +++ b/internal/pkg/filesystem_impl/mount.go @@ -157,44 +157,44 @@ func unmount(fsType FSType, sessionName datamodel.SessionName, isMultiJob bool, func createSwap(hostname string, swapMB int, filename string, loopback string) error { file := fmt.Sprintf("dd if=/dev/zero of=%s bs=1024 count=%d", filename, swapMB*1024) - if err := runner.Execute(hostname, file); err != nil { + if err := runner.Execute(hostname, true, file); err != nil { return err } - if err := runner.Execute(hostname, fmt.Sprintf("chmod 0600 %s", filename)); err != nil { + if err := runner.Execute(hostname, true, fmt.Sprintf("chmod 0600 %s", filename)); err != nil { return err } device := fmt.Sprintf("losetup %s %s", loopback, filename) - if err := runner.Execute(hostname, device); err != nil { + if err := runner.Execute(hostname, true, device); err != nil { return err } swap := fmt.Sprintf("mkswap %s", loopback) - return runner.Execute(hostname, swap) + return runner.Execute(hostname, true, swap) } func swapOn(hostname string, loopback string) error { - return runner.Execute(hostname, fmt.Sprintf("swapon %s", loopback)) + return runner.Execute(hostname, true, fmt.Sprintf("swapon %s", loopback)) } func swapOff(hostname string, loopback string) error { - return runner.Execute(hostname, fmt.Sprintf("swapoff %s", loopback)) + return runner.Execute(hostname, true, fmt.Sprintf("swapoff %s", loopback)) } func detachLoopback(hostname string, loopback string) error { - return runner.Execute(hostname, fmt.Sprintf("losetup -d %s", loopback)) + return runner.Execute(hostname, true, fmt.Sprintf("losetup -d %s", loopback)) } func fixUpOwnership(hostname string, owner uint, group uint, directory string) error { - if err := runner.Execute(hostname, fmt.Sprintf("chown %d:%d %s", owner, group, directory)); err != nil { + if err := runner.Execute(hostname, true, fmt.Sprintf("chown %d:%d %s", owner, group, directory)); err != nil { return err } - return runner.Execute(hostname, fmt.Sprintf("chmod 700 %s", directory)) + return runner.Execute(hostname, true, fmt.Sprintf("chmod 700 %s", directory)) } func umountLustre(hostname string, directory string) error { // only unmount if already mounted - if err := runner.Execute(hostname, fmt.Sprintf("grep %s /etc/mtab", directory)); err == nil { + if err := runner.Execute(hostname, true, fmt.Sprintf("grep %s /etc/mtab", directory)); err == nil { // Don't add -l so we can spot when this fails - if err := runner.Execute(hostname, fmt.Sprintf("umount %s", directory)); err != nil { + if err := runner.Execute(hostname, true, fmt.Sprintf("umount %s", directory)); err != nil { return err } } else { @@ -205,11 +205,11 @@ func umountLustre(hostname string, directory string) error { } func removeSubtree(hostname string, directory string) error { - return runner.Execute(hostname, fmt.Sprintf("rm -df %s", directory)) + return runner.Execute(hostname, true, fmt.Sprintf("rm -df %s", directory)) } func createSymbolicLink(hostname string, src string, dest string) error { - return runner.Execute(hostname, fmt.Sprintf("ln -s %s %s", src, dest)) + return runner.Execute(hostname, true, fmt.Sprintf("ln -s %s %s", src, dest)) } func mountRemoteFilesystem(fsType FSType, hostname string, lnetSuffix string, mgtHost string, fsname string, directory string) error { @@ -224,8 +224,8 @@ func mountRemoteFilesystem(fsType FSType, hostname string, lnetSuffix string, mg func mountLustre(hostname string, lnetSuffix string, mgtHost string, fsname string, directory string) error { // We assume modprobe -v lustre is already done // First check if we are mounted already - if err := runner.Execute(hostname, fmt.Sprintf("grep %s /etc/mtab", directory)); err != nil || conf.SkipAnsible { - if err := runner.Execute(hostname, fmt.Sprintf( + if err := runner.Execute(hostname, true, fmt.Sprintf("grep %s /etc/mtab", directory)); err != nil || conf.SkipAnsible { + if err := runner.Execute(hostname, true, fmt.Sprintf( "mount -t lustre -o flock,nodev,nosuid %s%s:/%s %s", mgtHost, lnetSuffix, fsname, directory)); err != nil { return err @@ -240,22 +240,22 @@ func mountBeegFS(hostname string, mgtHost string, fsname string, directory strin if err := removeSubtree(hostname, directory); err != nil { return err } - return runner.Execute(hostname, fmt.Sprintf("ln -s /mnt/beegfs/%s %s", fsname, directory)) + return runner.Execute(hostname, true, fmt.Sprintf("ln -s /mnt/beegfs/%s %s", fsname, directory)) } func mkdir(hostname string, directory string) error { - return runner.Execute(hostname, fmt.Sprintf("mkdir -p %s", directory)) + return runner.Execute(hostname, true, fmt.Sprintf("mkdir -p %s", directory)) } type Run interface { - Execute(name string, cmd string) error + Execute(name string, asRoot bool, cmd string) error } type run struct { } // TODO: need some code sharing here!!! -func (*run) Execute(hostname string, cmdStr string) error { +func (*run) Execute(hostname string, asRoot bool, cmdStr string) error { log.Println("SSH to:", hostname, "with command:", cmdStr) if conf.SkipAnsible { @@ -265,7 +265,11 @@ func (*run) Execute(hostname string, cmdStr string) error { } cmd := exec.Command("ssh", "-o", "StrictHostKeyChecking=no", - "-o", "UserKnownHostsFile=/dev/null", hostname, "sudo", cmdStr) + "-o", "UserKnownHostsFile=/dev/null", hostname, cmdStr) + if asRoot { + cmd = exec.Command("ssh", "-o", "StrictHostKeyChecking=no", + "-o", "UserKnownHostsFile=/dev/null", hostname, "sudo", cmdStr) + } timer := time.AfterFunc(time.Minute, func() { log.Println("Time up, waited more than 5 mins to complete.") diff --git a/internal/pkg/filesystem_impl/mount_test.go b/internal/pkg/filesystem_impl/mount_test.go index 8adff553..658f3f3b 100644 --- a/internal/pkg/filesystem_impl/mount_test.go +++ b/internal/pkg/filesystem_impl/mount_test.go @@ -14,7 +14,7 @@ type fakeRunner struct { cmdStrs []string } -func (f *fakeRunner) Execute(hostname string, cmdStr string) error { +func (f *fakeRunner) Execute(hostname string, asRoot bool, cmdStr string) error { f.calls += 1 f.hostnames = append(f.hostnames, hostname) f.cmdStrs = append(f.cmdStrs, cmdStr) From 55628a553f18600963c2f349a2181fd8d65690b3 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Thu, 12 Dec 2019 06:45:39 +0000 Subject: [PATCH 3/3] Filter out bad input Ensure bad input for paths and keys in the job file doesn't get into the system. Ensure if we missing anything we fail to run a command that includes bad input. --- .../dacctl/actions_impl/parsers/hostnames.go | 12 ++++++++ .../pkg/dacctl/actions_impl/parsers/job.go | 10 ++++++- .../dacctl/actions_impl/parsers/job_test.go | 12 ++++---- internal/pkg/filesystem_impl/copy.go | 5 ++++ internal/pkg/filesystem_impl/copy_test.go | 29 ++++++++++++++----- 5 files changed, 53 insertions(+), 15 deletions(-) diff --git a/internal/pkg/dacctl/actions_impl/parsers/hostnames.go b/internal/pkg/dacctl/actions_impl/parsers/hostnames.go index 86e8fa84..ad02e4bf 100644 --- a/internal/pkg/dacctl/actions_impl/parsers/hostnames.go +++ b/internal/pkg/dacctl/actions_impl/parsers/hostnames.go @@ -13,6 +13,18 @@ func IsValidName(name string) bool { return nameRegex.Match([]byte(name)) } +var keyRegex = regexp.MustCompile("^[a-zA-Z0-9._-]+$") + +func IsValidKey(name string) bool { + return keyRegex.Match([]byte(name)) +} + +var pathRegex = regexp.MustCompile("^[a-zA-Z0-9.,_/$-]+$") + +func IsValidPath(value string) bool { + return pathRegex.Match([]byte(value)) +} + func GetHostnamesFromFile(disk fileio.Disk, filename string) ([]string, error) { computeHosts, err := disk.Lines(filename) if err != nil { diff --git a/internal/pkg/dacctl/actions_impl/parsers/job.go b/internal/pkg/dacctl/actions_impl/parsers/job.go index a6526f40..6ca41fc4 100644 --- a/internal/pkg/dacctl/actions_impl/parsers/job.go +++ b/internal/pkg/dacctl/actions_impl/parsers/job.go @@ -134,7 +134,15 @@ func parseArgs(rawArgs []string) (map[string]string, error) { if len(parts) != 2 { return args, fmt.Errorf("unable to parse arg: %s", arg) } - args[strings.ToLower(parts[0])] = parts[1] + key := parts[0] + value := parts[1] + if !IsValidKey(key) { + return args, fmt.Errorf("invalid key: %s", key) + } + if !IsValidPath(value) { + return args, fmt.Errorf("invalid value: %s", value) + } + args[strings.ToLower(key)] = value } return args, nil } diff --git a/internal/pkg/dacctl/actions_impl/parsers/job_test.go b/internal/pkg/dacctl/actions_impl/parsers/job_test.go index 5cfbd68c..c121c6a1 100644 --- a/internal/pkg/dacctl/actions_impl/parsers/job_test.go +++ b/internal/pkg/dacctl/actions_impl/parsers/job_test.go @@ -126,13 +126,13 @@ func TestGetJobSummary_Errors(t *testing.T) { } func TestGetJobSummary_AvoidsBadInput(t *testing.T) { - // TODO: all these test should fail! + // TODO: probably should see more errors here lines1 := []string{ `#DW persistentdw name=myBBname1;doevil`, } result, err := getJobSummary(lines1) assert.Nil(t, err) - assert.Equal(t, "myBBname1;doevil", string(result.Attachments[0])) + assert.Equal(t, "", string(result.Attachments[0])) lines2 := []string{ `#DW jobdw capacity=4MiB access_mode=private type=scratch;asdf`, @@ -141,8 +141,8 @@ func TestGetJobSummary_AvoidsBadInput(t *testing.T) { } result, err = getJobSummary(lines2) assert.Nil(t, err) - assert.Contains(t, result.DataIn[0].Source, "doevil") - assert.Contains(t, result.DataIn[1].Destination, "doevil") + assert.NotContains(t, result.DataIn[0].Source, "doevil") + assert.NotContains(t, result.DataIn[1].Destination, "doevil") lines3 := []string{ `#DW jobdw capacity=4MiB access_mode=private type=scratch`, @@ -151,6 +151,6 @@ func TestGetJobSummary_AvoidsBadInput(t *testing.T) { } result, err = getJobSummary(lines3) assert.Nil(t, err) - assert.Contains(t, result.DataOut[0].Source, "doevil") - assert.Contains(t, result.DataOut[0].Destination, "doevil") + assert.NotContains(t, result.DataOut[0].Source, "doevil") + assert.NotContains(t, result.DataOut[0].Destination, "doevil") } diff --git a/internal/pkg/filesystem_impl/copy.go b/internal/pkg/filesystem_impl/copy.go index 85e72429..a4e0413b 100644 --- a/internal/pkg/filesystem_impl/copy.go +++ b/internal/pkg/filesystem_impl/copy.go @@ -2,6 +2,7 @@ package filesystem_impl import ( "fmt" + "github.com/RSE-Cambridge/data-acc/internal/pkg/dacctl/actions_impl/parsers" "github.com/RSE-Cambridge/data-acc/internal/pkg/datamodel" "log" "strings" @@ -46,6 +47,10 @@ func generateRsyncCmd(session datamodel.Session, request datamodel.DataCopyReque return "", nil } + if !parsers.IsValidPath(request.Source) || !parsers.IsValidPath(request.Destination) { + return "", fmt.Errorf("invalid path: %+v", request) + } + var flags string if request.SourceType == datamodel.Directory { flags = "-r -ospgu --stats" diff --git a/internal/pkg/filesystem_impl/copy_test.go b/internal/pkg/filesystem_impl/copy_test.go index aea70411..98a5f633 100644 --- a/internal/pkg/filesystem_impl/copy_test.go +++ b/internal/pkg/filesystem_impl/copy_test.go @@ -59,30 +59,43 @@ func Test_GenerateRsyncCmd(t *testing.T) { assert.Equal(t, "rsync -ospgu --stats source dest", cmd) request.SourceType = datamodel.Directory - request.Source = "source" - request.Destination = "dest" + request.Source = "source-1_2/asdf" + request.Destination = "dest/asdf" cmd, err = generateRsyncCmd(testVolume, request) assert.Nil(t, err) - assert.Equal(t, "rsync -r -ospgu --stats source dest", cmd) + assert.Equal(t, "rsync -r -ospgu --stats source-1_2/asdf dest/asdf", cmd) request.SourceType = datamodel.List request.Source = "list_filename" cmd, err = generateRsyncCmd(testVolume, request) + assert.NotNil(t, err) assert.Equal(t, "", cmd) assert.Equal(t, "unsupported source type list for volume: asdf", err.Error()) request.SourceType = datamodel.File - request.Source = "$DW_test" + request.Source = "$DW_test/abc" request.Destination = "dest" cmd, err = generateRsyncCmd(testVolume, request) assert.Nil(t, err) - assert.Equal(t, "rsync -ospgu --stats \\$DW_test dest", cmd) + assert.Equal(t, "rsync -ospgu --stats \\$DW_test/abc dest", cmd) - // TODO: all this test should fail! request.SourceType = datamodel.Directory request.Source = "source;doevil" request.Destination = "dest" cmd, err = generateRsyncCmd(testVolume, request) - assert.Nil(t, err) - assert.Equal(t, "rsync -r -ospgu --stats source;doevil dest", cmd) + assert.NotNil(t, err) + assert.Equal(t, "invalid path: "+ + "{SourceType:directory Source:source;doevil Destination:dest RequestCopyIn:false CopyCompleted:false Error:}", + err.Error()) + assert.Equal(t, "", cmd) + + request.SourceType = datamodel.Directory + request.Source = "source\\doevil" + request.Destination = "dest" + cmd, err = generateRsyncCmd(testVolume, request) + assert.NotNil(t, err) + assert.Equal(t, "invalid path: "+ + "{SourceType:directory Source:source\\doevil Destination:dest RequestCopyIn:false CopyCompleted:false Error:}", + err.Error()) + assert.Equal(t, "", cmd) }