Skip to content

Commit

Permalink
Merge pull request #171 from NeowayLabs/fixDownloadDir
Browse files Browse the repository at this point in the history
Fix bug on download batch
  • Loading branch information
katcipis authored Apr 23, 2018
2 parents 1f95d08 + 5b6b358 commit ca9fdf1
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 62 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ image:
docker build . -t neowaylabs/klb:$(version)

credentials: image guard-sh guard-subscription guard-service-principal guard-service-secret
docker run -ti --rm -v `pwd`:/credentials -w /credentials neowaylabs/klbdev:$(version) \
docker run -ti --rm -v `pwd`:/credentials -w /credentials neowaylabs/klb:$(version) \
/credentials/tools/azure/getcredentials.sh \
$(sh) "$(subscription)" "$(service-principal)" "$(service-secret)"

createsp: image guard-subscription-id guard-service-principal guard-service-secret
docker run -ti --rm -v `pwd`:/createsp -w /createsp neowaylabs/klbdev:$(version) \
docker run -ti --rm -v `pwd`:/createsp -w /createsp neowaylabs/klb:$(version) \
/createsp/tools/azure/createsp.sh \
"$(subscription-id)" "$(service-principal)" "$(service-secret)"

Expand Down
71 changes: 60 additions & 11 deletions azure/blob/fs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ import klb/azure/storage
# on the azure container will be a file named "1".
#
# This was the moment when we started to realize that we had been screwed
# by Azure (again). We never tested the download-batch command supposing that
# it will behave equally bad (maintaining only the basename of files).
# by Azure (again). The download-batch command has other quirks and stupidities too.
#
# AWS S3 is also flat, but the aws tools and API's provides a proper directory
# illusion, you can copy directories pretty much the same way you work with
Expand Down Expand Up @@ -118,18 +117,42 @@ fn azure_blob_fs_download(fs, localpath, remotepath) {

# Downloads a remote dir to a local dir.
# It will not create the remote dir on the local dir, only its
# contents (including other dirs, recursively).
# contents (including other dirs inside the remote dir, recursively).
#
# For example, downloading the remote dir /klb to the local dir /test
# will copy all contents of /klb to /test, like /test/file1 and /test/dir1/file1
# but it will not create a "klb" directory inside the local dir.
# For example, lets say you have these files at azure blob storage:
#
# /foo/bar/file1
# /foo/bar/file2
# /foo/bar/goo/file1
#
# Calling azure_blob_fs_download_dir($fs, "/test", "/foo/bar") will create
# these files on your filesystem:
#
# /test/file1
# /test/file2
# /test/goo/file1
#
# This is the behavior of Plan9 dircp (http://man.cat-v.org/plan_9/1/tar)
# and it seems more intuitive than the cp -r behavior of creating only the
# base dir of the source path at the target path if it does not exists
# and to copy only the contents you need the hack of regex expansion:
# base dir of the source path (this case the remote path)
# at the target path if it does not exists
# and to copy only the contents you need the hack of regex expansion like:
#
# cp -r /srcdir/* /targetdir.
#
# An curiosity is that the az tool actually introduces a third behavior with
# download-batch which is to reproduce the entire directory path of the remote
# path on the local path. The example above according to azure would result in:
#
# /test/foo/bar/file1
# /test/foo/bar/file2
# /test/foo/bar/goo/file1
#
# Which is the only approach who seems to have no upside AT ALL =D.
# This fs layers does it best to shield you from this kind of nonsense.
#
# If localdir does not exists this function will attempt to create it for you.
#
# This function returns an error string if it fails and "" if it succeeds.
fn azure_blob_fs_download_dir(fs, localdir, remotedir) {
remotedir <= _azure_storage_fix_remote_path($remotedir)
Expand All @@ -141,19 +164,45 @@ fn azure_blob_fs_download_dir(fs, localdir, remotedir) {
}

# TODO: use timeout, right now az cli does not provide timeout for download-batch
remotedir = $remotedir + "*"
pattern = $remotedir + "*"
container <= azure_blob_fs_container($fs)

mkdir -p $localdir
tmplocal <= mktemp -d

fn cleanup() {
rm -rf $tmplocal
}

out, status <= (
az storage blob download-batch
--destination $localdir
--destination $tmplocal
--source $container
--account-name $account
--account-key $accountkey
--pattern $remotedir
--pattern $pattern
)

if $status != "0" {
cleanup()
return format("error[%s] downloading dir[%s] to[%s]", $out, $remotedir, $localdir)
}

# WHY GOD WHY ?
srcdir <= format("%s/%s", $tmplocal, $remotedir)
srcpaths <= ls $srcdir
srcpaths <= split($srcpaths, "\n")

for srcpath in $srcpaths {
srcpath <= format("%s/%s", $srcdir, $srcpath)
out, status <= cp -r $srcpath $localdir
if $status != "0" {
cleanup()
return format("error copying: %s", $out)
}
}

cleanup()
return ""
}

Expand Down
4 changes: 4 additions & 0 deletions azure/login.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@ fn azure_login() {
clientID = $AZURE_CLIENT_ID
secretID = $AZURE_CLIENT_SECRET
username = $AZURE_SERVICE_PRINCIPAL

azure_login_credentials($subscriptionID, $tenantID, $clientID, $secretID, $username)
}

fn azure_login_credentials(subscriptionID, tenantID, clientID, secretID, username) {
# azure cli 2.0
az account clear
az login --service-principal -u $username -p $secretID --tenant $tenantID --output table
Expand Down
128 changes: 80 additions & 48 deletions tests/azure/blobfs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ func TestBlobFS(t *testing.T) {
location,
testBlobFSUploadsWhenAccountAndContainerExists,
)

testBlobFSDownloadDir(t, timeout, location)
testBlobFSUploadDir(t, timeout, location)
testBlobFSListFiles(t, timeout, location)
Expand Down Expand Up @@ -643,6 +642,16 @@ func testBlobFSDownloadDir(
}

tests := []TestCase{
{
name: "OneFile",
remoteFiles: []string{
"/one/test1",
},
downloadDir: "/one",
wantedFiles: []string{
"/test1",
},
},
{
name: "Root",
remoteFiles: []string{
Expand All @@ -667,8 +676,8 @@ func testBlobFSDownloadDir(
},
downloadDir: "/dir",
wantedFiles: []string{
"/dir/test1",
"/dir/test2",
"/test1",
"/test2",
},
},
{
Expand All @@ -683,9 +692,27 @@ func testBlobFSDownloadDir(
},
downloadDir: "/dir/dir2",
wantedFiles: []string{
"/test1",
"/test2",
},
},
{
name: "SubdirsAreDownloaded",
remoteFiles: []string{
"/test1",
"/test2",
"/dir/test1",
"/dir/test2",
"/dir/dir2/test1",
"/dir/dir2/test2",
},
downloadDir: "/dir",
wantedFiles: []string{
"/test1",
"/test2",
"/dir2/test1",
"/dir2/test2",
},
},
}

Expand All @@ -702,38 +729,10 @@ func testBlobFSDownloadDir(
checkStorageBlobAccount(t, f, fs.account, fs.sku, fs.tier)
createStorageAccountContainer(f, fs.account, fs.container)

wantedFilesContents := map[string]string{}

for _, remoteFile := range remoteFiles {
expectedContent := fixture.NewUniqueName("random-content")
localFile, cleanup := setupTestFile(t, expectedContent)
localFile, cleanup := setupTestFile(t, remoteFile)
fs.Upload(t, f, remoteFile, localFile)
cleanup()

for _, wantedFile := range wantedFiles {
if wantedFile == remoteFile {
wantedFilesContents[wantedFile] = expectedContent
}
}
}

if len(wantedFilesContents) != len(wantedFiles) {
t.Fatalf(
"wanted files [%s] is not a subset of remote files [%s]",
wantedFiles,
remoteFiles,
)
}

for _, wantedFile := range wantedFiles {
if _, ok := wantedFilesContents[wantedFile]; !ok {
t.Errorf("wanted file [%s] was not uploaded", wantedFile)
t.Fatalf(
"wanted files [%s] is not a subset of remote files [%s]",
wantedFiles,
remoteFiles,
)
}
}

tmpdir, err := ioutil.TempDir("", "az-download-dir")
Expand All @@ -759,32 +758,65 @@ func testBlobFSDownloadDir(
assert.NoError(t, err)

if len(wantedFiles) != len(gotFiles) {
t.Errorf("wanted files and got files len don't match")
t.Fatalf("want files[%s] got[%s]", wantedFiles, gotFiles)
}

removeFilePrefix := func(path string) string {
return strings.TrimPrefix(path, tmpdir)
}

removeFilesPrefix := func(paths []string) []string {
trimmed := make([]string, len(paths))
for i, path := range paths {
trimmed[i] = removeFilePrefix(path)
}
return trimmed
}

expectedContent := func(path string) string {
// WHY: we use the original remote file path as
// the content itself (this way the content is unique for each file).
return filepath.Join(downloadDir, path)
}

for wantedFile, wantedFileContents := range wantedFilesContents {
for _, wantedFile := range wantedFiles {

got := false
for _, gotFile := range gotFiles {

gotFileRelative := strings.TrimPrefix(gotFile, tmpdir)
if gotFileRelative == wantedFile {
got = true
gotContentRaw, err := ioutil.ReadFile(gotFile)
assert.NoError(t, err)
gotContent := string(gotContentRaw)
if wantedFileContents != gotContent {
t.Fatalf(
"found wanted file[%s] but expected content[%s] != [%s]",
wantedFile,
wantedFileContents,
gotContent,
)
}
gotFileRelative := removeFilePrefix(gotFile)
if gotFileRelative != wantedFile {
continue
}

got = true
f, err := os.Open(gotFile)
assert.NoError(t, err, "opening file")
defer f.Close()

contentsr, err := ioutil.ReadAll(f)
assert.NoError(t, err, "reading file")

contents := string(contentsr)
wantedContents := expectedContent(wantedFile)

if contents != wantedContents {
t.Fatalf(
"wanted file[%s] contents[%s] got[%s]",
wantedFile,
wantedContents,
contents,
)
}

}
if !got {
t.Fatalf("wanted files[%s] got[%s]", wantedFiles, gotFiles)
t.Fatalf(
"wanted files[%s] != got[%s]",
wantedFiles,
removeFilesPrefix(gotFiles),
)
}
}
})
Expand Down
17 changes: 16 additions & 1 deletion tools/azure/getcredentials.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/env nash

import klb/azure/login


if len($ARGS) != "5" {
echo "Usage: " $ARGS[0] "<(sh|nash)> <subscription name> <service principal name> <service secret>"

Expand Down Expand Up @@ -48,6 +51,18 @@ AZURE_CLIENT_ID <= (
tr -d "\n"
)

echo "testing the credentials"

AZURE_SERVICE_PRINCIPAL = "http://" + $SPNAME

azure_login_credentials(
$AZURE_SUBSCRIPTION_ID,
$AZURE_TENANT_ID,
$AZURE_CLIENT_ID,
$SPSECRET,
$AZURE_SERVICE_PRINCIPAL,
)

echo
echo "environment variables (copy them to a file):"
echo
Expand All @@ -57,6 +72,6 @@ printvar("AZURE_SUBSCRIPTION_NAME", $AZURE_SUBSCRIPTION_NAME)
printvar("AZURE_TENANT_ID", $AZURE_TENANT_ID)
printvar("AZURE_CLIENT_ID", $AZURE_CLIENT_ID)
printvar("AZURE_CLIENT_SECRET", $SPSECRET)
printvar("AZURE_SERVICE_PRINCIPAL", "http://"+$SPNAME)
printvar("AZURE_SERVICE_PRINCIPAL", $AZURE_SERVICE_PRINCIPAL)

echo

0 comments on commit ca9fdf1

Please sign in to comment.