Skip to content

Commit

Permalink
Parses gs:// urls correctly now. (#6)
Browse files Browse the repository at this point in the history
Parses gs:// urls correctly now; Also eliminates a bug where the md5 might not encode to a valid UTF8 string to use as a prometheus label
  • Loading branch information
pboothe authored Feb 13, 2020
1 parent e3708c3 commit 3465709
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 3 deletions.
12 changes: 9 additions & 3 deletions zipfile/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ import (
"archive/zip"
"bytes"
"context"
"encoding/hex"
"errors"
"io/ioutil"
"net/url"
"strings"

"cloud.google.com/go/storage"
"github.com/googleapis/google-cloud-go-testing/storage/stiface"
Expand Down Expand Up @@ -58,10 +60,10 @@ func (g *gcsProvider) Get(ctx context.Context) (*zip.Reader, error) {
}
g.cachedReader = zr
if g.md5 != nil {
metrics.GCSFilesLoaded.WithLabelValues(string(g.md5)).Set(0)
metrics.GCSFilesLoaded.WithLabelValues(hex.EncodeToString(g.md5)).Set(0)
}
g.md5 = oa.MD5
metrics.GCSFilesLoaded.WithLabelValues(string(g.md5)).Set(1)
metrics.GCSFilesLoaded.WithLabelValues(hex.EncodeToString(g.md5)).Set(1)
}
return g.cachedReader, nil
}
Expand Down Expand Up @@ -93,10 +95,14 @@ func FromURL(ctx context.Context, u *url.URL) (Provider, error) {
switch u.Scheme {
case "gs":
client, err := storage.NewClient(ctx)
filename := strings.TrimPrefix(u.Path, "/")
if len(filename) == 0 {
return nil, errors.New("Bad GS url, no filename detected")
}
return &gcsProvider{
client: stiface.AdaptClient(client),
bucket: u.Host,
filename: u.Path,
filename: filename,
}, err
case "file":
return &fileProvider{
Expand Down
59 changes: 59 additions & 0 deletions zipfile/provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,65 @@ func TestFromURL(t *testing.T) {
}
}

func TestFromGSURL(t *testing.T) {
tests := []struct {
name string
url string
want *gcsProvider
wantErr bool
}{
// Some of these endpoints do not exist, but since we never call .Get(),
// the provider can still be created successfully.
{
name: "Good file",
url: "gs://downloader-mlab-sandbox/Maxmind/current/GeoLite2-City-CSV.zip",
want: &gcsProvider{
bucket: "downloader-mlab-sandbox",
filename: "Maxmind/current/GeoLite2-City-CSV.zip",
},
},
{
name: "GCS nonexistent file",
url: "gs://mlab-nonexistent-bucket/nonexistent-object.zip",
want: &gcsProvider{
bucket: "mlab-nonexistent-bucket",
filename: "nonexistent-object.zip",
},
},
{
name: "GCS no file",
url: "gs://mlab-nonexistent-bucket/",
wantErr: true,
},
{
name: "GCS no file no slash",
url: "gs://mlab-nonexistent-bucket",
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
u, err := url.Parse(tt.url)
rtx.Must(err, "Could not parse URL")
p, err := FromURL(context.Background(), u)
if (err != nil) != tt.wantErr {
t.Errorf("FromURL() error=%v (which should be or wrap ErrUnsupportedURLScheme=%v), wantErr=%v", err, ErrUnsupportedURLScheme, tt.wantErr)
return
}
if err != nil {
// Don't verify the output in the error case. The API makes no promises on error cases.
return
}
gcsp := p.(*gcsProvider)
if gcsp.bucket != tt.want.bucket || gcsp.filename != tt.want.filename {
t.Errorf(
"Bucket and filename should be (%q,%q), but are (%q,%q)",
tt.want.bucket, tt.want.filename, gcsp.bucket, gcsp.filename)
}
})
}
}

type stifaceReaderThatsJustAnIOReader struct {
stiface.Reader
r io.Reader
Expand Down

0 comments on commit 3465709

Please sign in to comment.