diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a85da58..02d1abdc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +## [6.14.3] - 2024-05-30 +### Fixed +- Fixed #189 - Update utils authority package to handle proper encoding/decoding of uri with reserved characters. + ## [6.14.2] - 2024-05-30 ### Fixed - Fixed #187 - Update to latest jlaffaye/ftp library to fix issue where FTPS connections were failing due to a bug in the library. Also updated dataconn to continue even if it fails to MakeDir. diff --git a/backend/ftp/file.go b/backend/ftp/file.go index 92eaa451..45b69377 100644 --- a/backend/ftp/file.go +++ b/backend/ftp/file.go @@ -180,7 +180,7 @@ func (f *File) MoveToFile(t vfs.File) error { // ftp rename if vfs is ftp and for the same user/host if f.fileSystem.Scheme() == t.Location().FileSystem().Scheme() && f.authority.UserInfo().Username() == t.(*File).authority.UserInfo().Username() && - f.authority.Host() == t.(*File).authority.Host() { + f.authority.HostPortStr() == t.(*File).authority.HostPortStr() { // ensure destination exists before moving exists, err := t.Location().Exists() @@ -246,7 +246,7 @@ func (f *File) CopyToFile(file vfs.File) (err error) { //nolint:gocyclo if f.fileSystem.Scheme() == file.Location().FileSystem().Scheme() && f.authority.UserInfo().Username() == file.(*File).authority.UserInfo().Username() && - f.authority.Host() == file.(*File).authority.Host() { + f.authority.HostPortStr() == file.(*File).authority.HostPortStr() { // in the case that both files have the same authority we'll copy by writing a temporary // file to mem and then writing it back to the ftp server tempFile, err := f.createLocalTempFile() @@ -436,7 +436,13 @@ func (f *File) Write(data []byte) (res int, err error) { // URI returns the File's URI as a string. func (f *File) URI() string { - return utils.GetFileURI(f) + loc := f.Location().(*Location) + return utils.EncodeURI( + f.fileSystem.Scheme(), + loc.Authority.UserInfo().Username(), + loc.Authority.HostPortStr(), + f.Path(), + ) } // String implement fmt.Stringer, returning the file's URI as the default string. diff --git a/backend/ftp/file_test.go b/backend/ftp/file_test.go index bc82230d..9b928dc2 100644 --- a/backend/ftp/file_test.go +++ b/backend/ftp/file_test.go @@ -1125,6 +1125,12 @@ func (ts *fileTestSuite) TestPath() { func (ts *fileTestSuite) TestURI() { expected := "ftp://user@host.com:22/some/path/to/file.txt" ts.Equal(expected, ts.testFile.URI(), "URI test") + + expected = "ftp://domain.com%5Cuser@host.com:22/some/path/to/file.txt" + fs := NewFileSystem() + f, err := fs.NewFile("domain.com%5Cuser@host.com:22", "/some/path/to/file.txt") + ts.NoError(err) + ts.Equal(expected, f.URI(), "URI test") } func (ts *fileTestSuite) TestStringer() { diff --git a/backend/ftp/location.go b/backend/ftp/location.go index 2e1ec0ef..572714dc 100644 --- a/backend/ftp/location.go +++ b/backend/ftp/location.go @@ -223,7 +223,7 @@ func (l *Location) FileSystem() vfs.FileSystem { // URI returns the Location's URI as a string. func (l *Location) URI() string { - return utils.GetLocationURI(l) + return utils.EncodeURI(l.FileSystem().Scheme(), l.Authority.UserInfo().Username(), l.Authority.HostPortStr(), l.Path()) } // String implement fmt.Stringer, returning the location's URI as the default string. diff --git a/backend/ftp/location_test.go b/backend/ftp/location_test.go index 1c9f8df1..1a33d8ec 100644 --- a/backend/ftp/location_test.go +++ b/backend/ftp/location_test.go @@ -322,6 +322,16 @@ func (lt *locationTestSuite) TestURI() { file, err := lt.ftpfs.NewFile(authority, "/blah/file.txt") lt.NoError(err) lt.Equal("ftp://user@host.com/blah/file.txt", file.URI(), "file uri with user, pass, host") + + authority = `domain.com\user@host.com` + file, err = lt.ftpfs.NewFile(authority, "/blah/file.txt") + lt.Error(err) + lt.ErrorContains(err, "net/url: invalid userinfo", "file uri with bad user") + + authority = `domain.com%5Cuser@host.com` + file, err = lt.ftpfs.NewFile(authority, "/blah/file.txt") + lt.NoError(err) + lt.Equal(`ftp://domain.com%5Cuser@host.com/blah/file.txt`, file.URI(), "file uri with percent-encoded character in user") } func (lt *locationTestSuite) TestString() { diff --git a/backend/sftp/file.go b/backend/sftp/file.go index 11b61a3c..7665f011 100644 --- a/backend/sftp/file.go +++ b/backend/sftp/file.go @@ -148,7 +148,7 @@ func (f *File) MoveToFile(t vfs.File) error { // sftp rename if vfs is sftp and for the same user/host if f.fileSystem.Scheme() == t.Location().FileSystem().Scheme() && f.Authority.UserInfo().Username() == t.(*File).Authority.UserInfo().Username() && - f.Authority.Host() == t.(*File).Authority.Host() { + f.Authority.HostPortStr() == t.(*File).Authority.HostPortStr() { // ensure destination exists before moving exists, err := t.Location().Exists() if err != nil { @@ -364,7 +364,13 @@ func (f *File) Write(data []byte) (res int, err error) { // URI returns the File's URI as a string. func (f *File) URI() string { - return utils.GetFileURI(f) + loc := f.Location().(*Location) + return utils.EncodeURI( + f.fileSystem.Scheme(), + loc.Authority.UserInfo().Username(), + loc.Authority.HostPortStr(), + f.Path(), + ) } // String implement fmt.Stringer, returning the file's URI as the default string. diff --git a/backend/sftp/file_test.go b/backend/sftp/file_test.go index 7f88327f..676073a0 100644 --- a/backend/sftp/file_test.go +++ b/backend/sftp/file_test.go @@ -835,6 +835,12 @@ func (ts *fileTestSuite) TestPath() { func (ts *fileTestSuite) TestURI() { expected := "sftp://user@host.com:22/some/path/to/file.txt" ts.Equal(expected, ts.testFile.URI(), "URI test") + + expected = "sftp://domain.com%5Cuser@host.com:22/some/path/to/file.txt" + fs := NewFileSystem() + f, err := fs.NewFile("domain.com%5Cuser@host.com:22", "/some/path/to/file.txt") + ts.NoError(err) + ts.Equal(expected, f.URI(), "URI test") } func (ts *fileTestSuite) TestStringer() { diff --git a/backend/sftp/location.go b/backend/sftp/location.go index c0e6424a..f3c20603 100644 --- a/backend/sftp/location.go +++ b/backend/sftp/location.go @@ -213,7 +213,7 @@ func (l *Location) FileSystem() vfs.FileSystem { // URI returns the Location's URI as a string. func (l *Location) URI() string { - return utils.GetLocationURI(l) + return utils.EncodeURI(l.FileSystem().Scheme(), l.Authority.UserInfo().Username(), l.Authority.HostPortStr(), l.Path()) } // String implement fmt.Stringer, returning the location's URI as the default string. diff --git a/backend/sftp/location_test.go b/backend/sftp/location_test.go index d96e0628..c6f97d78 100644 --- a/backend/sftp/location_test.go +++ b/backend/sftp/location_test.go @@ -155,6 +155,16 @@ func (lt *locationTestSuite) TestURI() { file, err := lt.sftpfs.NewFile(authority, "/blah/file.txt") lt.NoError(err) lt.Equal("sftp://user@host.com/blah/file.txt", file.URI(), "file uri with user, pass, host") + + authority = `domain.com\user@host.com` + file, err = lt.sftpfs.NewFile(authority, "/blah/file.txt") + lt.Error(err) + lt.ErrorContains(err, "net/url: invalid userinfo", "file uri with bad user") + + authority = `domain.com%5Cuser@host.com` + file, err = lt.sftpfs.NewFile(authority, "/blah/file.txt") + lt.NoError(err) + lt.Equal(`sftp://domain.com%5Cuser@host.com/blah/file.txt`, file.URI(), "file uri with percent-encoded character in user") } func (lt *locationTestSuite) TestString() { diff --git a/utils/authority.go b/utils/authority.go index 15feef92..bd1005d5 100644 --- a/utils/authority.go +++ b/utils/authority.go @@ -3,6 +3,7 @@ package utils import ( "errors" "fmt" + "net/url" "regexp" "strconv" "strings" @@ -18,8 +19,13 @@ import ( Where: authority = [ userinfo "@" ] host [ ":" port ] + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) host = IP-literal / IPv4address / reg-name port = *DIGIT + reg-name = *( unreserved / pct-encoded / sub-delims ) + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" + pct-encoded = "%" HEXDIG HEXDIG */ // ------------------------------------ host :port @@ -30,21 +36,23 @@ type Authority struct { userinfo UserInfo host string port uint16 + url *url.URL } // UserInfo represents user/pass portion of a URI type UserInfo struct { - username, password string + url *url.URL } // Username returns the username of a URI UserInfo. May be an empty string. func (u UserInfo) Username() string { - return u.username + return u.url.User.Username() } // Password returns the password of a URI UserInfo. May be an empty string. func (u UserInfo) Password() string { - return u.password + p, _ := u.url.User.Password() + return p } // String() returns a string representation of authority. It does not include password per @@ -54,20 +62,22 @@ func (u UserInfo) Password() string { // subcomponent unless the data after the colon is the empty string (indicating no password). func (a Authority) String() string { authority := a.HostPortStr() - if a.userinfo.Username() != "" { - authority = fmt.Sprintf("%s@%s", a.userinfo.Username(), authority) + if a.UserInfo().Username() != "" { + authority = fmt.Sprintf("%s@%s", a.UserInfo().Username(), authority) } return authority } // UserInfo returns the userinfo section of authority. userinfo is username and password(deprecated). func (a Authority) UserInfo() UserInfo { - return a.userinfo + return UserInfo{ + url: a.url, + } } // Host returns the host portion of an authority func (a Authority) Host() string { - return a.host + return a.url.Hostname() } // Port returns the port portion of an authority @@ -83,23 +93,29 @@ func (a Authority) HostPortStr() string { return a.Host() } +var schemeRE = regexp.MustCompile("^[A-Za-z][A-Za-z0-9+.-]*://") + // NewAuthority initializes Authority struct by parsing authority string. func NewAuthority(authority string) (Authority, error) { if authority == "" { return Authority{}, errors.New("authority string may not be empty") } - u, p, h, err := parseAuthority(authority) + + var err error + matched := schemeRE.MatchString(authority) + if !matched { + authority = "scheme://" + authority + } + + u, err := url.Parse(authority) if err != nil { return Authority{}, err } - matches := portRegex.FindAllStringSubmatch(h, -1) - if len(matches) == 0 { - return Authority{}, errors.New("could not determine port") - } + host, portStr := splitHostPort(u.Host) var port uint16 - if matches[0][2] != "" { - val, err := strconv.ParseUint(matches[0][2], 10, 16) + if portStr != "" { + val, err := strconv.ParseUint(portStr, 10, 16) if err != nil { return Authority{}, err } @@ -107,118 +123,32 @@ func NewAuthority(authority string) (Authority, error) { } return Authority{ - userinfo: UserInfo{ - username: u, - password: p, - }, - host: matches[0][1], + host: host, port: port, + url: u, }, nil } -/* - NOTE: Below was mostly taken line-for-line from the "url" package (https://github.com/golang/go/blob/master/src/net/url/url.go), - minus unencoding and some unused split logic. Unfortunately none of it was exposed in a way that could be used for parsing authority. - - Copyright (c) 2009 The Go Authors. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - * Neither the name of Google Inc. nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -func parseAuthority(authority string) (username, password, host string, err error) { - i := strings.LastIndex(authority, "@") - if i < 0 { - host, err = parseHost(authority) - } else { - host, err = parseHost(authority[i+1:]) - } - if err != nil { - return "", "", "", err - } - if i < 0 { - return "", "", host, nil - } - userinfo := authority[:i] - if !validUserinfo(userinfo) { - return "", "", host, errors.New("invalid userinfo") - } - if !strings.Contains(userinfo, ":") { - username = userinfo - } else { - username, password = split(userinfo, ":") - } - - return -} - -func split(s, c string) (string, string) { - i := strings.Index(s, c) - return s[:i], s[i+len(c):] -} +// splitHostPort separates host and port. If the port is not valid, it returns +// the entire input as host, and it doesn't check the validity of the host. +// Unlike net.SplitHostPort, but per RFC 3986, it requires ports to be numeric. +func splitHostPort(hostPort string) (host, port string) { + host = hostPort -func validUserinfo(s string) bool { - for _, r := range s { - if r >= 'A' && r <= 'Z' { - continue - } - if r >= 'a' && r <= 'z' { - continue - } - if r >= '0' && r <= '9' { - continue - } - switch r { - case '-', '.', '_', ':', '~', '!', '$', '&', '\'', - '(', ')', '*', '+', ',', ';', '=', '%', '@': - continue - default: - return false - } + colon := strings.LastIndexByte(host, ':') + if colon != -1 && validOptionalPort(host[colon:]) { + host, port = host[:colon], host[colon+1:] } - return true -} -func parseHost(host string) (string, error) { - if strings.HasPrefix(host, "[") { - // Parse an IP-Literal in RFC 3986 and RFC 6874. - // E.g., "[fe80::1]", "[fe80::1%25en0]", "[fe80::1]:80". - i := strings.LastIndex(host, "]") - if i < 0 { - return "", errors.New("missing ']' in host") - } - colonPort := host[i+1:] - if !validOptionalPort(colonPort) { - return "", fmt.Errorf("invalid port %q after host", colonPort) - } + if strings.HasPrefix(host, "[") && strings.HasSuffix(host, "]") { + host = host[1 : len(host)-1] } - return host, nil + return } +// validOptionalPort reports whether port is either an empty string +// or matches /^:\d*$/ func validOptionalPort(port string) bool { if port == "" { return true diff --git a/utils/authority_test.go b/utils/authority_test.go index 513f7288..ac32af11 100644 --- a/utils/authority_test.go +++ b/utils/authority_test.go @@ -134,7 +134,7 @@ func (a *authoritySuite) TestAuthority() { message: "user has upper and numeric", }, { - authorityString: "#blah@some.host.com", + authorityString: "{blah@some.host.com", host: "", port: 0, user: "", @@ -145,6 +145,54 @@ func (a *authoritySuite) TestAuthority() { errMessage: "invalid userinfo", message: "user has bad character", }, + { + authorityString: "!c2fo@some.host.com", + host: "some.host.com", + port: 0, + user: "!c2fo", + pass: "", + str: "!c2fo@some.host.com", + hostPortStr: "some.host.com", + hasError: false, + errMessage: "", + message: "user has raw ! character", + }, + { + authorityString: "%21c2fo@some.host.com", + host: "some.host.com", + port: 0, + user: "!c2fo", + pass: "", + str: "!c2fo@some.host.com", + hostPortStr: "some.host.com", + hasError: false, + errMessage: "", + message: "user has pct-encoded ! character", + }, + { + authorityString: `domain.com%5cuser@some.host.com`, + host: "some.host.com", + port: 0, + user: `domain.com\user`, + pass: "", + str: `domain.com\user@some.host.com`, + hostPortStr: "some.host.com", + hasError: false, + errMessage: "", + message: `user has pct-encoded \ character`, + }, + { + authorityString: `domain.com\user@some.host.com`, + host: "", + port: 0, + user: "", + pass: "", + str: "", + hostPortStr: "", + hasError: true, + errMessage: "invalid userinfo", + message: "raw backslash not allowed", + }, { authorityString: "127.0.0.1", host: "127.0.0.1", @@ -171,11 +219,11 @@ func (a *authoritySuite) TestAuthority() { }, { authorityString: "[0:0:0:0:0:0:0:1]", - host: "[0:0:0:0:0:0:0:1]", + host: "0:0:0:0:0:0:0:1", port: 0, user: "", pass: "", - str: "[0:0:0:0:0:0:0:1]", + str: "0:0:0:0:0:0:0:1", hostPortStr: "[0:0:0:0:0:0:0:1]", hasError: false, errMessage: "", @@ -195,11 +243,11 @@ func (a *authoritySuite) TestAuthority() { }, { authorityString: "[:::::::1]", - host: "[:::::::1]", + host: ":::::::1", port: 0, user: "", pass: "", - str: "[:::::::1]", + str: ":::::::1", hostPortStr: "[:::::::1]", hasError: false, errMessage: "", @@ -207,11 +255,11 @@ func (a *authoritySuite) TestAuthority() { }, { authorityString: "[:::::::1]:3022", - host: "[:::::::1]", + host: ":::::::1", port: 3022, user: "", pass: "", - str: "[:::::::1]:3022", + str: ":::::::1:3022", hostPortStr: "[:::::::1]:3022", hasError: false, errMessage: "", @@ -243,18 +291,20 @@ func (a *authoritySuite) TestAuthority() { }, } - for i := range tests { - actual, err := NewAuthority(tests[i].authorityString) - if tests[i].hasError { - a.EqualError(err, tests[i].errMessage, tests[i].message) - } else { - a.NoError(err, tests[i].message) - a.Equal(tests[i].host, actual.Host(), tests[i].message) - a.Equal(int(tests[i].port), int(actual.Port()), tests[i].message) - a.Equal(tests[i].user, actual.UserInfo().Username(), tests[i].message) - a.Equal(tests[i].pass, actual.UserInfo().Password(), tests[i].message) - a.Equal(tests[i].str, actual.String(), tests[i].message) - } + for _, t := range tests { + a.Run(t.message, func() { + actual, err := NewAuthority(t.authorityString) + if t.hasError { + a.ErrorContains(err, t.errMessage, t.message) + } else { + a.NoError(err, t.message) + a.Equal(t.host, actual.Host(), t.message) + a.Equal(int(t.port), int(actual.Port()), t.message) + a.Equal(t.user, actual.UserInfo().Username(), t.message) + a.Equal(t.pass, actual.UserInfo().Password(), t.message) + a.Equal(t.str, actual.String(), t.message) + } + }) } } diff --git a/utils/utils.go b/utils/utils.go index 662d0392..87cef302 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -92,6 +92,7 @@ func GetFileURI(f vfs.File) string { // GetLocationURI returns a Location URI func GetLocationURI(l vfs.Location) string { + return fmt.Sprintf("%s://%s%s", l.FileSystem().Scheme(), l.Volume(), l.Path()) } @@ -244,3 +245,15 @@ func SeekTo(length, position, offset int64, whence int) (int64, error) { return offset, nil } + +// EncodeURI ensure that a uri is properly percent-encoded +func EncodeURI(scheme, username, hostport, path string) string { + u := &url.URL{ + Scheme: scheme, + User: url.User(username), + Host: hostport, + Path: path, + } + + return u.String() +} diff --git a/vfssimple/vfssimple_test.go b/vfssimple/vfssimple_test.go index 74eaef77..04555a67 100644 --- a/vfssimple/vfssimple_test.go +++ b/vfssimple/vfssimple_test.go @@ -152,24 +152,39 @@ func (s *vfssimplesuite) TestParseURI() { authority: "user@host.com:22", path: "/path/to/file.txt", }, + { + uri: `sftp://doamin.com%5Cuser@host.com:22/path/to/file.txt`, + err: nil, + message: "valid sftp uri, with percent-encoded char", + scheme: "sftp", + authority: `doamin.com%5Cuser@host.com:22`, + path: "/path/to/file.txt", + }, + { + uri: `sftp://doamin.com\user@host.com:22/path/to/file.txt`, + err: errors.New("net/url: invalid userinfo"), + message: `invalid sftp uri, with raw reserved char \`, + }, } for _, test := range tests { - scheme, authority, path, err := parseURI(test.uri) - if test.err != nil { - s.Error(err, test.message) - if errors.Is(err, test.err) { - s.True(errors.Is(err, test.err), test.message) + s.Run(test.message, func() { + scheme, authority, path, err := parseURI(test.uri) + if test.err != nil { + s.Error(err, test.message) + if errors.Is(err, test.err) { + s.True(errors.Is(err, test.err), test.message) + } else { + // this is necessary since we can't recreate sentinel errors from url.Parse() to do errors.Is() comparison + s.Contains(err.Error(), test.err.Error(), test.message) + } } else { - // this is necessary since we can't recreate sentinel errors from url.Parse() to do errors.Is() comparison - s.Contains(err.Error(), test.err.Error(), test.message) + s.NoError(err, test.message) + s.Equal(test.scheme, scheme, test.message) + s.Equal(test.authority, authority, test.message) + s.Equal(test.path, path, test.message) } - } else { - s.NoError(err, test.message) - s.Equal(test.scheme, scheme, test.message) - s.Equal(test.authority, authority, test.message) - s.Equal(test.path, path, test.message) - } + }) } }