From e1075985bc0bcaa94ddde077e99d3974ac5c27b8 Mon Sep 17 00:00:00 2001 From: Tom Wiesing Date: Mon, 7 Oct 2024 19:29:57 +0200 Subject: [PATCH] internal/url: Improve scheme splitting --- env/url.go | 10 ++----- internal/split/split.go | 15 ---------- internal/split/split_test.go | 36 ----------------------- internal/url/url.go | 43 +++++++++++++++++++++------ internal/url/url_test.go | 57 +++++++++++++++++++----------------- 5 files changed, 66 insertions(+), 95 deletions(-) diff --git a/env/url.go b/env/url.go index 5c98480..8da5602 100644 --- a/env/url.go +++ b/env/url.go @@ -66,14 +66,8 @@ func ParseURL(s string) (repo URL) { // normalize for windows s = windowsReplacer.Replace(s) - // Trim off a leading scheme (as separated by '://') and (if it is valid) store it. - { - scheme, rest := split.Before(s, "://") - if url.IsValidURLScheme(scheme) { - repo.Scheme = scheme - s = rest - } - } + // split off the url scheme + repo.Scheme, s = url.SplitURLScheme(s) // Next, we split of the authentication if we have an '@' sign. // Technically the if { } clause isn't required, the code will work fine without it. diff --git a/internal/split/split.go b/internal/split/split.go index 186f037..5c21807 100644 --- a/internal/split/split.go +++ b/internal/split/split.go @@ -9,24 +9,9 @@ import ( "unicode/utf8" ) -// Before splits the string s into a part before a separator, called the prefix, and a part after the separator, called the suffix. -// If separator is not contained in the source string, prefix is empty and suffix is equal to the input string. -// -// See also AfterRune. -func Before(s, sep string) (prefix, suffix string) { - if i := strings.Index(s, sep); i >= 0 { - return s[:i], s[i+len(sep):] - } - return "", s -} - // AfterRune splits the string s into a part before a separator, called the prefix, and a part after the separator, called the suffix. // If separator is not contained in the source string, suffix is empty and prefix is equal to the input string. -// -// See also Before. func AfterRune(s string, sep rune) (prefix, suffix string) { - // NOTE: This uses sep as a rune, because nothing else is required. - // And that turns out to be the most efficient variant. if i := strings.IndexRune(s, sep); i >= 0 { return s[:i], s[i+utf8.RuneLen(sep):] } diff --git a/internal/split/split_test.go b/internal/split/split_test.go index 9486ec2..f5c84a1 100644 --- a/internal/split/split_test.go +++ b/internal/split/split_test.go @@ -6,42 +6,6 @@ import ( "testing" ) -func TestBefore(t *testing.T) { - type args struct { - s string - sep string - } - tests := []struct { - name string - args args - wantPrefix string - wantSuffix string - }{ - {"splitFoundOnce", args{"a;b", ";"}, "a", "b"}, - {"splitFoundMultiple", args{"a;b;c", ";"}, "a", "b;c"}, - {"splitNotFound", args{"aaa", ";"}, "", "aaa"}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - gotPrefix, gotSuffix := Before(tt.args.s, tt.args.sep) - if gotPrefix != tt.wantPrefix { - t.Errorf("Before() gotPrefix = %v, want %v", gotPrefix, tt.wantPrefix) - } - if gotSuffix != tt.wantSuffix { - t.Errorf("Before() gotSuffix = %v, want %v", gotSuffix, tt.wantSuffix) - } - }) - } -} - -func Benchmark_Before(b *testing.B) { - for i := 0; i < b.N; i++ { - Before("a;b", ";") - Before("a;b;c", ";") - Before("aaa", ";") - } -} - func TestAfterRune(t *testing.T) { type args struct { s string diff --git a/internal/url/url.go b/internal/url/url.go index 4bdf46b..53d5980 100644 --- a/internal/url/url.go +++ b/internal/url/url.go @@ -62,20 +62,23 @@ func ParsePort(s string) (port uint16, err error) { return port, nil } -// IsValidURLScheme checks if a string represents a valid URL scheme. +// SplitURLScheme splits off the URL scheme from s, returning the rest of the string in rest. +// If it does not contain a valid scheme, returns "", s. // -// A valid url scheme is one that matches the regex [a-zA-Z][a-zA-Z0-9+\-\.]*. -func IsValidURLScheme(s string) bool { - // An obvious implementation of this function would be: - // regexp.MustCompile(`^[a-zA-Z][a-zA-Z0-9+\-\.]*$`).MatchString(s) +// A scheme is of the form 'scheme://rest'. +// Scheme must match the regular expression `^[a-zA-Z][a-zA-Z0-9+\-\.]*$`. +func SplitURLScheme(s string) (scheme string, rest string) { + // An obvious implementation of this function would simply match against the + // regular expression. + // // However such an implementation would be a lot slower when called repeatedly. - // Instead we directly build code that implements this regex. + // Instead we directly build code that directly implements the trimming. // Iterate over the bytes in this string. // We can do this safely, because any valid scheme must be ascii. bytes := []byte(s) if len(bytes) == 0 { - return false + return "", s } var ( @@ -92,12 +95,18 @@ nextLetter: // or be done idx++ if idx >= len(bytes) { - return true + goto noScheme } // get the current letter c = bytes[idx] + // reached end of the scheme + // we can make use of this because no valid scheme has a ':'. + if c == ':' { + goto scheme + } + if '0' <= c && c <= '9' { goto nextLetter } @@ -115,5 +124,21 @@ start: goto nextLetter } - return false +noScheme: + // not a valid letter + return "", s + +scheme: + // split into scheme and rest + scheme = string(bytes[:idx]) + rest = string(bytes[idx+1:]) + + // check that the rest starts with "//" + if len(rest) < 2 || rest[0] != '/' || rest[1] != '/' { + goto noScheme + } + + // and trim off the valid prefix + rest = rest[2:] + return } diff --git a/internal/url/url_test.go b/internal/url/url_test.go index 274e5c8..4f852dc 100644 --- a/internal/url/url_test.go +++ b/internal/url/url_test.go @@ -78,42 +78,45 @@ func Benchmark_ParsePort(b *testing.B) { } } -func Test_IsValidURLScheme(t *testing.T) { +func Test_SplitURLScheme(t *testing.T) { type args struct { - scheme string + input string } tests := []struct { - name string - args args - want bool + name string + args args + wantScheme string + wantRest string }{ - {"http scheme is valid", args{"http"}, true}, - {"https scheme is valid", args{"https"}, true}, - {"git scheme is valid", args{"git"}, true}, - {"ssh scheme is valid", args{"ssh"}, true}, - {"ssh2 scheme is valid", args{"ssh2"}, true}, - {"file scheme is valid", args{"file"}, true}, - {"combined with + scheme is valid", args{"ssh+git"}, true}, - {"combined with - scheme is valid", args{"ssh-git"}, true}, - {"combined with . scheme is valid", args{"ssh.git"}, true}, + {name: "http scheme is valid", args: args{"http://rest"}, wantScheme: "http", wantRest: "rest"}, + {name: "https scheme is valid", args: args{"https://rest"}, wantScheme: "https", wantRest: "rest"}, + {name: "git scheme is valid", args: args{"git://rest"}, wantScheme: "git", wantRest: "rest"}, + {name: "ssh scheme is valid", args: args{"ssh://rest"}, wantScheme: "ssh", wantRest: "rest"}, + {name: "ssh2 scheme is valid", args: args{"ssh2://rest"}, wantScheme: "ssh2", wantRest: "rest"}, + {name: "file scheme is valid", args: args{"file://rest"}, wantScheme: "file", wantRest: "rest"}, + {name: "combined with + scheme is valid", args: args{"ssh+git://rest"}, wantScheme: "ssh+git", wantRest: "rest"}, + {name: "combined with - scheme is valid", args: args{"ssh-git://rest"}, wantScheme: "ssh-git", wantRest: "rest"}, + {name: "combined with . scheme is valid", args: args{"ssh.git://rest"}, wantScheme: "ssh.git", wantRest: "rest"}, - {"empty scheme is invalid", args{""}, false}, - {"numerical scheme is invalid", args{"01234"}, false}, - {"numerical starting scheme is invalid", args{"01git"}, false}, - {"non-scheme is invalid", args{"://"}, false}, + {name: "valid scheme without valid ending", args: args{"valid"}, wantScheme: "", wantRest: "valid"}, + {name: "valid scheme with only :", args: args{"valid:"}, wantScheme: "", wantRest: "valid:"}, + {name: "valid scheme with only one /", args: args{"valid:/"}, wantScheme: "", wantRest: "valid:/"}, + {name: "valid scheme without rest", args: args{"valid://"}, wantScheme: "valid", wantRest: ""}, + + {name: "empty input passed through", args: args{""}, wantScheme: "", wantRest: ""}, + {name: "numerical scheme is invalid", args: args{"01234"}, wantScheme: "", wantRest: "01234"}, + {name: "numerical starting scheme is invalid", args: args{"01git"}, wantScheme: "", wantRest: "01git"}, + {name: "non-scheme is invalid", args: args{"://"}, wantScheme: "", wantRest: "://"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := IsValidURLScheme(tt.args.scheme); got != tt.want { - t.Errorf("validateScheme() = %v, want %v", got, tt.want) + gotScheme, gotRest := SplitURLScheme(tt.args.input) + if gotScheme != tt.wantScheme { + t.Errorf("SplitURLScheme() scheme = %v, want %v", gotScheme, tt.wantScheme) + } + if gotRest != tt.wantRest { + t.Errorf("SplitURLScheme() rest = %v, want %v", gotRest, tt.wantRest) } }) } } - -func Benchmark_IsValidURLScheme(b *testing.B) { - for i := 0; i < b.N; i++ { - IsValidURLScheme("0http") - IsValidURLScheme("http") - } -}