-
-
Notifications
You must be signed in to change notification settings - Fork 101
/
arguments.go
145 lines (119 loc) · 5.45 KB
/
arguments.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
package main
import (
"bytes"
"errors"
"net/http"
"regexp"
"strings"
"github.com/jessevdk/go-flags"
)
type arguments struct {
RawAcceptedStatusCodes string `long:"accepted-status-codes" value-name:"<codes>" default:"200..300" description:"Accepted HTTP response status codes (e.g. '200..300,403')"`
BufferSize int `short:"b" long:"buffer-size" value-name:"<size>" default:"4096" description:"HTTP response buffer size in bytes"`
MaxConnections int `short:"c" long:"max-connections" value-name:"<count>" default:"512" description:"Maximum number of HTTP connections"`
MaxConnectionsPerHost int `long:"max-connections-per-host" value-name:"<count>" default:"512" description:"Maximum number of HTTP connections per host"`
MaxResponseBodySize int `long:"max-response-body-size" value-name:"<size>" default:"10000000" description:"Maximum response body size to read"`
RawExcludedPatterns []string `short:"e" long:"exclude" value-name:"<pattern>..." description:"Exclude URLs matched with given regular expressions"`
RawIncludedPatterns []string `short:"i" long:"include" value-name:"<pattern>..." description:"Include URLs matched with given regular expressions"`
FollowRobotsTxt bool `long:"follow-robots-txt" description:"Follow robots.txt when scraping pages"`
FollowSitemapXML bool `long:"follow-sitemap-xml" description:"Scrape only pages listed in sitemap.xml (deprecated)"`
RawHeaders []string `long:"header" value-name:"<header>..." description:"Custom headers"`
// TODO Remove a short option.
IgnoreFragments bool `short:"f" long:"ignore-fragments" description:"Ignore URL fragments"`
DnsResolver string `long:"dns-resolver" value-name:"<address>" description:"Custom DNS resolver"`
Format string `long:"format" description:"Output format" default:"text" choice:"text" choice:"json" choice:"junit"`
// TODO Remove this option.
JSONOutput bool `long:"json" description:"Output results in JSON (deprecated)"`
// TODO Remove this option.
VerboseJSON bool `long:"experimental-verbose-json" description:"Include successful results in JSON (deprecated)"`
// TODO Remove this option.
JUnitOutput bool `long:"junit" description:"Output results as JUnit XML file (deprecated)"`
MaxRedirections int `short:"r" long:"max-redirections" value-name:"<count>" default:"64" description:"Maximum number of redirections"`
RateLimit int `long:"rate-limit" value-name:"<rate>" description:"Max requests per second"`
Timeout int `short:"t" long:"timeout" value-name:"<seconds>" default:"10" description:"Timeout for HTTP requests in seconds"`
Verbose bool `short:"v" long:"verbose" description:"Show successful results too"`
Proxy string `long:"proxy" value-name:"<host>" description:"HTTP proxy host"`
SkipTLSVerification bool `long:"skip-tls-verification" description:"Skip TLS certificate verification"`
OnePageOnly bool `long:"one-page-only" description:"Only check links found in the given URL"`
Color color `long:"color" description:"Color output" choice:"auto" choice:"always" choice:"never" default:"auto"`
Help bool `short:"h" long:"help" description:"Show this help"`
Version bool `long:"version" description:"Show version"`
URL string
AcceptedStatusCodes statusCodeSet
ExcludedPatterns []*regexp.Regexp
IncludePatterns []*regexp.Regexp
Header http.Header
}
func getArguments(ss []string) (*arguments, error) {
args := arguments{}
ss, err := flags.NewParser(&args, flags.PassDoubleDash).ParseArgs(ss)
if err != nil {
return nil, err
} else if args.Version || args.Help {
return &args, nil
} else if len(ss) != 1 {
return nil, errors.New("invalid number of arguments")
}
reconcileDeprecatedArguments(&args)
args.URL = ss[0]
args.ExcludedPatterns, err = compileRegexps(args.RawExcludedPatterns)
if err != nil {
return nil, err
}
args.IncludePatterns, err = compileRegexps(args.RawIncludedPatterns)
if err != nil {
return nil, err
}
args.Header, err = parseHeaders(args.RawHeaders)
if err != nil {
return nil, err
}
args.AcceptedStatusCodes, err = parseStatusCodeSet(args.RawAcceptedStatusCodes)
if err != nil {
return nil, err
}
if args.Format == "junit" && args.Verbose {
return nil, errors.New("verbose option not supported for JUnit output")
}
return &args, nil
}
func help() string {
p := flags.NewParser(&arguments{}, flags.PassDoubleDash)
p.Usage = "[options] <url>"
// Parse() is run here to show default values in help.
// This seems to be a bug in go-flags.
p.Parse() // nolint:errcheck
b := &bytes.Buffer{}
p.WriteHelp(b)
return b.String()
}
func compileRegexps(regexps []string) ([]*regexp.Regexp, error) {
rs := make([]*regexp.Regexp, 0, len(regexps))
for _, s := range regexps {
r, err := regexp.Compile(s)
if err != nil {
return nil, err
}
rs = append(rs, r)
}
return rs, nil
}
func parseHeaders(headers []string) (http.Header, error) {
h := make(http.Header, len(headers))
for _, s := range headers {
i := strings.IndexRune(s, ':')
if i < 0 {
return nil, errors.New("invalid header format")
}
h.Add(s[:i], strings.TrimSpace(s[i+1:]))
}
return h, nil
}
func reconcileDeprecatedArguments(args *arguments) {
if args.JSONOutput {
args.Format = "json"
args.Verbose = args.Verbose || args.VerboseJSON
} else if args.JUnitOutput {
args.Format = "junit"
}
}