diff --git a/README.md b/README.md index dfc9b2e..5e62890 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ website recursively. - High compatibility with web browsers - Different tag support (`a`, `img`, `link`, `script`, etc) - Multiple output formats (text, JSON, and JUnit XML) +- ini file support (configuration can be read from `muffet.ini` file) ## Installation diff --git a/arguments.go b/arguments.go index 52927fc..7a1040c 100644 --- a/arguments.go +++ b/arguments.go @@ -3,6 +3,7 @@ package main import ( "bytes" "errors" + "io" "net/http" "regexp" "strings" @@ -11,32 +12,32 @@ import ( ) type arguments struct { - BufferSize int `short:"b" long:"buffer-size" value-name:"" default:"4096" description:"HTTP response buffer size in bytes"` - MaxConnections int `short:"c" long:"max-connections" value-name:"" default:"512" description:"Maximum number of HTTP connections"` - MaxConnectionsPerHost int `long:"max-connections-per-host" value-name:"" default:"512" description:"Maximum number of HTTP connections per host"` - MaxResponseBodySize int `long:"max-response-body-size" value-name:"" default:"10000000" description:"Maximum response body size to read"` - RawExcludedPatterns []string `short:"e" long:"exclude" value-name:"..." description:"Exclude URLs matched with given regular expressions"` - RawIncludedPatterns []string `short:"i" long:"include" value-name:"..." description:"Include URLs matched with given regular expressions"` - FollowRobotsTxt bool `long:"follow-robots-txt" description:"Follow robots.txt when scraping pages"` + BufferSize int `short:"b" long:"buffer-size" value-name:"" default:"4096" description:"HTTP response buffer size in bytes" ini-name:"bufferSize"` + MaxConnections int `short:"c" long:"max-connections" value-name:"" default:"512" description:"Maximum number of HTTP connections" ini-name:"maxConnections"` + MaxConnectionsPerHost int `long:"max-connections-per-host" value-name:"" default:"512" description:"Maximum number of HTTP connections per host" ini-name:"maxConnectionsPerHost"` + MaxResponseBodySize int `long:"max-response-body-size" value-name:"" default:"10000000" description:"Maximum response body size to read" ini-name:"maxResponseBodySize"` + RawExcludedPatterns []string `short:"e" long:"exclude" value-name:"..." description:"Exclude URLs matched with given regular expressions" ini-name:"exclude"` + RawIncludedPatterns []string `short:"i" long:"include" value-name:"..." description:"Include URLs matched with given regular expressions" ini-name:"include"` + FollowRobotsTxt bool `long:"follow-robots-txt" description:"Follow robots.txt when scraping pages" ini-name:"followRobotsTxt"` FollowSitemapXML bool `long:"follow-sitemap-xml" description:"Scrape only pages listed in sitemap.xml (deprecated)"` - RawHeaders []string `long:"header" value-name:"
..." description:"Custom headers"` + RawHeaders []string `long:"header" value-name:"
..." description:"Custom headers" ini-name:"header"` // TODO Remove a short option. IgnoreFragments bool `short:"f" long:"ignore-fragments" description:"Ignore URL fragments"` - Format string `long:"format" description:"Output format" default:"text" choice:"text" choice:"json" choice:"junit"` + Format string `long:"format" description:"Output format" default:"text" choice:"text" choice:"json" choice:"junit" ini-name:"format"` // TODO Remove this option. JSONOutput bool `long:"json" description:"Output results in JSON (deprecated)"` // TODO Remove this option. VerboseJSON bool `long:"experimental-verbose-json" description:"Include successful results in JSON (deprecated)"` // TODO Remove this option. JUnitOutput bool `long:"junit" description:"Output results as JUnit XML file (deprecated)"` - MaxRedirections int `short:"r" long:"max-redirections" value-name:"" default:"64" description:"Maximum number of redirections"` - RateLimit int `long:"rate-limit" value-name:"" description:"Max requests per second"` - Timeout int `short:"t" long:"timeout" value-name:"" default:"10" description:"Timeout for HTTP requests in seconds"` - Verbose bool `short:"v" long:"verbose" description:"Show successful results too"` - Proxy string `long:"proxy" value-name:"" description:"HTTP proxy host"` - SkipTLSVerification bool `long:"skip-tls-verification" description:"Skip TLS certificate verification"` - OnePageOnly bool `long:"one-page-only" description:"Only check links found in the given URL"` - Color color `long:"color" description:"Color output" choice:"auto" choice:"always" choice:"never" default:"auto"` + MaxRedirections int `short:"r" long:"max-redirections" value-name:"" default:"64" description:"Maximum number of redirections" ini-name:"maxRedirections"` + RateLimit int `long:"rate-limit" value-name:"" description:"Max requests per second" ini-name:"rateLimit"` + Timeout int `short:"t" long:"timeout" value-name:"" default:"10" description:"Timeout for HTTP requests in seconds" ini-name:"timeout"` + Verbose bool `short:"v" long:"verbose" description:"Show successful results too" ini-name:"verbose"` + Proxy string `long:"proxy" value-name:"" description:"HTTP proxy host" ini-name:"proxy"` + SkipTLSVerification bool `long:"skip-tls-verification" description:"Skip TLS certificate verification" ini-name:"skipTLSVerification"` + OnePageOnly bool `long:"one-page-only" description:"Only check links found in the given URL" ini-name:"onePageOnly"` + Color color `long:"color" description:"Color output" choice:"auto" choice:"always" choice:"never" default:"auto" color:"color"` Help bool `short:"h" long:"help" description:"Show this help"` Version bool `long:"version" description:"Show version"` URL string @@ -45,9 +46,17 @@ type arguments struct { Header http.Header } -func getArguments(ss []string) (*arguments, error) { +func getArguments(ss []string, iniFileReader io.Reader) (*arguments, error) { args := arguments{} - ss, err := flags.NewParser(&args, flags.PassDoubleDash).ParseArgs(ss) + parser := flags.NewParser(&args, flags.PassDoubleDash) + if iniFileReader != nil { + iniParser := flags.NewIniParser(parser) + err := iniParser.Parse(iniFileReader) + if err != nil { + return nil, err + } + } + ss, err := parser.ParseArgs(ss) if err != nil { return nil, err diff --git a/arguments_test.go b/arguments_test.go index edf120a..1f052ce 100644 --- a/arguments_test.go +++ b/arguments_test.go @@ -1,6 +1,7 @@ package main import ( + "bytes" "net/http" "testing" @@ -40,7 +41,7 @@ func TestGetArguments(t *testing.T) { {"--help"}, {"--version"}, } { - _, err := getArguments(ss) + _, err := getArguments(ss, nil) assert.Nil(t, err) } } @@ -60,11 +61,34 @@ func TestGetArgumentsError(t *testing.T) { {"-t", "foo", "https://foo.com"}, {"--timeout", "foo", "https://foo.com"}, } { - _, err := getArguments(ss) + _, err := getArguments(ss, nil) assert.NotNil(t, err) } } +func TestGetArgumentsWithIniFile(t *testing.T) { + ini := ` +bufferSize = 8192 +exclude = foo.com +exclude = bar.com +maxConnectionsPerHost = 122 +` + args := []string{"--header", "a:fo", "--max-connections-per-host", "123", "https://baz.com"} + outArgs, err := getArguments(args, bytes.NewBufferString(ini)) + assert.Nil(t, err) + + // Just from the ini file (the global default is overriden) + assert.Equal(t, 8192, outArgs.BufferSize) + // Not set anywhere (the global default is taken) + assert.Equal(t, 512, outArgs.MaxConnections) + // Command line takes priority over ini file + assert.Equal(t, 123, outArgs.MaxConnectionsPerHost) + // Just on command line + assert.Equal(t, []string{"a:fo"}, outArgs.RawHeaders) + // Repeated entry in ini file lead to multiple items + assert.Equal(t, []string{"foo.com", "bar.com"}, outArgs.RawExcludedPatterns) +} + func TestHelp(t *testing.T) { cupaloy.SnapshotT(t, help()) } diff --git a/command.go b/command.go index 167d55b..7aa1882 100644 --- a/command.go +++ b/command.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "os" "strings" "time" @@ -33,7 +34,12 @@ func (c *command) Run(args []string) bool { } func (c *command) runWithError(ss []string) (bool, error) { - args, err := getArguments(ss) + var iniReader io.Reader = nil + // We try to read the ini file, if it fails, we just don't use it + if r, err := os.Open("muffet.ini"); err == nil { + iniReader = r + } + args, err := getArguments(ss, iniReader) if err != nil { return false, err } else if args.Help {