Skip to content

Commit

Permalink
Add support for ini file.
Browse files Browse the repository at this point in the history
Motivation:

In CI we have multiple invocations of muffet with mostly the same arguments.
This is cumbersome to maintain all the other args.
Thus we'd like to have support for a config file to use as default

Implementation:

Always use `muffet.ini`. I thought it was likely unecessary to support
anything different until the need comes for it.

Signed-off-by: Charly Molter <[email protected]>
  • Loading branch information
lahabana committed Feb 14, 2024
1 parent c9c0369 commit a948e22
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 22 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ website recursively.
- High compatibility with web browsers
- Different tag support (`a`, `img`, `link`, `script`, etc)
- Multiple output formats (text, JSON, and JUnit XML)
- ini file support (configuration can be read from `muffet.ini` file)

## Installation

Expand Down
47 changes: 28 additions & 19 deletions arguments.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"bytes"
"errors"
"io"
"net/http"
"regexp"
"strings"
Expand All @@ -11,32 +12,32 @@ import (
)

type arguments struct {
BufferSize int `short:"b" long:"buffer-size" value-name:"<size>" default:"4096" description:"HTTP response buffer size in bytes"`
MaxConnections int `short:"c" long:"max-connections" value-name:"<count>" default:"512" description:"Maximum number of HTTP connections"`
MaxConnectionsPerHost int `long:"max-connections-per-host" value-name:"<count>" default:"512" description:"Maximum number of HTTP connections per host"`
MaxResponseBodySize int `long:"max-response-body-size" value-name:"<size>" default:"10000000" description:"Maximum response body size to read"`
RawExcludedPatterns []string `short:"e" long:"exclude" value-name:"<pattern>..." description:"Exclude URLs matched with given regular expressions"`
RawIncludedPatterns []string `short:"i" long:"include" value-name:"<pattern>..." description:"Include URLs matched with given regular expressions"`
FollowRobotsTxt bool `long:"follow-robots-txt" description:"Follow robots.txt when scraping pages"`
BufferSize int `short:"b" long:"buffer-size" value-name:"<size>" default:"4096" description:"HTTP response buffer size in bytes" ini-name:"bufferSize"`
MaxConnections int `short:"c" long:"max-connections" value-name:"<count>" default:"512" description:"Maximum number of HTTP connections" ini-name:"maxConnections"`
MaxConnectionsPerHost int `long:"max-connections-per-host" value-name:"<count>" default:"512" description:"Maximum number of HTTP connections per host" ini-name:"maxConnectionsPerHost"`
MaxResponseBodySize int `long:"max-response-body-size" value-name:"<size>" default:"10000000" description:"Maximum response body size to read" ini-name:"maxResponseBodySize"`
RawExcludedPatterns []string `short:"e" long:"exclude" value-name:"<pattern>..." description:"Exclude URLs matched with given regular expressions" ini-name:"exclude"`
RawIncludedPatterns []string `short:"i" long:"include" value-name:"<pattern>..." description:"Include URLs matched with given regular expressions" ini-name:"include"`
FollowRobotsTxt bool `long:"follow-robots-txt" description:"Follow robots.txt when scraping pages" ini-name:"followRobotsTxt"`
FollowSitemapXML bool `long:"follow-sitemap-xml" description:"Scrape only pages listed in sitemap.xml (deprecated)"`
RawHeaders []string `long:"header" value-name:"<header>..." description:"Custom headers"`
RawHeaders []string `long:"header" value-name:"<header>..." description:"Custom headers" ini-name:"header"`
// TODO Remove a short option.
IgnoreFragments bool `short:"f" long:"ignore-fragments" description:"Ignore URL fragments"`
Format string `long:"format" description:"Output format" default:"text" choice:"text" choice:"json" choice:"junit"`
Format string `long:"format" description:"Output format" default:"text" choice:"text" choice:"json" choice:"junit" ini-name:"format"`
// TODO Remove this option.
JSONOutput bool `long:"json" description:"Output results in JSON (deprecated)"`
// TODO Remove this option.
VerboseJSON bool `long:"experimental-verbose-json" description:"Include successful results in JSON (deprecated)"`
// TODO Remove this option.
JUnitOutput bool `long:"junit" description:"Output results as JUnit XML file (deprecated)"`
MaxRedirections int `short:"r" long:"max-redirections" value-name:"<count>" default:"64" description:"Maximum number of redirections"`
RateLimit int `long:"rate-limit" value-name:"<rate>" description:"Max requests per second"`
Timeout int `short:"t" long:"timeout" value-name:"<seconds>" default:"10" description:"Timeout for HTTP requests in seconds"`
Verbose bool `short:"v" long:"verbose" description:"Show successful results too"`
Proxy string `long:"proxy" value-name:"<host>" description:"HTTP proxy host"`
SkipTLSVerification bool `long:"skip-tls-verification" description:"Skip TLS certificate verification"`
OnePageOnly bool `long:"one-page-only" description:"Only check links found in the given URL"`
Color color `long:"color" description:"Color output" choice:"auto" choice:"always" choice:"never" default:"auto"`
MaxRedirections int `short:"r" long:"max-redirections" value-name:"<count>" default:"64" description:"Maximum number of redirections" ini-name:"maxRedirections"`
RateLimit int `long:"rate-limit" value-name:"<rate>" description:"Max requests per second" ini-name:"rateLimit"`
Timeout int `short:"t" long:"timeout" value-name:"<seconds>" default:"10" description:"Timeout for HTTP requests in seconds" ini-name:"timeout"`
Verbose bool `short:"v" long:"verbose" description:"Show successful results too" ini-name:"verbose"`
Proxy string `long:"proxy" value-name:"<host>" description:"HTTP proxy host" ini-name:"proxy"`
SkipTLSVerification bool `long:"skip-tls-verification" description:"Skip TLS certificate verification" ini-name:"skipTLSVerification"`
OnePageOnly bool `long:"one-page-only" description:"Only check links found in the given URL" ini-name:"onePageOnly"`
Color color `long:"color" description:"Color output" choice:"auto" choice:"always" choice:"never" default:"auto" color:"color"`
Help bool `short:"h" long:"help" description:"Show this help"`
Version bool `long:"version" description:"Show version"`
URL string
Expand All @@ -45,9 +46,17 @@ type arguments struct {
Header http.Header
}

func getArguments(ss []string) (*arguments, error) {
func getArguments(ss []string, iniFileReader io.Reader) (*arguments, error) {
args := arguments{}
ss, err := flags.NewParser(&args, flags.PassDoubleDash).ParseArgs(ss)
parser := flags.NewParser(&args, flags.PassDoubleDash)
if iniFileReader != nil {
iniParser := flags.NewIniParser(parser)
err := iniParser.Parse(iniFileReader)
if err != nil {
return nil, err

Check warning on line 56 in arguments.go

View check run for this annotation

Codecov / codecov/patch

arguments.go#L56

Added line #L56 was not covered by tests
}
}
ss, err := parser.ParseArgs(ss)

if err != nil {
return nil, err
Expand Down
28 changes: 26 additions & 2 deletions arguments_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"bytes"
"net/http"
"testing"

Expand Down Expand Up @@ -40,7 +41,7 @@ func TestGetArguments(t *testing.T) {
{"--help"},
{"--version"},
} {
_, err := getArguments(ss)
_, err := getArguments(ss, nil)
assert.Nil(t, err)
}
}
Expand All @@ -60,11 +61,34 @@ func TestGetArgumentsError(t *testing.T) {
{"-t", "foo", "https://foo.com"},
{"--timeout", "foo", "https://foo.com"},
} {
_, err := getArguments(ss)
_, err := getArguments(ss, nil)
assert.NotNil(t, err)
}
}

func TestGetArgumentsWithIniFile(t *testing.T) {
ini := `
bufferSize = 8192
exclude = foo.com
exclude = bar.com
maxConnectionsPerHost = 122
`
args := []string{"--header", "a:fo", "--max-connections-per-host", "123", "https://baz.com"}
outArgs, err := getArguments(args, bytes.NewBufferString(ini))
assert.Nil(t, err)

// Just from the ini file (the global default is overriden)

Check warning on line 80 in arguments_test.go

View workflow job for this annotation

GitHub Actions / spell_check

Unknown word (overriden)
assert.Equal(t, 8192, outArgs.BufferSize)
// Not set anywhere (the global default is taken)
assert.Equal(t, 512, outArgs.MaxConnections)
// Command line takes priority over ini file
assert.Equal(t, 123, outArgs.MaxConnectionsPerHost)
// Just on command line
assert.Equal(t, []string{"a:fo"}, outArgs.RawHeaders)
// Repeated entry in ini file lead to multiple items
assert.Equal(t, []string{"foo.com", "bar.com"}, outArgs.RawExcludedPatterns)
}

func TestHelp(t *testing.T) {
cupaloy.SnapshotT(t, help())
}
Expand Down
8 changes: 7 additions & 1 deletion command.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"io"
"os"
"strings"
"time"

Expand Down Expand Up @@ -33,7 +34,12 @@ func (c *command) Run(args []string) bool {
}

func (c *command) runWithError(ss []string) (bool, error) {
args, err := getArguments(ss)
var iniReader io.Reader = nil
// We try to read the ini file, if it fails, we just don't use it
if r, err := os.Open("muffet.ini"); err == nil {
iniReader = r

Check warning on line 40 in command.go

View check run for this annotation

Codecov / codecov/patch

command.go#L40

Added line #L40 was not covered by tests
}
args, err := getArguments(ss, iniReader)
if err != nil {
return false, err
} else if args.Help {
Expand Down

0 comments on commit a948e22

Please sign in to comment.