diff --git a/.gitignore b/.gitignore index 8e5c76cd..05298823 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ jobs/ Zeno *.txt *.sh -zeno.log \ No newline at end of file +zeno.log +.vscode/ \ No newline at end of file diff --git a/cmd/all/all.go b/cmd/all/all.go deleted file mode 100644 index 67598d24..00000000 --- a/cmd/all/all.go +++ /dev/null @@ -1,6 +0,0 @@ -package all - -import ( - _ "github.com/internetarchive/Zeno/cmd/get" - _ "github.com/internetarchive/Zeno/cmd/version" -) diff --git a/cmd/cmd.go b/cmd/cmd.go index 585dbc92..62cd5526 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -1,353 +1,57 @@ package cmd import ( - "os" - - "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" + "fmt" "github.com/internetarchive/Zeno/config" + "github.com/spf13/cobra" ) -var GlobalFlags = []cli.Flag{ - &cli.StringFlag{ - Name: "user-agent", - Value: "Zeno", - Usage: "User agent to use when requesting URLs.", - Destination: &config.App.Flags.UserAgent, - }, - &cli.StringFlag{ - Name: "job", - Value: "", - Usage: "Job name to use, will determine the path for the persistent queue, seencheck database, and WARC files.", - Destination: &config.App.Flags.Job, - }, - &cli.IntFlag{ - Name: "workers", - Aliases: []string{"w"}, - Value: 1, - Usage: "Number of concurrent workers to run.", - Destination: &config.App.Flags.Workers, - }, - &cli.IntFlag{ - Name: "max-concurrent-assets", - Aliases: []string{"ca"}, - Value: 8, - Usage: "Max number of concurrent assets to fetch PER worker. E.g. if you have 100 workers and this setting at 8, Zeno could do up to 800 concurrent requests at any time.", - Destination: &config.App.Flags.MaxConcurrentAssets, - }, - &cli.UintFlag{ - Name: "max-hops", - Aliases: []string{"hops"}, - Value: 0, - Usage: "Maximum number of hops to execute.", - Destination: &config.App.Flags.MaxHops, - }, - &cli.StringFlag{ - Name: "cookies", - Usage: "File containing cookies that will be used for requests.", - Destination: &config.App.Flags.CookieFile, - }, - &cli.BoolFlag{ - Name: "keep-cookies", - Usage: "Keep a global cookie jar", - Destination: &config.App.Flags.KeepCookies, - }, - &cli.BoolFlag{ - Name: "headless", - Usage: "Use headless browsers instead of standard GET requests.", - Destination: &config.App.Flags.Headless, - }, - &cli.BoolFlag{ - Name: "local-seencheck", - Usage: "Simple local seencheck to avoid re-crawling of URIs.", - Destination: &config.App.Flags.Seencheck, - }, - &cli.BoolFlag{ - Name: "json", - Usage: "Output logs in JSON", - Destination: &config.App.Flags.JSON, - }, - &cli.BoolFlag{ - Name: "debug", - Destination: &config.App.Flags.Debug, - }, - &cli.BoolFlag{ - Name: "live-stats", - Destination: &config.App.Flags.LiveStats, - }, - - &cli.BoolFlag{ - Name: "api", - Destination: &config.App.Flags.API, - }, - &cli.StringFlag{ - Name: "api-port", - Value: "9443", - Usage: "Port to listen on for the API.", - Destination: &config.App.Flags.APIPort, - }, - - &cli.StringFlag{ - Name: "prometheus-prefix", - Destination: &config.App.Flags.PrometheusPrefix, - Usage: "String used as a prefix for the exported Prometheus metrics.", - Value: "zeno:", - }, - - &cli.IntFlag{ - Name: "max-redirect", - Value: 20, - Usage: "Specifies the maximum number of redirections to follow for a resource.", - Destination: &config.App.Flags.MaxRedirect, - }, - &cli.IntFlag{ - Name: "max-retry", - Value: 20, - Usage: "Number of retry if error happen when executing HTTP request.", - Destination: &config.App.Flags.MaxRetry, - }, - &cli.IntFlag{ - Name: "http-timeout", - Value: 30, - Usage: "Number of seconds to wait before timing out a request.", - Destination: &config.App.Flags.HTTPTimeout, - }, - &cli.BoolFlag{ - Name: "domains-crawl", - Usage: "If this is turned on, seeds will be treated as domains to crawl, therefore same-domain outlinks will be added to the queue as hop=0.", - Destination: &config.App.Flags.DomainsCrawl, - }, - &cli.StringSliceFlag{ - Name: "disable-html-tag", - Usage: "Specify HTML tag to not extract assets from", - Destination: &config.App.Flags.DisabledHTMLTags, - }, - &cli.BoolFlag{ - Name: "capture-alternate-pages", - Value: false, - Usage: "If turned on, HTML tags with \"alternate\" values for their \"rel\" attribute will be archived.", - Destination: &config.App.Flags.CaptureAlternatePages, - }, - &cli.StringSliceFlag{ - Name: "exclude-host", - Usage: "Exclude a specific host from the crawl, note that it will not exclude the domain if it is encountered as an asset for another web page.", - Destination: &config.App.Flags.ExcludedHosts, - }, - &cli.StringSliceFlag{ - Name: "include-host", - Usage: "Only crawl specific hosts, note that it will not include the domain if it is encountered as an asset for another web page.", - Destination: &config.App.Flags.IncludedHosts, - }, - &cli.IntFlag{ - Name: "max-concurrent-per-domain", - Value: 16, - Usage: "Maximum number of concurrent requests per domain.", - Destination: &config.App.Flags.MaxConcurrentRequestsPerDomain, - }, - &cli.IntFlag{ - Name: "concurrent-sleep-length", - Value: 500, - Usage: "Number of milliseconds to sleep when max concurrency per domain is reached.", - Destination: &config.App.Flags.RateLimitDelay, - }, +var cfg *config.Config - &cli.IntFlag{ - Name: "crawl-time-limit", - Value: 0, - Usage: "Number of seconds until the crawl will automatically set itself into the finished state.", - Destination: &config.App.Flags.CrawlTimeLimit, - }, +var rootCmd = &cobra.Command{ + Use: "Zeno", + Short: "State-of-the-art web crawler 🔱", + Long: `Zeno is a web crawler designed to operate wide crawls or to simply archive one web page. +Zeno's key concepts are: portability, performance, simplicity ; with an emphasis on performance. - &cli.IntFlag{ - Name: "crawl-max-time-limit", - Value: 0, - Usage: "Number of seconds until the crawl will automatically panic itself. Default to crawl-time-limit + (crawl-time-limit / 10)", - Destination: &config.App.Flags.MaxCrawlTimeLimit, - }, - &cli.IntFlag{ - Name: "min-space-required", - Aliases: []string{"msr"}, - Value: 20, - Usage: "Minimum space (GB) required to start crawl", - Destination: &config.App.Flags.MinSpaceRequired, - }, +Authors: + Corentin Barreau + Jake LaFountain + Thomas Foubert +`, + PersistentPreRunE: func(cmd *cobra.Command, args []string) error { + // Initialize config here, after cobra has parsed command line flags + config.BindFlags(cmd.Flags()) + if err := config.InitConfig(); err != nil { + return fmt.Errorf("error initializing config: %s", err) + } - // Proxy flags - &cli.StringFlag{ - Name: "proxy", - Value: "", - Usage: "Proxy to use when requesting pages.", - Destination: &config.App.Flags.Proxy, + cfg = config.GetConfig() + return nil }, - &cli.StringSliceFlag{ - Name: "bypass-proxy", - Usage: "Domains that should not be proxied.", - Destination: &config.App.Flags.BypassProxy, + Run: func(cmd *cobra.Command, args []string) { + cmd.Help() }, - - // WARC flags - &cli.StringFlag{ - Name: "warc-prefix", - Value: "ZENO", - Usage: "Prefix to use when naming the WARC files.", - Destination: &config.App.Flags.WARCPrefix, - }, - &cli.StringFlag{ - Name: "warc-operator", - Value: "", - Usage: "Contact informations of the crawl operator to write in the Warc-Info record in each WARC file.", - Destination: &config.App.Flags.WARCOperator, - }, - &cli.StringFlag{ - Name: "warc-cdx-dedupe-server", - Value: "", - Usage: "Identify the server to use CDX deduplication. This also activates CDX deduplication on.", - Destination: &config.App.Flags.CDXDedupeServer, - }, - &cli.BoolFlag{ - Name: "warc-on-disk", - Value: false, - Usage: "Do not use RAM to store payloads when recording traffic to WARCs, everything will happen on disk (usually used to reduce memory usage).", - Destination: &config.App.Flags.WARCFullOnDisk, - }, - &cli.IntFlag{ - Name: "warc-pool-size", - Value: 1, - Usage: "Number of concurrent WARC files to write.", - Destination: &config.App.Flags.WARCPoolSize, - }, - &cli.StringFlag{ - Name: "warc-temp-dir", - Value: "", - Usage: "Custom directory to use for WARC temporary files.", - Destination: &config.App.Flags.WARCTempDir, - }, - &cli.BoolFlag{ - Name: "disable-local-dedupe", - Usage: "Disable local URL agonistic deduplication.", - Value: false, - Destination: &config.App.Flags.DisableLocalDedupe, - }, - &cli.BoolFlag{ - Name: "cert-validation", - Usage: "Enables certificate validation on HTTPS requests.", - Value: false, - Destination: &config.App.Flags.CertValidation, - }, - &cli.BoolFlag{ - Name: "disable-assets-capture", - Usage: "Disable assets capture.", - Value: false, - Destination: &config.App.Flags.DisableAssetsCapture, - }, - &cli.IntFlag{ - Name: "warc-dedupe-size", - Value: 1024, - Usage: "Minimum size to deduplicate WARC records with revisit records.", - Destination: &config.App.Flags.WARCDedupSize, - }, - &cli.StringFlag{ - Name: "cdx-cookie", - Value: "", - Usage: "Pass custom cookie during CDX requests. Example: 'cdx_auth_token=test_value'", - Destination: &config.App.Flags.WARCCustomCookie, - }, - // Crawl HQ flags - &cli.BoolFlag{ - Name: "hq", - Value: false, - Usage: "Use Crawl HQ to pull URLs to process.", - Destination: &config.App.Flags.UseHQ, - }, - &cli.StringFlag{ - Name: "hq-address", - Usage: "Crawl HQ address.", - Destination: &config.App.Flags.HQAddress, - }, - &cli.StringFlag{ - Name: "hq-key", - Usage: "Crawl HQ key.", - Destination: &config.App.Flags.HQKey, - }, - &cli.StringFlag{ - Name: "hq-secret", - Usage: "Crawl HQ secret.", - Destination: &config.App.Flags.HQSecret, - }, - &cli.StringFlag{ - Name: "hq-project", - Usage: "Crawl HQ project.", - Destination: &config.App.Flags.HQProject, - }, - &cli.Int64Flag{ - Name: "hq-batch-size", - Usage: "Crawl HQ feeding batch size.", - Destination: &config.App.Flags.HQBatchSize, - }, - &cli.BoolFlag{ - Name: "hq-continuous-pull", - Usage: "If turned on, the crawler will pull URLs from Crawl HQ continuously.", - Destination: &config.App.Flags.HQContinuousPull, - }, - &cli.StringFlag{ - Name: "hq-strategy", - Usage: "Crawl HQ feeding strategy.", - Value: "lifo", - Destination: &config.App.Flags.HQStrategy, - }, - &cli.BoolFlag{ - Name: "hq-rate-limiting-send-back", - Usage: "If turned on, the crawler will send back URLs that hit a rate limit to crawl HQ.", - Destination: &config.App.Flags.HQRateLimitingSendBack, - }, - // Logging flags - &cli.StringFlag{ - Name: "log-file-output-dir", - Usage: "Directory to write log files to.", - Value: "jobs", - Destination: &config.App.Flags.LogFileOutputDir, - }, - &cli.StringFlag{ - Name: "es-url", - Usage: "comma-separated ElasticSearch URL to use for indexing crawl logs.", - Destination: &config.App.Flags.ElasticSearchURLs, - }, - &cli.StringFlag{ - Name: "es-user", - Usage: "ElasticSearch username to use for indexing crawl logs.", - Destination: &config.App.Flags.ElasticSearchUsername, - }, - &cli.StringFlag{ - Name: "es-password", - Usage: "ElasticSearch password to use for indexing crawl logs.", - Destination: &config.App.Flags.ElasticSearchPassword, - }, - &cli.StringFlag{ - Name: "es-index-prefix", - Usage: "ElasticSearch index prefix to use for indexing crawl logs. Default is : `zeno`, without `-`", - Value: "zeno", - Destination: &config.App.Flags.ElasticSearchIndexPrefix, - }, - &cli.StringSliceFlag{ - Name: "exclude-string", - Usage: "Discard any (discovered) URLs containing this string.", - Destination: &config.App.Flags.ExcludedStrings, - }, - &cli.BoolFlag{ - Name: "random-local-ip", - Usage: "Use random local IP for requests. (will be ignored if a proxy is set)", - Destination: &config.App.Flags.RandomLocalIP, - }, -} - -var Commands []*cli.Command - -func RegisterCommand(command cli.Command) { - Commands = append(Commands, &command) } -func CommandNotFound(c *cli.Context, command string) { - logrus.Errorf("%s: '%s' is not a %s command. See '%s --help'.", c.App.Name, command, c.App.Name, c.App.Name) - os.Exit(2) +// Run the root command +func Run() error { + rootCmd.CompletionOptions.DisableDefaultCmd = true + + // Define flags and configuration settings + rootCmd.PersistentFlags().String("log-level", "info", "stdout log level (debug, info, warn, error)") + rootCmd.PersistentFlags().String("config-file", "", "config file (default is $HOME/zeno-config.yaml)") + rootCmd.PersistentFlags().Bool("no-stdout-log", false, "disable stdout logging.") + rootCmd.PersistentFlags().Bool("consul-config", false, "Use this flag to enable consul config support") + rootCmd.PersistentFlags().String("consul-address", "", "The consul address used to retreive config") + rootCmd.PersistentFlags().String("consul-path", "", "The full Consul K/V path where the config is stored") + rootCmd.PersistentFlags().String("consul-user", "", "The Consul user used to retreive config") + rootCmd.PersistentFlags().String("consul-password", "", "The Consul password used to retreive config") + + // Add get subcommands + getCmd := getCMDs() + rootCmd.AddCommand(getCmd) + + return rootCmd.Execute() } diff --git a/cmd/get.go b/cmd/get.go new file mode 100644 index 00000000..fd9469b8 --- /dev/null +++ b/cmd/get.go @@ -0,0 +1,101 @@ +package cmd + +import ( + "github.com/spf13/cobra" +) + +func getCMDs() *cobra.Command { + getCmd := &cobra.Command{ + Use: "get", + Short: "Archive the web!", + Run: func(cmd *cobra.Command, args []string) { + if len(args) == 0 { + cmd.Help() + } + }, + } + + getCMDsFlags(getCmd) + + getCmd.AddCommand(getURLCmd) + getCmd.AddCommand(getHQCmd) + getCmd.AddCommand(getListCmd) + + return getCmd +} + +func getCMDsFlags(getCmd *cobra.Command) { + getCmd.PersistentFlags().String("user-agent", "Zeno", "User agent to use when requesting URLs.") + getCmd.PersistentFlags().String("job", "", "Job name to use, will determine the path for the persistent queue, seencheck database, and WARC files.") + getCmd.PersistentFlags().IntP("workers", "w", 1, "Number of concurrent workers to run.") + getCmd.PersistentFlags().Int("max-concurrent-assets", 8, "Max number of concurrent assets to fetch PER worker. E.g. if you have 100 workers and this setting at 8, Zeno could do up to 800 concurrent requests at any time.") + getCmd.PersistentFlags().Uint("max-hops", 0, "Maximum number of hops to execute.") + getCmd.PersistentFlags().String("cookies", "", "File containing cookies that will be used for requests.") + getCmd.PersistentFlags().Bool("keep-cookies", false, "Keep a global cookie jar") + getCmd.PersistentFlags().Bool("headless", false, "Use headless browsers instead of standard GET requests.") + getCmd.PersistentFlags().Bool("local-seencheck", false, "Simple local seencheck to avoid re-crawling of URIs.") + getCmd.PersistentFlags().Bool("json", false, "Output logs in JSON") + getCmd.PersistentFlags().Bool("debug", false, "") + getCmd.PersistentFlags().Bool("api", false, "Enable API") + getCmd.PersistentFlags().String("api-port", "9443", "Port to listen on for the API.") + getCmd.PersistentFlags().Bool("prometheus", false, "Export metrics in Prometheus format. (implies --api)") + getCmd.PersistentFlags().String("prometheus-prefix", "zeno:", "String used as a prefix for the exported Prometheus metrics.") + getCmd.PersistentFlags().Int("max-redirect", 20, "Specifies the maximum number of redirections to follow for a resource.") + getCmd.PersistentFlags().Int("max-retry", 20, "Number of retry if error happen when executing HTTP request.") + getCmd.PersistentFlags().Int("http-timeout", 30, "Number of seconds to wait before timing out a request.") + getCmd.PersistentFlags().Bool("domains-crawl", false, "If this is turned on, seeds will be treated as domains to crawl, therefore same-domain outlinks will be added to the queue as hop=0.") + getCmd.PersistentFlags().StringSlice("disable-html-tag", []string{}, "Specify HTML tag to not extract assets from") + getCmd.PersistentFlags().Bool("capture-alternate-pages", false, "If turned on, HTML tags with \"alternate\" values for their \"rel\" attribute will be archived.") + getCmd.PersistentFlags().StringSlice("exclude-host", []string{}, "Exclude a specific host from the crawl, note that it will not exclude the domain if it is encountered as an asset for another web page.") + getCmd.PersistentFlags().StringSlice("include-host", []string{}, "Only crawl specific hosts, note that it will not include the domain if it is encountered as an asset for another web page.") + getCmd.PersistentFlags().Int("max-concurrent-per-domain", 16, "Maximum number of concurrent requests per domain.") + getCmd.PersistentFlags().Int("concurrent-sleep-length", 500, "Number of milliseconds to sleep when max concurrency per domain is reached.") + getCmd.PersistentFlags().Int("crawl-time-limit", 0, "Number of seconds until the crawl will automatically set itself into the finished state.") + getCmd.PersistentFlags().Int("crawl-max-time-limit", 0, "Number of seconds until the crawl will automatically panic itself. Default to crawl-time-limit + (crawl-time-limit / 10)") + getCmd.PersistentFlags().StringSlice("exclude-string", []string{}, "Discard any (discovered) URLs containing this string.") + getCmd.PersistentFlags().Bool("random-local-ip", false, "Use random local IP for requests. (will be ignored if a proxy is set)") + getCmd.PersistentFlags().Int("min-space-required", 20, "Minimum space required in GB to continue the crawl.") + + // Proxy flags + getCmd.PersistentFlags().String("proxy", "", "Proxy to use when requesting pages.") + getCmd.PersistentFlags().StringSlice("bypass-proxy", []string{}, "Domains that should not be proxied.") + + // WARC flags + getCmd.PersistentFlags().String("warc-prefix", "ZENO", "Prefix to use when naming the WARC files.") + getCmd.PersistentFlags().String("warc-operator", "", "Contact informations of the crawl operator to write in the Warc-Info record in each WARC file.") + getCmd.PersistentFlags().String("warc-cdx-dedupe-server", "", "Identify the server to use CDX deduplication. This also activates CDX deduplication on.") + getCmd.PersistentFlags().Bool("warc-on-disk", false, "Do not use RAM to store payloads when recording traffic to WARCs, everything will happen on disk (usually used to reduce memory usage).") + getCmd.PersistentFlags().Int("warc-pool-size", 1, "Number of concurrent WARC files to write.") + getCmd.PersistentFlags().String("warc-temp-dir", "", "Custom directory to use for WARC temporary files.") + getCmd.PersistentFlags().Bool("disable-local-dedupe", false, "Disable local URL agonistic deduplication.") + getCmd.PersistentFlags().Bool("cert-validation", false, "Enables certificate validation on HTTPS requests.") + getCmd.PersistentFlags().Bool("disable-assets-capture", false, "Disable assets capture.") + getCmd.PersistentFlags().Int("warc-dedupe-size", 1024, "Minimum size to deduplicate WARC records with revisit records.") + getCmd.PersistentFlags().String("cdx-cookie", "", "Pass custom cookie during CDX requests. Example: 'cdx_auth_token=test_value'") + + // Logging flags + getCmd.PersistentFlags().Bool("live-stats", false, "Enable live stats but disable logging. (implies --no-stdout-log)") + getCmd.PersistentFlags().String("log-file-output-dir", "./jobs/", "Directory to write log files to.") + getCmd.PersistentFlags().String("es-url", "", "comma-separated ElasticSearch URL to use for indexing crawl logs.") + getCmd.PersistentFlags().String("es-user", "", "ElasticSearch username to use for indexing crawl logs.") + getCmd.PersistentFlags().String("es-password", "", "ElasticSearch password to use for indexing crawl logs.") + getCmd.PersistentFlags().String("es-index-prefix", "zeno", "ElasticSearch index prefix to use for indexing crawl logs. Default is : `zeno`, without `-`") + + // Alias support + // As cobra doesn't support aliases natively (couldn't find a way to do it), we have to do it manually + // This is a workaround to allow users to use `--hops` instead of `--max-hops` for example + // Aliases shouldn't be used as proper flags nor declared in the config struct + // Aliases should be marked as deprecated to inform the user base + // Aliases values should be copied to the proper flag in the config/config.go:handleFlagsAliases() function + getCmd.PersistentFlags().Uint("hops", 0, "Maximum number of hops to execute.") + getCmd.PersistentFlags().MarkDeprecated("hops", "use --max-hops instead") + getCmd.PersistentFlags().MarkHidden("hops") + + getCmd.PersistentFlags().Uint("ca", 8, "Max number of concurrent assets to fetch PER worker. E.g. if you have 100 workers and this setting at 8, Zeno could do up to 800 concurrent requests at any time.") + getCmd.PersistentFlags().MarkDeprecated("ca", "use --max-concurrent-assets") + getCmd.PersistentFlags().MarkHidden("ca") + + getCmd.PersistentFlags().Int("msr", 20, "Minimum space required in GB to continue the crawl.") + getCmd.PersistentFlags().MarkDeprecated("msr", "use --min-space-required instead") + getCmd.PersistentFlags().MarkHidden("msr") +} diff --git a/cmd/get/get.go b/cmd/get/get.go deleted file mode 100644 index 09750d75..00000000 --- a/cmd/get/get.go +++ /dev/null @@ -1,35 +0,0 @@ -package get - -import ( - "github.com/internetarchive/Zeno/cmd" - "github.com/internetarchive/Zeno/config" - log "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" -) - -func initLogging() (err error) { - // Log as JSON instead of the default ASCII formatter. - if config.App.Flags.JSON { - log.SetFormatter(&log.JSONFormatter{}) - } - - // Turn on debug mode - if config.App.Flags.Debug { - log.SetLevel(log.DebugLevel) - } - - return nil -} - -func init() { - cmd.RegisterCommand( - cli.Command{ - Name: "get", - Usage: "Archive the web!", - Subcommands: []*cli.Command{ - newGetURLCmd(), - newGetListCmd(), - newGetHQCmd(), - }, - }) -} diff --git a/cmd/get/hq.go b/cmd/get/hq.go deleted file mode 100644 index 6c7dc208..00000000 --- a/cmd/get/hq.go +++ /dev/null @@ -1,42 +0,0 @@ -package get - -import ( - "github.com/internetarchive/Zeno/cmd" - "github.com/internetarchive/Zeno/config" - "github.com/sirupsen/logrus" - log "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" -) - -func newGetHQCmd() *cli.Command { - return &cli.Command{ - Name: "hq", - Usage: "Start crawling with the crawl HQ connector.", - Action: cmdGetHQ, - Flags: []cli.Flag{}, - UsageText: " [ARGUMENTS]", - } -} - -func cmdGetHQ(c *cli.Context) error { - err := initLogging() - if err != nil { - log.Error("Unable to parse arguments") - return err - } - - // init crawl using the flags provided - crawl := cmd.InitCrawlWithCMD(config.App.Flags) - - // start crawl - err = crawl.Start() - if err != nil { - logrus.WithFields(logrus.Fields{ - "crawl": crawl, - "err": err.Error(), - }).Error("Crawl exited due to error") - return err - } - - return nil -} diff --git a/cmd/get/list.go b/cmd/get/list.go deleted file mode 100644 index bcacf64f..00000000 --- a/cmd/get/list.go +++ /dev/null @@ -1,58 +0,0 @@ -package get - -import ( - "github.com/internetarchive/Zeno/cmd" - "github.com/internetarchive/Zeno/config" - "github.com/internetarchive/Zeno/internal/pkg/frontier" - "github.com/sirupsen/logrus" - log "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" -) - -func newGetListCmd() *cli.Command { - return &cli.Command{ - Name: "list", - Usage: "Start crawling with a seed list.", - Action: cmdGetList, - Flags: []cli.Flag{}, - UsageText: " [ARGUMENTS]", - } -} - -func cmdGetList(c *cli.Context) error { - err := initLogging() - if err != nil { - log.Error("Unable to parse arguments") - return err - } - - // Init crawl using the flags provided - crawl := cmd.InitCrawlWithCMD(config.App.Flags) - - // Initialize initial seed list - crawl.SeedList, err = frontier.IsSeedList(c.Args().Get(0)) - if err != nil || len(crawl.SeedList) <= 0 { - logrus.WithFields(logrus.Fields{ - "input": c.Args().Get(0), - "err": err.Error(), - }).Error("This is not a valid input") - return err - } - - logrus.WithFields(logrus.Fields{ - "input": c.Args().Get(0), - "seedsCount": len(crawl.SeedList), - }).Print("Seed list loaded") - - // Start crawl - err = crawl.Start() - if err != nil { - logrus.WithFields(logrus.Fields{ - "crawl": crawl, - "err": err.Error(), - }).Error("Crawl exited due to error") - return err - } - - return nil -} diff --git a/cmd/get/url.go b/cmd/get/url.go deleted file mode 100644 index 39749c59..00000000 --- a/cmd/get/url.go +++ /dev/null @@ -1,57 +0,0 @@ -package get - -import ( - "net/url" - - "github.com/internetarchive/Zeno/cmd" - "github.com/internetarchive/Zeno/config" - "github.com/internetarchive/Zeno/internal/pkg/frontier" - "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" -) - -func newGetURLCmd() *cli.Command { - return &cli.Command{ - Name: "url", - Usage: "Start crawling with a single URL.", - Action: cmdGetURL, - Flags: []cli.Flag{}, - UsageText: " [ARGUMENTS]", - } -} - -func cmdGetURL(c *cli.Context) error { - err := initLogging() - if err != nil { - logrus.Error("Unable to parse arguments") - return err - } - - // Init crawl using the flags provided - crawl := cmd.InitCrawlWithCMD(config.App.Flags) - - // Initialize initial seed list - input, err := url.Parse(c.Args().Get(0)) - if err != nil { - logrus.WithFields(logrus.Fields{ - "input": c.Args().Get(0), - "err": err.Error(), - }).Error("This is not a valid input") - return err - } - - crawl.SeedList = append(crawl.SeedList, *frontier.NewItem(input, nil, "seed", 0, "", false)) - - // Start crawl - err = crawl.Start() - if err != nil { - logrus.WithFields(logrus.Fields{ - "crawl": crawl, - "err": err.Error(), - }).Error("Crawl exited due to error") - return err - } - - logrus.Info("Crawl finished") - return err -} diff --git a/cmd/get_hq.go b/cmd/get_hq.go new file mode 100644 index 00000000..764a2a5c --- /dev/null +++ b/cmd/get_hq.go @@ -0,0 +1,57 @@ +package cmd + +import ( + "fmt" + + "github.com/internetarchive/Zeno/internal/pkg/crawl" + "github.com/spf13/cobra" +) + +var getHQCmd = &cobra.Command{ + Use: "hq", + Short: "Start crawling with the crawl HQ connector.", + PreRunE: func(cmd *cobra.Command, args []string) error { + if cfg == nil { + return fmt.Errorf("viper config is nil") + } + cfg.HQ = true + return nil + }, + RunE: func(cmd *cobra.Command, args []string) error { + // Init crawl using the flags provided + crawl, err := crawl.GenerateCrawlConfig(cfg) + if err != nil { + if crawl != nil && crawl.Log != nil { + crawl.Log.WithFields(map[string]interface{}{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get hq' exited due to error") + } + return err + } + + // start crawl + err = crawl.Start() + if err != nil { + crawl.Log.WithFields(map[string]interface{}{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get hq' Crawl() exited due to error") + return err + } + + return nil + }, +} + +func getHQCmdFlags(getHQCmd *cobra.Command) { + // Crawl HQ flags + getHQCmd.PersistentFlags().String("hq-address", "", "Crawl HQ address.") + getHQCmd.PersistentFlags().String("hq-key", "", "Crawl HQ key.") + getHQCmd.PersistentFlags().String("hq-secret", "", "Crawl HQ secret.") + getHQCmd.PersistentFlags().String("hq-project", "", "Crawl HQ project.") + getHQCmd.PersistentFlags().Int64("hq-batch-size", 0, "Crawl HQ feeding batch size.") + getHQCmd.PersistentFlags().Bool("hq-continuous-pull", false, "If turned on, the crawler will pull URLs from Crawl HQ continuously.") + getHQCmd.PersistentFlags().String("hq-strategy", "lifo", "Crawl HQ feeding strategy.") + getHQCmd.PersistentFlags().Bool("hq-rate-limiting-send-back", false, "If turned on, the crawler will send back URLs that hit a rate limit to crawl HQ.") +} diff --git a/cmd/get_list.go b/cmd/get_list.go new file mode 100644 index 00000000..cc3ffb3f --- /dev/null +++ b/cmd/get_list.go @@ -0,0 +1,61 @@ +package cmd + +import ( + "fmt" + + "github.com/internetarchive/Zeno/internal/pkg/crawl" + "github.com/internetarchive/Zeno/internal/pkg/frontier" + "github.com/spf13/cobra" +) + +var getListCmd = &cobra.Command{ + Use: "list [FILE]", + Short: "Start crawling with a seed list", + Args: cobra.ExactArgs(1), + PreRunE: func(cmd *cobra.Command, args []string) error { + if cfg == nil { + return fmt.Errorf("viper config is nil") + } + return nil + }, + RunE: func(cmd *cobra.Command, args []string) error { + // Init crawl using the flags provided + crawl, err := crawl.GenerateCrawlConfig(cfg) + if err != nil { + if crawl != nil && crawl.Log != nil { + crawl.Log.WithFields(map[string]interface{}{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get hq' exited due to error") + } + return err + } + + // Initialize initial seed list + crawl.SeedList, err = frontier.IsSeedList(args[0]) + if err != nil || len(crawl.SeedList) <= 0 { + crawl.Log.WithFields(map[string]interface{}{ + "input": args[0], + "err": err.Error(), + }).Error("This is not a valid input") + return err + } + + crawl.Log.WithFields(map[string]interface{}{ + "input": args[0], + "seedsCount": len(crawl.SeedList), + }).Info("Seed list loaded") + + // Start crawl + err = crawl.Start() + if err != nil { + crawl.Log.WithFields(map[string]interface{}{ + "crawl": crawl, + "err": err.Error(), + }).Error("Crawl exited due to error") + return err + } + + return nil + }, +} diff --git a/cmd/get_url.go b/cmd/get_url.go new file mode 100644 index 00000000..ad3367c8 --- /dev/null +++ b/cmd/get_url.go @@ -0,0 +1,62 @@ +package cmd + +import ( + "fmt" + "net/url" + + "github.com/internetarchive/Zeno/internal/pkg/crawl" + "github.com/internetarchive/Zeno/internal/pkg/frontier" + "github.com/spf13/cobra" +) + +var getURLCmd = &cobra.Command{ + Use: "url [URL...]", + Short: "Archive given URLs", + Args: cobra.MinimumNArgs(1), + PreRunE: func(cmd *cobra.Command, args []string) error { + if cfg == nil { + return fmt.Errorf("viper config is nil") + } + return nil + }, + RunE: func(cmd *cobra.Command, args []string) error { + // Init crawl using the flags provided + crawl, err := crawl.GenerateCrawlConfig(cfg) + if err != nil { + if crawl != nil && crawl.Log != nil { + crawl.Log.WithFields(map[string]interface{}{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get url' exited due to error") + } + return err + } + + // Initialize initial seed list + for _, arg := range args { + input, err := url.Parse(arg) + if err != nil { + crawl.Log.WithFields(map[string]interface{}{ + "input_url": arg, + "err": err.Error(), + }).Error("given URL is not a valid input") + return err + } + + crawl.SeedList = append(crawl.SeedList, *frontier.NewItem(input, nil, "seed", 0, "", false)) + } + + // Start crawl + err = crawl.Start() + if err != nil { + crawl.Log.WithFields(map[string]interface{}{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get url' Crawl() exited due to error") + return err + } + + crawl.Log.Info("Crawl finished") + return err + }, +} diff --git a/cmd/utils.go b/cmd/utils.go deleted file mode 100644 index 7696f616..00000000 --- a/cmd/utils.go +++ /dev/null @@ -1,180 +0,0 @@ -package cmd - -import ( - "fmt" - "log/slog" - "os" - "path" - "path/filepath" - "strings" - "time" - - "github.com/google/uuid" - "github.com/internetarchive/Zeno/config" - "github.com/internetarchive/Zeno/internal/pkg/crawl" - "github.com/internetarchive/Zeno/internal/pkg/frontier" - "github.com/internetarchive/Zeno/internal/pkg/log" - "github.com/internetarchive/Zeno/internal/pkg/utils" - "github.com/paulbellamy/ratecounter" -) - -// InitCrawlWithCMD takes a config.Flags struct and return a -// *crawl.Crawl initialized with it -func InitCrawlWithCMD(flags config.Flags) *crawl.Crawl { - var c = new(crawl.Crawl) - - // Craft Elastic Search configuration - var elasticSearchConfig *log.ElasticsearchConfig - - elasticSearchURLs := strings.Split(flags.ElasticSearchURLs, ",") - - if elasticSearchURLs[0] == "" { - elasticSearchConfig = nil - } else { - elasticSearchConfig = &log.ElasticsearchConfig{ - Addresses: elasticSearchURLs, - Username: flags.ElasticSearchUsername, - Password: flags.ElasticSearchPassword, - IndexPrefix: flags.ElasticSearchIndexPrefix, - Level: slog.LevelDebug, - } - } - - // Ensure that the log file output directory is well parsed - logfileOutputDir := filepath.Dir(flags.LogFileOutputDir) - if logfileOutputDir == "." && flags.LogFileOutputDir != "." { - logfileOutputDir = filepath.Dir(flags.LogFileOutputDir + "/") - } - - // Craft custom logger - customLogger, err := log.New(log.Config{ - FileConfig: &log.LogfileConfig{ - Dir: logfileOutputDir, - Prefix: "zeno", - }, - FileLevel: slog.LevelDebug, - StdoutLevel: slog.LevelInfo, - RotateLogFile: true, - RotateElasticSearchIndex: true, - ElasticsearchConfig: elasticSearchConfig, - LiveStats: flags.LiveStats, - }) - if err != nil { - fmt.Println(err) - os.Exit(1) - } - c.Log = customLogger - - // Statistics counters - c.CrawledSeeds = new(ratecounter.Counter) - c.CrawledAssets = new(ratecounter.Counter) - c.ActiveWorkers = new(ratecounter.Counter) - c.URIsPerSecond = ratecounter.NewRateCounter(1 * time.Second) - - c.LiveStats = flags.LiveStats - - // Frontier - c.Frontier = new(frontier.Frontier) - c.Frontier.Log = c.Log - - // If the job name isn't specified, we generate a random name - if flags.Job == "" { - if flags.HQProject != "" { - c.Job = flags.HQProject - } else { - UUID, err := uuid.NewUUID() - if err != nil { - c.Log.Fatal("cmd/utils.go:InitCrawlWithCMD():uuid.NewUUID()", "error", err) - } - - c.Job = UUID.String() - } - } else { - c.Job = flags.Job - } - - c.JobPath = path.Join("jobs", flags.Job) - - c.Workers = flags.Workers - c.WorkerPool = make([]*crawl.Worker, 0) - c.WorkerStopTimeout = time.Second * 60 // Placeholder for WorkerStopTimeout - c.MaxConcurrentAssets = flags.MaxConcurrentAssets - c.WorkerStopSignal = make(chan bool) - - c.Seencheck = flags.Seencheck - c.HTTPTimeout = flags.HTTPTimeout - c.MaxConcurrentRequestsPerDomain = flags.MaxConcurrentRequestsPerDomain - c.RateLimitDelay = flags.RateLimitDelay - c.CrawlTimeLimit = flags.CrawlTimeLimit - - // Defaults --max-crawl-time-limit to 10% more than --crawl-time-limit - if flags.MaxCrawlTimeLimit == 0 && flags.CrawlTimeLimit != 0 { - c.MaxCrawlTimeLimit = flags.CrawlTimeLimit + (flags.CrawlTimeLimit / 10) - } else { - c.MaxCrawlTimeLimit = flags.MaxCrawlTimeLimit - } - - c.MaxRetry = flags.MaxRetry - c.MaxRedirect = flags.MaxRedirect - c.MaxHops = uint8(flags.MaxHops) - c.DomainsCrawl = flags.DomainsCrawl - c.DisableAssetsCapture = flags.DisableAssetsCapture - c.DisabledHTMLTags = flags.DisabledHTMLTags.Value() - c.ExcludedHosts = flags.ExcludedHosts.Value() - c.IncludedHosts = flags.IncludedHosts.Value() - c.CaptureAlternatePages = flags.CaptureAlternatePages - c.ExcludedStrings = flags.ExcludedStrings.Value() - - // WARC settings - c.WARCPrefix = flags.WARCPrefix - c.WARCOperator = flags.WARCOperator - - if flags.WARCTempDir != "" { - c.WARCTempDir = flags.WARCTempDir - } else { - c.WARCTempDir = path.Join(c.JobPath, "temp") - } - - c.CDXDedupeServer = flags.CDXDedupeServer - c.DisableLocalDedupe = flags.DisableLocalDedupe - c.CertValidation = flags.CertValidation - c.WARCFullOnDisk = flags.WARCFullOnDisk - c.WARCPoolSize = flags.WARCPoolSize - c.WARCDedupSize = flags.WARCDedupSize - c.WARCCustomCookie = flags.WARCCustomCookie - - c.API = flags.API - c.APIPort = flags.APIPort - if c.API { - c.PrometheusMetrics = new(crawl.PrometheusMetrics) - c.PrometheusMetrics.Prefix = flags.PrometheusPrefix - } - if flags.UserAgent != "Zeno" { - c.UserAgent = flags.UserAgent - } else { - version := utils.GetVersion() - c.UserAgent = "Mozilla/5.0 (compatible; archive.org_bot +http://archive.org/details/archive.org_bot) Zeno/" + version.Version[:7] + " warc/" + version.WarcVersion - } - c.Headless = flags.Headless - c.MinSpaceRequired = flags.MinSpaceRequired - - c.CookieFile = flags.CookieFile - c.KeepCookies = flags.KeepCookies - - // Proxy settings - c.Proxy = flags.Proxy - c.BypassProxy = flags.BypassProxy.Value() - - // Crawl HQ settings - c.UseHQ = flags.UseHQ - c.HQProject = flags.HQProject - c.HQAddress = flags.HQAddress - c.HQKey = flags.HQKey - c.HQSecret = flags.HQSecret - c.HQStrategy = flags.HQStrategy - c.HQBatchSize = int(flags.HQBatchSize) - c.HQContinuousPull = flags.HQContinuousPull - c.HQRateLimitingSendBack = flags.HQRateLimitingSendBack - - return c -} diff --git a/cmd/version/deps.go b/cmd/version/deps.go deleted file mode 100644 index b877930d..00000000 --- a/cmd/version/deps.go +++ /dev/null @@ -1,31 +0,0 @@ -package version - -import ( - "fmt" - "runtime/debug" - - "github.com/urfave/cli/v2" -) - -func newShowDepsCmd() *cli.Command { - return &cli.Command{ - Name: "deps", - Usage: "Get dependencies.", - Action: cmdShowDeps, - } -} - -func cmdShowDeps(c *cli.Context) error { - if info, ok := debug.ReadBuildInfo(); ok { - for _, dep := range info.Deps { - fmt.Printf("%s %s (%s)", dep.Path, dep.Version, dep.Sum) - if dep.Replace != nil { - fmt.Printf(" => %s %s (%s)", dep.Replace.Path, dep.Replace.Version, dep.Replace.Sum) - } else { - fmt.Print("\n") - } - } - } - - return nil -} diff --git a/cmd/version/version.go b/cmd/version/version.go deleted file mode 100644 index 7dd57c4a..00000000 --- a/cmd/version/version.go +++ /dev/null @@ -1,28 +0,0 @@ -package version - -import ( - "github.com/internetarchive/Zeno/cmd" - "github.com/internetarchive/Zeno/internal/pkg/utils" - "github.com/urfave/cli/v2" -) - -func init() { - cmd.RegisterCommand( - cli.Command{ - Name: "version", - Usage: "Show the version number.", - Action: cmdVersion, - Subcommands: []*cli.Command{ - newShowDepsCmd(), - }, - }) -} - -func cmdVersion(c *cli.Context) error { - version := utils.GetVersion() - - println("Zeno", version.Version) - println("- go/version:", version.GoVersion) - - return nil -} diff --git a/config/config.go b/config/config.go index 6d754758..f1873f8b 100644 --- a/config/config.go +++ b/config/config.go @@ -1,81 +1,190 @@ package config -import "github.com/urfave/cli/v2" - -type Flags struct { - UserAgent string - Job string - Workers int - MaxConcurrentAssets int - MaxHops uint - Headless bool - Seencheck bool - JSON bool - LiveStats bool - Debug bool - MinSpaceRequired int - - DisabledHTMLTags cli.StringSlice - ExcludedHosts cli.StringSlice - IncludedHosts cli.StringSlice - DomainsCrawl bool - CaptureAlternatePages bool - HTTPTimeout int - MaxRedirect int - MaxRetry int - MaxConcurrentRequestsPerDomain int - RateLimitDelay int - CrawlTimeLimit int - MaxCrawlTimeLimit int - RandomLocalIP bool - - Proxy string - BypassProxy cli.StringSlice - - CookieFile string - KeepCookies bool - - API bool - APIPort string - PrometheusPrefix string - - WARCPrefix string - WARCOperator string - WARCPoolSize int - WARCDedupSize int - WARCFullOnDisk bool - WARCTempDir string - WARCCustomCookie string - - UseHQ bool - HQBatchSize int64 - HQAddress string - HQProject string - HQKey string - HQSecret string - HQStrategy string - HQContinuousPull bool - HQRateLimitingSendBack bool - - CDXDedupeServer string - DisableLocalDedupe bool - DisableAssetsCapture bool - CertValidation bool - - ElasticSearchURLs string - ElasticSearchUsername string - ElasticSearchPassword string - ElasticSearchIndexPrefix string - ExcludedStrings cli.StringSlice - LogFileOutputDir string +import ( + "fmt" + "net/url" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/spf13/pflag" + "github.com/spf13/viper" +) + +// Config holds all configuration for our program +type Config struct { + LogLevel string `mapstructure:"log-level"` + UserAgent string `mapstructure:"user-agent"` + Job string `mapstructure:"job"` + Cookies string `mapstructure:"cookies"` + APIPort string `mapstructure:"api-port"` + PrometheusPrefix string `mapstructure:"prometheus-prefix"` + Proxy string `mapstructure:"proxy"` + WARCPrefix string `mapstructure:"warc-prefix"` + WARCOperator string `mapstructure:"warc-operator"` + CDXDedupeServer string `mapstructure:"warc-cdx-dedupe-server"` + WARCTempDir string `mapstructure:"warc-temp-dir"` + CDXCookie string `mapstructure:"cdx-cookie"` + HQAddress string `mapstructure:"hq-address"` + HQKey string `mapstructure:"hq-key"` + HQSecret string `mapstructure:"hq-secret"` + HQProject string `mapstructure:"hq-project"` + HQStrategy string `mapstructure:"hq-strategy"` + LogFileOutputDir string `mapstructure:"log-file-output-dir"` + ElasticSearchUsername string `mapstructure:"es-user"` + ElasticSearchPassword string `mapstructure:"es-password"` + ElasticSearchIndexPrefix string `mapstructure:"es-index-prefix"` + DisableHTMLTag []string `mapstructure:"disable-html-tag"` + ExcludeHosts []string `mapstructure:"exclude-host"` + IncludeHosts []string `mapstructure:"include-host"` + ExcludeString []string `mapstructure:"exclude-string"` + DomainsBypassProxy []string `mapstructure:"bypass-proxy"` + ElasticSearchURLs []string `mapstructure:"es-url"` + WorkersCount int `mapstructure:"workers"` + MaxConcurrentAssets int `mapstructure:"max-concurrent-assets"` + MaxHops uint `mapstructure:"max-hops"` + MaxRedirect int `mapstructure:"max-redirect"` + MaxRetry int `mapstructure:"max-retry"` + HTTPTimeout int `mapstructure:"http-timeout"` + MaxConcurrentRequestsPerDomain int `mapstructure:"max-concurrent-per-domain"` + ConcurrentSleepLength int `mapstructure:"concurrent-sleep-length"` + CrawlTimeLimit int `mapstructure:"crawl-time-limit"` + CrawlMaxTimeLimit int `mapstructure:"crawl-max-time-limit"` + MinSpaceRequired int `mapstructure:"min-space-required"` + WARCPoolSize int `mapstructure:"warc-pool-size"` + WARCDedupeSize int `mapstructure:"warc-dedupe-size"` + HQBatchSize int64 `mapstructure:"hq-batch-size"` + KeepCookies bool `mapstructure:"keep-cookies"` + Headless bool `mapstructure:"headless"` + LocalSeencheck bool `mapstructure:"local-seencheck"` + JSON bool `mapstructure:"json"` + Debug bool `mapstructure:"debug"` + LiveStats bool `mapstructure:"live-stats"` + API bool `mapstructure:"api"` + Prometheus bool `mapstructure:"prometheus"` + DomainsCrawl bool `mapstructure:"domains-crawl"` + CaptureAlternatePages bool `mapstructure:"capture-alternate-pages"` + RandomLocalIP bool `mapstructure:"random-local-ip"` + WARCOnDisk bool `mapstructure:"warc-on-disk"` + DisableLocalDedupe bool `mapstructure:"disable-local-dedupe"` + CertValidation bool `mapstructure:"cert-validation"` + DisableAssetsCapture bool `mapstructure:"disable-assets-capture"` + HQ bool // Special field to check if HQ is enabled depending on the command called + HQContinuousPull bool `mapstructure:"hq-continuous-pull"` + HQRateLimitSendBack bool `mapstructure:"hq-rate-limiting-send-back"` + NoStdoutLogging bool `mapstructure:"no-stdout-log"` +} + +var ( + config *Config + once sync.Once +) + +// InitConfig initializes the configuration +// Flags -> Env -> Config file -> Consul config +// Latest has precedence over the rest +func InitConfig() error { + var err error + once.Do(func() { + config = &Config{} + + // Check if a config file is provided via flag + if configFile := viper.GetString("config-file"); configFile != "" { + viper.SetConfigFile(configFile) + } else { + home, err := os.UserHomeDir() + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + viper.AddConfigPath(home) + viper.SetConfigType("yaml") + viper.SetConfigName("zeno-config") + } + + viper.SetEnvPrefix("ZENO") + replacer := strings.NewReplacer("-", "_", ".", "_") + viper.SetEnvKeyReplacer(replacer) + viper.AutomaticEnv() + + if err = viper.ReadInConfig(); err == nil { + fmt.Println("Using config file:", viper.ConfigFileUsed()) + } + + if viper.GetBool("consul-config") && viper.GetString("consul-address") != "" { + var consulAddress *url.URL + consulAddress, err = url.Parse(viper.GetString("consul-address")) + if err != nil { + return + } + + consulPath, consulFile := filepath.Split(viper.GetString("consul-path")) + viper.AddRemoteProvider("consul", consulAddress.String(), consulPath) + viper.SetConfigType(filepath.Ext(consulFile)) + viper.SetConfigName(strings.TrimSuffix(consulFile, filepath.Ext(consulFile))) + + if err = viper.ReadInConfig(); err == nil { + fmt.Println("Using config file:", viper.ConfigFileUsed()) + } + } + + // This function is used to bring logic to the flags when needed (e.g. live-stats) + handleFlagsEdgeCases() + + // This function is used to handle flags aliases (e.g. hops -> max-hops) + handleFlagsAliases() + + // Unmarshal the config into the Config struct + err = viper.Unmarshal(config) + }) + return err +} + +// BindFlags binds the flags to the viper configuration +// This is needed because viper doesn't support same flag name accross multiple commands +// Details here: https://github.com/spf13/viper/issues/375#issuecomment-794668149 +func BindFlags(flagSet *pflag.FlagSet) { + flagSet.VisitAll(func(flag *pflag.Flag) { + viper.BindPFlag(flag.Name, flag) + }) } -type Application struct { - Flags Flags +// GetConfig returns the config struct +func GetConfig() *Config { + cfg := config + if cfg == nil { + panic("Config not initialized. Call InitConfig() before accessing the config.") + } + return cfg } -var App *Application +func handleFlagsEdgeCases() { + if viper.GetBool("live-stats") { + // If live-stats is true, set no-stdout-log to true + viper.Set("no-stdout-log", true) + } + + if viper.GetBool("prometheus") { + // If prometheus is true, set no-stdout-log to true + viper.Set("api", true) + } +} + +func handleFlagsAliases() { + // For each flag we want to alias, we check if the original flag is at default and if the alias is not + // If so, we set the original flag to the value of the alias + + if viper.GetUint("hops") != 0 && viper.GetUint("max-hops") == 0 { + viper.Set("max-hops", viper.GetUint("hops")) + } + + if viper.GetInt("ca") != 8 && viper.GetInt("max-concurrent-assets") == 8 { + viper.Set("max-concurrent-assets", viper.GetInt("ca")) + } -func init() { - App = &Application{} + if viper.GetInt("msr") != 20 && viper.GetInt("min-space-required") == 20 { + viper.Set("min-space-required", viper.GetInt("msr")) + } } diff --git a/go.mod b/go.mod index ba778312..c90785bc 100644 --- a/go.mod +++ b/go.mod @@ -14,19 +14,18 @@ require ( github.com/google/uuid v1.6.0 github.com/gosuri/uilive v0.0.4 github.com/gosuri/uitable v0.0.4 - github.com/internetarchive/elogrus v0.0.0-20230725172814-093db31a64fc - github.com/lestrrat-go/file-rotatelogs v2.4.0+incompatible - github.com/olivere/elastic/v7 v7.0.32 github.com/paulbellamy/ratecounter v0.2.0 github.com/philippgille/gokv/leveldb v0.7.0 github.com/prometheus/client_golang v1.19.1 github.com/remeh/sizedwaitgroup v1.0.0 github.com/sirupsen/logrus v1.9.3 github.com/spf13/afero v1.11.0 + github.com/spf13/cobra v1.8.0 + github.com/spf13/pflag v1.0.5 + github.com/spf13/viper v1.19.0 github.com/stretchr/testify v1.9.0 github.com/telanflow/cookiejar v0.0.0-20190719062046-114449e86aa5 github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 - github.com/urfave/cli/v2 v2.27.2 github.com/zeebo/xxh3 v1.0.2 go.uber.org/goleak v1.3.0 golang.org/x/net v0.26.0 @@ -39,10 +38,10 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cloudflare/circl v1.3.9 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/elastic/elastic-transport-go/v8 v8.6.0 // indirect github.com/fatih/color v1.17.0 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/gobwas/httphead v0.1.0 // indirect @@ -50,42 +49,48 @@ require ( github.com/gobwas/ws v1.4.0 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/gomodule/redigo v1.9.2 // indirect - github.com/jonboulle/clockwork v0.4.0 // indirect - github.com/josharian/intern v1.0.0 // indirect + github.com/hashicorp/hcl v1.0.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.17.9 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect github.com/klauspost/pgzip v1.2.6 // indirect - github.com/kr/text v0.2.0 // indirect - github.com/lestrrat-go/strftime v1.0.6 // indirect - github.com/mailru/easyjson v0.7.7 // indirect + github.com/magiconair/properties v1.8.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/onsi/gomega v1.27.6 // indirect + github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/philippgille/gokv/encoding v0.7.0 // indirect github.com/philippgille/gokv/util v0.7.0 // indirect - github.com/pkg/errors v0.9.1 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.55.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect - github.com/quic-go/quic-go v0.41.0 // indirect - github.com/refraction-networking/utls v1.6.3 // indirect - github.com/rivo/uniseg v0.2.0 // indirect - github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/refraction-networking/utls v1.6.6 // indirect + github.com/rivo/uniseg v0.4.7 // indirect + github.com/sagikazarmark/locafero v0.4.0 // indirect + github.com/sagikazarmark/slog-shim v0.1.0 // indirect github.com/satori/go.uuid v1.2.0 // indirect + github.com/sourcegraph/conc v0.3.0 // indirect + github.com/spf13/cast v1.6.0 // indirect + github.com/subosito/gotenv v1.6.0 // indirect github.com/syndtr/goleveldb v1.0.0 // indirect - github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect go.opentelemetry.io/otel v1.28.0 // indirect go.opentelemetry.io/otel/metric v1.28.0 // indirect go.opentelemetry.io/otel/trace v1.28.0 // indirect + go.uber.org/atomic v1.9.0 // indirect + go.uber.org/multierr v1.9.0 // indirect golang.org/x/crypto v0.24.0 // indirect + golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.21.0 // indirect golang.org/x/text v0.16.0 // indirect google.golang.org/protobuf v1.34.2 // indirect + gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 99c401d1..c1c6060c 100644 --- a/go.sum +++ b/go.sum @@ -1,68 +1,50 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= git.archive.org/wb/gocrawlhq v1.2.5 h1:k8cPZRa+O7nWrGIJntVjLsbOOhoprICfAP8T7yfsvJU= git.archive.org/wb/gocrawlhq v1.2.5/go.mod h1:WiuNIB4Toqe8twVvwRu0fTSNC3KXFqA8/mAeaZ3GICE= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/CorentinB/warc v0.8.40 h1:6HIMT4jujlFTudeXtsoaFT+qJZYXeQlKdIED+c36Qpc= github.com/CorentinB/warc v0.8.40/go.mod h1:Q9SHKf7pwcqzIWcxlzCtAWN8sKH+Q1BZxq1mSHJ9ttY= github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE= github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk= -github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= -github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= -github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= -github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= -github.com/aws/aws-sdk-go v1.19.6/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/beeker1121/goque v2.1.0+incompatible h1:m5pZ5b8nqzojS2DF2ioZphFYQUqGYsDORq6uefUItPM= github.com/beeker1121/goque v2.1.0+incompatible/go.mod h1:L6dOWBhDOnxUVQsb0wkLve0VCnt2xJW/MI8pdRX4ANw= -github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/clbanning/mxj/v2 v2.7.0 h1:WA/La7UGCanFe5NpHF0Q3DNtnCsVoxbPKuyBNHWRyME= github.com/clbanning/mxj/v2 v2.7.0/go.mod h1:hNiWqW14h+kc+MdF9C6/YoRfjEJoR3ou6tn/Qo+ve2s= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudflare/circl v1.3.9 h1:QFrlgFYf2Qpi8bSpVPK1HBvWpx16v/1TZivyo7pGuBE= github.com/cloudflare/circl v1.3.9/go.mod h1:PDRU+oXvdD7KCtgKxW95M5Z8BpSCJXQORiZFnBQS5QU= -github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= -github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= -github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= -github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= -github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= github.com/elastic/elastic-transport-go/v8 v8.6.0 h1:Y2S/FBjx1LlCv5m6pWAF2kDJAHoSjSRSJCApolgfthA= github.com/elastic/elastic-transport-go/v8 v8.6.0/go.mod h1:YLHer5cj0csTzNFXoNQ8qhtGY1GTvSqPnKWKaqQE3Hk= github.com/elastic/go-elasticsearch/v8 v8.14.0 h1:1ywU8WFReLLcxE1WJqii3hTtbPUE2hc38ZK/j4mMFow= github.com/elastic/go-elasticsearch/v8 v8.14.0/go.mod h1:WRvnlGkSuZyp83M2U8El/LGXpCjYLrvlkSgkAH4O5I4= github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4= github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI= -github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= -github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/go-test/deep v1.1.0 h1:WOcxcdHcvdgThNXjw0t76K42FXTU7HpNQWHpA2HHNlg= github.com/go-test/deep v1.1.0/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= @@ -71,68 +53,41 @@ github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs= github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc= -github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/gomodule/redigo v1.9.2 h1:HrutZBLhSIU8abiSfW8pj8mPhOyMYjZT/wcA4/L9L9s= github.com/gomodule/redigo v1.9.2/go.mod h1:KsU3hiK/Ay8U42qpaJk+kuNa3C+spxapWpM+ywhcgtw= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= -github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= -github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gosuri/uilive v0.0.4 h1:hUEBpQDj8D8jXgtCdBu7sWsy5sbW/5GhuO8KBwJ2jyY= github.com/gosuri/uilive v0.0.4/go.mod h1:V/epo5LjjlDE5RJUcqx8dbw+zc93y5Ya3yg8tfZ74VI= github.com/gosuri/uitable v0.0.4 h1:IG2xLKRvErL3uhY6e1BylFzG+aJiwQviDDTfOKeKTpY= github.com/gosuri/uitable v0.0.4/go.mod h1:tKR86bXuXPZazfOTG1FIzvjIdXzd0mo4Vtn16vt0PJo= -github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/internetarchive/elogrus v0.0.0-20230725172814-093db31a64fc h1:/f7B0tD2oVSHxIrDy9ciEQaVy/I/sMdLwFMVkjh5LLU= -github.com/internetarchive/elogrus v0.0.0-20230725172814-093db31a64fc/go.mod h1:oI8WojhCewR2pTR8bAx5/9pJO7ToWuLEzUuVIX3IVYk= -github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= -github.com/jonboulle/clockwork v0.4.0 h1:p4Cf1aMWXnXAUh8lVfewRBx1zaTSYKrKMF2g3ST4RZ4= -github.com/jonboulle/clockwork v0.4.0/go.mod h1:xgRqUGwRcjKCO1vbZUEtSLrqKoPSsUpK7fnezOII0kc= -github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= -github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= -github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8= -github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is= -github.com/lestrrat-go/file-rotatelogs v2.4.0+incompatible h1:Y6sqxHMyB1D2YSzWkLibYKgg+SwmyFU9dF2hn6MdTj4= -github.com/lestrrat-go/file-rotatelogs v2.4.0+incompatible/go.mod h1:ZQnN8lSECaebrkQytbHj4xNgtg8CR7RYXnPok8e0EHA= -github.com/lestrrat-go/strftime v1.0.6 h1:CFGsDEt1pOpFNU+TJB0nhz9jl+K0hZSLE205AhTIGQQ= -github.com/lestrrat-go/strftime v1.0.6/go.mod h1:f7jQKgV5nnJpYgdEasS+/y7EsTb8ykN2z68n3TtcTaw= -github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= -github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= +github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= @@ -140,7 +95,8 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= -github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= +github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -148,22 +104,16 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/olivere/elastic/v7 v7.0.4/go.mod h1:l4YWa59iTCcOJQXI5ZtxVjcd3p5U8GCxVgvzHZqGn3o= -github.com/olivere/elastic/v7 v7.0.32 h1:R7CXvbu8Eq+WlsLgxmKVKPox0oOwAE/2T9Si5BnvK6E= -github.com/olivere/elastic/v7 v7.0.32/go.mod h1:c7PVmLe3Fxq77PIfY/bZmxY/TAamBhCzZ8xDOE09a9k= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q= -github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE= github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg= -github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJc5AZX7/PBEpw= github.com/paulbellamy/ratecounter v0.2.0 h1:2L/RhJq+HA8gBQImDXtLPrDXK5qAj6ozWVK/zFXVJGs= github.com/paulbellamy/ratecounter v0.2.0/go.mod h1:Hfx1hDpSGoqxkVVpBi/IlYD7kChlfo5C6hzIHwPqfFE= +github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= +github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/philippgille/gokv v0.7.0 h1:rQSIQspete82h78Br7k7rKUZ8JYy/hWlwzm/W5qobPI= github.com/philippgille/gokv v0.7.0/go.mod h1:OwiTP/3bhEBhSuOmFmq1+rszglfSgjJVxd1HOgOa2N4= github.com/philippgille/gokv/encoding v0.7.0 h1:2oxepKzzTsi00iLZBCZ7Rmqrallh9zws3iqSrLGfkgo= @@ -174,73 +124,71 @@ github.com/philippgille/gokv/test v0.7.0 h1:0wBKnKaFZlSeHxLXcmUJqK//IQGUMeu+o8B8 github.com/philippgille/gokv/test v0.7.0/go.mod h1:TP/VzO/qAoi6njsfKnRpXKno0hRuzD5wsLnHhtUcVkY= github.com/philippgille/gokv/util v0.7.0 h1:5avUK/a3aSj/aWjhHv4/FkqgMon2B7k2BqFgLcR+DYg= github.com/philippgille/gokv/util v0.7.0/go.mod h1:i9KLHbPxGiHLMhkix/CcDQhpPbCkJy5BkW+RKgwDHMo= -github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= -github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= -github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= -github.com/quic-go/quic-go v0.41.0 h1:aD8MmHfgqTURWNJy48IYFg2OnxwHT3JL7ahGs73lb4k= -github.com/quic-go/quic-go v0.41.0/go.mod h1:qCkNjqczPEvgsOnxZ0eCD14lv+B2LHlFAB++CNOh9hA= -github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= -github.com/refraction-networking/utls v1.6.3 h1:MFOfRN35sSx6K5AZNIoESsBuBxS2LCgRilRIdHb6fDc= -github.com/refraction-networking/utls v1.6.3/go.mod h1:yil9+7qSl+gBwJqztoQseO6Pr3h62pQoY1lXiNR/FPs= +github.com/refraction-networking/utls v1.6.6 h1:igFsYBUJPYM8Rno9xUuDoM5GQrVEqY4llzEXOkL43Ig= +github.com/refraction-networking/utls v1.6.6/go.mod h1:BC3O4vQzye5hqpmDTWUqi4P5DDhzJfkV1tdqtawQIH0= github.com/remeh/sizedwaitgroup v1.0.0 h1:VNGGFwNo/R5+MJBf6yrsr110p0m4/OX4S3DCy7Kyl5E= github.com/remeh/sizedwaitgroup v1.0.0/go.mod h1:3j2R4OIe/SeS6YDhICBy22RWjJC5eNCJ1V+9+NVNYlo= -github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ= +github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= +github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= +github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= -github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= -github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM= +github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= +github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= +github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0= +github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.19.0 h1:RWq5SEjt8o25SROyN3z2OrDB9l7RPd3lwTWU8EcEdcI= +github.com/spf13/viper v1.19.0/go.mod h1:GQUN9bilAbhU/jgc1bKs99f/suXKeUMct8Adx5+Ntkg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= +github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE= github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= github.com/telanflow/cookiejar v0.0.0-20190719062046-114449e86aa5 h1:gTQl5nPlc9B53vFOKM8aJHwxB2BW2kM49PVR5526GBg= github.com/telanflow/cookiejar v0.0.0-20190719062046-114449e86aa5/go.mod h1:qNgA5MKwTh103SxGTooqZMiKxZTaV9UV3KjN7I7Drig= github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 h1:nrZ3ySNYwJbSpD6ce9duiP+QkD3JuLCcWkdaehUS/3Y= github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80/go.mod h1:iFyPdL66DjUD96XmzVL3ZntbzcflLnznH0fr99w5VqE= -github.com/urfave/cli/v2 v2.27.2 h1:6e0H+AkS+zDckwPCUrZkKX38mRaau4nL2uipkJpbkcI= -github.com/urfave/cli/v2 v2.27.2/go.mod h1:g0+79LmHHATl7DAcHO99smiR/T7uGLw84w8Y42x+4eM= -github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= -github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= -go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= @@ -249,27 +197,21 @@ go.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZ go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E= go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= +go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= +go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI= golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= @@ -277,24 +219,14 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -317,41 +249,25 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= -golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= -gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= +gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= mvdan.cc/xurls/v2 v2.5.0 h1:lyBNOm8Wo71UknhUs4QTFUNNMyxy2JEIaKKo0RWOh+8= mvdan.cc/xurls/v2 v2.5.0/go.mod h1:yQgaGQ1rFtJUzkmKiHYSSfuQxqfYmd//X6PxvholpeE= diff --git a/internal/pkg/crawl/api.go b/internal/pkg/crawl/api.go index 00df2dad..6d1e6f47 100644 --- a/internal/pkg/crawl/api.go +++ b/internal/pkg/crawl/api.go @@ -50,7 +50,9 @@ func (crawl *Crawl) startAPI() { json.NewEncoder(w).Encode(response) }) - http.HandleFunc("/metrics", setupPrometheus(crawl).ServeHTTP) + if crawl.Prometheus { + http.HandleFunc("/metrics", setupPrometheus(crawl).ServeHTTP) + } http.HandleFunc("/workers", func(w http.ResponseWriter, r *http.Request) { workersState := crawl.GetWorkerState(-1) diff --git a/internal/pkg/crawl/config.go b/internal/pkg/crawl/config.go new file mode 100644 index 00000000..68d421a5 --- /dev/null +++ b/internal/pkg/crawl/config.go @@ -0,0 +1,279 @@ +package crawl + +import ( + "log/slog" + "net/http" + "path" + "path/filepath" + "sync" + "time" + + "git.archive.org/wb/gocrawlhq" + "github.com/CorentinB/warc" + "github.com/google/uuid" + "github.com/internetarchive/Zeno/config" + "github.com/internetarchive/Zeno/internal/pkg/frontier" + "github.com/internetarchive/Zeno/internal/pkg/log" + "github.com/internetarchive/Zeno/internal/pkg/utils" + "github.com/paulbellamy/ratecounter" +) + +// Crawl define the parameters of a crawl process +type Crawl struct { + *sync.Mutex + StartTime time.Time + SeedList []frontier.Item + Paused *utils.TAtomBool + Finished *utils.TAtomBool + LiveStats bool + + // Logger + Log *log.Logger + + // Frontier + Frontier *frontier.Frontier + + // Worker pool + WorkerMutex sync.RWMutex + WorkerPool []*Worker + WorkerStopSignal chan bool + WorkerStopTimeout time.Duration + + // Crawl settings + MaxConcurrentAssets int + Client *warc.CustomHTTPClient + ClientProxied *warc.CustomHTTPClient + DisabledHTMLTags []string + ExcludedHosts []string + IncludedHosts []string + ExcludedStrings []string + UserAgent string + Job string + JobPath string + MaxHops uint8 + MaxRetry int + MaxRedirect int + HTTPTimeout int + MaxConcurrentRequestsPerDomain int + RateLimitDelay int + CrawlTimeLimit int + MaxCrawlTimeLimit int + DisableAssetsCapture bool + CaptureAlternatePages bool + DomainsCrawl bool + Headless bool + Seencheck bool + Workers int + RandomLocalIP bool + MinSpaceRequired int + + // Cookie-related settings + CookieFile string + KeepCookies bool + CookieJar http.CookieJar + + // proxy settings + Proxy string + BypassProxy []string + + // API settings + API bool + APIPort string + Prometheus bool + PrometheusMetrics *PrometheusMetrics + + // Real time statistics + URIsPerSecond *ratecounter.RateCounter + ActiveWorkers *ratecounter.Counter + CrawledSeeds *ratecounter.Counter + CrawledAssets *ratecounter.Counter + + // WARC settings + WARCPrefix string + WARCOperator string + WARCWriter chan *warc.RecordBatch + WARCWriterFinish chan bool + WARCTempDir string + CDXDedupeServer string + WARCFullOnDisk bool + WARCPoolSize int + WARCDedupSize int + DisableLocalDedupe bool + CertValidation bool + WARCCustomCookie string + + // Crawl HQ settings + UseHQ bool + HQAddress string + HQProject string + HQKey string + HQSecret string + HQStrategy string + HQBatchSize int + HQContinuousPull bool + HQClient *gocrawlhq.Client + HQFinishedChannel chan *frontier.Item + HQProducerChannel chan *frontier.Item + HQChannelsWg *sync.WaitGroup + HQRateLimitingSendBack bool +} + +func GenerateCrawlConfig(config *config.Config) (*Crawl, error) { + var c = new(Crawl) + + // Ensure that the log file output directory is well parsed + logfileOutputDir := filepath.Dir(config.LogFileOutputDir) + if logfileOutputDir == "." && config.LogFileOutputDir != "." { + logfileOutputDir = filepath.Dir(config.LogFileOutputDir + "/") + } + + // Logger + customLoggerConfig := log.Config{ + FileConfig: &log.LogfileConfig{ + Dir: logfileOutputDir, + Prefix: "zeno", + }, + FileLevel: slog.LevelDebug, + StdoutEnabled: !config.NoStdoutLogging, + StdoutLevel: slog.LevelInfo, + RotateLogFile: true, + RotateElasticSearchIndex: true, + ElasticsearchConfig: &log.ElasticsearchConfig{ + Addresses: config.ElasticSearchURLs, + Username: config.ElasticSearchUsername, + Password: config.ElasticSearchPassword, + IndexPrefix: config.ElasticSearchIndexPrefix, + Level: slog.LevelDebug, + }, + } + if len(config.ElasticSearchURLs) == 0 || (config.ElasticSearchUsername == "" && config.ElasticSearchPassword == "") { + customLoggerConfig.ElasticsearchConfig = nil + } + + customLogger, err := log.New(customLoggerConfig) + if err != nil { + return nil, err + } + c.Log = customLogger + + // Statistics counters + c.CrawledSeeds = new(ratecounter.Counter) + c.CrawledAssets = new(ratecounter.Counter) + c.ActiveWorkers = new(ratecounter.Counter) + c.URIsPerSecond = ratecounter.NewRateCounter(1 * time.Second) + + c.LiveStats = config.LiveStats + + // Frontier + c.Frontier = new(frontier.Frontier) + c.Frontier.Log = c.Log + + // If the job name isn't specified, we generate a random name + if config.Job == "" { + if config.HQProject != "" { + c.Job = config.HQProject + } else { + UUID, err := uuid.NewUUID() + if err != nil { + c.Log.Error("cmd/utils.go:InitCrawlWithCMD():uuid.NewUUID()", "error", err) + return nil, err + } + + c.Job = UUID.String() + } + } else { + c.Job = config.Job + } + + c.JobPath = path.Join("jobs", config.Job) + + c.Workers = config.WorkersCount + c.WorkerPool = make([]*Worker, 0) + c.WorkerStopTimeout = time.Second * 60 // Placeholder for WorkerStopTimeout + c.MaxConcurrentAssets = config.MaxConcurrentAssets + c.WorkerStopSignal = make(chan bool) + + c.Seencheck = config.LocalSeencheck + c.HTTPTimeout = config.HTTPTimeout + c.MaxConcurrentRequestsPerDomain = config.MaxConcurrentRequestsPerDomain + c.RateLimitDelay = config.ConcurrentSleepLength + c.CrawlTimeLimit = config.CrawlTimeLimit + + // Defaults --max-crawl-time-limit to 10% more than --crawl-time-limit + if config.CrawlMaxTimeLimit == 0 && config.CrawlTimeLimit != 0 { + c.MaxCrawlTimeLimit = config.CrawlTimeLimit + (config.CrawlTimeLimit / 10) + } else { + c.MaxCrawlTimeLimit = config.CrawlMaxTimeLimit + } + + c.MaxRetry = config.MaxRetry + c.MaxRedirect = config.MaxRedirect + c.MaxHops = uint8(config.MaxHops) + c.DomainsCrawl = config.DomainsCrawl + c.DisableAssetsCapture = config.DisableAssetsCapture + c.DisabledHTMLTags = config.DisableHTMLTag + c.ExcludedHosts = config.ExcludeHosts + c.IncludedHosts = config.IncludeHosts + c.CaptureAlternatePages = config.CaptureAlternatePages + c.ExcludedStrings = config.ExcludeString + + c.MinSpaceRequired = config.MinSpaceRequired + + // WARC settings + c.WARCPrefix = config.WARCPrefix + c.WARCOperator = config.WARCOperator + + if config.WARCTempDir != "" { + c.WARCTempDir = config.WARCTempDir + } else { + c.WARCTempDir = path.Join(c.JobPath, "temp") + } + + c.CDXDedupeServer = config.CDXDedupeServer + c.DisableLocalDedupe = config.DisableLocalDedupe + c.CertValidation = config.CertValidation + c.WARCFullOnDisk = config.WARCOnDisk + c.WARCPoolSize = config.WARCPoolSize + c.WARCDedupSize = config.WARCDedupeSize + c.WARCCustomCookie = config.CDXCookie + + c.API = config.API + c.APIPort = config.APIPort + + // If Prometheus is specified, then we make sure + // c.API is true + c.Prometheus = config.Prometheus + if c.Prometheus { + c.API = true + c.PrometheusMetrics = &PrometheusMetrics{} + c.PrometheusMetrics.Prefix = config.PrometheusPrefix + } + + if config.UserAgent != "Zeno" { + c.UserAgent = config.UserAgent + } else { + version := utils.GetVersion() + c.UserAgent = "Mozilla/5.0 (compatible; archive.org_bot +http://archive.org/details/archive.org_bot) Zeno/" + version.Version[:7] + " warc/" + version.WarcVersion + } + c.Headless = config.Headless + + c.CookieFile = config.Cookies + c.KeepCookies = config.KeepCookies + + // Proxy settings + c.Proxy = config.Proxy + c.BypassProxy = config.DomainsBypassProxy + + // Crawl HQ settings + c.UseHQ = config.HQ + c.HQProject = config.HQProject + c.HQAddress = config.HQAddress + c.HQKey = config.HQKey + c.HQSecret = config.HQSecret + c.HQStrategy = config.HQStrategy + c.HQBatchSize = int(config.HQBatchSize) + c.HQContinuousPull = config.HQContinuousPull + c.HQRateLimitingSendBack = config.HQRateLimitSendBack + + return c, nil +} diff --git a/internal/pkg/crawl/crawl.go b/internal/pkg/crawl/crawl.go index c12f200d..8d072fd2 100644 --- a/internal/pkg/crawl/crawl.go +++ b/internal/pkg/crawl/crawl.go @@ -3,16 +3,13 @@ package crawl import ( "fmt" - "net/http" "sync" "time" "git.archive.org/wb/gocrawlhq" "github.com/CorentinB/warc" "github.com/internetarchive/Zeno/internal/pkg/frontier" - "github.com/internetarchive/Zeno/internal/pkg/log" "github.com/internetarchive/Zeno/internal/pkg/utils" - "github.com/paulbellamy/ratecounter" "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" "github.com/telanflow/cookiejar" @@ -25,105 +22,6 @@ type PrometheusMetrics struct { DownloadedURI prometheus.Counter } -// Crawl define the parameters of a crawl process -type Crawl struct { - *sync.Mutex - StartTime time.Time - SeedList []frontier.Item - Paused *utils.TAtomBool - Finished *utils.TAtomBool - LiveStats bool - - // Logger - Log *log.Logger - - // Frontier - Frontier *frontier.Frontier - - // Worker pool - WorkerMutex sync.RWMutex - WorkerPool []*Worker - WorkerStopSignal chan bool - WorkerStopTimeout time.Duration - - // Crawl settings - MaxConcurrentAssets int - Client *warc.CustomHTTPClient - ClientProxied *warc.CustomHTTPClient - DisabledHTMLTags []string - ExcludedHosts []string - IncludedHosts []string - ExcludedStrings []string - UserAgent string - Job string - JobPath string - MaxHops uint8 - MaxRetry int - MaxRedirect int - HTTPTimeout int - MaxConcurrentRequestsPerDomain int - RateLimitDelay int - CrawlTimeLimit int - MaxCrawlTimeLimit int - DisableAssetsCapture bool - CaptureAlternatePages bool - DomainsCrawl bool - Headless bool - Seencheck bool - Workers int - RandomLocalIP bool - MinSpaceRequired int - - // Cookie-related settings - CookieFile string - KeepCookies bool - CookieJar http.CookieJar - - // proxy settings - Proxy string - BypassProxy []string - - // API settings - API bool - APIPort string - PrometheusMetrics *PrometheusMetrics - - // Real time statistics - URIsPerSecond *ratecounter.RateCounter - ActiveWorkers *ratecounter.Counter - CrawledSeeds *ratecounter.Counter - CrawledAssets *ratecounter.Counter - - // WARC settings - WARCPrefix string - WARCOperator string - WARCWriter chan *warc.RecordBatch - WARCWriterFinish chan bool - WARCTempDir string - CDXDedupeServer string - WARCFullOnDisk bool - WARCPoolSize int - WARCDedupSize int - DisableLocalDedupe bool - CertValidation bool - WARCCustomCookie string - - // Crawl HQ settings - UseHQ bool - HQAddress string - HQProject string - HQKey string - HQSecret string - HQStrategy string - HQBatchSize int - HQContinuousPull bool - HQClient *gocrawlhq.Client - HQFinishedChannel chan *frontier.Item - HQProducerChannel chan *frontier.Item - HQChannelsWg *sync.WaitGroup - HQRateLimitingSendBack bool -} - // Start fire up the crawling process func (c *Crawl) Start() (err error) { c.StartTime = time.Now() diff --git a/internal/pkg/log/log.go b/internal/pkg/log/log.go index f1fe2ac7..d53ee707 100644 --- a/internal/pkg/log/log.go +++ b/internal/pkg/log/log.go @@ -40,11 +40,11 @@ type Logger struct { type Config struct { FileConfig *LogfileConfig FileLevel slog.Level + StdoutEnabled bool StdoutLevel slog.Level RotateLogFile bool ElasticsearchConfig *ElasticsearchConfig RotateElasticSearchIndex bool - LiveStats bool } // New creates a new Logger instance with the given configuration. @@ -61,7 +61,7 @@ func New(cfg Config) (*Logger, error) { var handlers []slog.Handler // Create stdout handler - if !cfg.LiveStats { + if cfg.StdoutEnabled { stdoutHandler := slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ Level: cfg.StdoutLevel, }) diff --git a/main.go b/main.go index e4a8e759..f5a4238a 100644 --- a/main.go +++ b/main.go @@ -1,37 +1,25 @@ +// Zeno is a web crawler designed to operate wide crawls or to simply archive one web page. +// Zeno's key concepts are: portability, performance, simplicity ; with an emphasis on performance. + +// Authors: +// +// Corentin Barreau +// Jake LaFountain +// Thomas Foubert package main import ( + "fmt" "os" _ "net/http/pprof" "github.com/internetarchive/Zeno/cmd" - _ "github.com/internetarchive/Zeno/cmd/all" - "github.com/internetarchive/Zeno/internal/pkg/utils" - "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" ) func main() { - app := cli.NewApp() - app.Name = "Zeno" - app.Version = utils.GetVersion().Version - app.Authors = append(app.Authors, &cli.Author{Name: "Corentin Barreau", Email: "corentin@archive.org"}) - app.Usage = "" - - app.Flags = cmd.GlobalFlags - app.Commands = cmd.Commands - app.CommandNotFound = cmd.CommandNotFound - app.Before = func(context *cli.Context) error { - return nil - } - - app.After = func(context *cli.Context) error { - return nil - } - - err := app.Run(os.Args) - if err != nil { - logrus.Panic(err) + if err := cmd.Run(); err != nil { + fmt.Println(err) + os.Exit(1) } }