From 81cdd983e45643e9b9b65233d9f9938d2c31130e Mon Sep 17 00:00:00 2001 From: Thomas FOUBERT Date: Mon, 1 Jul 2024 10:04:12 -0400 Subject: [PATCH 01/12] chore: moving current cmd package to cmd/v1 --- cmd/all/all.go | 6 ------ cmd/v1/all/all.go | 6 ++++++ cmd/{ => v1}/cmd.go | 0 cmd/{ => v1}/get/get.go | 2 +- cmd/{ => v1}/get/hq.go | 2 +- cmd/{ => v1}/get/list.go | 2 +- cmd/{ => v1}/get/url.go | 2 +- cmd/{ => v1}/utils.go | 0 cmd/{ => v1}/version/deps.go | 0 cmd/{ => v1}/version/version.go | 2 +- main.go | 4 ++-- 11 files changed, 13 insertions(+), 13 deletions(-) delete mode 100644 cmd/all/all.go create mode 100644 cmd/v1/all/all.go rename cmd/{ => v1}/cmd.go (100%) rename cmd/{ => v1}/get/get.go (93%) rename cmd/{ => v1}/get/hq.go (95%) rename cmd/{ => v1}/get/list.go (96%) rename cmd/{ => v1}/get/url.go (96%) rename cmd/{ => v1}/utils.go (100%) rename cmd/{ => v1}/version/deps.go (100%) rename cmd/{ => v1}/version/version.go (91%) diff --git a/cmd/all/all.go b/cmd/all/all.go deleted file mode 100644 index 67598d24..00000000 --- a/cmd/all/all.go +++ /dev/null @@ -1,6 +0,0 @@ -package all - -import ( - _ "github.com/internetarchive/Zeno/cmd/get" - _ "github.com/internetarchive/Zeno/cmd/version" -) diff --git a/cmd/v1/all/all.go b/cmd/v1/all/all.go new file mode 100644 index 00000000..a5ac5d6c --- /dev/null +++ b/cmd/v1/all/all.go @@ -0,0 +1,6 @@ +package all + +import ( + _ "github.com/internetarchive/Zeno/cmd/v1/get" + _ "github.com/internetarchive/Zeno/cmd/v1/version" +) diff --git a/cmd/cmd.go b/cmd/v1/cmd.go similarity index 100% rename from cmd/cmd.go rename to cmd/v1/cmd.go diff --git a/cmd/get/get.go b/cmd/v1/get/get.go similarity index 93% rename from cmd/get/get.go rename to cmd/v1/get/get.go index 09750d75..985f6dcd 100644 --- a/cmd/get/get.go +++ b/cmd/v1/get/get.go @@ -1,7 +1,7 @@ package get import ( - "github.com/internetarchive/Zeno/cmd" + "github.com/internetarchive/Zeno/cmd/v1" "github.com/internetarchive/Zeno/config" log "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" diff --git a/cmd/get/hq.go b/cmd/v1/get/hq.go similarity index 95% rename from cmd/get/hq.go rename to cmd/v1/get/hq.go index 6c7dc208..ead42952 100644 --- a/cmd/get/hq.go +++ b/cmd/v1/get/hq.go @@ -1,7 +1,7 @@ package get import ( - "github.com/internetarchive/Zeno/cmd" + "github.com/internetarchive/Zeno/cmd/v1" "github.com/internetarchive/Zeno/config" "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus" diff --git a/cmd/get/list.go b/cmd/v1/get/list.go similarity index 96% rename from cmd/get/list.go rename to cmd/v1/get/list.go index bcacf64f..3c843f24 100644 --- a/cmd/get/list.go +++ b/cmd/v1/get/list.go @@ -1,7 +1,7 @@ package get import ( - "github.com/internetarchive/Zeno/cmd" + "github.com/internetarchive/Zeno/cmd/v1" "github.com/internetarchive/Zeno/config" "github.com/internetarchive/Zeno/internal/pkg/frontier" "github.com/sirupsen/logrus" diff --git a/cmd/get/url.go b/cmd/v1/get/url.go similarity index 96% rename from cmd/get/url.go rename to cmd/v1/get/url.go index 39749c59..28ce9f37 100644 --- a/cmd/get/url.go +++ b/cmd/v1/get/url.go @@ -3,7 +3,7 @@ package get import ( "net/url" - "github.com/internetarchive/Zeno/cmd" + "github.com/internetarchive/Zeno/cmd/v1" "github.com/internetarchive/Zeno/config" "github.com/internetarchive/Zeno/internal/pkg/frontier" "github.com/sirupsen/logrus" diff --git a/cmd/utils.go b/cmd/v1/utils.go similarity index 100% rename from cmd/utils.go rename to cmd/v1/utils.go diff --git a/cmd/version/deps.go b/cmd/v1/version/deps.go similarity index 100% rename from cmd/version/deps.go rename to cmd/v1/version/deps.go diff --git a/cmd/version/version.go b/cmd/v1/version/version.go similarity index 91% rename from cmd/version/version.go rename to cmd/v1/version/version.go index 7dd57c4a..5f0137f1 100644 --- a/cmd/version/version.go +++ b/cmd/v1/version/version.go @@ -1,7 +1,7 @@ package version import ( - "github.com/internetarchive/Zeno/cmd" + "github.com/internetarchive/Zeno/cmd/v1" "github.com/internetarchive/Zeno/internal/pkg/utils" "github.com/urfave/cli/v2" ) diff --git a/main.go b/main.go index e4a8e759..1bd5b53c 100644 --- a/main.go +++ b/main.go @@ -5,8 +5,8 @@ import ( _ "net/http/pprof" - "github.com/internetarchive/Zeno/cmd" - _ "github.com/internetarchive/Zeno/cmd/all" + "github.com/internetarchive/Zeno/cmd/v1" + _ "github.com/internetarchive/Zeno/cmd/v1/all" "github.com/internetarchive/Zeno/internal/pkg/utils" "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" From 04eb2ed8bb15a35db4c252e78eeabed1c80216fa Mon Sep 17 00:00:00 2001 From: Thomas FOUBERT Date: Wed, 3 Jul 2024 18:14:41 -0400 Subject: [PATCH 02/12] feat: first flags reflection from cmd/v1 to cmd/v2 --- cmd/v1/cmd.go | 2 +- cmd/v1/get/get.go | 2 +- cmd/v1/get/hq.go | 2 +- cmd/v1/get/list.go | 2 +- cmd/v1/get/url.go | 2 +- cmd/v1/utils.go | 2 +- cmd/v2/cmd.go | 52 ++++++++++++ cmd/v2/get.go | 152 +++++++++++++++++++++++++++++++++++ config/{ => v1}/config.go | 0 config/v2/config.go | 137 +++++++++++++++++++++++++++++++ go.mod | 25 +++++- go.sum | 55 +++++++++++-- internal/pkg/crawl/config.go | 118 +++++++++++++++++++++++++++ internal/pkg/crawl/crawl.go | 102 ----------------------- main.go | 30 ++----- 15 files changed, 538 insertions(+), 145 deletions(-) create mode 100644 cmd/v2/cmd.go create mode 100644 cmd/v2/get.go rename config/{ => v1}/config.go (100%) create mode 100644 config/v2/config.go create mode 100644 internal/pkg/crawl/config.go diff --git a/cmd/v1/cmd.go b/cmd/v1/cmd.go index 5ffb177d..c79cdcd2 100644 --- a/cmd/v1/cmd.go +++ b/cmd/v1/cmd.go @@ -6,7 +6,7 @@ import ( "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" - "github.com/internetarchive/Zeno/config" + "github.com/internetarchive/Zeno/config/v1" ) var GlobalFlags = []cli.Flag{ diff --git a/cmd/v1/get/get.go b/cmd/v1/get/get.go index 985f6dcd..1e738596 100644 --- a/cmd/v1/get/get.go +++ b/cmd/v1/get/get.go @@ -2,7 +2,7 @@ package get import ( "github.com/internetarchive/Zeno/cmd/v1" - "github.com/internetarchive/Zeno/config" + "github.com/internetarchive/Zeno/config/v1" log "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" ) diff --git a/cmd/v1/get/hq.go b/cmd/v1/get/hq.go index ead42952..a15a2c86 100644 --- a/cmd/v1/get/hq.go +++ b/cmd/v1/get/hq.go @@ -2,7 +2,7 @@ package get import ( "github.com/internetarchive/Zeno/cmd/v1" - "github.com/internetarchive/Zeno/config" + "github.com/internetarchive/Zeno/config/v1" "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" diff --git a/cmd/v1/get/list.go b/cmd/v1/get/list.go index 3c843f24..86f4ac5e 100644 --- a/cmd/v1/get/list.go +++ b/cmd/v1/get/list.go @@ -2,7 +2,7 @@ package get import ( "github.com/internetarchive/Zeno/cmd/v1" - "github.com/internetarchive/Zeno/config" + "github.com/internetarchive/Zeno/config/v1" "github.com/internetarchive/Zeno/internal/pkg/frontier" "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus" diff --git a/cmd/v1/get/url.go b/cmd/v1/get/url.go index 28ce9f37..b2b4eb55 100644 --- a/cmd/v1/get/url.go +++ b/cmd/v1/get/url.go @@ -4,7 +4,7 @@ import ( "net/url" "github.com/internetarchive/Zeno/cmd/v1" - "github.com/internetarchive/Zeno/config" + "github.com/internetarchive/Zeno/config/v1" "github.com/internetarchive/Zeno/internal/pkg/frontier" "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" diff --git a/cmd/v1/utils.go b/cmd/v1/utils.go index 155949b3..abd2cf5a 100644 --- a/cmd/v1/utils.go +++ b/cmd/v1/utils.go @@ -9,7 +9,7 @@ import ( "time" "github.com/google/uuid" - "github.com/internetarchive/Zeno/config" + "github.com/internetarchive/Zeno/config/v1" "github.com/internetarchive/Zeno/internal/pkg/crawl" "github.com/internetarchive/Zeno/internal/pkg/frontier" "github.com/internetarchive/Zeno/internal/pkg/log" diff --git a/cmd/v2/cmd.go b/cmd/v2/cmd.go new file mode 100644 index 00000000..ea43499d --- /dev/null +++ b/cmd/v2/cmd.go @@ -0,0 +1,52 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/internetarchive/Zeno/config/v2" + "github.com/spf13/cobra" +) + +var cfg *config.Config + +var rootCmd = &cobra.Command{ + Use: "Zeno", + Short: "State-of-the-art web crawler 🔱", + Long: `Zeno is a web crawler designed to operate wide crawls or to simply archive one web page. +Zeno's key concepts are: portability, performance, simplicity ; with an emphasis on performance. + +Authors: + Corentin Barreau + Jake LaFountain + Thomas Foubert +`, + PersistentPreRun: func(cmd *cobra.Command, args []string) { + // Initialize config here, after cobra has parsed command line flags + if err := config.InitConfig(); err != nil { + fmt.Printf("error initializing config: %s", err) + os.Exit(1) + } + + cfg = config.GetConfig() + }, + Run: func(cmd *cobra.Command, args []string) { + cmd.Help() + }, +} + +// Run the root command +func Run() error { + rootCmd.CompletionOptions.DisableDefaultCmd = true + + // Define flags and configuration settings + rootCmd.PersistentFlags().String("log-level", "info", "stdout log level (debug, info, warn, error)") + rootCmd.PersistentFlags().String("config", "", "config file (default is $HOME/zeno-config.yaml)") + + // Bind flags to viper + config.BindFlags(rootCmd.Flags()) + + addGetCMDs(rootCmd) + + return rootCmd.Execute() +} diff --git a/cmd/v2/get.go b/cmd/v2/get.go new file mode 100644 index 00000000..a41abef8 --- /dev/null +++ b/cmd/v2/get.go @@ -0,0 +1,152 @@ +package cmd + +import ( + "fmt" + "net/url" + + "github.com/internetarchive/Zeno/config/v2" + "github.com/internetarchive/Zeno/internal/pkg/crawl" + "github.com/internetarchive/Zeno/internal/pkg/frontier" + "github.com/sirupsen/logrus" + "github.com/spf13/cobra" +) + +func addGetCMDs(rootCmd *cobra.Command) { + getCmd := &cobra.Command{ + Use: "get", + Short: "Archive the web!", + Run: func(cmd *cobra.Command, args []string) { + if len(args) == 0 { + cmd.Help() + } + }, + } + + getCmd.PersistentFlags().String("user-agent", "Zeno", "User agent to use when requesting URLs.") + getCmd.PersistentFlags().String("job", "", "Job name to use, will determine the path for the persistent queue, seencheck database, and WARC files.") + getCmd.PersistentFlags().Int("workers", 1, "Number of concurrent workers to run.") + getCmd.PersistentFlags().Int("max-concurrent-assets", 8, "Max number of concurrent assets to fetch PER worker. E.g. if you have 100 workers and this setting at 8, Zeno could do up to 800 concurrent requests at any time.") + getCmd.PersistentFlags().Uint("max-hops", 0, "Maximum number of hops to execute.") + getCmd.PersistentFlags().String("cookies", "", "File containing cookies that will be used for requests.") + getCmd.PersistentFlags().Bool("keep-cookies", false, "Keep a global cookie jar") + getCmd.PersistentFlags().Bool("headless", false, "Use headless browsers instead of standard GET requests.") + getCmd.PersistentFlags().Bool("local-seencheck", false, "Simple local seencheck to avoid re-crawling of URIs.") + getCmd.PersistentFlags().Bool("json", false, "Output logs in JSON") + getCmd.PersistentFlags().Bool("debug", false, "") + getCmd.PersistentFlags().Bool("live-stats", false, "") + getCmd.PersistentFlags().Bool("api", false, "") + getCmd.PersistentFlags().String("api-port", "9443", "Port to listen on for the API.") + getCmd.PersistentFlags().Bool("prometheus", false, "Export metrics in Prometheus format, using this setting imply --api.") + getCmd.PersistentFlags().String("prometheus-prefix", "String used as a prefix for the exported Prometheus metrics.", "zeno:") + getCmd.PersistentFlags().Int("max-redirect", 20, "Specifies the maximum number of redirections to follow for a resource.") + getCmd.PersistentFlags().Int("max-retry", 20, "Number of retry if error happen when executing HTTP request.") + getCmd.PersistentFlags().Int("http-timeout", 30, "Number of seconds to wait before timing out a request.") + getCmd.PersistentFlags().Bool("domains-crawl", false, "If this is turned on, seeds will be treated as domains to crawl, therefore same-domain outlinks will be added to the queue as hop=0.") + getCmd.PersistentFlags().StringSlice("disable-html-tag", []string{}, "Specify HTML tag to not extract assets from") + getCmd.PersistentFlags().Bool("capture-alternate-pages", false, "If turned on, HTML tags with \"alternate\" values for their \"rel\" attribute will be archived.") + getCmd.PersistentFlags().StringSlice("exclude-host", []string{}, "Exclude a specific host from the crawl, note that it will not exclude the domain if it is encountered as an asset for another web page.") + getCmd.PersistentFlags().StringSlice("include-host", []string{}, "Only crawl specific hosts, note that it will not include the domain if it is encountered as an asset for another web page.") + getCmd.PersistentFlags().Int("max-concurrent-per-domain", 16, "Maximum number of concurrent requests per domain.") + getCmd.PersistentFlags().Int("concurrent-sleep-length", 500, "Number of milliseconds to sleep when max concurrency per domain is reached.") + getCmd.PersistentFlags().Int("crawl-time-limit", 0, "Number of seconds until the crawl will automatically set itself into the finished state.") + getCmd.PersistentFlags().Int("crawl-max-time-limit", 0, "Number of seconds until the crawl will automatically panic itself. Default to crawl-time-limit + (crawl-time-limit / 10)") + getCmd.PersistentFlags().StringSlice("exclude-string", []string{}, "Discard any (discovered) URLs containing this string.") + getCmd.PersistentFlags().Bool("random-local-ip", false, "Use random local IP for requests. (will be ignored if a proxy is set)") + + // Proxy flags + getCmd.PersistentFlags().String("proxy", "", "Proxy to use when requesting pages.") + getCmd.PersistentFlags().StringSlice("bypass-proxy", []string{}, "Domains that should not be proxied.") + + // WARC flags + getCmd.PersistentFlags().String("warc-prefix", "ZENO", "Prefix to use when naming the WARC files.") + getCmd.PersistentFlags().String("warc-operator", "", "Contact informations of the crawl operator to write in the Warc-Info record in each WARC file.") + getCmd.PersistentFlags().String("warc-cdx-dedupe-server", "", "Identify the server to use CDX deduplication. This also activates CDX deduplication on.") + getCmd.PersistentFlags().Bool("warc-on-disk", false, "Do not use RAM to store payloads when recording traffic to WARCs, everything will happen on disk (usually used to reduce memory usage).") + getCmd.PersistentFlags().Int("warc-pool-size", 1, "Number of concurrent WARC files to write.") + getCmd.PersistentFlags().String("warc-temp-dir", "", "Custom directory to use for WARC temporary files.") + getCmd.PersistentFlags().Bool("disable-local-dedupe", false, "Disable local URL agonistic deduplication.") + getCmd.PersistentFlags().Bool("cert-validation", false, "Enables certificate validation on HTTPS requests.") + getCmd.PersistentFlags().Bool("disable-assets-capture", false, "Disable assets capture.") + getCmd.PersistentFlags().Int("warc-dedupe-size", 1024, "Minimum size to deduplicate WARC records with revisit records.") + getCmd.PersistentFlags().String("cdx-cookie", "", "Pass custom cookie during CDX requests. Example: 'cdx_auth_token=test_value'") + + // Crawl HQ flags + getCmd.PersistentFlags().Bool("hq", false, "Use Crawl HQ to pull URLs to process.") + getCmd.PersistentFlags().String("hq-address", "", "Crawl HQ address.") + getCmd.PersistentFlags().String("hq-key", "", "Crawl HQ key.") + getCmd.PersistentFlags().String("hq-secret", "", "Crawl HQ secret.") + getCmd.PersistentFlags().String("hq-project", "", "Crawl HQ project.") + getCmd.PersistentFlags().Int64("hq-batch-size", 0, "Crawl HQ feeding batch size.") + getCmd.PersistentFlags().Bool("hq-continuous-pull", false, "If turned on, the crawler will pull URLs from Crawl HQ continuously.") + getCmd.PersistentFlags().String("hq-strategy", "Crawl HQ feeding strategy.", "lifo") + getCmd.PersistentFlags().Bool("hq-rate-limiting-send-back", false, "If turned on, the crawler will send back URLs that hit a rate limit to crawl HQ.") + + // Logging flags + getCmd.PersistentFlags().String("log-file-output-dir", "Directory to write log files to.", "jobs") + getCmd.PersistentFlags().String("es-url", "", "comma-separated ElasticSearch URL to use for indexing crawl logs.") + getCmd.PersistentFlags().String("es-user", "", "ElasticSearch username to use for indexing crawl logs.") + getCmd.PersistentFlags().String("es-password", "", "ElasticSearch password to use for indexing crawl logs.") + getCmd.PersistentFlags().String("es-index-prefix", "ElasticSearch index prefix to use for indexing crawl logs. Default is : `zeno`, without `-`", "zeno") + + config.BindFlags(getCmd.Flags()) + + getURLCmd(getCmd) + + rootCmd.AddCommand(getCmd) +} + +func getURLCmd(rootCmd *cobra.Command) { + getURLCmd := &cobra.Command{ + Use: "url [URL...]", + Short: "Archive given URLs", + Args: cobra.MinimumNArgs(1), + PreRunE: func(cmd *cobra.Command, args []string) error { + if cfg == nil { + return fmt.Errorf("viper config is nil") + } + return nil + }, + RunE: func(cmd *cobra.Command, args []string) error { + // Init crawl using the flags provided + crawl, err := crawl.GenerateCrawlConfig(cfg) + if err != nil { + if crawl != nil && crawl.Log != nil { + crawl.Log.WithFields(logrus.Fields{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get url' exited due to error") + } + return err + } + + // Initialize initial seed list + for _, arg := range args { + input, err := url.Parse(arg) + if err != nil { + crawl.Log.WithFields(logrus.Fields{ + "input_url": arg, + "err": err.Error(), + }).Error("given URL is not a valid input") + return err + } + + crawl.SeedList = append(crawl.SeedList, *frontier.NewItem(input, nil, "seed", 0, "", false)) + } + + // Start crawl + err = crawl.Start() + if err != nil { + crawl.Log.WithFields(logrus.Fields{ + "crawl": crawl, + "err": err.Error(), + }).Error("crawl exited due to error") + return err + } + + crawl.Log.Info("Crawl finished") + return err + }, + } + + rootCmd.AddCommand(getURLCmd) +} diff --git a/config/config.go b/config/v1/config.go similarity index 100% rename from config/config.go rename to config/v1/config.go diff --git a/config/v2/config.go b/config/v2/config.go new file mode 100644 index 00000000..f12c964d --- /dev/null +++ b/config/v2/config.go @@ -0,0 +1,137 @@ +package config + +import ( + "fmt" + "os" + "strings" + "sync" + + "github.com/spf13/pflag" + "github.com/spf13/viper" +) + +// Config holds all configuration for our program +type Config struct { + // Global Flags + LogLevel string `mapstructure:"log-level"` + + // Get flags (crawling flags) + UserAgent string `mapstructure:"user-agent"` + Job string `mapstructure:"job"` + WorkersCount int `mapstructure:"workers"` + MaxConCurrentAssets int `mapstructure:"max-concurrent-assets"` + MaxHops uint `mapstructure:"max-hops"` + Cookies string `mapstructure:"cookies"` + KeepCookies bool `mapstructure:"keep-cookies"` + Headless bool `mapstructure:"headless"` + LocalSeenCheck bool `mapstructure:"local-seencheck"` + JSON bool `mapstructure:"json"` + Debug bool `mapstructure:"debug"` + LiveStats bool `mapstructure:"live-stats"` + API bool `mapstructure:"api"` + APIPort string `mapstructure:"api-port"` + Prometheus bool `mapstructure:"prometheus"` + PrometheusPrefix string `mapstructure:"prometheus-prefix"` + MaxRedirect int `mapstructure:"max-redirect"` + MaxRetry int `mapstructure:"max-retry"` + HTTPTimeout int `mapstructure:"http-timeout"` + DomainsCrawl bool `mapstructure:"domains-crawl"` + DisableHTMLTag []string `mapstructure:"disable-html-tag"` + CaptureAlternatePages bool `mapstructure:"capture-alternate-pages"` + ExcludeHosts []string `mapstructure:"exclude-host"` + IncludeHosts []string `mapstructure:"include-host"` + MaxConcurrentPerDomain int `mapstructure:"max-concurrent-per-domain"` + ConcurrentSleepLength int `mapstructure:"concurrent-sleep-length"` + CrawlTimeLimit int `mapstructure:"crawl-time-limit"` + CrawlMaxTimeLimit int `mapstructure:"crawl-max-time-limit"` + ExcludeString []string `mapstructure:"exclude-string"` + RandomLocalIP bool `mapstructure:"random-local-ip"` + + // Get flags (Proxy flags) + Proxy string `mapstructure:"proxy"` + DomainsBypassProxy []string `mapstructure:"bypass-proxy"` + + // Get flags (WARC flags) + WARCPrefix string `mapstructure:"warc-prefix"` + WARCOperator string `mapstructure:"warc-operator"` + CDXDedupeServer string `mapstructure:"warc-cdx-dedupe-server"` + WARCOnDisk bool `mapstructure:"warc-on-disk"` + WARCPoolSize int `mapstructure:"warc-pool-size"` + WARCTempDir string `mapstructure:"warc-temp-dir"` + DisableLocalDedupe bool `mapstructure:"disable-local-dedupe"` + CertValidation bool `mapstructure:"cert-validation"` + DisableAssetsCapture bool `mapstructure:"disable-assets-capture"` + WARCDedupeSize int `mapstructure:"warc-dedupe-size"` + CDXCookie string `mapstructure:"cdx-cookie"` + + // Get flags (Crawl HQ flags) + HQ bool `mapstructure:"hq"` + HQAddress string `mapstructure:"hq-address"` + HQKey string `mapstructure:"hq-key"` + HQSecret string `mapstructure:"hq-secret"` + HQProject string `mapstructure:"hq-project"` + HQBatchSize int64 `mapstructure:"hq-batch-size"` + HQContinuousPull bool `mapstructure:"hq-continuous-pull"` + HQStrategy string `mapstructure:"hq-strategy"` + HQRateLimitSendBack bool `mapstructure:"hq-rate-limiting-send-back"` + + // Get flags (Logging flags) + LogFileOutputDir string `mapstructure:"log-file-output-dir"` + ElasticSearchURL string `mapstructure:"es-url"` + ElasticSearchUser string `mapstructure:"es-user"` + ElasticSearchPassword string `mapstructure:"es-password"` + ElasticSearchIndexPrefix string `mapstructure:"es-index-prefix"` +} + +var ( + config Config + once sync.Once +) + +// InitConfig initializes the configuration +func InitConfig() error { + var err error + once.Do(func() { + // Check if a config file is provided via flag + if configFile := viper.GetString("config"); configFile != "" { + viper.SetConfigFile(configFile) + } else { + home, err := os.UserHomeDir() + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + viper.AddConfigPath(home) + viper.SetConfigType("yaml") + viper.SetConfigName("zeno-config") + } + + viper.SetEnvPrefix("ZENO") + replacer := strings.NewReplacer("-", "_", ".", "_") + viper.SetEnvKeyReplacer(replacer) + viper.AutomaticEnv() + + if err = viper.ReadInConfig(); err == nil { + fmt.Println("Using config file:", viper.ConfigFileUsed()) + } + + // Unmarshal the config into the Config struct + err = viper.Unmarshal(&config) + }) + return err +} + +// BindFlags binds the flags to the viper configuration +// This is needed because viper doesn't support same flag name accross multiple commands +// Details here: https://github.com/spf13/viper/issues/375#issuecomment-794668149 +func BindFlags(flagSet *pflag.FlagSet) { + flagSet.VisitAll(func(flag *pflag.Flag) { + viper.BindPFlag(flag.Name, flag) + }) +} + +// GetConfig returns the config struct +func GetConfig() *Config { + return &config +} diff --git a/go.mod b/go.mod index e0503e56..ada7cc42 100644 --- a/go.mod +++ b/go.mod @@ -26,13 +26,15 @@ require ( github.com/remeh/sizedwaitgroup v1.0.0 github.com/sirupsen/logrus v1.9.3 github.com/spf13/afero v1.11.0 + github.com/spf13/cobra v1.8.0 + github.com/spf13/viper v1.19.0 github.com/stretchr/testify v1.9.0 github.com/telanflow/cookiejar v0.0.0-20190719062046-114449e86aa5 github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 github.com/urfave/cli/v2 v2.27.1 github.com/zeebo/xxh3 v1.0.2 go.uber.org/goleak v1.3.0 - golang.org/x/net v0.22.0 + golang.org/x/net v0.23.0 mvdan.cc/xurls/v2 v2.5.0 ) @@ -46,9 +48,10 @@ require ( github.com/chenzhuoyu/iasm v0.9.1 // indirect github.com/cloudflare/circl v1.3.7 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/elastic/elastic-transport-go/v8 v8.6.0 // indirect github.com/fatih/color v1.16.0 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/gabriel-vasile/mimetype v1.4.3 // indirect github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-logr/logr v1.4.1 // indirect @@ -62,6 +65,8 @@ require ( github.com/goccy/go-json v0.10.2 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/gomodule/redigo v1.9.2 // indirect + github.com/hashicorp/hcl v1.0.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jonboulle/clockwork v0.4.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect @@ -70,17 +75,19 @@ require ( github.com/klauspost/pgzip v1.2.6 // indirect github.com/leodido/go-urn v1.4.0 // indirect github.com/lestrrat-go/strftime v1.0.6 // indirect + github.com/magiconair/properties v1.8.7 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/pelletier/go-toml/v2 v2.1.1 // indirect + github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/philippgille/gokv/encoding v0.7.0 // indirect github.com/philippgille/gokv/util v0.7.0 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_model v0.6.0 // indirect github.com/prometheus/common v0.53.0 // indirect github.com/prometheus/procfs v0.13.0 // indirect @@ -88,7 +95,13 @@ require ( github.com/refraction-networking/utls v1.6.3 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/sagikazarmark/locafero v0.4.0 // indirect + github.com/sagikazarmark/slog-shim v0.1.0 // indirect github.com/satori/go.uuid v1.2.0 // indirect + github.com/sourcegraph/conc v0.3.0 // indirect + github.com/spf13/cast v1.6.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/subosito/gotenv v1.6.0 // indirect github.com/syndtr/goleveldb v1.0.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.12 // indirect @@ -96,11 +109,15 @@ require ( go.opentelemetry.io/otel v1.24.0 // indirect go.opentelemetry.io/otel/metric v1.24.0 // indirect go.opentelemetry.io/otel/trace v1.24.0 // indirect + go.uber.org/atomic v1.9.0 // indirect + go.uber.org/multierr v1.9.0 // indirect golang.org/x/arch v0.7.0 // indirect golang.org/x/crypto v0.21.0 // indirect + golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect golang.org/x/sync v0.6.0 // indirect golang.org/x/sys v0.20.0 // indirect golang.org/x/text v0.14.0 // indirect google.golang.org/protobuf v1.33.0 // indirect + gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 4e20cd38..dbeaafb7 100644 --- a/go.sum +++ b/go.sum @@ -48,8 +48,9 @@ github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0q github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= @@ -65,7 +66,11 @@ github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= github.com/gin-contrib/pprof v1.4.0 h1:XxiBSf5jWZ5i16lNOPbMTVdgHBdhfGRD5PZ1LWazzvg= @@ -137,8 +142,12 @@ github.com/gosuri/uilive v0.0.4/go.mod h1:V/epo5LjjlDE5RJUcqx8dbw+zc93y5Ya3yg8tf github.com/gosuri/uitable v0.0.4 h1:IG2xLKRvErL3uhY6e1BylFzG+aJiwQviDDTfOKeKTpY= github.com/gosuri/uitable v0.0.4/go.mod h1:tKR86bXuXPZazfOTG1FIzvjIdXzd0mo4Vtn16vt0PJo= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/internetarchive/elogrus v0.0.0-20230725172814-093db31a64fc h1:/f7B0tD2oVSHxIrDy9ciEQaVy/I/sMdLwFMVkjh5LLU= github.com/internetarchive/elogrus v0.0.0-20230725172814-093db31a64fc/go.mod h1:oI8WojhCewR2pTR8bAx5/9pJO7ToWuLEzUuVIX3IVYk= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= @@ -178,6 +187,8 @@ github.com/lestrrat-go/file-rotatelogs v2.4.0+incompatible h1:Y6sqxHMyB1D2YSzWkL github.com/lestrrat-go/file-rotatelogs v2.4.0+incompatible/go.mod h1:ZQnN8lSECaebrkQytbHj4xNgtg8CR7RYXnPok8e0EHA= github.com/lestrrat-go/strftime v1.0.6 h1:CFGsDEt1pOpFNU+TJB0nhz9jl+K0hZSLE205AhTIGQQ= github.com/lestrrat-go/strftime v1.0.6/go.mod h1:f7jQKgV5nnJpYgdEasS+/y7EsTb8ykN2z68n3TtcTaw= +github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= +github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= @@ -191,6 +202,8 @@ github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= +github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -213,8 +226,8 @@ github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJ github.com/paulbellamy/ratecounter v0.2.0 h1:2L/RhJq+HA8gBQImDXtLPrDXK5qAj6ozWVK/zFXVJGs= github.com/paulbellamy/ratecounter v0.2.0/go.mod h1:Hfx1hDpSGoqxkVVpBi/IlYD7kChlfo5C6hzIHwPqfFE= github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= -github.com/pelletier/go-toml/v2 v2.1.1 h1:LWAJwfNvjQZCFIDKWYQaM62NcYeYViCmWIwmOStowAI= -github.com/pelletier/go-toml/v2 v2.1.1/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= +github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= +github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/philippgille/gokv v0.7.0 h1:rQSIQspete82h78Br7k7rKUZ8JYy/hWlwzm/W5qobPI= github.com/philippgille/gokv v0.7.0/go.mod h1:OwiTP/3bhEBhSuOmFmq1+rszglfSgjJVxd1HOgOa2N4= github.com/philippgille/gokv/encoding v0.7.0 h1:2oxepKzzTsi00iLZBCZ7Rmqrallh9zws3iqSrLGfkgo= @@ -231,8 +244,9 @@ github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs= github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= @@ -264,6 +278,10 @@ github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjR github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ= +github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= +github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= +github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= @@ -271,12 +289,23 @@ github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6Mwd github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM= +github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= +github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= +github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0= +github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.19.0 h1:RWq5SEjt8o25SROyN3z2OrDB9l7RPd3lwTWU8EcEdcI= +github.com/spf13/viper v1.19.0/go.mod h1:GQUN9bilAbhU/jgc1bKs99f/suXKeUMct8Adx5+Ntkg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -287,6 +316,8 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= +github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE= github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= github.com/telanflow/cookiejar v0.0.0-20190719062046-114449e86aa5 h1:gTQl5nPlc9B53vFOKM8aJHwxB2BW2kM49PVR5526GBg= @@ -317,8 +348,12 @@ go.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZ go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E= go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI= go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU= +go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= +go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= +go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= golang.org/x/arch v0.7.0 h1:pskyeJh/3AmoQ8CPE95vxHLqp1G1GfGNXTmcl9NEKTc= golang.org/x/arch v0.7.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= @@ -329,6 +364,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -347,8 +384,8 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= -golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= -golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -400,8 +437,8 @@ golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo= -golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= +golang.org/x/tools v0.13.0 h1:Iey4qkscZuv0VvIt8E0neZjtPVQFSc870HQ448QgEmQ= +golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= @@ -423,6 +460,8 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EV gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= +gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/internal/pkg/crawl/config.go b/internal/pkg/crawl/config.go new file mode 100644 index 00000000..f68d054b --- /dev/null +++ b/internal/pkg/crawl/config.go @@ -0,0 +1,118 @@ +package crawl + +import ( + "net/http" + "sync" + "time" + + "git.archive.org/wb/gocrawlhq" + "github.com/CorentinB/warc" + "github.com/internetarchive/Zeno/config/v2" + "github.com/internetarchive/Zeno/internal/pkg/frontier" + "github.com/internetarchive/Zeno/internal/pkg/log" + "github.com/internetarchive/Zeno/internal/pkg/utils" + "github.com/paulbellamy/ratecounter" +) + +// Crawl define the parameters of a crawl process +type Crawl struct { + *sync.Mutex + StartTime time.Time + SeedList []frontier.Item + Paused *utils.TAtomBool + Finished *utils.TAtomBool + LiveStats bool + + // Logger + Log *log.Logger + + // Frontier + Frontier *frontier.Frontier + + // Worker pool + WorkerMutex sync.RWMutex + WorkerPool []*Worker + WorkerStopSignal chan bool + WorkerStopTimeout time.Duration + + // Crawl settings + MaxConcurrentAssets int + Client *warc.CustomHTTPClient + ClientProxied *warc.CustomHTTPClient + DisabledHTMLTags []string + ExcludedHosts []string + IncludedHosts []string + ExcludedStrings []string + UserAgent string + Job string + JobPath string + MaxHops uint8 + MaxRetry int + MaxRedirect int + HTTPTimeout int + MaxConcurrentRequestsPerDomain int + RateLimitDelay int + CrawlTimeLimit int + MaxCrawlTimeLimit int + DisableAssetsCapture bool + CaptureAlternatePages bool + DomainsCrawl bool + Headless bool + Seencheck bool + Workers int + RandomLocalIP bool + + // Cookie-related settings + CookieFile string + KeepCookies bool + CookieJar http.CookieJar + + // proxy settings + Proxy string + BypassProxy []string + + // API settings + API bool + APIPort string + Prometheus bool + PrometheusMetrics *PrometheusMetrics + + // Real time statistics + URIsPerSecond *ratecounter.RateCounter + ActiveWorkers *ratecounter.Counter + CrawledSeeds *ratecounter.Counter + CrawledAssets *ratecounter.Counter + + // WARC settings + WARCPrefix string + WARCOperator string + WARCWriter chan *warc.RecordBatch + WARCWriterFinish chan bool + WARCTempDir string + CDXDedupeServer string + WARCFullOnDisk bool + WARCPoolSize int + WARCDedupSize int + DisableLocalDedupe bool + CertValidation bool + WARCCustomCookie string + + // Crawl HQ settings + UseHQ bool + HQAddress string + HQProject string + HQKey string + HQSecret string + HQStrategy string + HQBatchSize int + HQContinuousPull bool + HQClient *gocrawlhq.Client + HQFinishedChannel chan *frontier.Item + HQProducerChannel chan *frontier.Item + HQChannelsWg *sync.WaitGroup + HQRateLimitingSendBack bool +} + +func GenerateCrawlConfig(config *config.Config) (*Crawl, error) { + return nil, nil +} diff --git a/internal/pkg/crawl/crawl.go b/internal/pkg/crawl/crawl.go index 802dfb1d..8d072fd2 100644 --- a/internal/pkg/crawl/crawl.go +++ b/internal/pkg/crawl/crawl.go @@ -3,16 +3,13 @@ package crawl import ( "fmt" - "net/http" "sync" "time" "git.archive.org/wb/gocrawlhq" "github.com/CorentinB/warc" "github.com/internetarchive/Zeno/internal/pkg/frontier" - "github.com/internetarchive/Zeno/internal/pkg/log" "github.com/internetarchive/Zeno/internal/pkg/utils" - "github.com/paulbellamy/ratecounter" "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" "github.com/telanflow/cookiejar" @@ -25,105 +22,6 @@ type PrometheusMetrics struct { DownloadedURI prometheus.Counter } -// Crawl define the parameters of a crawl process -type Crawl struct { - *sync.Mutex - StartTime time.Time - SeedList []frontier.Item - Paused *utils.TAtomBool - Finished *utils.TAtomBool - LiveStats bool - - // Logger - Log *log.Logger - - // Frontier - Frontier *frontier.Frontier - - // Worker pool - WorkerMutex sync.RWMutex - WorkerPool []*Worker - WorkerStopSignal chan bool - WorkerStopTimeout time.Duration - - // Crawl settings - MaxConcurrentAssets int - Client *warc.CustomHTTPClient - ClientProxied *warc.CustomHTTPClient - DisabledHTMLTags []string - ExcludedHosts []string - IncludedHosts []string - ExcludedStrings []string - UserAgent string - Job string - JobPath string - MaxHops uint8 - MaxRetry int - MaxRedirect int - HTTPTimeout int - MaxConcurrentRequestsPerDomain int - RateLimitDelay int - CrawlTimeLimit int - MaxCrawlTimeLimit int - DisableAssetsCapture bool - CaptureAlternatePages bool - DomainsCrawl bool - Headless bool - Seencheck bool - Workers int - RandomLocalIP bool - - // Cookie-related settings - CookieFile string - KeepCookies bool - CookieJar http.CookieJar - - // proxy settings - Proxy string - BypassProxy []string - - // API settings - API bool - APIPort string - Prometheus bool - PrometheusMetrics *PrometheusMetrics - - // Real time statistics - URIsPerSecond *ratecounter.RateCounter - ActiveWorkers *ratecounter.Counter - CrawledSeeds *ratecounter.Counter - CrawledAssets *ratecounter.Counter - - // WARC settings - WARCPrefix string - WARCOperator string - WARCWriter chan *warc.RecordBatch - WARCWriterFinish chan bool - WARCTempDir string - CDXDedupeServer string - WARCFullOnDisk bool - WARCPoolSize int - WARCDedupSize int - DisableLocalDedupe bool - CertValidation bool - WARCCustomCookie string - - // Crawl HQ settings - UseHQ bool - HQAddress string - HQProject string - HQKey string - HQSecret string - HQStrategy string - HQBatchSize int - HQContinuousPull bool - HQClient *gocrawlhq.Client - HQFinishedChannel chan *frontier.Item - HQProducerChannel chan *frontier.Item - HQChannelsWg *sync.WaitGroup - HQRateLimitingSendBack bool -} - // Start fire up the crawling process func (c *Crawl) Start() (err error) { c.StartTime = time.Now() diff --git a/main.go b/main.go index 1bd5b53c..e6f0213a 100644 --- a/main.go +++ b/main.go @@ -1,37 +1,17 @@ package main import ( + "fmt" "os" _ "net/http/pprof" - "github.com/internetarchive/Zeno/cmd/v1" - _ "github.com/internetarchive/Zeno/cmd/v1/all" - "github.com/internetarchive/Zeno/internal/pkg/utils" - "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" + "github.com/internetarchive/Zeno/cmd/v2" ) func main() { - app := cli.NewApp() - app.Name = "Zeno" - app.Version = utils.GetVersion().Version - app.Authors = append(app.Authors, &cli.Author{Name: "Corentin Barreau", Email: "corentin@archive.org"}) - app.Usage = "" - - app.Flags = cmd.GlobalFlags - app.Commands = cmd.Commands - app.CommandNotFound = cmd.CommandNotFound - app.Before = func(context *cli.Context) error { - return nil - } - - app.After = func(context *cli.Context) error { - return nil - } - - err := app.Run(os.Args) - if err != nil { - logrus.Panic(err) + if err := cmd.Execute(); err != nil { + fmt.Println(err) + os.Exit(1) } } From 7c878eb81180f4dba830616aa2c37a9a83d93b02 Mon Sep 17 00:00:00 2001 From: Thomas FOUBERT Date: Fri, 5 Jul 2024 17:54:59 -0400 Subject: [PATCH 03/12] feat: first command implemented and tested --- cmd/v2/cmd.go | 15 ++-- cmd/v2/get.go | 20 ++--- config/v2/config.go | 82 +++++++++--------- go.mod | 21 ++--- go.sum | 154 +-------------------------------- internal/pkg/crawl/config.go | 159 ++++++++++++++++++++++++++++++++++- main.go | 2 +- 7 files changed, 229 insertions(+), 224 deletions(-) diff --git a/cmd/v2/cmd.go b/cmd/v2/cmd.go index ea43499d..6a313b5d 100644 --- a/cmd/v2/cmd.go +++ b/cmd/v2/cmd.go @@ -2,7 +2,6 @@ package cmd import ( "fmt" - "os" "github.com/internetarchive/Zeno/config/v2" "github.com/spf13/cobra" @@ -21,14 +20,15 @@ Authors: Jake LaFountain Thomas Foubert `, - PersistentPreRun: func(cmd *cobra.Command, args []string) { + PersistentPreRunE: func(cmd *cobra.Command, args []string) error { // Initialize config here, after cobra has parsed command line flags + config.BindFlags(cmd.Flags()) if err := config.InitConfig(); err != nil { - fmt.Printf("error initializing config: %s", err) - os.Exit(1) + return fmt.Errorf("error initializing config: %s", err) } cfg = config.GetConfig() + return nil }, Run: func(cmd *cobra.Command, args []string) { cmd.Help() @@ -43,10 +43,9 @@ func Run() error { rootCmd.PersistentFlags().String("log-level", "info", "stdout log level (debug, info, warn, error)") rootCmd.PersistentFlags().String("config", "", "config file (default is $HOME/zeno-config.yaml)") - // Bind flags to viper - config.BindFlags(rootCmd.Flags()) - - addGetCMDs(rootCmd) + // Add get subcommands + getCmd := getCMDs() + rootCmd.AddCommand(getCmd) return rootCmd.Execute() } diff --git a/cmd/v2/get.go b/cmd/v2/get.go index a41abef8..b7616e4b 100644 --- a/cmd/v2/get.go +++ b/cmd/v2/get.go @@ -4,14 +4,13 @@ import ( "fmt" "net/url" - "github.com/internetarchive/Zeno/config/v2" "github.com/internetarchive/Zeno/internal/pkg/crawl" "github.com/internetarchive/Zeno/internal/pkg/frontier" "github.com/sirupsen/logrus" "github.com/spf13/cobra" ) -func addGetCMDs(rootCmd *cobra.Command) { +func getCMDs() *cobra.Command { getCmd := &cobra.Command{ Use: "get", Short: "Archive the web!", @@ -78,24 +77,23 @@ func addGetCMDs(rootCmd *cobra.Command) { getCmd.PersistentFlags().String("hq-project", "", "Crawl HQ project.") getCmd.PersistentFlags().Int64("hq-batch-size", 0, "Crawl HQ feeding batch size.") getCmd.PersistentFlags().Bool("hq-continuous-pull", false, "If turned on, the crawler will pull URLs from Crawl HQ continuously.") - getCmd.PersistentFlags().String("hq-strategy", "Crawl HQ feeding strategy.", "lifo") + getCmd.PersistentFlags().String("hq-strategy", "lifo", "Crawl HQ feeding strategy.") getCmd.PersistentFlags().Bool("hq-rate-limiting-send-back", false, "If turned on, the crawler will send back URLs that hit a rate limit to crawl HQ.") // Logging flags - getCmd.PersistentFlags().String("log-file-output-dir", "Directory to write log files to.", "jobs") + getCmd.PersistentFlags().String("log-file-output-dir", "./jobs/", "Directory to write log files to.") getCmd.PersistentFlags().String("es-url", "", "comma-separated ElasticSearch URL to use for indexing crawl logs.") getCmd.PersistentFlags().String("es-user", "", "ElasticSearch username to use for indexing crawl logs.") getCmd.PersistentFlags().String("es-password", "", "ElasticSearch password to use for indexing crawl logs.") - getCmd.PersistentFlags().String("es-index-prefix", "ElasticSearch index prefix to use for indexing crawl logs. Default is : `zeno`, without `-`", "zeno") + getCmd.PersistentFlags().String("es-index-prefix", "zeno", "ElasticSearch index prefix to use for indexing crawl logs. Default is : `zeno`, without `-`") - config.BindFlags(getCmd.Flags()) + getURLCmd := getURLCmd() + getCmd.AddCommand(getURLCmd) - getURLCmd(getCmd) - - rootCmd.AddCommand(getCmd) + return getCmd } -func getURLCmd(rootCmd *cobra.Command) { +func getURLCmd() *cobra.Command { getURLCmd := &cobra.Command{ Use: "url [URL...]", Short: "Archive given URLs", @@ -148,5 +146,5 @@ func getURLCmd(rootCmd *cobra.Command) { }, } - rootCmd.AddCommand(getURLCmd) + return getURLCmd } diff --git a/config/v2/config.go b/config/v2/config.go index f12c964d..c6590648 100644 --- a/config/v2/config.go +++ b/config/v2/config.go @@ -16,36 +16,36 @@ type Config struct { LogLevel string `mapstructure:"log-level"` // Get flags (crawling flags) - UserAgent string `mapstructure:"user-agent"` - Job string `mapstructure:"job"` - WorkersCount int `mapstructure:"workers"` - MaxConCurrentAssets int `mapstructure:"max-concurrent-assets"` - MaxHops uint `mapstructure:"max-hops"` - Cookies string `mapstructure:"cookies"` - KeepCookies bool `mapstructure:"keep-cookies"` - Headless bool `mapstructure:"headless"` - LocalSeenCheck bool `mapstructure:"local-seencheck"` - JSON bool `mapstructure:"json"` - Debug bool `mapstructure:"debug"` - LiveStats bool `mapstructure:"live-stats"` - API bool `mapstructure:"api"` - APIPort string `mapstructure:"api-port"` - Prometheus bool `mapstructure:"prometheus"` - PrometheusPrefix string `mapstructure:"prometheus-prefix"` - MaxRedirect int `mapstructure:"max-redirect"` - MaxRetry int `mapstructure:"max-retry"` - HTTPTimeout int `mapstructure:"http-timeout"` - DomainsCrawl bool `mapstructure:"domains-crawl"` - DisableHTMLTag []string `mapstructure:"disable-html-tag"` - CaptureAlternatePages bool `mapstructure:"capture-alternate-pages"` - ExcludeHosts []string `mapstructure:"exclude-host"` - IncludeHosts []string `mapstructure:"include-host"` - MaxConcurrentPerDomain int `mapstructure:"max-concurrent-per-domain"` - ConcurrentSleepLength int `mapstructure:"concurrent-sleep-length"` - CrawlTimeLimit int `mapstructure:"crawl-time-limit"` - CrawlMaxTimeLimit int `mapstructure:"crawl-max-time-limit"` - ExcludeString []string `mapstructure:"exclude-string"` - RandomLocalIP bool `mapstructure:"random-local-ip"` + UserAgent string `mapstructure:"user-agent"` + Job string `mapstructure:"job"` + WorkersCount int `mapstructure:"workers"` + MaxConcurrentAssets int `mapstructure:"max-concurrent-assets"` + MaxHops uint `mapstructure:"max-hops"` + Cookies string `mapstructure:"cookies"` + KeepCookies bool `mapstructure:"keep-cookies"` + Headless bool `mapstructure:"headless"` + LocalSeencheck bool `mapstructure:"local-seencheck"` + JSON bool `mapstructure:"json"` + Debug bool `mapstructure:"debug"` + LiveStats bool `mapstructure:"live-stats"` + API bool `mapstructure:"api"` + APIPort string `mapstructure:"api-port"` + Prometheus bool `mapstructure:"prometheus"` + PrometheusPrefix string `mapstructure:"prometheus-prefix"` + MaxRedirect int `mapstructure:"max-redirect"` + MaxRetry int `mapstructure:"max-retry"` + HTTPTimeout int `mapstructure:"http-timeout"` + DomainsCrawl bool `mapstructure:"domains-crawl"` + DisableHTMLTag []string `mapstructure:"disable-html-tag"` + CaptureAlternatePages bool `mapstructure:"capture-alternate-pages"` + ExcludeHosts []string `mapstructure:"exclude-host"` + IncludeHosts []string `mapstructure:"include-host"` + MaxConcurrentRequestsPerDomain int `mapstructure:"max-concurrent-per-domain"` + ConcurrentSleepLength int `mapstructure:"concurrent-sleep-length"` + CrawlTimeLimit int `mapstructure:"crawl-time-limit"` + CrawlMaxTimeLimit int `mapstructure:"crawl-max-time-limit"` + ExcludeString []string `mapstructure:"exclude-string"` + RandomLocalIP bool `mapstructure:"random-local-ip"` // Get flags (Proxy flags) Proxy string `mapstructure:"proxy"` @@ -76,15 +76,15 @@ type Config struct { HQRateLimitSendBack bool `mapstructure:"hq-rate-limiting-send-back"` // Get flags (Logging flags) - LogFileOutputDir string `mapstructure:"log-file-output-dir"` - ElasticSearchURL string `mapstructure:"es-url"` - ElasticSearchUser string `mapstructure:"es-user"` - ElasticSearchPassword string `mapstructure:"es-password"` - ElasticSearchIndexPrefix string `mapstructure:"es-index-prefix"` + LogFileOutputDir string `mapstructure:"log-file-output-dir"` + ElasticSearchURLs []string `mapstructure:"es-url"` + ElasticSearchUsername string `mapstructure:"es-user"` + ElasticSearchPassword string `mapstructure:"es-password"` + ElasticSearchIndexPrefix string `mapstructure:"es-index-prefix"` } var ( - config Config + config *Config once sync.Once ) @@ -92,6 +92,8 @@ var ( func InitConfig() error { var err error once.Do(func() { + config = &Config{} + // Check if a config file is provided via flag if configFile := viper.GetString("config"); configFile != "" { viper.SetConfigFile(configFile) @@ -117,7 +119,7 @@ func InitConfig() error { } // Unmarshal the config into the Config struct - err = viper.Unmarshal(&config) + err = viper.Unmarshal(config) }) return err } @@ -133,5 +135,9 @@ func BindFlags(flagSet *pflag.FlagSet) { // GetConfig returns the config struct func GetConfig() *Config { - return &config + cfg := config + if cfg == nil { + panic("Config not initialized. Call InitConfig() before accessing the config.") + } + return cfg } diff --git a/go.mod b/go.mod index 0e999a46..cf3bc387 100644 --- a/go.mod +++ b/go.mod @@ -26,6 +26,7 @@ require ( github.com/sirupsen/logrus v1.9.3 github.com/spf13/afero v1.11.0 github.com/spf13/cobra v1.8.0 + github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.19.0 github.com/stretchr/testify v1.9.0 github.com/telanflow/cookiejar v0.0.0-20190719062046-114449e86aa5 @@ -44,15 +45,14 @@ require ( github.com/bytedance/sonic v1.11.9 // indirect github.com/bytedance/sonic/loader v0.1.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect - github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d // indirect - github.com/chenzhuoyu/iasm v0.9.1 // indirect github.com/cloudflare/circl v1.3.9 // indirect github.com/cloudwego/base64x v0.1.4 // indirect github.com/cloudwego/iasm v0.2.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/elastic/elastic-transport-go/v8 v8.6.0 // indirect github.com/fatih/color v1.17.0 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/gabriel-vasile/mimetype v1.4.4 // indirect github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-logr/logr v1.4.2 // indirect @@ -60,7 +60,6 @@ require ( github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect github.com/go-playground/validator/v10 v10.22.0 // indirect - github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/gobwas/httphead v0.1.0 // indirect github.com/gobwas/pool v0.2.1 // indirect github.com/gobwas/ws v1.4.0 // indirect @@ -69,14 +68,12 @@ require ( github.com/gomodule/redigo v1.9.2 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect github.com/jonboulle/clockwork v0.4.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.17.9 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect github.com/klauspost/pgzip v1.2.6 // indirect - github.com/knz/go-libedit v1.10.1 // indirect github.com/leodido/go-urn v1.4.0 // indirect github.com/lestrrat-go/strftime v1.0.6 // indirect github.com/magiconair/properties v1.8.7 // indirect @@ -88,16 +85,15 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/onsi/ginkgo/v2 v2.9.5 // indirect + github.com/onsi/gomega v1.27.6 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/philippgille/gokv/encoding v0.7.0 // indirect github.com/philippgille/gokv/util v0.7.0 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.55.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect - github.com/quic-go/quic-go v0.45.1 // indirect github.com/refraction-networking/utls v1.6.6 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect @@ -106,7 +102,6 @@ require ( github.com/satori/go.uuid v1.2.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/cast v1.6.0 // indirect - github.com/spf13/pflag v1.0.5 // indirect github.com/subosito/gotenv v1.6.0 // indirect github.com/syndtr/goleveldb v1.0.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect @@ -115,15 +110,15 @@ require ( go.opentelemetry.io/otel v1.28.0 // indirect go.opentelemetry.io/otel/metric v1.28.0 // indirect go.opentelemetry.io/otel/trace v1.28.0 // indirect - go.uber.org/mock v0.4.0 // indirect + go.uber.org/atomic v1.9.0 // indirect + go.uber.org/multierr v1.9.0 // indirect golang.org/x/arch v0.8.0 // indirect golang.org/x/crypto v0.24.0 // indirect golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect - golang.org/x/mod v0.17.0 // indirect golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.21.0 // indirect golang.org/x/text v0.16.0 // indirect - golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect google.golang.org/protobuf v1.34.2 // indirect + gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 28b13339..97e9a64c 100644 --- a/go.sum +++ b/go.sum @@ -1,14 +1,10 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -git.archive.org/wb/gocrawlhq v1.2.4 h1:Z/w1UwFfvq1m03IT0ZMvV6m18DiOgYEGxR5JVgJkQ/s= -git.archive.org/wb/gocrawlhq v1.2.4/go.mod h1:WiuNIB4Toqe8twVvwRu0fTSNC3KXFqA8/mAeaZ3GICE= git.archive.org/wb/gocrawlhq v1.2.5 h1:k8cPZRa+O7nWrGIJntVjLsbOOhoprICfAP8T7yfsvJU= git.archive.org/wb/gocrawlhq v1.2.5/go.mod h1:WiuNIB4Toqe8twVvwRu0fTSNC3KXFqA8/mAeaZ3GICE= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/CorentinB/warc v0.8.39 h1:ZYccm4BKRle10aVPL9EU7nCVe0LImGxkK1fsqIE8Dw8= github.com/CorentinB/warc v0.8.39/go.mod h1:Q9SHKf7pwcqzIWcxlzCtAWN8sKH+Q1BZxq1mSHJ9ttY= -github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VPW7UI= -github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY= github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE= github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= @@ -30,44 +26,24 @@ github.com/beeker1121/goque v2.1.0+incompatible/go.mod h1:L6dOWBhDOnxUVQsb0wkLve github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= -github.com/bytedance/sonic v1.10.0-rc/go.mod h1:ElCzW+ufi8qKqNW0FY314xriJhyJhuoJ3gFZdAHF7NM= -github.com/bytedance/sonic v1.11.2 h1:ywfwo0a/3j9HR8wsYGWsIWl2mvRsI950HyoxiBERw5A= -github.com/bytedance/sonic v1.11.2/go.mod h1:iZcSUejdk5aukTND/Eu/ivjQuEL0Cu9/rf50Hi0u/g4= github.com/bytedance/sonic v1.11.9 h1:LFHENlIY/SLzDWverzdOvgMztTxcfcF+cqNsz9pK5zg= github.com/bytedance/sonic v1.11.9/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4= github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM= github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= -github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= -github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d h1:77cEq6EriyTZ0g/qfRdp61a3Uu/AWrgIq2s0ClJV1g0= -github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d/go.mod h1:8EPpVsBuRksnlj1mLy4AWzRNQYxauNi62uWcE3to6eA= -github.com/chenzhuoyu/iasm v0.9.0/go.mod h1:Xjy2NpN3h7aUqeqM+woSuuvxmIe6+DDsiNLIrkAmYog= -github.com/chenzhuoyu/iasm v0.9.1 h1:tUHQJXo3NhBqw6s33wkGn9SP3bvrWLdlVIJ3hQBL7P0= -github.com/chenzhuoyu/iasm v0.9.1/go.mod h1:Xjy2NpN3h7aUqeqM+woSuuvxmIe6+DDsiNLIrkAmYog= -github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= -github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= -github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/clbanning/mxj/v2 v2.7.0 h1:WA/La7UGCanFe5NpHF0Q3DNtnCsVoxbPKuyBNHWRyME= github.com/clbanning/mxj/v2 v2.7.0/go.mod h1:hNiWqW14h+kc+MdF9C6/YoRfjEJoR3ou6tn/Qo+ve2s= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cloudflare/circl v1.3.7 h1:qlCDlTPz2n9fu58M0Nh1J/JzcFpfgkFHHX3O35r5vcU= -github.com/cloudflare/circl v1.3.7/go.mod h1:sRTcRWXGLrKw6yIGJ+l7amYJFfAXbZG0kBSc8r4zxgA= github.com/cloudflare/circl v1.3.9 h1:QFrlgFYf2Qpi8bSpVPK1HBvWpx16v/1TZivyo7pGuBE= github.com/cloudflare/circl v1.3.9/go.mod h1:PDRU+oXvdD7KCtgKxW95M5Z8BpSCJXQORiZFnBQS5QU= github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y= github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= -github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -81,8 +57,6 @@ github.com/elastic/elastic-transport-go/v8 v8.6.0 h1:Y2S/FBjx1LlCv5m6pWAF2kDJAHo github.com/elastic/elastic-transport-go/v8 v8.6.0/go.mod h1:YLHer5cj0csTzNFXoNQ8qhtGY1GTvSqPnKWKaqQE3Hk= github.com/elastic/go-elasticsearch/v8 v8.14.0 h1:1ywU8WFReLLcxE1WJqii3hTtbPUE2hc38ZK/j4mMFow= github.com/elastic/go-elasticsearch/v8 v8.14.0/go.mod h1:WRvnlGkSuZyp83M2U8El/LGXpCjYLrvlkSgkAH4O5I4= -github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= -github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4= github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI= github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= @@ -92,60 +66,38 @@ github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7z github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= -github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= -github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= github.com/gabriel-vasile/mimetype v1.4.4 h1:QjV6pZ7/XZ7ryI2KuyeEDE8wnh7fHP9YnQy+R0LnH8I= github.com/gabriel-vasile/mimetype v1.4.4/go.mod h1:JwLei5XPtWdGiMFB5Pjle1oEeoSeEuJfJE+TtfvdB/s= -github.com/gin-contrib/pprof v1.4.0 h1:XxiBSf5jWZ5i16lNOPbMTVdgHBdhfGRD5PZ1LWazzvg= -github.com/gin-contrib/pprof v1.4.0/go.mod h1:RrehPJasUVBPK6yTUwOl8/NP6i0vbUgmxtis+Z5KE90= github.com/gin-contrib/pprof v1.5.0 h1:E/Oy7g+kNw94KfdCy3bZxQFtyDnAX2V7axRS7sNYVrU= github.com/gin-contrib/pprof v1.5.0/go.mod h1:GqFL6LerKoCQ/RSWnkYczkTJ+tOAUVN/8sbnEtaqOKs= github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= -github.com/gin-gonic/gin v1.8.1/go.mod h1:ji8BvRH1azfM+SYow9zQ6SZMvR8qOMZHmsCuWR9tTTk= -github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= -github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= -github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= -github.com/go-playground/locales v0.14.0/go.mod h1:sawfccIbzZTqEDETgFXqTho0QybSa7l++s0DH+LDiLs= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= -github.com/go-playground/universal-translator v0.18.0/go.mod h1:UvRDBj+xPUEGrFYl+lu/H90nyDXpg0fqeB/AQUGNTVA= github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= -github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos= -github.com/go-playground/validator/v10 v10.19.0 h1:ol+5Fu+cSq9JD7SoSqe04GMI92cbn0+wvQ3bZ8b/AU4= -github.com/go-playground/validator/v10 v10.19.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= github.com/go-playground/validator/v10 v10.22.0 h1:k6HsTZ0sTnROkhS//R0O+55JgM8C4Bx7ia+JlgcnOao= github.com/go-playground/validator/v10 v10.22.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/go-test/deep v1.1.0 h1:WOcxcdHcvdgThNXjw0t76K42FXTU7HpNQWHpA2HHNlg= github.com/go-test/deep v1.1.0/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM= github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= -github.com/gobwas/ws v1.3.2 h1:zlnbNHxumkRvfPWgfXu8RBwyNR1x8wh9cf5PTOCqs9Q= -github.com/gobwas/ws v1.3.2/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY= github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs= github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc= -github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= -github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= -github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -154,7 +106,6 @@ github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfU github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= @@ -162,12 +113,9 @@ github.com/gomodule/redigo v1.9.2 h1:HrutZBLhSIU8abiSfW8pj8mPhOyMYjZT/wcA4/L9L9s github.com/gomodule/redigo v1.9.2/go.mod h1:KsU3hiK/Ay8U42qpaJk+kuNa3C+spxapWpM+ywhcgtw= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= -github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= @@ -183,7 +131,6 @@ github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/internetarchive/elogrus v0.0.0-20230725172814-093db31a64fc h1:/f7B0tD2oVSHxIrDy9ciEQaVy/I/sMdLwFMVkjh5LLU= github.com/internetarchive/elogrus v0.0.0-20230725172814-093db31a64fc/go.mod h1:oI8WojhCewR2pTR8bAx5/9pJO7ToWuLEzUuVIX3IVYk= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= @@ -195,31 +142,20 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg= -github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= -github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= -github.com/knz/go-libedit v1.10.1 h1:0pHpWtx9vcvC0xGZqEQlQdfSQs7WRlAjuPvk3fOZDCo= github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY= github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8= @@ -236,7 +172,6 @@ github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= -github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= @@ -259,8 +194,6 @@ github.com/olivere/elastic/v7 v7.0.32/go.mod h1:c7PVmLe3Fxq77PIfY/bZmxY/TAamBhCz github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q= -github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE= github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg= @@ -268,9 +201,6 @@ github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFSt github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJc5AZX7/PBEpw= github.com/paulbellamy/ratecounter v0.2.0 h1:2L/RhJq+HA8gBQImDXtLPrDXK5qAj6ozWVK/zFXVJGs= github.com/paulbellamy/ratecounter v0.2.0/go.mod h1:Hfx1hDpSGoqxkVVpBi/IlYD7kChlfo5C6hzIHwPqfFE= -github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo= -github.com/pelletier/go-toml/v2 v2.1.1 h1:LWAJwfNvjQZCFIDKWYQaM62NcYeYViCmWIwmOStowAI= -github.com/pelletier/go-toml/v2 v2.1.1/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/philippgille/gokv v0.7.0 h1:rQSIQspete82h78Br7k7rKUZ8JYy/hWlwzm/W5qobPI= @@ -284,7 +214,6 @@ github.com/philippgille/gokv/test v0.7.0/go.mod h1:TP/VzO/qAoi6njsfKnRpXKno0hRuz github.com/philippgille/gokv/util v0.7.0 h1:5avUK/a3aSj/aWjhHv4/FkqgMon2B7k2BqFgLcR+DYg= github.com/philippgille/gokv/util v0.7.0/go.mod h1:i9KLHbPxGiHLMhkix/CcDQhpPbCkJy5BkW+RKgwDHMo= github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -294,34 +223,20 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs= -github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= -github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos= -github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.53.0 h1:U2pL9w9nmJwJDa4qqLQ3ZaePJ6ZTwt7cMD3AG3+aLCE= -github.com/prometheus/common v0.53.0/go.mod h1:BrxBKv3FWBIGXw89Mg1AeBq7FSyRzXWI3l3e7W3RN5U= github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.13.0 h1:GqzLlQyfsPbaEHaQkO7tbDlriv/4o5Hudv6OXHGKX7o= -github.com/prometheus/procfs v0.13.0/go.mod h1:cd4PFCR54QLnGKPaKGA6l+cfuNXtht43ZKY6tow0Y1g= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= -github.com/quic-go/quic-go v0.41.0 h1:aD8MmHfgqTURWNJy48IYFg2OnxwHT3JL7ahGs73lb4k= -github.com/quic-go/quic-go v0.41.0/go.mod h1:qCkNjqczPEvgsOnxZ0eCD14lv+B2LHlFAB++CNOh9hA= -github.com/quic-go/quic-go v0.45.1 h1:tPfeYCk+uZHjmDRwHHQmvHRYL2t44ROTujLeFVBmjCA= -github.com/quic-go/quic-go v0.45.1/go.mod h1:1dLehS7TIR64+vxGR70GDcatWTOtMX2PUtnKsjbTurI= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= -github.com/refraction-networking/utls v1.6.3 h1:MFOfRN35sSx6K5AZNIoESsBuBxS2LCgRilRIdHb6fDc= -github.com/refraction-networking/utls v1.6.3/go.mod h1:yil9+7qSl+gBwJqztoQseO6Pr3h62pQoY1lXiNR/FPs= github.com/refraction-networking/utls v1.6.6 h1:igFsYBUJPYM8Rno9xUuDoM5GQrVEqY4llzEXOkL43Ig= github.com/refraction-networking/utls v1.6.6/go.mod h1:BC3O4vQzye5hqpmDTWUqi4P5DDhzJfkV1tdqtawQIH0= github.com/remeh/sizedwaitgroup v1.0.0 h1:VNGGFwNo/R5+MJBf6yrsr110p0m4/OX4S3DCy7Kyl5E= @@ -329,8 +244,6 @@ github.com/remeh/sizedwaitgroup v1.0.0/go.mod h1:3j2R4OIe/SeS6YDhICBy22RWjJC5eNC github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= -github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= -github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= @@ -365,7 +278,6 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= @@ -383,16 +295,10 @@ github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 h1:nrZ3ySNYwJ github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80/go.mod h1:iFyPdL66DjUD96XmzVL3ZntbzcflLnznH0fr99w5VqE= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= -github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= -github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= -github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho= -github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= github.com/urfave/cli/v2 v2.27.2 h1:6e0H+AkS+zDckwPCUrZkKX38mRaau4nL2uipkJpbkcI= github.com/urfave/cli/v2 v2.27.2/go.mod h1:g0+79LmHHATl7DAcHO99smiR/T7uGLw84w8Y42x+4eM= -github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e h1:+SOyEddqYF09QP7vr7CgJ1eti3pY9Fn3LHO1M1r/0sI= -github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= @@ -401,58 +307,36 @@ github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= -go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo= -go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo= go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= -go.opentelemetry.io/otel/metric v1.24.0 h1:6EhoGWWK28x1fbpA4tYTOWBkPefTDQnb8WSGXlc88kI= -go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco= go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= go.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZXQ8= go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E= -go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI= -go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU= +go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= +go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= -go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= -go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= -go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= -go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU= -go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= -golang.org/x/arch v0.7.0 h1:pskyeJh/3AmoQ8CPE95vxHLqp1G1GfGNXTmcl9NEKTc= -golang.org/x/arch v0.7.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= -golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI= golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -<<<<<<< HEAD -golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= -golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= -======= golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= ->>>>>>> main golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= -golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -466,15 +350,8 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= -<<<<<<< HEAD -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= -======= -golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= -golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= ->>>>>>> main golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -484,8 +361,6 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= -golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -495,11 +370,8 @@ golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -507,8 +379,6 @@ golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= -golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -517,12 +387,9 @@ golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -533,17 +400,7 @@ golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -<<<<<<< HEAD -golang.org/x/tools v0.13.0 h1:Iey4qkscZuv0VvIt8E0neZjtPVQFSc870HQ448QgEmQ= -golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= -======= -golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo= -golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= ->>>>>>> main golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -551,10 +408,6 @@ google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoA google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= @@ -562,7 +415,6 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= @@ -571,9 +423,7 @@ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkep gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/internal/pkg/crawl/config.go b/internal/pkg/crawl/config.go index f68d054b..31ca73a5 100644 --- a/internal/pkg/crawl/config.go +++ b/internal/pkg/crawl/config.go @@ -1,12 +1,16 @@ package crawl import ( + "log/slog" "net/http" + "path" + "path/filepath" "sync" "time" "git.archive.org/wb/gocrawlhq" "github.com/CorentinB/warc" + "github.com/google/uuid" "github.com/internetarchive/Zeno/config/v2" "github.com/internetarchive/Zeno/internal/pkg/frontier" "github.com/internetarchive/Zeno/internal/pkg/log" @@ -114,5 +118,158 @@ type Crawl struct { } func GenerateCrawlConfig(config *config.Config) (*Crawl, error) { - return nil, nil + var c = new(Crawl) + + // Ensure that the log file output directory is well parsed + logfileOutputDir := filepath.Dir(config.LogFileOutputDir) + if logfileOutputDir == "." && config.LogFileOutputDir != "." { + logfileOutputDir = filepath.Dir(config.LogFileOutputDir + "/") + } + + // Logger + customLoggerConfig := log.Config{ + FileConfig: &log.LogfileConfig{ + Dir: logfileOutputDir, + Prefix: "zeno", + }, + FileLevel: slog.LevelDebug, + StdoutLevel: slog.LevelInfo, + RotateLogFile: true, + RotateElasticSearchIndex: true, + ElasticsearchConfig: &log.ElasticsearchConfig{ + Addresses: config.ElasticSearchURLs, + Username: config.ElasticSearchUsername, + Password: config.ElasticSearchPassword, + IndexPrefix: config.ElasticSearchIndexPrefix, + Level: slog.LevelDebug, + }, + } + if len(config.ElasticSearchURLs) == 0 || (config.ElasticSearchUsername == "" && config.ElasticSearchPassword == "") { + customLoggerConfig.ElasticsearchConfig = nil + } + + customLogger, err := log.New(customLoggerConfig) + if err != nil { + return nil, err + } + c.Log = customLogger + + // Statistics counters + c.CrawledSeeds = new(ratecounter.Counter) + c.CrawledAssets = new(ratecounter.Counter) + c.ActiveWorkers = new(ratecounter.Counter) + c.URIsPerSecond = ratecounter.NewRateCounter(1 * time.Second) + + c.LiveStats = config.LiveStats + + // Frontier + c.Frontier = new(frontier.Frontier) + c.Frontier.Log = c.Log + + // If the job name isn't specified, we generate a random name + if config.Job == "" { + if config.HQProject != "" { + c.Job = config.HQProject + } else { + UUID, err := uuid.NewUUID() + if err != nil { + c.Log.Error("cmd/utils.go:InitCrawlWithCMD():uuid.NewUUID()", "error", err) + return nil, err + } + + c.Job = UUID.String() + } + } else { + c.Job = config.Job + } + + c.JobPath = path.Join("jobs", config.Job) + + c.Workers = config.WorkersCount + c.WorkerPool = make([]*Worker, 0) + c.WorkerStopTimeout = time.Second * 60 // Placeholder for WorkerStopTimeout + c.MaxConcurrentAssets = config.MaxConcurrentAssets + c.WorkerStopSignal = make(chan bool) + + c.Seencheck = config.LocalSeencheck + c.HTTPTimeout = config.HTTPTimeout + c.MaxConcurrentRequestsPerDomain = config.MaxConcurrentRequestsPerDomain + c.RateLimitDelay = config.ConcurrentSleepLength + c.CrawlTimeLimit = config.CrawlTimeLimit + + // Defaults --max-crawl-time-limit to 10% more than --crawl-time-limit + if config.CrawlMaxTimeLimit == 0 && config.CrawlTimeLimit != 0 { + c.MaxCrawlTimeLimit = config.CrawlTimeLimit + (config.CrawlTimeLimit / 10) + } else { + c.MaxCrawlTimeLimit = config.CrawlMaxTimeLimit + } + + c.MaxRetry = config.MaxRetry + c.MaxRedirect = config.MaxRedirect + c.MaxHops = uint8(config.MaxHops) + c.DomainsCrawl = config.DomainsCrawl + c.DisableAssetsCapture = config.DisableAssetsCapture + c.DisabledHTMLTags = config.DisableHTMLTag + c.ExcludedHosts = config.ExcludeHosts + c.IncludedHosts = config.IncludeHosts + c.CaptureAlternatePages = config.CaptureAlternatePages + c.ExcludedStrings = config.ExcludeString + + // WARC settings + c.WARCPrefix = config.WARCPrefix + c.WARCOperator = config.WARCOperator + + if config.WARCTempDir != "" { + c.WARCTempDir = config.WARCTempDir + } else { + c.WARCTempDir = path.Join(c.JobPath, "temp") + } + + c.CDXDedupeServer = config.CDXDedupeServer + c.DisableLocalDedupe = config.DisableLocalDedupe + c.CertValidation = config.CertValidation + c.WARCFullOnDisk = config.WARCOnDisk + c.WARCPoolSize = config.WARCPoolSize + c.WARCDedupSize = config.WARCDedupeSize + c.WARCCustomCookie = config.CDXCookie + + c.API = config.API + c.APIPort = config.APIPort + + // If Prometheus is specified, then we make sure + // c.API is true + c.Prometheus = config.Prometheus + if c.Prometheus { + c.API = true + c.PrometheusMetrics = new(PrometheusMetrics) + c.PrometheusMetrics.Prefix = config.PrometheusPrefix + } + + if config.UserAgent != "Zeno" { + c.UserAgent = config.UserAgent + } else { + version := utils.GetVersion() + c.UserAgent = "Mozilla/5.0 (compatible; archive.org_bot +http://archive.org/details/archive.org_bot) Zeno/" + version.Version[:7] + " warc/" + version.WarcVersion + } + c.Headless = config.Headless + + c.CookieFile = config.Cookies + c.KeepCookies = config.KeepCookies + + // Proxy settings + c.Proxy = config.Proxy + c.BypassProxy = config.DomainsBypassProxy + + // Crawl HQ settings + c.UseHQ = config.HQ + c.HQProject = config.HQProject + c.HQAddress = config.HQAddress + c.HQKey = config.HQKey + c.HQSecret = config.HQSecret + c.HQStrategy = config.HQStrategy + c.HQBatchSize = int(config.HQBatchSize) + c.HQContinuousPull = config.HQContinuousPull + c.HQRateLimitingSendBack = config.HQRateLimitSendBack + + return c, nil } diff --git a/main.go b/main.go index e6f0213a..1d33dea7 100644 --- a/main.go +++ b/main.go @@ -10,7 +10,7 @@ import ( ) func main() { - if err := cmd.Execute(); err != nil { + if err := cmd.Run(); err != nil { fmt.Println(err) os.Exit(1) } From 03b939fb2a6cbba2bf4e7a3720cc9ecafb7fd8ee Mon Sep 17 00:00:00 2001 From: Thomas FOUBERT Date: Sun, 7 Jul 2024 16:05:27 -0400 Subject: [PATCH 04/12] feat: implemented the rest of the commands, working --- cmd/v2/get.go | 205 ++++++++++++++++++++++++++++++++------------ config/v2/config.go | 2 +- 2 files changed, 149 insertions(+), 58 deletions(-) diff --git a/cmd/v2/get.go b/cmd/v2/get.go index b7616e4b..5c29f10c 100644 --- a/cmd/v2/get.go +++ b/cmd/v2/get.go @@ -21,6 +21,16 @@ func getCMDs() *cobra.Command { }, } + getCMDsFlags(getCmd) + + getCmd.AddCommand(getURLCmd) + getCmd.AddCommand(getHQCmd) + getCmd.AddCommand(getListCmd) + + return getCmd +} + +func getCMDsFlags(getCmd *cobra.Command) { getCmd.PersistentFlags().String("user-agent", "Zeno", "User agent to use when requesting URLs.") getCmd.PersistentFlags().String("job", "", "Job name to use, will determine the path for the persistent queue, seencheck database, and WARC files.") getCmd.PersistentFlags().Int("workers", 1, "Number of concurrent workers to run.") @@ -69,82 +79,163 @@ func getCMDs() *cobra.Command { getCmd.PersistentFlags().Int("warc-dedupe-size", 1024, "Minimum size to deduplicate WARC records with revisit records.") getCmd.PersistentFlags().String("cdx-cookie", "", "Pass custom cookie during CDX requests. Example: 'cdx_auth_token=test_value'") - // Crawl HQ flags - getCmd.PersistentFlags().Bool("hq", false, "Use Crawl HQ to pull URLs to process.") - getCmd.PersistentFlags().String("hq-address", "", "Crawl HQ address.") - getCmd.PersistentFlags().String("hq-key", "", "Crawl HQ key.") - getCmd.PersistentFlags().String("hq-secret", "", "Crawl HQ secret.") - getCmd.PersistentFlags().String("hq-project", "", "Crawl HQ project.") - getCmd.PersistentFlags().Int64("hq-batch-size", 0, "Crawl HQ feeding batch size.") - getCmd.PersistentFlags().Bool("hq-continuous-pull", false, "If turned on, the crawler will pull URLs from Crawl HQ continuously.") - getCmd.PersistentFlags().String("hq-strategy", "lifo", "Crawl HQ feeding strategy.") - getCmd.PersistentFlags().Bool("hq-rate-limiting-send-back", false, "If turned on, the crawler will send back URLs that hit a rate limit to crawl HQ.") - // Logging flags getCmd.PersistentFlags().String("log-file-output-dir", "./jobs/", "Directory to write log files to.") getCmd.PersistentFlags().String("es-url", "", "comma-separated ElasticSearch URL to use for indexing crawl logs.") getCmd.PersistentFlags().String("es-user", "", "ElasticSearch username to use for indexing crawl logs.") getCmd.PersistentFlags().String("es-password", "", "ElasticSearch password to use for indexing crawl logs.") getCmd.PersistentFlags().String("es-index-prefix", "zeno", "ElasticSearch index prefix to use for indexing crawl logs. Default is : `zeno`, without `-`") - - getURLCmd := getURLCmd() - getCmd.AddCommand(getURLCmd) - - return getCmd } -func getURLCmd() *cobra.Command { - getURLCmd := &cobra.Command{ - Use: "url [URL...]", - Short: "Archive given URLs", - Args: cobra.MinimumNArgs(1), - PreRunE: func(cmd *cobra.Command, args []string) error { - if cfg == nil { - return fmt.Errorf("viper config is nil") +var getURLCmd = &cobra.Command{ + Use: "url [URL...]", + Short: "Archive given URLs", + Args: cobra.MinimumNArgs(1), + PreRunE: func(cmd *cobra.Command, args []string) error { + if cfg == nil { + return fmt.Errorf("viper config is nil") + } + return nil + }, + RunE: func(cmd *cobra.Command, args []string) error { + // Init crawl using the flags provided + crawl, err := crawl.GenerateCrawlConfig(cfg) + if err != nil { + if crawl != nil && crawl.Log != nil { + crawl.Log.WithFields(logrus.Fields{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get url' exited due to error") } - return nil - }, - RunE: func(cmd *cobra.Command, args []string) error { - // Init crawl using the flags provided - crawl, err := crawl.GenerateCrawlConfig(cfg) + return err + } + + // Initialize initial seed list + for _, arg := range args { + input, err := url.Parse(arg) if err != nil { - if crawl != nil && crawl.Log != nil { - crawl.Log.WithFields(logrus.Fields{ - "crawl": crawl, - "err": err.Error(), - }).Error("'get url' exited due to error") - } + crawl.Log.WithFields(logrus.Fields{ + "input_url": arg, + "err": err.Error(), + }).Error("given URL is not a valid input") return err } - // Initialize initial seed list - for _, arg := range args { - input, err := url.Parse(arg) - if err != nil { - crawl.Log.WithFields(logrus.Fields{ - "input_url": arg, - "err": err.Error(), - }).Error("given URL is not a valid input") - return err - } - - crawl.SeedList = append(crawl.SeedList, *frontier.NewItem(input, nil, "seed", 0, "", false)) - } + crawl.SeedList = append(crawl.SeedList, *frontier.NewItem(input, nil, "seed", 0, "", false)) + } - // Start crawl - err = crawl.Start() - if err != nil { + // Start crawl + err = crawl.Start() + if err != nil { + crawl.Log.WithFields(logrus.Fields{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get url' Crawl() exited due to error") + return err + } + + crawl.Log.Info("Crawl finished") + return err + }, +} + +var getHQCmd = &cobra.Command{ + Use: "hq", + Short: "Start crawling with the crawl HQ connector.", + PreRunE: func(cmd *cobra.Command, args []string) error { + if cfg == nil { + return fmt.Errorf("viper config is nil") + } + cfg.HQ = true + return nil + }, + RunE: func(cmd *cobra.Command, args []string) error { + // Init crawl using the flags provided + crawl, err := crawl.GenerateCrawlConfig(cfg) + if err != nil { + if crawl != nil && crawl.Log != nil { crawl.Log.WithFields(logrus.Fields{ "crawl": crawl, "err": err.Error(), - }).Error("crawl exited due to error") - return err + }).Error("'get hq' exited due to error") } + return err + } + + // start crawl + err = crawl.Start() + if err != nil { + logrus.WithFields(logrus.Fields{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get hq' Crawl() exited due to error") + return err + } + + return nil + }, +} + +func getHQCmdFlags(getHQCmd *cobra.Command) { + // Crawl HQ flags + getHQCmd.PersistentFlags().String("hq-address", "", "Crawl HQ address.") + getHQCmd.PersistentFlags().String("hq-key", "", "Crawl HQ key.") + getHQCmd.PersistentFlags().String("hq-secret", "", "Crawl HQ secret.") + getHQCmd.PersistentFlags().String("hq-project", "", "Crawl HQ project.") + getHQCmd.PersistentFlags().Int64("hq-batch-size", 0, "Crawl HQ feeding batch size.") + getHQCmd.PersistentFlags().Bool("hq-continuous-pull", false, "If turned on, the crawler will pull URLs from Crawl HQ continuously.") + getHQCmd.PersistentFlags().String("hq-strategy", "lifo", "Crawl HQ feeding strategy.") + getHQCmd.PersistentFlags().Bool("hq-rate-limiting-send-back", false, "If turned on, the crawler will send back URLs that hit a rate limit to crawl HQ.") +} - crawl.Log.Info("Crawl finished") +var getListCmd = &cobra.Command{ + Use: "list [FILE]", + Short: "Start crawling with a seed list", + Args: cobra.ExactArgs(1), + PreRunE: func(cmd *cobra.Command, args []string) error { + if cfg == nil { + return fmt.Errorf("viper config is nil") + } + return nil + }, + RunE: func(cmd *cobra.Command, args []string) error { + // Init crawl using the flags provided + crawl, err := crawl.GenerateCrawlConfig(cfg) + if err != nil { + if crawl != nil && crawl.Log != nil { + crawl.Log.WithFields(logrus.Fields{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get hq' exited due to error") + } return err - }, - } + } + + // Initialize initial seed list + crawl.SeedList, err = frontier.IsSeedList(args[0]) + if err != nil || len(crawl.SeedList) <= 0 { + logrus.WithFields(logrus.Fields{ + "input": args[0], + "err": err.Error(), + }).Error("This is not a valid input") + return err + } + + logrus.WithFields(logrus.Fields{ + "input": args[0], + "seedsCount": len(crawl.SeedList), + }).Print("Seed list loaded") + + // Start crawl + err = crawl.Start() + if err != nil { + logrus.WithFields(logrus.Fields{ + "crawl": crawl, + "err": err.Error(), + }).Error("Crawl exited due to error") + return err + } - return getURLCmd + return nil + }, } diff --git a/config/v2/config.go b/config/v2/config.go index c6590648..db3d126e 100644 --- a/config/v2/config.go +++ b/config/v2/config.go @@ -65,7 +65,7 @@ type Config struct { CDXCookie string `mapstructure:"cdx-cookie"` // Get flags (Crawl HQ flags) - HQ bool `mapstructure:"hq"` + HQ bool // Special field to check if HQ is enabled depending on the command called HQAddress string `mapstructure:"hq-address"` HQKey string `mapstructure:"hq-key"` HQSecret string `mapstructure:"hq-secret"` From e4b25af28e18ceb223a4c995343b7a1d203920bc Mon Sep 17 00:00:00 2001 From: Thomas FOUBERT Date: Sun, 7 Jul 2024 17:14:52 -0400 Subject: [PATCH 05/12] chore: removed cli+config v1 packages and set v2 as the actual version --- cmd/{v2 => }/cmd.go | 2 +- cmd/{v2 => }/get.go | 0 cmd/v1/all/all.go | 6 - cmd/v1/cmd.go | 350 ----------------------------------- cmd/v1/get/get.go | 35 ---- cmd/v1/get/hq.go | 42 ----- cmd/v1/get/list.go | 58 ------ cmd/v1/get/url.go | 57 ------ cmd/v1/utils.go | 184 ------------------ cmd/v1/version/deps.go | 31 ---- cmd/v1/version/version.go | 28 --- config/{v2 => }/config.go | 0 config/v1/config.go | 81 -------- go.mod | 4 - go.sum | 7 - internal/pkg/crawl/config.go | 2 +- main.go | 10 +- 17 files changed, 11 insertions(+), 886 deletions(-) rename cmd/{v2 => }/cmd.go (96%) rename cmd/{v2 => }/get.go (100%) delete mode 100644 cmd/v1/all/all.go delete mode 100644 cmd/v1/cmd.go delete mode 100644 cmd/v1/get/get.go delete mode 100644 cmd/v1/get/hq.go delete mode 100644 cmd/v1/get/list.go delete mode 100644 cmd/v1/get/url.go delete mode 100644 cmd/v1/utils.go delete mode 100644 cmd/v1/version/deps.go delete mode 100644 cmd/v1/version/version.go rename config/{v2 => }/config.go (100%) delete mode 100644 config/v1/config.go diff --git a/cmd/v2/cmd.go b/cmd/cmd.go similarity index 96% rename from cmd/v2/cmd.go rename to cmd/cmd.go index 6a313b5d..c23451a3 100644 --- a/cmd/v2/cmd.go +++ b/cmd/cmd.go @@ -3,7 +3,7 @@ package cmd import ( "fmt" - "github.com/internetarchive/Zeno/config/v2" + "github.com/internetarchive/Zeno/config" "github.com/spf13/cobra" ) diff --git a/cmd/v2/get.go b/cmd/get.go similarity index 100% rename from cmd/v2/get.go rename to cmd/get.go diff --git a/cmd/v1/all/all.go b/cmd/v1/all/all.go deleted file mode 100644 index a5ac5d6c..00000000 --- a/cmd/v1/all/all.go +++ /dev/null @@ -1,6 +0,0 @@ -package all - -import ( - _ "github.com/internetarchive/Zeno/cmd/v1/get" - _ "github.com/internetarchive/Zeno/cmd/v1/version" -) diff --git a/cmd/v1/cmd.go b/cmd/v1/cmd.go deleted file mode 100644 index c79cdcd2..00000000 --- a/cmd/v1/cmd.go +++ /dev/null @@ -1,350 +0,0 @@ -package cmd - -import ( - "os" - - "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" - - "github.com/internetarchive/Zeno/config/v1" -) - -var GlobalFlags = []cli.Flag{ - &cli.StringFlag{ - Name: "user-agent", - Value: "Zeno", - Usage: "User agent to use when requesting URLs.", - Destination: &config.App.Flags.UserAgent, - }, - &cli.StringFlag{ - Name: "job", - Value: "", - Usage: "Job name to use, will determine the path for the persistent queue, seencheck database, and WARC files.", - Destination: &config.App.Flags.Job, - }, - &cli.IntFlag{ - Name: "workers", - Aliases: []string{"w"}, - Value: 1, - Usage: "Number of concurrent workers to run.", - Destination: &config.App.Flags.Workers, - }, - &cli.IntFlag{ - Name: "max-concurrent-assets", - Aliases: []string{"ca"}, - Value: 8, - Usage: "Max number of concurrent assets to fetch PER worker. E.g. if you have 100 workers and this setting at 8, Zeno could do up to 800 concurrent requests at any time.", - Destination: &config.App.Flags.MaxConcurrentAssets, - }, - &cli.UintFlag{ - Name: "max-hops", - Aliases: []string{"hops"}, - Value: 0, - Usage: "Maximum number of hops to execute.", - Destination: &config.App.Flags.MaxHops, - }, - &cli.StringFlag{ - Name: "cookies", - Usage: "File containing cookies that will be used for requests.", - Destination: &config.App.Flags.CookieFile, - }, - &cli.BoolFlag{ - Name: "keep-cookies", - Usage: "Keep a global cookie jar", - Destination: &config.App.Flags.KeepCookies, - }, - &cli.BoolFlag{ - Name: "headless", - Usage: "Use headless browsers instead of standard GET requests.", - Destination: &config.App.Flags.Headless, - }, - &cli.BoolFlag{ - Name: "local-seencheck", - Usage: "Simple local seencheck to avoid re-crawling of URIs.", - Destination: &config.App.Flags.Seencheck, - }, - &cli.BoolFlag{ - Name: "json", - Usage: "Output logs in JSON", - Destination: &config.App.Flags.JSON, - }, - &cli.BoolFlag{ - Name: "debug", - Destination: &config.App.Flags.Debug, - }, - &cli.BoolFlag{ - Name: "live-stats", - Destination: &config.App.Flags.LiveStats, - }, - - &cli.BoolFlag{ - Name: "api", - Destination: &config.App.Flags.API, - }, - &cli.StringFlag{ - Name: "api-port", - Value: "9443", - Usage: "Port to listen on for the API.", - Destination: &config.App.Flags.APIPort, - }, - &cli.BoolFlag{ - Name: "prometheus", - Destination: &config.App.Flags.Prometheus, - Usage: "Export metrics in Prometheus format, using this setting imply --api.", - }, - &cli.StringFlag{ - Name: "prometheus-prefix", - Destination: &config.App.Flags.PrometheusPrefix, - Usage: "String used as a prefix for the exported Prometheus metrics.", - Value: "zeno:", - }, - - &cli.IntFlag{ - Name: "max-redirect", - Value: 20, - Usage: "Specifies the maximum number of redirections to follow for a resource.", - Destination: &config.App.Flags.MaxRedirect, - }, - &cli.IntFlag{ - Name: "max-retry", - Value: 20, - Usage: "Number of retry if error happen when executing HTTP request.", - Destination: &config.App.Flags.MaxRetry, - }, - &cli.IntFlag{ - Name: "http-timeout", - Value: 30, - Usage: "Number of seconds to wait before timing out a request.", - Destination: &config.App.Flags.HTTPTimeout, - }, - &cli.BoolFlag{ - Name: "domains-crawl", - Usage: "If this is turned on, seeds will be treated as domains to crawl, therefore same-domain outlinks will be added to the queue as hop=0.", - Destination: &config.App.Flags.DomainsCrawl, - }, - &cli.StringSliceFlag{ - Name: "disable-html-tag", - Usage: "Specify HTML tag to not extract assets from", - Destination: &config.App.Flags.DisabledHTMLTags, - }, - &cli.BoolFlag{ - Name: "capture-alternate-pages", - Value: false, - Usage: "If turned on, HTML tags with \"alternate\" values for their \"rel\" attribute will be archived.", - Destination: &config.App.Flags.CaptureAlternatePages, - }, - &cli.StringSliceFlag{ - Name: "exclude-host", - Usage: "Exclude a specific host from the crawl, note that it will not exclude the domain if it is encountered as an asset for another web page.", - Destination: &config.App.Flags.ExcludedHosts, - }, - &cli.StringSliceFlag{ - Name: "include-host", - Usage: "Only crawl specific hosts, note that it will not include the domain if it is encountered as an asset for another web page.", - Destination: &config.App.Flags.IncludedHosts, - }, - &cli.IntFlag{ - Name: "max-concurrent-per-domain", - Value: 16, - Usage: "Maximum number of concurrent requests per domain.", - Destination: &config.App.Flags.MaxConcurrentRequestsPerDomain, - }, - &cli.IntFlag{ - Name: "concurrent-sleep-length", - Value: 500, - Usage: "Number of milliseconds to sleep when max concurrency per domain is reached.", - Destination: &config.App.Flags.RateLimitDelay, - }, - - &cli.IntFlag{ - Name: "crawl-time-limit", - Value: 0, - Usage: "Number of seconds until the crawl will automatically set itself into the finished state.", - Destination: &config.App.Flags.CrawlTimeLimit, - }, - - &cli.IntFlag{ - Name: "crawl-max-time-limit", - Value: 0, - Usage: "Number of seconds until the crawl will automatically panic itself. Default to crawl-time-limit + (crawl-time-limit / 10)", - Destination: &config.App.Flags.MaxCrawlTimeLimit, - }, - - // Proxy flags - &cli.StringFlag{ - Name: "proxy", - Value: "", - Usage: "Proxy to use when requesting pages.", - Destination: &config.App.Flags.Proxy, - }, - &cli.StringSliceFlag{ - Name: "bypass-proxy", - Usage: "Domains that should not be proxied.", - Destination: &config.App.Flags.BypassProxy, - }, - - // WARC flags - &cli.StringFlag{ - Name: "warc-prefix", - Value: "ZENO", - Usage: "Prefix to use when naming the WARC files.", - Destination: &config.App.Flags.WARCPrefix, - }, - &cli.StringFlag{ - Name: "warc-operator", - Value: "", - Usage: "Contact informations of the crawl operator to write in the Warc-Info record in each WARC file.", - Destination: &config.App.Flags.WARCOperator, - }, - &cli.StringFlag{ - Name: "warc-cdx-dedupe-server", - Value: "", - Usage: "Identify the server to use CDX deduplication. This also activates CDX deduplication on.", - Destination: &config.App.Flags.CDXDedupeServer, - }, - &cli.BoolFlag{ - Name: "warc-on-disk", - Value: false, - Usage: "Do not use RAM to store payloads when recording traffic to WARCs, everything will happen on disk (usually used to reduce memory usage).", - Destination: &config.App.Flags.WARCFullOnDisk, - }, - &cli.IntFlag{ - Name: "warc-pool-size", - Value: 1, - Usage: "Number of concurrent WARC files to write.", - Destination: &config.App.Flags.WARCPoolSize, - }, - &cli.StringFlag{ - Name: "warc-temp-dir", - Value: "", - Usage: "Custom directory to use for WARC temporary files.", - Destination: &config.App.Flags.WARCTempDir, - }, - &cli.BoolFlag{ - Name: "disable-local-dedupe", - Usage: "Disable local URL agonistic deduplication.", - Value: false, - Destination: &config.App.Flags.DisableLocalDedupe, - }, - &cli.BoolFlag{ - Name: "cert-validation", - Usage: "Enables certificate validation on HTTPS requests.", - Value: false, - Destination: &config.App.Flags.CertValidation, - }, - &cli.BoolFlag{ - Name: "disable-assets-capture", - Usage: "Disable assets capture.", - Value: false, - Destination: &config.App.Flags.DisableAssetsCapture, - }, - &cli.IntFlag{ - Name: "warc-dedupe-size", - Value: 1024, - Usage: "Minimum size to deduplicate WARC records with revisit records.", - Destination: &config.App.Flags.WARCDedupSize, - }, - &cli.StringFlag{ - Name: "cdx-cookie", - Value: "", - Usage: "Pass custom cookie during CDX requests. Example: 'cdx_auth_token=test_value'", - Destination: &config.App.Flags.WARCCustomCookie, - }, - // Crawl HQ flags - &cli.BoolFlag{ - Name: "hq", - Value: false, - Usage: "Use Crawl HQ to pull URLs to process.", - Destination: &config.App.Flags.UseHQ, - }, - &cli.StringFlag{ - Name: "hq-address", - Usage: "Crawl HQ address.", - Destination: &config.App.Flags.HQAddress, - }, - &cli.StringFlag{ - Name: "hq-key", - Usage: "Crawl HQ key.", - Destination: &config.App.Flags.HQKey, - }, - &cli.StringFlag{ - Name: "hq-secret", - Usage: "Crawl HQ secret.", - Destination: &config.App.Flags.HQSecret, - }, - &cli.StringFlag{ - Name: "hq-project", - Usage: "Crawl HQ project.", - Destination: &config.App.Flags.HQProject, - }, - &cli.Int64Flag{ - Name: "hq-batch-size", - Usage: "Crawl HQ feeding batch size.", - Destination: &config.App.Flags.HQBatchSize, - }, - &cli.BoolFlag{ - Name: "hq-continuous-pull", - Usage: "If turned on, the crawler will pull URLs from Crawl HQ continuously.", - Destination: &config.App.Flags.HQContinuousPull, - }, - &cli.StringFlag{ - Name: "hq-strategy", - Usage: "Crawl HQ feeding strategy.", - Value: "lifo", - Destination: &config.App.Flags.HQStrategy, - }, - &cli.BoolFlag{ - Name: "hq-rate-limiting-send-back", - Usage: "If turned on, the crawler will send back URLs that hit a rate limit to crawl HQ.", - Destination: &config.App.Flags.HQRateLimitingSendBack, - }, - // Logging flags - &cli.StringFlag{ - Name: "log-file-output-dir", - Usage: "Directory to write log files to.", - Value: "jobs", - Destination: &config.App.Flags.LogFileOutputDir, - }, - &cli.StringFlag{ - Name: "es-url", - Usage: "comma-separated ElasticSearch URL to use for indexing crawl logs.", - Destination: &config.App.Flags.ElasticSearchURLs, - }, - &cli.StringFlag{ - Name: "es-user", - Usage: "ElasticSearch username to use for indexing crawl logs.", - Destination: &config.App.Flags.ElasticSearchUsername, - }, - &cli.StringFlag{ - Name: "es-password", - Usage: "ElasticSearch password to use for indexing crawl logs.", - Destination: &config.App.Flags.ElasticSearchPassword, - }, - &cli.StringFlag{ - Name: "es-index-prefix", - Usage: "ElasticSearch index prefix to use for indexing crawl logs. Default is : `zeno`, without `-`", - Value: "zeno", - Destination: &config.App.Flags.ElasticSearchIndexPrefix, - }, - &cli.StringSliceFlag{ - Name: "exclude-string", - Usage: "Discard any (discovered) URLs containing this string.", - Destination: &config.App.Flags.ExcludedStrings, - }, - &cli.BoolFlag{ - Name: "random-local-ip", - Usage: "Use random local IP for requests. (will be ignored if a proxy is set)", - Destination: &config.App.Flags.RandomLocalIP, - }, -} - -var Commands []*cli.Command - -func RegisterCommand(command cli.Command) { - Commands = append(Commands, &command) -} - -func CommandNotFound(c *cli.Context, command string) { - logrus.Errorf("%s: '%s' is not a %s command. See '%s --help'.", c.App.Name, command, c.App.Name, c.App.Name) - os.Exit(2) -} diff --git a/cmd/v1/get/get.go b/cmd/v1/get/get.go deleted file mode 100644 index 1e738596..00000000 --- a/cmd/v1/get/get.go +++ /dev/null @@ -1,35 +0,0 @@ -package get - -import ( - "github.com/internetarchive/Zeno/cmd/v1" - "github.com/internetarchive/Zeno/config/v1" - log "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" -) - -func initLogging() (err error) { - // Log as JSON instead of the default ASCII formatter. - if config.App.Flags.JSON { - log.SetFormatter(&log.JSONFormatter{}) - } - - // Turn on debug mode - if config.App.Flags.Debug { - log.SetLevel(log.DebugLevel) - } - - return nil -} - -func init() { - cmd.RegisterCommand( - cli.Command{ - Name: "get", - Usage: "Archive the web!", - Subcommands: []*cli.Command{ - newGetURLCmd(), - newGetListCmd(), - newGetHQCmd(), - }, - }) -} diff --git a/cmd/v1/get/hq.go b/cmd/v1/get/hq.go deleted file mode 100644 index a15a2c86..00000000 --- a/cmd/v1/get/hq.go +++ /dev/null @@ -1,42 +0,0 @@ -package get - -import ( - "github.com/internetarchive/Zeno/cmd/v1" - "github.com/internetarchive/Zeno/config/v1" - "github.com/sirupsen/logrus" - log "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" -) - -func newGetHQCmd() *cli.Command { - return &cli.Command{ - Name: "hq", - Usage: "Start crawling with the crawl HQ connector.", - Action: cmdGetHQ, - Flags: []cli.Flag{}, - UsageText: " [ARGUMENTS]", - } -} - -func cmdGetHQ(c *cli.Context) error { - err := initLogging() - if err != nil { - log.Error("Unable to parse arguments") - return err - } - - // init crawl using the flags provided - crawl := cmd.InitCrawlWithCMD(config.App.Flags) - - // start crawl - err = crawl.Start() - if err != nil { - logrus.WithFields(logrus.Fields{ - "crawl": crawl, - "err": err.Error(), - }).Error("Crawl exited due to error") - return err - } - - return nil -} diff --git a/cmd/v1/get/list.go b/cmd/v1/get/list.go deleted file mode 100644 index 86f4ac5e..00000000 --- a/cmd/v1/get/list.go +++ /dev/null @@ -1,58 +0,0 @@ -package get - -import ( - "github.com/internetarchive/Zeno/cmd/v1" - "github.com/internetarchive/Zeno/config/v1" - "github.com/internetarchive/Zeno/internal/pkg/frontier" - "github.com/sirupsen/logrus" - log "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" -) - -func newGetListCmd() *cli.Command { - return &cli.Command{ - Name: "list", - Usage: "Start crawling with a seed list.", - Action: cmdGetList, - Flags: []cli.Flag{}, - UsageText: " [ARGUMENTS]", - } -} - -func cmdGetList(c *cli.Context) error { - err := initLogging() - if err != nil { - log.Error("Unable to parse arguments") - return err - } - - // Init crawl using the flags provided - crawl := cmd.InitCrawlWithCMD(config.App.Flags) - - // Initialize initial seed list - crawl.SeedList, err = frontier.IsSeedList(c.Args().Get(0)) - if err != nil || len(crawl.SeedList) <= 0 { - logrus.WithFields(logrus.Fields{ - "input": c.Args().Get(0), - "err": err.Error(), - }).Error("This is not a valid input") - return err - } - - logrus.WithFields(logrus.Fields{ - "input": c.Args().Get(0), - "seedsCount": len(crawl.SeedList), - }).Print("Seed list loaded") - - // Start crawl - err = crawl.Start() - if err != nil { - logrus.WithFields(logrus.Fields{ - "crawl": crawl, - "err": err.Error(), - }).Error("Crawl exited due to error") - return err - } - - return nil -} diff --git a/cmd/v1/get/url.go b/cmd/v1/get/url.go deleted file mode 100644 index b2b4eb55..00000000 --- a/cmd/v1/get/url.go +++ /dev/null @@ -1,57 +0,0 @@ -package get - -import ( - "net/url" - - "github.com/internetarchive/Zeno/cmd/v1" - "github.com/internetarchive/Zeno/config/v1" - "github.com/internetarchive/Zeno/internal/pkg/frontier" - "github.com/sirupsen/logrus" - "github.com/urfave/cli/v2" -) - -func newGetURLCmd() *cli.Command { - return &cli.Command{ - Name: "url", - Usage: "Start crawling with a single URL.", - Action: cmdGetURL, - Flags: []cli.Flag{}, - UsageText: " [ARGUMENTS]", - } -} - -func cmdGetURL(c *cli.Context) error { - err := initLogging() - if err != nil { - logrus.Error("Unable to parse arguments") - return err - } - - // Init crawl using the flags provided - crawl := cmd.InitCrawlWithCMD(config.App.Flags) - - // Initialize initial seed list - input, err := url.Parse(c.Args().Get(0)) - if err != nil { - logrus.WithFields(logrus.Fields{ - "input": c.Args().Get(0), - "err": err.Error(), - }).Error("This is not a valid input") - return err - } - - crawl.SeedList = append(crawl.SeedList, *frontier.NewItem(input, nil, "seed", 0, "", false)) - - // Start crawl - err = crawl.Start() - if err != nil { - logrus.WithFields(logrus.Fields{ - "crawl": crawl, - "err": err.Error(), - }).Error("Crawl exited due to error") - return err - } - - logrus.Info("Crawl finished") - return err -} diff --git a/cmd/v1/utils.go b/cmd/v1/utils.go deleted file mode 100644 index 0a6ce311..00000000 --- a/cmd/v1/utils.go +++ /dev/null @@ -1,184 +0,0 @@ -package cmd - -import ( - "fmt" - "log/slog" - "os" - "path" - "path/filepath" - "strings" - "time" - - "github.com/google/uuid" - "github.com/internetarchive/Zeno/config/v1" - "github.com/internetarchive/Zeno/internal/pkg/crawl" - "github.com/internetarchive/Zeno/internal/pkg/frontier" - "github.com/internetarchive/Zeno/internal/pkg/log" - "github.com/internetarchive/Zeno/internal/pkg/utils" - "github.com/paulbellamy/ratecounter" -) - -// InitCrawlWithCMD takes a config.Flags struct and return a -// *crawl.Crawl initialized with it -func InitCrawlWithCMD(flags config.Flags) *crawl.Crawl { - var c = new(crawl.Crawl) - - // Craft Elastic Search configuration - var elasticSearchConfig *log.ElasticsearchConfig - - elasticSearchURLs := strings.Split(flags.ElasticSearchURLs, ",") - - if elasticSearchURLs[0] == "" { - elasticSearchConfig = nil - } else { - elasticSearchConfig = &log.ElasticsearchConfig{ - Addresses: elasticSearchURLs, - Username: flags.ElasticSearchUsername, - Password: flags.ElasticSearchPassword, - IndexPrefix: flags.ElasticSearchIndexPrefix, - Level: slog.LevelDebug, - } - } - - // Ensure that the log file output directory is well parsed - logfileOutputDir := filepath.Dir(flags.LogFileOutputDir) - if logfileOutputDir == "." && flags.LogFileOutputDir != "." { - logfileOutputDir = filepath.Dir(flags.LogFileOutputDir + "/") - } - - // Craft custom logger - customLogger, err := log.New(log.Config{ - FileConfig: &log.LogfileConfig{ - Dir: logfileOutputDir, - Prefix: "zeno", - }, - FileLevel: slog.LevelDebug, - StdoutLevel: slog.LevelInfo, - RotateLogFile: true, - RotateElasticSearchIndex: true, - ElasticsearchConfig: elasticSearchConfig, - }) - if err != nil { - fmt.Println(err) - os.Exit(1) - } - c.Log = customLogger - - // Statistics counters - c.CrawledSeeds = new(ratecounter.Counter) - c.CrawledAssets = new(ratecounter.Counter) - c.ActiveWorkers = new(ratecounter.Counter) - c.URIsPerSecond = ratecounter.NewRateCounter(1 * time.Second) - - c.LiveStats = flags.LiveStats - - // Frontier - c.Frontier = new(frontier.Frontier) - c.Frontier.Log = c.Log - - // If the job name isn't specified, we generate a random name - if flags.Job == "" { - if flags.HQProject != "" { - c.Job = flags.HQProject - } else { - UUID, err := uuid.NewUUID() - if err != nil { - c.Log.Fatal("cmd/utils.go:InitCrawlWithCMD():uuid.NewUUID()", "error", err) - } - - c.Job = UUID.String() - } - } else { - c.Job = flags.Job - } - - c.JobPath = path.Join("jobs", flags.Job) - - c.Workers = flags.Workers - c.WorkerPool = make([]*crawl.Worker, 0) - c.WorkerStopTimeout = time.Second * 60 // Placeholder for WorkerStopTimeout - c.MaxConcurrentAssets = flags.MaxConcurrentAssets - c.WorkerStopSignal = make(chan bool) - - c.Seencheck = flags.Seencheck - c.HTTPTimeout = flags.HTTPTimeout - c.MaxConcurrentRequestsPerDomain = flags.MaxConcurrentRequestsPerDomain - c.RateLimitDelay = flags.RateLimitDelay - c.CrawlTimeLimit = flags.CrawlTimeLimit - - // Defaults --max-crawl-time-limit to 10% more than --crawl-time-limit - if flags.MaxCrawlTimeLimit == 0 && flags.CrawlTimeLimit != 0 { - c.MaxCrawlTimeLimit = flags.CrawlTimeLimit + (flags.CrawlTimeLimit / 10) - } else { - c.MaxCrawlTimeLimit = flags.MaxCrawlTimeLimit - } - - c.MaxRetry = flags.MaxRetry - c.MaxRedirect = flags.MaxRedirect - c.MaxHops = uint8(flags.MaxHops) - c.DomainsCrawl = flags.DomainsCrawl - c.DisableAssetsCapture = flags.DisableAssetsCapture - c.DisabledHTMLTags = flags.DisabledHTMLTags.Value() - c.ExcludedHosts = flags.ExcludedHosts.Value() - c.IncludedHosts = flags.IncludedHosts.Value() - c.CaptureAlternatePages = flags.CaptureAlternatePages - c.ExcludedStrings = flags.ExcludedStrings.Value() - - // WARC settings - c.WARCPrefix = flags.WARCPrefix - c.WARCOperator = flags.WARCOperator - - if flags.WARCTempDir != "" { - c.WARCTempDir = flags.WARCTempDir - } else { - c.WARCTempDir = path.Join(c.JobPath, "temp") - } - - c.CDXDedupeServer = flags.CDXDedupeServer - c.DisableLocalDedupe = flags.DisableLocalDedupe - c.CertValidation = flags.CertValidation - c.WARCFullOnDisk = flags.WARCFullOnDisk - c.WARCPoolSize = flags.WARCPoolSize - c.WARCDedupSize = flags.WARCDedupSize - c.WARCCustomCookie = flags.WARCCustomCookie - - c.API = flags.API - c.APIPort = flags.APIPort - - // If Prometheus is specified, then we make sure - // c.API is true - c.Prometheus = flags.Prometheus - if c.Prometheus { - c.API = true - c.PrometheusMetrics = new(crawl.PrometheusMetrics) - c.PrometheusMetrics.Prefix = flags.PrometheusPrefix - } - - if flags.UserAgent != "Zeno" { - c.UserAgent = flags.UserAgent - } else { - version := utils.GetVersion() - c.UserAgent = "Mozilla/5.0 (compatible; archive.org_bot +http://archive.org/details/archive.org_bot) Zeno/" + version.Version[:7] + " warc/" + version.WarcVersion - } - c.Headless = flags.Headless - - c.CookieFile = flags.CookieFile - c.KeepCookies = flags.KeepCookies - - // Proxy settings - c.Proxy = flags.Proxy - c.BypassProxy = flags.BypassProxy.Value() - - // Crawl HQ settings - c.UseHQ = flags.UseHQ - c.HQProject = flags.HQProject - c.HQAddress = flags.HQAddress - c.HQKey = flags.HQKey - c.HQSecret = flags.HQSecret - c.HQStrategy = flags.HQStrategy - c.HQBatchSize = int(flags.HQBatchSize) - c.HQContinuousPull = flags.HQContinuousPull - c.HQRateLimitingSendBack = flags.HQRateLimitingSendBack - - return c -} diff --git a/cmd/v1/version/deps.go b/cmd/v1/version/deps.go deleted file mode 100644 index b877930d..00000000 --- a/cmd/v1/version/deps.go +++ /dev/null @@ -1,31 +0,0 @@ -package version - -import ( - "fmt" - "runtime/debug" - - "github.com/urfave/cli/v2" -) - -func newShowDepsCmd() *cli.Command { - return &cli.Command{ - Name: "deps", - Usage: "Get dependencies.", - Action: cmdShowDeps, - } -} - -func cmdShowDeps(c *cli.Context) error { - if info, ok := debug.ReadBuildInfo(); ok { - for _, dep := range info.Deps { - fmt.Printf("%s %s (%s)", dep.Path, dep.Version, dep.Sum) - if dep.Replace != nil { - fmt.Printf(" => %s %s (%s)", dep.Replace.Path, dep.Replace.Version, dep.Replace.Sum) - } else { - fmt.Print("\n") - } - } - } - - return nil -} diff --git a/cmd/v1/version/version.go b/cmd/v1/version/version.go deleted file mode 100644 index 5f0137f1..00000000 --- a/cmd/v1/version/version.go +++ /dev/null @@ -1,28 +0,0 @@ -package version - -import ( - "github.com/internetarchive/Zeno/cmd/v1" - "github.com/internetarchive/Zeno/internal/pkg/utils" - "github.com/urfave/cli/v2" -) - -func init() { - cmd.RegisterCommand( - cli.Command{ - Name: "version", - Usage: "Show the version number.", - Action: cmdVersion, - Subcommands: []*cli.Command{ - newShowDepsCmd(), - }, - }) -} - -func cmdVersion(c *cli.Context) error { - version := utils.GetVersion() - - println("Zeno", version.Version) - println("- go/version:", version.GoVersion) - - return nil -} diff --git a/config/v2/config.go b/config/config.go similarity index 100% rename from config/v2/config.go rename to config/config.go diff --git a/config/v1/config.go b/config/v1/config.go deleted file mode 100644 index 112d0e18..00000000 --- a/config/v1/config.go +++ /dev/null @@ -1,81 +0,0 @@ -package config - -import "github.com/urfave/cli/v2" - -type Flags struct { - UserAgent string - Job string - Workers int - MaxConcurrentAssets int - MaxHops uint - Headless bool - Seencheck bool - JSON bool - LiveStats bool - Debug bool - - DisabledHTMLTags cli.StringSlice - ExcludedHosts cli.StringSlice - IncludedHosts cli.StringSlice - DomainsCrawl bool - CaptureAlternatePages bool - HTTPTimeout int - MaxRedirect int - MaxRetry int - MaxConcurrentRequestsPerDomain int - RateLimitDelay int - CrawlTimeLimit int - MaxCrawlTimeLimit int - RandomLocalIP bool - - Proxy string - BypassProxy cli.StringSlice - - CookieFile string - KeepCookies bool - - API bool - APIPort string - Prometheus bool - PrometheusPrefix string - - WARCPrefix string - WARCOperator string - WARCPoolSize int - WARCDedupSize int - WARCFullOnDisk bool - WARCTempDir string - WARCCustomCookie string - - UseHQ bool - HQBatchSize int64 - HQAddress string - HQProject string - HQKey string - HQSecret string - HQStrategy string - HQContinuousPull bool - HQRateLimitingSendBack bool - - CDXDedupeServer string - DisableLocalDedupe bool - DisableAssetsCapture bool - CertValidation bool - - ElasticSearchURLs string - ElasticSearchUsername string - ElasticSearchPassword string - ElasticSearchIndexPrefix string - ExcludedStrings cli.StringSlice - LogFileOutputDir string -} - -type Application struct { - Flags Flags -} - -var App *Application - -func init() { - App = &Application{} -} diff --git a/go.mod b/go.mod index cf3bc387..be61fb47 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,6 @@ require ( github.com/stretchr/testify v1.9.0 github.com/telanflow/cookiejar v0.0.0-20190719062046-114449e86aa5 github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 - github.com/urfave/cli/v2 v2.27.2 github.com/zeebo/xxh3 v1.0.2 go.uber.org/goleak v1.3.0 golang.org/x/net v0.26.0 @@ -48,7 +47,6 @@ require ( github.com/cloudflare/circl v1.3.9 // indirect github.com/cloudwego/base64x v0.1.4 // indirect github.com/cloudwego/iasm v0.2.0 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/elastic/elastic-transport-go/v8 v8.6.0 // indirect github.com/fatih/color v1.17.0 // indirect @@ -96,7 +94,6 @@ require ( github.com/prometheus/procfs v0.15.1 // indirect github.com/refraction-networking/utls v1.6.6 // indirect github.com/rivo/uniseg v0.4.7 // indirect - github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect github.com/satori/go.uuid v1.2.0 // indirect @@ -106,7 +103,6 @@ require ( github.com/syndtr/goleveldb v1.0.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.12 // indirect - github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect go.opentelemetry.io/otel v1.28.0 // indirect go.opentelemetry.io/otel/metric v1.28.0 // indirect go.opentelemetry.io/otel/trace v1.28.0 // indirect diff --git a/go.sum b/go.sum index 97e9a64c..87d41365 100644 --- a/go.sum +++ b/go.sum @@ -42,8 +42,6 @@ github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJ github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= -github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -246,7 +244,6 @@ github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ= github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= @@ -297,10 +294,6 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= -github.com/urfave/cli/v2 v2.27.2 h1:6e0H+AkS+zDckwPCUrZkKX38mRaau4nL2uipkJpbkcI= -github.com/urfave/cli/v2 v2.27.2/go.mod h1:g0+79LmHHATl7DAcHO99smiR/T7uGLw84w8Y42x+4eM= -github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= -github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= diff --git a/internal/pkg/crawl/config.go b/internal/pkg/crawl/config.go index 31ca73a5..de9f6620 100644 --- a/internal/pkg/crawl/config.go +++ b/internal/pkg/crawl/config.go @@ -11,7 +11,7 @@ import ( "git.archive.org/wb/gocrawlhq" "github.com/CorentinB/warc" "github.com/google/uuid" - "github.com/internetarchive/Zeno/config/v2" + "github.com/internetarchive/Zeno/config" "github.com/internetarchive/Zeno/internal/pkg/frontier" "github.com/internetarchive/Zeno/internal/pkg/log" "github.com/internetarchive/Zeno/internal/pkg/utils" diff --git a/main.go b/main.go index 1d33dea7..f5a4238a 100644 --- a/main.go +++ b/main.go @@ -1,3 +1,11 @@ +// Zeno is a web crawler designed to operate wide crawls or to simply archive one web page. +// Zeno's key concepts are: portability, performance, simplicity ; with an emphasis on performance. + +// Authors: +// +// Corentin Barreau +// Jake LaFountain +// Thomas Foubert package main import ( @@ -6,7 +14,7 @@ import ( _ "net/http/pprof" - "github.com/internetarchive/Zeno/cmd/v2" + "github.com/internetarchive/Zeno/cmd" ) func main() { From 2e364ab037f24283c155a8b777689f4610db38aa Mon Sep 17 00:00:00 2001 From: Thomas FOUBERT Date: Sat, 13 Jul 2024 12:36:16 -0400 Subject: [PATCH 06/12] feat: add base to support consul config with viper --- cmd/cmd.go | 7 ++++++- config/config.go | 23 ++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/cmd/cmd.go b/cmd/cmd.go index c23451a3..197ddfab 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -41,7 +41,12 @@ func Run() error { // Define flags and configuration settings rootCmd.PersistentFlags().String("log-level", "info", "stdout log level (debug, info, warn, error)") - rootCmd.PersistentFlags().String("config", "", "config file (default is $HOME/zeno-config.yaml)") + rootCmd.PersistentFlags().String("config-file", "", "config file (default is $HOME/zeno-config.yaml)") + rootCmd.PersistentFlags().Bool("consul-config", false, "Use this flag to enable consul config support") + rootCmd.PersistentFlags().String("consul-address", "", "The consul address used to retreive config") + rootCmd.PersistentFlags().String("consul-path", "", "The full Consul K/V path where the config is stored") + rootCmd.PersistentFlags().String("consul-user", "", "The Consul user used to retreive config") + rootCmd.PersistentFlags().String("consul-password", "", "The Consul password used to retreive config") // Add get subcommands getCmd := getCMDs() diff --git a/config/config.go b/config/config.go index db3d126e..006d05dd 100644 --- a/config/config.go +++ b/config/config.go @@ -2,7 +2,9 @@ package config import ( "fmt" + "net/url" "os" + "path/filepath" "strings" "sync" @@ -89,13 +91,15 @@ var ( ) // InitConfig initializes the configuration +// Flags -> Env -> Config file -> Consul config +// Latest has precedence over the rest func InitConfig() error { var err error once.Do(func() { config = &Config{} // Check if a config file is provided via flag - if configFile := viper.GetString("config"); configFile != "" { + if configFile := viper.GetString("config-file"); configFile != "" { viper.SetConfigFile(configFile) } else { home, err := os.UserHomeDir() @@ -118,6 +122,23 @@ func InitConfig() error { fmt.Println("Using config file:", viper.ConfigFileUsed()) } + if viper.GetBool("consul-config") && viper.GetString("consul-address") != "" { + var consulAddress *url.URL + consulAddress, err = url.Parse(viper.GetString("consul-address")) + if err != nil { + return + } + + consulPath, consulFile := filepath.Split(viper.GetString("consul-path")) + viper.AddRemoteProvider("consul", consulAddress.String(), consulPath) + viper.SetConfigType(filepath.Ext(consulFile)) + viper.SetConfigName(strings.TrimSuffix(consulFile, filepath.Ext(consulFile))) + + if err = viper.ReadInConfig(); err == nil { + fmt.Println("Using config file:", viper.ConfigFileUsed()) + } + } + // Unmarshal the config into the Config struct err = viper.Unmarshal(config) }) From e4834d367458548f2ad321bb9b7d2db59d92a650 Mon Sep 17 00:00:00 2001 From: Thomas FOUBERT Date: Thu, 18 Jul 2024 11:04:27 -0400 Subject: [PATCH 07/12] update .gitignore to exclude vscode dir --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 8e5c76cd..05298823 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ jobs/ Zeno *.txt *.sh -zeno.log \ No newline at end of file +zeno.log +.vscode/ \ No newline at end of file From 7c7cb45fded3a025420d932ab121f6781cd6998e Mon Sep 17 00:00:00 2001 From: Thomas FOUBERT Date: Thu, 18 Jul 2024 11:35:30 -0400 Subject: [PATCH 08/12] fix: report 7bdc091 changes on this branch --- cmd/get.go | 1 + config/config.go | 1 + go.mod | 35 ------- go.sum | 176 ++--------------------------------- internal/pkg/crawl/config.go | 3 + 5 files changed, 13 insertions(+), 203 deletions(-) diff --git a/cmd/get.go b/cmd/get.go index 5c29f10c..0eefd129 100644 --- a/cmd/get.go +++ b/cmd/get.go @@ -61,6 +61,7 @@ func getCMDsFlags(getCmd *cobra.Command) { getCmd.PersistentFlags().Int("crawl-max-time-limit", 0, "Number of seconds until the crawl will automatically panic itself. Default to crawl-time-limit + (crawl-time-limit / 10)") getCmd.PersistentFlags().StringSlice("exclude-string", []string{}, "Discard any (discovered) URLs containing this string.") getCmd.PersistentFlags().Bool("random-local-ip", false, "Use random local IP for requests. (will be ignored if a proxy is set)") + getCmd.PersistentFlags().Int("min-space-required", 20, "Minimum space required in GB to continue the crawl.") // Proxy flags getCmd.PersistentFlags().String("proxy", "", "Proxy to use when requesting pages.") diff --git a/config/config.go b/config/config.go index 006d05dd..e57ab150 100644 --- a/config/config.go +++ b/config/config.go @@ -48,6 +48,7 @@ type Config struct { CrawlMaxTimeLimit int `mapstructure:"crawl-max-time-limit"` ExcludeString []string `mapstructure:"exclude-string"` RandomLocalIP bool `mapstructure:"random-local-ip"` + MinSpaceRequired int `mapstructure:"min-space-required"` // Get flags (Proxy flags) Proxy string `mapstructure:"proxy"` diff --git a/go.mod b/go.mod index fa8810e5..c90785bc 100644 --- a/go.mod +++ b/go.mod @@ -14,9 +14,6 @@ require ( github.com/google/uuid v1.6.0 github.com/gosuri/uilive v0.0.4 github.com/gosuri/uitable v0.0.4 - github.com/internetarchive/elogrus v0.0.0-20230725172814-093db31a64fc - github.com/lestrrat-go/file-rotatelogs v2.4.0+incompatible - github.com/olivere/elastic/v7 v7.0.32 github.com/paulbellamy/ratecounter v0.2.0 github.com/philippgille/gokv/leveldb v0.7.0 github.com/prometheus/client_golang v1.19.1 @@ -41,23 +38,10 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cloudflare/circl v1.3.9 // indirect - github.com/cloudwego/base64x v0.1.4 // indirect - github.com/cloudwego/iasm v0.2.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/elastic/elastic-transport-go/v8 v8.6.0 // indirect github.com/fatih/color v1.17.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect - github.com/gabriel-vasile/mimetype v1.4.4 // indirect - github.com/gin-contrib/sse v0.1.0 // indirect - github.com/go-logr/logr v1.4.2 // indirect - github.com/go-logr/stdr v1.2.2 // indirect - github.com/go-playground/locales v0.14.1 // indirect - github.com/go-playground/universal-translator v0.18.1 // indirect - github.com/go-playground/validator/v10 v10.22.0 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/elastic/elastic-transport-go/v8 v8.6.0 // indirect - github.com/fatih/color v1.17.0 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/gobwas/httphead v0.1.0 // indirect @@ -67,17 +51,11 @@ require ( github.com/gomodule/redigo v1.9.2 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/jonboulle/clockwork v0.4.0 // indirect - github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.17.9 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect github.com/klauspost/pgzip v1.2.6 // indirect - github.com/leodido/go-urn v1.4.0 // indirect - github.com/kr/text v0.2.0 // indirect - github.com/lestrrat-go/strftime v1.0.6 // indirect github.com/magiconair/properties v1.8.7 // indirect - github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.15 // indirect @@ -89,7 +67,6 @@ require ( github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/philippgille/gokv/encoding v0.7.0 // indirect github.com/philippgille/gokv/util v0.7.0 // indirect - github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.55.0 // indirect @@ -98,30 +75,18 @@ require ( github.com/rivo/uniseg v0.4.7 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect - github.com/quic-go/quic-go v0.41.0 // indirect - github.com/refraction-networking/utls v1.6.3 // indirect - github.com/rivo/uniseg v0.2.0 // indirect - github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/satori/go.uuid v1.2.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/cast v1.6.0 // indirect github.com/subosito/gotenv v1.6.0 // indirect github.com/syndtr/goleveldb v1.0.0 // indirect - github.com/twitchyliquid64/golang-asm v0.15.1 // indirect - github.com/ugorji/go/codec v1.2.12 // indirect go.opentelemetry.io/otel v1.28.0 // indirect go.opentelemetry.io/otel/metric v1.28.0 // indirect go.opentelemetry.io/otel/trace v1.28.0 // indirect go.uber.org/atomic v1.9.0 // indirect go.uber.org/multierr v1.9.0 // indirect - golang.org/x/arch v0.8.0 // indirect golang.org/x/crypto v0.24.0 // indirect golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect - github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect - go.opentelemetry.io/otel v1.28.0 // indirect - go.opentelemetry.io/otel/metric v1.28.0 // indirect - go.opentelemetry.io/otel/trace v1.28.0 // indirect - golang.org/x/crypto v0.24.0 // indirect golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.21.0 // indirect golang.org/x/text v0.16.0 // indirect diff --git a/go.sum b/go.sum index 27a999b8..c1c6060c 100644 --- a/go.sum +++ b/go.sum @@ -1,98 +1,50 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= git.archive.org/wb/gocrawlhq v1.2.5 h1:k8cPZRa+O7nWrGIJntVjLsbOOhoprICfAP8T7yfsvJU= git.archive.org/wb/gocrawlhq v1.2.5/go.mod h1:WiuNIB4Toqe8twVvwRu0fTSNC3KXFqA8/mAeaZ3GICE= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/CorentinB/warc v0.8.39 h1:ZYccm4BKRle10aVPL9EU7nCVe0LImGxkK1fsqIE8Dw8= -github.com/CorentinB/warc v0.8.39/go.mod h1:Q9SHKf7pwcqzIWcxlzCtAWN8sKH+Q1BZxq1mSHJ9ttY= github.com/CorentinB/warc v0.8.40 h1:6HIMT4jujlFTudeXtsoaFT+qJZYXeQlKdIED+c36Qpc= github.com/CorentinB/warc v0.8.40/go.mod h1:Q9SHKf7pwcqzIWcxlzCtAWN8sKH+Q1BZxq1mSHJ9ttY= github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE= github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk= -github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= -github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= -github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= -github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= -github.com/aws/aws-sdk-go v1.19.6/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/beeker1121/goque v2.1.0+incompatible h1:m5pZ5b8nqzojS2DF2ioZphFYQUqGYsDORq6uefUItPM= github.com/beeker1121/goque v2.1.0+incompatible/go.mod h1:L6dOWBhDOnxUVQsb0wkLve0VCnt2xJW/MI8pdRX4ANw= -github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bytedance/sonic v1.11.9 h1:LFHENlIY/SLzDWverzdOvgMztTxcfcF+cqNsz9pK5zg= -github.com/bytedance/sonic v1.11.9/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4= -github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM= -github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/clbanning/mxj/v2 v2.7.0 h1:WA/La7UGCanFe5NpHF0Q3DNtnCsVoxbPKuyBNHWRyME= github.com/clbanning/mxj/v2 v2.7.0/go.mod h1:hNiWqW14h+kc+MdF9C6/YoRfjEJoR3ou6tn/Qo+ve2s= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudflare/circl v1.3.9 h1:QFrlgFYf2Qpi8bSpVPK1HBvWpx16v/1TZivyo7pGuBE= github.com/cloudflare/circl v1.3.9/go.mod h1:PDRU+oXvdD7KCtgKxW95M5Z8BpSCJXQORiZFnBQS5QU= -github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y= -github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= -github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= -github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= -github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= -github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= -github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= -github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= github.com/elastic/elastic-transport-go/v8 v8.6.0 h1:Y2S/FBjx1LlCv5m6pWAF2kDJAHoSjSRSJCApolgfthA= github.com/elastic/elastic-transport-go/v8 v8.6.0/go.mod h1:YLHer5cj0csTzNFXoNQ8qhtGY1GTvSqPnKWKaqQE3Hk= github.com/elastic/go-elasticsearch/v8 v8.14.0 h1:1ywU8WFReLLcxE1WJqii3hTtbPUE2hc38ZK/j4mMFow= github.com/elastic/go-elasticsearch/v8 v8.14.0/go.mod h1:WRvnlGkSuZyp83M2U8El/LGXpCjYLrvlkSgkAH4O5I4= github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4= github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI= -github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= -github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= -github.com/gabriel-vasile/mimetype v1.4.4 h1:QjV6pZ7/XZ7ryI2KuyeEDE8wnh7fHP9YnQy+R0LnH8I= -github.com/gabriel-vasile/mimetype v1.4.4/go.mod h1:JwLei5XPtWdGiMFB5Pjle1oEeoSeEuJfJE+TtfvdB/s= -github.com/gin-contrib/pprof v1.5.0 h1:E/Oy7g+kNw94KfdCy3bZxQFtyDnAX2V7axRS7sNYVrU= -github.com/gin-contrib/pprof v1.5.0/go.mod h1:GqFL6LerKoCQ/RSWnkYczkTJ+tOAUVN/8sbnEtaqOKs= -github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= -github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= -github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= -github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= -github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= -github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= -github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= -github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= -github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= -github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= -github.com/go-playground/validator/v10 v10.22.0 h1:k6HsTZ0sTnROkhS//R0O+55JgM8C4Bx7ia+JlgcnOao= -github.com/go-playground/validator/v10 v10.22.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= -github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-test/deep v1.1.0 h1:WOcxcdHcvdgThNXjw0t76K42FXTU7HpNQWHpA2HHNlg= github.com/go-test/deep v1.1.0/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= @@ -101,78 +53,41 @@ github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs= github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc= -github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= -github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= -github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/gomodule/redigo v1.9.2 h1:HrutZBLhSIU8abiSfW8pj8mPhOyMYjZT/wcA4/L9L9s= github.com/gomodule/redigo v1.9.2/go.mod h1:KsU3hiK/Ay8U42qpaJk+kuNa3C+spxapWpM+ywhcgtw= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= -github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gosuri/uilive v0.0.4 h1:hUEBpQDj8D8jXgtCdBu7sWsy5sbW/5GhuO8KBwJ2jyY= github.com/gosuri/uilive v0.0.4/go.mod h1:V/epo5LjjlDE5RJUcqx8dbw+zc93y5Ya3yg8tfZ74VI= github.com/gosuri/uitable v0.0.4 h1:IG2xLKRvErL3uhY6e1BylFzG+aJiwQviDDTfOKeKTpY= github.com/gosuri/uitable v0.0.4/go.mod h1:tKR86bXuXPZazfOTG1FIzvjIdXzd0mo4Vtn16vt0PJo= -github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/internetarchive/elogrus v0.0.0-20230725172814-093db31a64fc h1:/f7B0tD2oVSHxIrDy9ciEQaVy/I/sMdLwFMVkjh5LLU= -github.com/internetarchive/elogrus v0.0.0-20230725172814-093db31a64fc/go.mod h1:oI8WojhCewR2pTR8bAx5/9pJO7ToWuLEzUuVIX3IVYk= -github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= -github.com/jonboulle/clockwork v0.4.0 h1:p4Cf1aMWXnXAUh8lVfewRBx1zaTSYKrKMF2g3ST4RZ4= -github.com/jonboulle/clockwork v0.4.0/go.mod h1:xgRqUGwRcjKCO1vbZUEtSLrqKoPSsUpK7fnezOII0kc= -github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= -github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= -github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= -github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= -github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= -github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= -github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8= -github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is= -github.com/lestrrat-go/file-rotatelogs v2.4.0+incompatible h1:Y6sqxHMyB1D2YSzWkLibYKgg+SwmyFU9dF2hn6MdTj4= -github.com/lestrrat-go/file-rotatelogs v2.4.0+incompatible/go.mod h1:ZQnN8lSECaebrkQytbHj4xNgtg8CR7RYXnPok8e0EHA= -github.com/lestrrat-go/strftime v1.0.6 h1:CFGsDEt1pOpFNU+TJB0nhz9jl+K0hZSLE205AhTIGQQ= -github.com/lestrrat-go/strftime v1.0.6/go.mod h1:f7jQKgV5nnJpYgdEasS+/y7EsTb8ykN2z68n3TtcTaw= github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= -github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= -github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= -github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= @@ -180,7 +95,6 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= -github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -190,18 +104,12 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/olivere/elastic/v7 v7.0.4/go.mod h1:l4YWa59iTCcOJQXI5ZtxVjcd3p5U8GCxVgvzHZqGn3o= -github.com/olivere/elastic/v7 v7.0.32 h1:R7CXvbu8Eq+WlsLgxmKVKPox0oOwAE/2T9Si5BnvK6E= -github.com/olivere/elastic/v7 v7.0.32/go.mod h1:c7PVmLe3Fxq77PIfY/bZmxY/TAamBhCzZ8xDOE09a9k= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE= github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg= -github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJc5AZX7/PBEpw= github.com/paulbellamy/ratecounter v0.2.0 h1:2L/RhJq+HA8gBQImDXtLPrDXK5qAj6ozWVK/zFXVJGs= github.com/paulbellamy/ratecounter v0.2.0/go.mod h1:Hfx1hDpSGoqxkVVpBi/IlYD7kChlfo5C6hzIHwPqfFE= github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= @@ -216,40 +124,21 @@ github.com/philippgille/gokv/test v0.7.0 h1:0wBKnKaFZlSeHxLXcmUJqK//IQGUMeu+o8B8 github.com/philippgille/gokv/test v0.7.0/go.mod h1:TP/VzO/qAoi6njsfKnRpXKno0hRuzD5wsLnHhtUcVkY= github.com/philippgille/gokv/util v0.7.0 h1:5avUK/a3aSj/aWjhHv4/FkqgMon2B7k2BqFgLcR+DYg= github.com/philippgille/gokv/util v0.7.0/go.mod h1:i9KLHbPxGiHLMhkix/CcDQhpPbCkJy5BkW+RKgwDHMo= -github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs= github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= -github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= -github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= -github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/refraction-networking/utls v1.6.6 h1:igFsYBUJPYM8Rno9xUuDoM5GQrVEqY4llzEXOkL43Ig= github.com/refraction-networking/utls v1.6.6/go.mod h1:BC3O4vQzye5hqpmDTWUqi4P5DDhzJfkV1tdqtawQIH0= -github.com/quic-go/quic-go v0.41.0 h1:aD8MmHfgqTURWNJy48IYFg2OnxwHT3JL7ahGs73lb4k= -github.com/quic-go/quic-go v0.41.0/go.mod h1:qCkNjqczPEvgsOnxZ0eCD14lv+B2LHlFAB++CNOh9hA= -github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= -github.com/refraction-networking/utls v1.6.3 h1:MFOfRN35sSx6K5AZNIoESsBuBxS2LCgRilRIdHb6fDc= -github.com/refraction-networking/utls v1.6.3/go.mod h1:yil9+7qSl+gBwJqztoQseO6Pr3h62pQoY1lXiNR/FPs= github.com/remeh/sizedwaitgroup v1.0.0 h1:VNGGFwNo/R5+MJBf6yrsr110p0m4/OX4S3DCy7Kyl5E= github.com/remeh/sizedwaitgroup v1.0.0/go.mod h1:3j2R4OIe/SeS6YDhICBy22RWjJC5eNCJ1V+9+NVNYlo= -github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= @@ -262,11 +151,8 @@ github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6g github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= -github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= -github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= @@ -280,10 +166,14 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/spf13/viper v1.19.0 h1:RWq5SEjt8o25SROyN3z2OrDB9l7RPd3lwTWU8EcEdcI= github.com/spf13/viper v1.19.0/go.mod h1:GQUN9bilAbhU/jgc1bKs99f/suXKeUMct8Adx5+Ntkg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= @@ -294,20 +184,11 @@ github.com/telanflow/cookiejar v0.0.0-20190719062046-114449e86aa5 h1:gTQl5nPlc9B github.com/telanflow/cookiejar v0.0.0-20190719062046-114449e86aa5/go.mod h1:qNgA5MKwTh103SxGTooqZMiKxZTaV9UV3KjN7I7Drig= github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 h1:nrZ3ySNYwJbSpD6ce9duiP+QkD3JuLCcWkdaehUS/3Y= github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80/go.mod h1:iFyPdL66DjUD96XmzVL3ZntbzcflLnznH0fr99w5VqE= -github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= -github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= -github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= -github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= -github.com/urfave/cli/v2 v2.27.2 h1:6e0H+AkS+zDckwPCUrZkKX38mRaau4nL2uipkJpbkcI= -github.com/urfave/cli/v2 v2.27.2/go.mod h1:g0+79LmHHATl7DAcHO99smiR/T7uGLw84w8Y42x+4eM= -github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= -github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= -go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= @@ -322,28 +203,15 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= -golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= -golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= -golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= -golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI= golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= @@ -351,24 +219,14 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -391,29 +249,14 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= -golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= -gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= @@ -423,11 +266,8 @@ gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= mvdan.cc/xurls/v2 v2.5.0 h1:lyBNOm8Wo71UknhUs4QTFUNNMyxy2JEIaKKo0RWOh+8= mvdan.cc/xurls/v2 v2.5.0/go.mod h1:yQgaGQ1rFtJUzkmKiHYSSfuQxqfYmd//X6PxvholpeE= diff --git a/internal/pkg/crawl/config.go b/internal/pkg/crawl/config.go index de9f6620..c8794dba 100644 --- a/internal/pkg/crawl/config.go +++ b/internal/pkg/crawl/config.go @@ -65,6 +65,7 @@ type Crawl struct { Seencheck bool Workers int RandomLocalIP bool + MinSpaceRequired int // Cookie-related settings CookieFile string @@ -215,6 +216,8 @@ func GenerateCrawlConfig(config *config.Config) (*Crawl, error) { c.CaptureAlternatePages = config.CaptureAlternatePages c.ExcludedStrings = config.ExcludeString + c.MinSpaceRequired = config.MinSpaceRequired + // WARC settings c.WARCPrefix = config.WARCPrefix c.WARCOperator = config.WARCOperator From 8de40c4f2677f6f8a723ec4acc85d1fea79d400b Mon Sep 17 00:00:00 2001 From: Thomas FOUBERT Date: Fri, 19 Jul 2024 12:23:08 -0400 Subject: [PATCH 09/12] report and adapt 7b7d7d7 changes --- cmd/cmd.go | 1 + cmd/get.go | 6 +++--- config/config.go | 16 ++++++++++++++++ internal/pkg/crawl/api.go | 4 +++- internal/pkg/crawl/config.go | 3 ++- internal/pkg/log/log.go | 4 ++-- 6 files changed, 27 insertions(+), 7 deletions(-) diff --git a/cmd/cmd.go b/cmd/cmd.go index 197ddfab..62cd5526 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -42,6 +42,7 @@ func Run() error { // Define flags and configuration settings rootCmd.PersistentFlags().String("log-level", "info", "stdout log level (debug, info, warn, error)") rootCmd.PersistentFlags().String("config-file", "", "config file (default is $HOME/zeno-config.yaml)") + rootCmd.PersistentFlags().Bool("no-stdout-log", false, "disable stdout logging.") rootCmd.PersistentFlags().Bool("consul-config", false, "Use this flag to enable consul config support") rootCmd.PersistentFlags().String("consul-address", "", "The consul address used to retreive config") rootCmd.PersistentFlags().String("consul-path", "", "The full Consul K/V path where the config is stored") diff --git a/cmd/get.go b/cmd/get.go index 0eefd129..06679f7c 100644 --- a/cmd/get.go +++ b/cmd/get.go @@ -42,11 +42,10 @@ func getCMDsFlags(getCmd *cobra.Command) { getCmd.PersistentFlags().Bool("local-seencheck", false, "Simple local seencheck to avoid re-crawling of URIs.") getCmd.PersistentFlags().Bool("json", false, "Output logs in JSON") getCmd.PersistentFlags().Bool("debug", false, "") - getCmd.PersistentFlags().Bool("live-stats", false, "") getCmd.PersistentFlags().Bool("api", false, "") getCmd.PersistentFlags().String("api-port", "9443", "Port to listen on for the API.") - getCmd.PersistentFlags().Bool("prometheus", false, "Export metrics in Prometheus format, using this setting imply --api.") - getCmd.PersistentFlags().String("prometheus-prefix", "String used as a prefix for the exported Prometheus metrics.", "zeno:") + getCmd.PersistentFlags().Bool("prometheus", false, "Export metrics in Prometheus format. (implies --api)") + getCmd.PersistentFlags().String("prometheus-prefix", "zeno:", "String used as a prefix for the exported Prometheus metrics.") getCmd.PersistentFlags().Int("max-redirect", 20, "Specifies the maximum number of redirections to follow for a resource.") getCmd.PersistentFlags().Int("max-retry", 20, "Number of retry if error happen when executing HTTP request.") getCmd.PersistentFlags().Int("http-timeout", 30, "Number of seconds to wait before timing out a request.") @@ -81,6 +80,7 @@ func getCMDsFlags(getCmd *cobra.Command) { getCmd.PersistentFlags().String("cdx-cookie", "", "Pass custom cookie during CDX requests. Example: 'cdx_auth_token=test_value'") // Logging flags + getCmd.PersistentFlags().Bool("live-stats", false, "Enable live stats but disable logging. (implies --no-stdout-log)") getCmd.PersistentFlags().String("log-file-output-dir", "./jobs/", "Directory to write log files to.") getCmd.PersistentFlags().String("es-url", "", "comma-separated ElasticSearch URL to use for indexing crawl logs.") getCmd.PersistentFlags().String("es-user", "", "ElasticSearch username to use for indexing crawl logs.") diff --git a/config/config.go b/config/config.go index e57ab150..62c4b289 100644 --- a/config/config.go +++ b/config/config.go @@ -79,6 +79,7 @@ type Config struct { HQRateLimitSendBack bool `mapstructure:"hq-rate-limiting-send-back"` // Get flags (Logging flags) + NoStdoutLogging bool `mapstructure:"no-stdout-log"` LogFileOutputDir string `mapstructure:"log-file-output-dir"` ElasticSearchURLs []string `mapstructure:"es-url"` ElasticSearchUsername string `mapstructure:"es-user"` @@ -140,6 +141,9 @@ func InitConfig() error { } } + // This function is used to bring logic to the flags when needed (e.g. live-stats) + handleFlagsEdgeCases() + // Unmarshal the config into the Config struct err = viper.Unmarshal(config) }) @@ -163,3 +167,15 @@ func GetConfig() *Config { } return cfg } + +func handleFlagsEdgeCases() { + if viper.GetBool("live-stats") { + // If live-stats is true, set no-stdout-log to true + viper.Set("no-stdout-log", true) + } + + if viper.GetBool("prometheus") { + // If prometheus is true, set no-stdout-log to true + viper.Set("api", true) + } +} diff --git a/internal/pkg/crawl/api.go b/internal/pkg/crawl/api.go index 00df2dad..6d1e6f47 100644 --- a/internal/pkg/crawl/api.go +++ b/internal/pkg/crawl/api.go @@ -50,7 +50,9 @@ func (crawl *Crawl) startAPI() { json.NewEncoder(w).Encode(response) }) - http.HandleFunc("/metrics", setupPrometheus(crawl).ServeHTTP) + if crawl.Prometheus { + http.HandleFunc("/metrics", setupPrometheus(crawl).ServeHTTP) + } http.HandleFunc("/workers", func(w http.ResponseWriter, r *http.Request) { workersState := crawl.GetWorkerState(-1) diff --git a/internal/pkg/crawl/config.go b/internal/pkg/crawl/config.go index c8794dba..68d421a5 100644 --- a/internal/pkg/crawl/config.go +++ b/internal/pkg/crawl/config.go @@ -134,6 +134,7 @@ func GenerateCrawlConfig(config *config.Config) (*Crawl, error) { Prefix: "zeno", }, FileLevel: slog.LevelDebug, + StdoutEnabled: !config.NoStdoutLogging, StdoutLevel: slog.LevelInfo, RotateLogFile: true, RotateElasticSearchIndex: true, @@ -244,7 +245,7 @@ func GenerateCrawlConfig(config *config.Config) (*Crawl, error) { c.Prometheus = config.Prometheus if c.Prometheus { c.API = true - c.PrometheusMetrics = new(PrometheusMetrics) + c.PrometheusMetrics = &PrometheusMetrics{} c.PrometheusMetrics.Prefix = config.PrometheusPrefix } diff --git a/internal/pkg/log/log.go b/internal/pkg/log/log.go index f1fe2ac7..d53ee707 100644 --- a/internal/pkg/log/log.go +++ b/internal/pkg/log/log.go @@ -40,11 +40,11 @@ type Logger struct { type Config struct { FileConfig *LogfileConfig FileLevel slog.Level + StdoutEnabled bool StdoutLevel slog.Level RotateLogFile bool ElasticsearchConfig *ElasticsearchConfig RotateElasticSearchIndex bool - LiveStats bool } // New creates a new Logger instance with the given configuration. @@ -61,7 +61,7 @@ func New(cfg Config) (*Logger, error) { var handlers []slog.Handler // Create stdout handler - if !cfg.LiveStats { + if cfg.StdoutEnabled { stdoutHandler := slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ Level: cfg.StdoutLevel, }) From d09be8e066ea21a13db60a34d6638130e766bf61 Mon Sep 17 00:00:00 2001 From: Thomas FOUBERT Date: Fri, 19 Jul 2024 14:37:38 -0400 Subject: [PATCH 10/12] chore: aligned Config struct type to save memory --- config/config.go | 105 ++++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 51 deletions(-) diff --git a/config/config.go b/config/config.go index 62c4b289..3b29f707 100644 --- a/config/config.go +++ b/config/config.go @@ -18,73 +18,76 @@ type Config struct { LogLevel string `mapstructure:"log-level"` // Get flags (crawling flags) - UserAgent string `mapstructure:"user-agent"` - Job string `mapstructure:"job"` + UserAgent string `mapstructure:"user-agent"` + Job string `mapstructure:"job"` + Cookies string `mapstructure:"cookies"` + APIPort string `mapstructure:"api-port"` + PrometheusPrefix string `mapstructure:"prometheus-prefix"` + + // Get flags (Proxy flags) + Proxy string `mapstructure:"proxy"` + + // Get flags (WARC flags) + WARCPrefix string `mapstructure:"warc-prefix"` + WARCOperator string `mapstructure:"warc-operator"` + CDXDedupeServer string `mapstructure:"warc-cdx-dedupe-server"` + WARCTempDir string `mapstructure:"warc-temp-dir"` + CDXCookie string `mapstructure:"cdx-cookie"` + + HQAddress string `mapstructure:"hq-address"` + HQKey string `mapstructure:"hq-key"` + HQSecret string `mapstructure:"hq-secret"` + HQProject string `mapstructure:"hq-project"` + HQStrategy string `mapstructure:"hq-strategy"` + LogFileOutputDir string `mapstructure:"log-file-output-dir"` + ElasticSearchUsername string `mapstructure:"es-user"` + ElasticSearchPassword string `mapstructure:"es-password"` + ElasticSearchIndexPrefix string `mapstructure:"es-index-prefix"` + DisableHTMLTag []string `mapstructure:"disable-html-tag"` + ExcludeHosts []string `mapstructure:"exclude-host"` + IncludeHosts []string `mapstructure:"include-host"` + ExcludeString []string `mapstructure:"exclude-string"` + DomainsBypassProxy []string `mapstructure:"bypass-proxy"` + + ElasticSearchURLs []string `mapstructure:"es-url"` WorkersCount int `mapstructure:"workers"` MaxConcurrentAssets int `mapstructure:"max-concurrent-assets"` MaxHops uint `mapstructure:"max-hops"` - Cookies string `mapstructure:"cookies"` - KeepCookies bool `mapstructure:"keep-cookies"` - Headless bool `mapstructure:"headless"` - LocalSeencheck bool `mapstructure:"local-seencheck"` - JSON bool `mapstructure:"json"` - Debug bool `mapstructure:"debug"` - LiveStats bool `mapstructure:"live-stats"` - API bool `mapstructure:"api"` - APIPort string `mapstructure:"api-port"` - Prometheus bool `mapstructure:"prometheus"` - PrometheusPrefix string `mapstructure:"prometheus-prefix"` MaxRedirect int `mapstructure:"max-redirect"` MaxRetry int `mapstructure:"max-retry"` HTTPTimeout int `mapstructure:"http-timeout"` - DomainsCrawl bool `mapstructure:"domains-crawl"` - DisableHTMLTag []string `mapstructure:"disable-html-tag"` - CaptureAlternatePages bool `mapstructure:"capture-alternate-pages"` - ExcludeHosts []string `mapstructure:"exclude-host"` - IncludeHosts []string `mapstructure:"include-host"` MaxConcurrentRequestsPerDomain int `mapstructure:"max-concurrent-per-domain"` ConcurrentSleepLength int `mapstructure:"concurrent-sleep-length"` CrawlTimeLimit int `mapstructure:"crawl-time-limit"` CrawlMaxTimeLimit int `mapstructure:"crawl-max-time-limit"` - ExcludeString []string `mapstructure:"exclude-string"` - RandomLocalIP bool `mapstructure:"random-local-ip"` MinSpaceRequired int `mapstructure:"min-space-required"` - // Get flags (Proxy flags) - Proxy string `mapstructure:"proxy"` - DomainsBypassProxy []string `mapstructure:"bypass-proxy"` - - // Get flags (WARC flags) - WARCPrefix string `mapstructure:"warc-prefix"` - WARCOperator string `mapstructure:"warc-operator"` - CDXDedupeServer string `mapstructure:"warc-cdx-dedupe-server"` - WARCOnDisk bool `mapstructure:"warc-on-disk"` - WARCPoolSize int `mapstructure:"warc-pool-size"` - WARCTempDir string `mapstructure:"warc-temp-dir"` - DisableLocalDedupe bool `mapstructure:"disable-local-dedupe"` - CertValidation bool `mapstructure:"cert-validation"` - DisableAssetsCapture bool `mapstructure:"disable-assets-capture"` - WARCDedupeSize int `mapstructure:"warc-dedupe-size"` - CDXCookie string `mapstructure:"cdx-cookie"` + WARCPoolSize int `mapstructure:"warc-pool-size"` + WARCDedupeSize int `mapstructure:"warc-dedupe-size"` + HQBatchSize int64 `mapstructure:"hq-batch-size"` + KeepCookies bool `mapstructure:"keep-cookies"` + Headless bool `mapstructure:"headless"` + LocalSeencheck bool `mapstructure:"local-seencheck"` + JSON bool `mapstructure:"json"` + Debug bool `mapstructure:"debug"` + LiveStats bool `mapstructure:"live-stats"` + API bool `mapstructure:"api"` + Prometheus bool `mapstructure:"prometheus"` + DomainsCrawl bool `mapstructure:"domains-crawl"` + CaptureAlternatePages bool `mapstructure:"capture-alternate-pages"` + RandomLocalIP bool `mapstructure:"random-local-ip"` + WARCOnDisk bool `mapstructure:"warc-on-disk"` + DisableLocalDedupe bool `mapstructure:"disable-local-dedupe"` + CertValidation bool `mapstructure:"cert-validation"` + DisableAssetsCapture bool `mapstructure:"disable-assets-capture"` // Get flags (Crawl HQ flags) - HQ bool // Special field to check if HQ is enabled depending on the command called - HQAddress string `mapstructure:"hq-address"` - HQKey string `mapstructure:"hq-key"` - HQSecret string `mapstructure:"hq-secret"` - HQProject string `mapstructure:"hq-project"` - HQBatchSize int64 `mapstructure:"hq-batch-size"` - HQContinuousPull bool `mapstructure:"hq-continuous-pull"` - HQStrategy string `mapstructure:"hq-strategy"` - HQRateLimitSendBack bool `mapstructure:"hq-rate-limiting-send-back"` + HQ bool // Special field to check if HQ is enabled depending on the command called + HQContinuousPull bool `mapstructure:"hq-continuous-pull"` + HQRateLimitSendBack bool `mapstructure:"hq-rate-limiting-send-back"` // Get flags (Logging flags) - NoStdoutLogging bool `mapstructure:"no-stdout-log"` - LogFileOutputDir string `mapstructure:"log-file-output-dir"` - ElasticSearchURLs []string `mapstructure:"es-url"` - ElasticSearchUsername string `mapstructure:"es-user"` - ElasticSearchPassword string `mapstructure:"es-password"` - ElasticSearchIndexPrefix string `mapstructure:"es-index-prefix"` + NoStdoutLogging bool `mapstructure:"no-stdout-log"` } var ( From e5b29cf78d61ef790c2368d0b3d4ddc15da3eba8 Mon Sep 17 00:00:00 2001 From: Thomas FOUBERT Date: Fri, 19 Jul 2024 14:38:49 -0400 Subject: [PATCH 11/12] chore: removed comments from config --- config/config.go | 110 +++++++++++++++++++++-------------------------- 1 file changed, 48 insertions(+), 62 deletions(-) diff --git a/config/config.go b/config/config.go index 3b29f707..8729ffbc 100644 --- a/config/config.go +++ b/config/config.go @@ -14,41 +14,32 @@ import ( // Config holds all configuration for our program type Config struct { - // Global Flags - LogLevel string `mapstructure:"log-level"` - - // Get flags (crawling flags) - UserAgent string `mapstructure:"user-agent"` - Job string `mapstructure:"job"` - Cookies string `mapstructure:"cookies"` - APIPort string `mapstructure:"api-port"` - PrometheusPrefix string `mapstructure:"prometheus-prefix"` - - // Get flags (Proxy flags) - Proxy string `mapstructure:"proxy"` - - // Get flags (WARC flags) - WARCPrefix string `mapstructure:"warc-prefix"` - WARCOperator string `mapstructure:"warc-operator"` - CDXDedupeServer string `mapstructure:"warc-cdx-dedupe-server"` - WARCTempDir string `mapstructure:"warc-temp-dir"` - CDXCookie string `mapstructure:"cdx-cookie"` - - HQAddress string `mapstructure:"hq-address"` - HQKey string `mapstructure:"hq-key"` - HQSecret string `mapstructure:"hq-secret"` - HQProject string `mapstructure:"hq-project"` - HQStrategy string `mapstructure:"hq-strategy"` - LogFileOutputDir string `mapstructure:"log-file-output-dir"` - ElasticSearchUsername string `mapstructure:"es-user"` - ElasticSearchPassword string `mapstructure:"es-password"` - ElasticSearchIndexPrefix string `mapstructure:"es-index-prefix"` - DisableHTMLTag []string `mapstructure:"disable-html-tag"` - ExcludeHosts []string `mapstructure:"exclude-host"` - IncludeHosts []string `mapstructure:"include-host"` - ExcludeString []string `mapstructure:"exclude-string"` - DomainsBypassProxy []string `mapstructure:"bypass-proxy"` - + LogLevel string `mapstructure:"log-level"` + UserAgent string `mapstructure:"user-agent"` + Job string `mapstructure:"job"` + Cookies string `mapstructure:"cookies"` + APIPort string `mapstructure:"api-port"` + PrometheusPrefix string `mapstructure:"prometheus-prefix"` + Proxy string `mapstructure:"proxy"` + WARCPrefix string `mapstructure:"warc-prefix"` + WARCOperator string `mapstructure:"warc-operator"` + CDXDedupeServer string `mapstructure:"warc-cdx-dedupe-server"` + WARCTempDir string `mapstructure:"warc-temp-dir"` + CDXCookie string `mapstructure:"cdx-cookie"` + HQAddress string `mapstructure:"hq-address"` + HQKey string `mapstructure:"hq-key"` + HQSecret string `mapstructure:"hq-secret"` + HQProject string `mapstructure:"hq-project"` + HQStrategy string `mapstructure:"hq-strategy"` + LogFileOutputDir string `mapstructure:"log-file-output-dir"` + ElasticSearchUsername string `mapstructure:"es-user"` + ElasticSearchPassword string `mapstructure:"es-password"` + ElasticSearchIndexPrefix string `mapstructure:"es-index-prefix"` + DisableHTMLTag []string `mapstructure:"disable-html-tag"` + ExcludeHosts []string `mapstructure:"exclude-host"` + IncludeHosts []string `mapstructure:"include-host"` + ExcludeString []string `mapstructure:"exclude-string"` + DomainsBypassProxy []string `mapstructure:"bypass-proxy"` ElasticSearchURLs []string `mapstructure:"es-url"` WorkersCount int `mapstructure:"workers"` MaxConcurrentAssets int `mapstructure:"max-concurrent-assets"` @@ -61,33 +52,28 @@ type Config struct { CrawlTimeLimit int `mapstructure:"crawl-time-limit"` CrawlMaxTimeLimit int `mapstructure:"crawl-max-time-limit"` MinSpaceRequired int `mapstructure:"min-space-required"` - - WARCPoolSize int `mapstructure:"warc-pool-size"` - WARCDedupeSize int `mapstructure:"warc-dedupe-size"` - HQBatchSize int64 `mapstructure:"hq-batch-size"` - KeepCookies bool `mapstructure:"keep-cookies"` - Headless bool `mapstructure:"headless"` - LocalSeencheck bool `mapstructure:"local-seencheck"` - JSON bool `mapstructure:"json"` - Debug bool `mapstructure:"debug"` - LiveStats bool `mapstructure:"live-stats"` - API bool `mapstructure:"api"` - Prometheus bool `mapstructure:"prometheus"` - DomainsCrawl bool `mapstructure:"domains-crawl"` - CaptureAlternatePages bool `mapstructure:"capture-alternate-pages"` - RandomLocalIP bool `mapstructure:"random-local-ip"` - WARCOnDisk bool `mapstructure:"warc-on-disk"` - DisableLocalDedupe bool `mapstructure:"disable-local-dedupe"` - CertValidation bool `mapstructure:"cert-validation"` - DisableAssetsCapture bool `mapstructure:"disable-assets-capture"` - - // Get flags (Crawl HQ flags) - HQ bool // Special field to check if HQ is enabled depending on the command called - HQContinuousPull bool `mapstructure:"hq-continuous-pull"` - HQRateLimitSendBack bool `mapstructure:"hq-rate-limiting-send-back"` - - // Get flags (Logging flags) - NoStdoutLogging bool `mapstructure:"no-stdout-log"` + WARCPoolSize int `mapstructure:"warc-pool-size"` + WARCDedupeSize int `mapstructure:"warc-dedupe-size"` + HQBatchSize int64 `mapstructure:"hq-batch-size"` + KeepCookies bool `mapstructure:"keep-cookies"` + Headless bool `mapstructure:"headless"` + LocalSeencheck bool `mapstructure:"local-seencheck"` + JSON bool `mapstructure:"json"` + Debug bool `mapstructure:"debug"` + LiveStats bool `mapstructure:"live-stats"` + API bool `mapstructure:"api"` + Prometheus bool `mapstructure:"prometheus"` + DomainsCrawl bool `mapstructure:"domains-crawl"` + CaptureAlternatePages bool `mapstructure:"capture-alternate-pages"` + RandomLocalIP bool `mapstructure:"random-local-ip"` + WARCOnDisk bool `mapstructure:"warc-on-disk"` + DisableLocalDedupe bool `mapstructure:"disable-local-dedupe"` + CertValidation bool `mapstructure:"cert-validation"` + DisableAssetsCapture bool `mapstructure:"disable-assets-capture"` + HQ bool // Special field to check if HQ is enabled depending on the command called + HQContinuousPull bool `mapstructure:"hq-continuous-pull"` + HQRateLimitSendBack bool `mapstructure:"hq-rate-limiting-send-back"` + NoStdoutLogging bool `mapstructure:"no-stdout-log"` } var ( From 146c4676a4eef07a0c53e1b637ee4e41e964670b Mon Sep 17 00:00:00 2001 From: Thomas FOUBERT Date: Fri, 19 Jul 2024 17:02:35 -0400 Subject: [PATCH 12/12] feat: add aliases from the old CLI and splitted get commands in separate files --- cmd/get.go | 179 +++++------------------------------------------ cmd/get_hq.go | 57 +++++++++++++++ cmd/get_list.go | 61 ++++++++++++++++ cmd/get_url.go | 62 ++++++++++++++++ config/config.go | 20 ++++++ 5 files changed, 219 insertions(+), 160 deletions(-) create mode 100644 cmd/get_hq.go create mode 100644 cmd/get_list.go create mode 100644 cmd/get_url.go diff --git a/cmd/get.go b/cmd/get.go index 06679f7c..fd9469b8 100644 --- a/cmd/get.go +++ b/cmd/get.go @@ -1,12 +1,6 @@ package cmd import ( - "fmt" - "net/url" - - "github.com/internetarchive/Zeno/internal/pkg/crawl" - "github.com/internetarchive/Zeno/internal/pkg/frontier" - "github.com/sirupsen/logrus" "github.com/spf13/cobra" ) @@ -33,7 +27,7 @@ func getCMDs() *cobra.Command { func getCMDsFlags(getCmd *cobra.Command) { getCmd.PersistentFlags().String("user-agent", "Zeno", "User agent to use when requesting URLs.") getCmd.PersistentFlags().String("job", "", "Job name to use, will determine the path for the persistent queue, seencheck database, and WARC files.") - getCmd.PersistentFlags().Int("workers", 1, "Number of concurrent workers to run.") + getCmd.PersistentFlags().IntP("workers", "w", 1, "Number of concurrent workers to run.") getCmd.PersistentFlags().Int("max-concurrent-assets", 8, "Max number of concurrent assets to fetch PER worker. E.g. if you have 100 workers and this setting at 8, Zeno could do up to 800 concurrent requests at any time.") getCmd.PersistentFlags().Uint("max-hops", 0, "Maximum number of hops to execute.") getCmd.PersistentFlags().String("cookies", "", "File containing cookies that will be used for requests.") @@ -42,7 +36,7 @@ func getCMDsFlags(getCmd *cobra.Command) { getCmd.PersistentFlags().Bool("local-seencheck", false, "Simple local seencheck to avoid re-crawling of URIs.") getCmd.PersistentFlags().Bool("json", false, "Output logs in JSON") getCmd.PersistentFlags().Bool("debug", false, "") - getCmd.PersistentFlags().Bool("api", false, "") + getCmd.PersistentFlags().Bool("api", false, "Enable API") getCmd.PersistentFlags().String("api-port", "9443", "Port to listen on for the API.") getCmd.PersistentFlags().Bool("prometheus", false, "Export metrics in Prometheus format. (implies --api)") getCmd.PersistentFlags().String("prometheus-prefix", "zeno:", "String used as a prefix for the exported Prometheus metrics.") @@ -86,157 +80,22 @@ func getCMDsFlags(getCmd *cobra.Command) { getCmd.PersistentFlags().String("es-user", "", "ElasticSearch username to use for indexing crawl logs.") getCmd.PersistentFlags().String("es-password", "", "ElasticSearch password to use for indexing crawl logs.") getCmd.PersistentFlags().String("es-index-prefix", "zeno", "ElasticSearch index prefix to use for indexing crawl logs. Default is : `zeno`, without `-`") -} - -var getURLCmd = &cobra.Command{ - Use: "url [URL...]", - Short: "Archive given URLs", - Args: cobra.MinimumNArgs(1), - PreRunE: func(cmd *cobra.Command, args []string) error { - if cfg == nil { - return fmt.Errorf("viper config is nil") - } - return nil - }, - RunE: func(cmd *cobra.Command, args []string) error { - // Init crawl using the flags provided - crawl, err := crawl.GenerateCrawlConfig(cfg) - if err != nil { - if crawl != nil && crawl.Log != nil { - crawl.Log.WithFields(logrus.Fields{ - "crawl": crawl, - "err": err.Error(), - }).Error("'get url' exited due to error") - } - return err - } - - // Initialize initial seed list - for _, arg := range args { - input, err := url.Parse(arg) - if err != nil { - crawl.Log.WithFields(logrus.Fields{ - "input_url": arg, - "err": err.Error(), - }).Error("given URL is not a valid input") - return err - } - - crawl.SeedList = append(crawl.SeedList, *frontier.NewItem(input, nil, "seed", 0, "", false)) - } - - // Start crawl - err = crawl.Start() - if err != nil { - crawl.Log.WithFields(logrus.Fields{ - "crawl": crawl, - "err": err.Error(), - }).Error("'get url' Crawl() exited due to error") - return err - } - - crawl.Log.Info("Crawl finished") - return err - }, -} - -var getHQCmd = &cobra.Command{ - Use: "hq", - Short: "Start crawling with the crawl HQ connector.", - PreRunE: func(cmd *cobra.Command, args []string) error { - if cfg == nil { - return fmt.Errorf("viper config is nil") - } - cfg.HQ = true - return nil - }, - RunE: func(cmd *cobra.Command, args []string) error { - // Init crawl using the flags provided - crawl, err := crawl.GenerateCrawlConfig(cfg) - if err != nil { - if crawl != nil && crawl.Log != nil { - crawl.Log.WithFields(logrus.Fields{ - "crawl": crawl, - "err": err.Error(), - }).Error("'get hq' exited due to error") - } - return err - } - - // start crawl - err = crawl.Start() - if err != nil { - logrus.WithFields(logrus.Fields{ - "crawl": crawl, - "err": err.Error(), - }).Error("'get hq' Crawl() exited due to error") - return err - } - - return nil - }, -} - -func getHQCmdFlags(getHQCmd *cobra.Command) { - // Crawl HQ flags - getHQCmd.PersistentFlags().String("hq-address", "", "Crawl HQ address.") - getHQCmd.PersistentFlags().String("hq-key", "", "Crawl HQ key.") - getHQCmd.PersistentFlags().String("hq-secret", "", "Crawl HQ secret.") - getHQCmd.PersistentFlags().String("hq-project", "", "Crawl HQ project.") - getHQCmd.PersistentFlags().Int64("hq-batch-size", 0, "Crawl HQ feeding batch size.") - getHQCmd.PersistentFlags().Bool("hq-continuous-pull", false, "If turned on, the crawler will pull URLs from Crawl HQ continuously.") - getHQCmd.PersistentFlags().String("hq-strategy", "lifo", "Crawl HQ feeding strategy.") - getHQCmd.PersistentFlags().Bool("hq-rate-limiting-send-back", false, "If turned on, the crawler will send back URLs that hit a rate limit to crawl HQ.") -} - -var getListCmd = &cobra.Command{ - Use: "list [FILE]", - Short: "Start crawling with a seed list", - Args: cobra.ExactArgs(1), - PreRunE: func(cmd *cobra.Command, args []string) error { - if cfg == nil { - return fmt.Errorf("viper config is nil") - } - return nil - }, - RunE: func(cmd *cobra.Command, args []string) error { - // Init crawl using the flags provided - crawl, err := crawl.GenerateCrawlConfig(cfg) - if err != nil { - if crawl != nil && crawl.Log != nil { - crawl.Log.WithFields(logrus.Fields{ - "crawl": crawl, - "err": err.Error(), - }).Error("'get hq' exited due to error") - } - return err - } - - // Initialize initial seed list - crawl.SeedList, err = frontier.IsSeedList(args[0]) - if err != nil || len(crawl.SeedList) <= 0 { - logrus.WithFields(logrus.Fields{ - "input": args[0], - "err": err.Error(), - }).Error("This is not a valid input") - return err - } - - logrus.WithFields(logrus.Fields{ - "input": args[0], - "seedsCount": len(crawl.SeedList), - }).Print("Seed list loaded") - - // Start crawl - err = crawl.Start() - if err != nil { - logrus.WithFields(logrus.Fields{ - "crawl": crawl, - "err": err.Error(), - }).Error("Crawl exited due to error") - return err - } - return nil - }, + // Alias support + // As cobra doesn't support aliases natively (couldn't find a way to do it), we have to do it manually + // This is a workaround to allow users to use `--hops` instead of `--max-hops` for example + // Aliases shouldn't be used as proper flags nor declared in the config struct + // Aliases should be marked as deprecated to inform the user base + // Aliases values should be copied to the proper flag in the config/config.go:handleFlagsAliases() function + getCmd.PersistentFlags().Uint("hops", 0, "Maximum number of hops to execute.") + getCmd.PersistentFlags().MarkDeprecated("hops", "use --max-hops instead") + getCmd.PersistentFlags().MarkHidden("hops") + + getCmd.PersistentFlags().Uint("ca", 8, "Max number of concurrent assets to fetch PER worker. E.g. if you have 100 workers and this setting at 8, Zeno could do up to 800 concurrent requests at any time.") + getCmd.PersistentFlags().MarkDeprecated("ca", "use --max-concurrent-assets") + getCmd.PersistentFlags().MarkHidden("ca") + + getCmd.PersistentFlags().Int("msr", 20, "Minimum space required in GB to continue the crawl.") + getCmd.PersistentFlags().MarkDeprecated("msr", "use --min-space-required instead") + getCmd.PersistentFlags().MarkHidden("msr") } diff --git a/cmd/get_hq.go b/cmd/get_hq.go new file mode 100644 index 00000000..764a2a5c --- /dev/null +++ b/cmd/get_hq.go @@ -0,0 +1,57 @@ +package cmd + +import ( + "fmt" + + "github.com/internetarchive/Zeno/internal/pkg/crawl" + "github.com/spf13/cobra" +) + +var getHQCmd = &cobra.Command{ + Use: "hq", + Short: "Start crawling with the crawl HQ connector.", + PreRunE: func(cmd *cobra.Command, args []string) error { + if cfg == nil { + return fmt.Errorf("viper config is nil") + } + cfg.HQ = true + return nil + }, + RunE: func(cmd *cobra.Command, args []string) error { + // Init crawl using the flags provided + crawl, err := crawl.GenerateCrawlConfig(cfg) + if err != nil { + if crawl != nil && crawl.Log != nil { + crawl.Log.WithFields(map[string]interface{}{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get hq' exited due to error") + } + return err + } + + // start crawl + err = crawl.Start() + if err != nil { + crawl.Log.WithFields(map[string]interface{}{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get hq' Crawl() exited due to error") + return err + } + + return nil + }, +} + +func getHQCmdFlags(getHQCmd *cobra.Command) { + // Crawl HQ flags + getHQCmd.PersistentFlags().String("hq-address", "", "Crawl HQ address.") + getHQCmd.PersistentFlags().String("hq-key", "", "Crawl HQ key.") + getHQCmd.PersistentFlags().String("hq-secret", "", "Crawl HQ secret.") + getHQCmd.PersistentFlags().String("hq-project", "", "Crawl HQ project.") + getHQCmd.PersistentFlags().Int64("hq-batch-size", 0, "Crawl HQ feeding batch size.") + getHQCmd.PersistentFlags().Bool("hq-continuous-pull", false, "If turned on, the crawler will pull URLs from Crawl HQ continuously.") + getHQCmd.PersistentFlags().String("hq-strategy", "lifo", "Crawl HQ feeding strategy.") + getHQCmd.PersistentFlags().Bool("hq-rate-limiting-send-back", false, "If turned on, the crawler will send back URLs that hit a rate limit to crawl HQ.") +} diff --git a/cmd/get_list.go b/cmd/get_list.go new file mode 100644 index 00000000..cc3ffb3f --- /dev/null +++ b/cmd/get_list.go @@ -0,0 +1,61 @@ +package cmd + +import ( + "fmt" + + "github.com/internetarchive/Zeno/internal/pkg/crawl" + "github.com/internetarchive/Zeno/internal/pkg/frontier" + "github.com/spf13/cobra" +) + +var getListCmd = &cobra.Command{ + Use: "list [FILE]", + Short: "Start crawling with a seed list", + Args: cobra.ExactArgs(1), + PreRunE: func(cmd *cobra.Command, args []string) error { + if cfg == nil { + return fmt.Errorf("viper config is nil") + } + return nil + }, + RunE: func(cmd *cobra.Command, args []string) error { + // Init crawl using the flags provided + crawl, err := crawl.GenerateCrawlConfig(cfg) + if err != nil { + if crawl != nil && crawl.Log != nil { + crawl.Log.WithFields(map[string]interface{}{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get hq' exited due to error") + } + return err + } + + // Initialize initial seed list + crawl.SeedList, err = frontier.IsSeedList(args[0]) + if err != nil || len(crawl.SeedList) <= 0 { + crawl.Log.WithFields(map[string]interface{}{ + "input": args[0], + "err": err.Error(), + }).Error("This is not a valid input") + return err + } + + crawl.Log.WithFields(map[string]interface{}{ + "input": args[0], + "seedsCount": len(crawl.SeedList), + }).Info("Seed list loaded") + + // Start crawl + err = crawl.Start() + if err != nil { + crawl.Log.WithFields(map[string]interface{}{ + "crawl": crawl, + "err": err.Error(), + }).Error("Crawl exited due to error") + return err + } + + return nil + }, +} diff --git a/cmd/get_url.go b/cmd/get_url.go new file mode 100644 index 00000000..ad3367c8 --- /dev/null +++ b/cmd/get_url.go @@ -0,0 +1,62 @@ +package cmd + +import ( + "fmt" + "net/url" + + "github.com/internetarchive/Zeno/internal/pkg/crawl" + "github.com/internetarchive/Zeno/internal/pkg/frontier" + "github.com/spf13/cobra" +) + +var getURLCmd = &cobra.Command{ + Use: "url [URL...]", + Short: "Archive given URLs", + Args: cobra.MinimumNArgs(1), + PreRunE: func(cmd *cobra.Command, args []string) error { + if cfg == nil { + return fmt.Errorf("viper config is nil") + } + return nil + }, + RunE: func(cmd *cobra.Command, args []string) error { + // Init crawl using the flags provided + crawl, err := crawl.GenerateCrawlConfig(cfg) + if err != nil { + if crawl != nil && crawl.Log != nil { + crawl.Log.WithFields(map[string]interface{}{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get url' exited due to error") + } + return err + } + + // Initialize initial seed list + for _, arg := range args { + input, err := url.Parse(arg) + if err != nil { + crawl.Log.WithFields(map[string]interface{}{ + "input_url": arg, + "err": err.Error(), + }).Error("given URL is not a valid input") + return err + } + + crawl.SeedList = append(crawl.SeedList, *frontier.NewItem(input, nil, "seed", 0, "", false)) + } + + // Start crawl + err = crawl.Start() + if err != nil { + crawl.Log.WithFields(map[string]interface{}{ + "crawl": crawl, + "err": err.Error(), + }).Error("'get url' Crawl() exited due to error") + return err + } + + crawl.Log.Info("Crawl finished") + return err + }, +} diff --git a/config/config.go b/config/config.go index 8729ffbc..f1873f8b 100644 --- a/config/config.go +++ b/config/config.go @@ -133,6 +133,9 @@ func InitConfig() error { // This function is used to bring logic to the flags when needed (e.g. live-stats) handleFlagsEdgeCases() + // This function is used to handle flags aliases (e.g. hops -> max-hops) + handleFlagsAliases() + // Unmarshal the config into the Config struct err = viper.Unmarshal(config) }) @@ -168,3 +171,20 @@ func handleFlagsEdgeCases() { viper.Set("api", true) } } + +func handleFlagsAliases() { + // For each flag we want to alias, we check if the original flag is at default and if the alias is not + // If so, we set the original flag to the value of the alias + + if viper.GetUint("hops") != 0 && viper.GetUint("max-hops") == 0 { + viper.Set("max-hops", viper.GetUint("hops")) + } + + if viper.GetInt("ca") != 8 && viper.GetInt("max-concurrent-assets") == 8 { + viper.Set("max-concurrent-assets", viper.GetInt("ca")) + } + + if viper.GetInt("msr") != 20 && viper.GetInt("min-space-required") == 20 { + viper.Set("min-space-required", viper.GetInt("msr")) + } +}