From f3eafd058150f4fdc7df182014f8a94f9f6005d8 Mon Sep 17 00:00:00 2001 From: Salim Afiune Maya Date: Mon, 16 Dec 2024 05:47:27 -0800 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20verify=20github=20connecti?= =?UTF-8?q?on=20only=20once=20(#4980)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user scans a single GH repository, this was not a problem, but for users that scan an entire organization with thousands of repositories, we were creating a connection for each asset and validating it thousands of times. This changes the behavior to verify the connection only once which will save us thousands of GH API requests. --------- Signed-off-by: Salim Afiune Maya --- providers/github/connection/connection.go | 141 ++++++++++++++-------- providers/github/provider/provider.go | 21 +++- 2 files changed, 108 insertions(+), 54 deletions(-) diff --git a/providers/github/connection/connection.go b/providers/github/connection/connection.go index c974ef0b0..37525d8e9 100644 --- a/providers/github/connection/connection.go +++ b/providers/github/connection/connection.go @@ -15,6 +15,7 @@ import ( "github.com/cockroachdb/errors" "github.com/google/go-github/v67/github" "github.com/hashicorp/go-retryablehttp" + "github.com/mitchellh/hashstructure/v2" "github.com/rs/zerolog" "github.com/rs/zerolog/log" "go.mondoo.com/cnquery/v11/logger/zerologadapter" @@ -25,6 +26,7 @@ import ( ) const ( + OPTION_TOKEN = "token" OPTION_REPOS = "repos" OPTION_REPOS_EXCLUDE = "repos-exclude" OPTION_APP_ID = "app-id" @@ -38,25 +40,75 @@ type GithubConnection struct { asset *inventory.Asset client *github.Client ctx context.Context + + // Used to avoid verifying a client with the same options more than once + Hash uint64 +} + +type githubConnectionOptions struct { + // Maps to OPTION_ENTERPRISE_URL + EnterpriseURL string + + // Maps to OPTION_APP_ID + AppID string + // Maps to OPTION_APP_INSTALLATION_ID + AppInstallationID string + // Maps to OPTION_APP_PRIVATE_KEY + AppPrivateKeyFile string + AppPrivateKey []byte + // Maps to OPTION_TOKEN or the environment variable GITHUB_TOKEN + Token string +} + +func connectionOptionsFromConfigOptions(conf *inventory.Config) (opts githubConnectionOptions) { + if conf == nil { + return + } + + opts.AppID = conf.Options[OPTION_APP_ID] + opts.AppInstallationID = conf.Options[OPTION_APP_INSTALLATION_ID] + opts.AppPrivateKeyFile = conf.Options[OPTION_APP_PRIVATE_KEY] + opts.EnterpriseURL = conf.Options[OPTION_ENTERPRISE_URL] + opts.Token = conf.Options[OPTION_TOKEN] + + if opts.Token == "" { + opts.Token = os.Getenv("GITHUB_TOKEN") + } + + for _, cred := range conf.Credentials { + switch cred.Type { + + case vault.CredentialType_private_key: + if opts.AppPrivateKeyFile != "" { + opts.AppPrivateKey = cred.Secret + } + + case vault.CredentialType_password: + if opts.Token != "" { + opts.Token = string(cred.Secret) + } + } + } + + return } func NewGithubConnection(id uint32, asset *inventory.Asset) (*GithubConnection, error) { - conf := asset.Connections[0] + connectionOpts := connectionOptionsFromConfigOptions(asset.Connections[0]) var client *github.Client var err error - appIdStr := conf.Options[OPTION_APP_ID] - if appIdStr != "" { - client, err = newGithubAppClient(conf) + if connectionOpts.AppID != "" { + client, err = newGithubAppClient(connectionOpts) } else { - client, err = newGithubTokenClient(conf) + client, err = newGithubTokenClient(connectionOpts) } if err != nil { return nil, err } - if enterpriseUrl := conf.Options[OPTION_ENTERPRISE_URL]; enterpriseUrl != "" { - parsedUrl, err := url.Parse(enterpriseUrl) + if connectionOpts.EnterpriseURL != "" { + parsedUrl, err := url.Parse(connectionOpts.EnterpriseURL) if err != nil { return nil, err } @@ -73,20 +125,15 @@ func NewGithubConnection(id uint32, asset *inventory.Asset) (*GithubConnection, // (default behaviour is to send fake 403 response bypassing the retry logic) ctx := context.WithValue(context.Background(), github.SleepUntilPrimaryRateLimitResetWhenRateLimited, true) - // perform a quick call to verify the token's validity. - // @afiune do we need to validate the token for every connection? can this be a "once" operation? - _, resp, err := client.Meta.Zen(ctx) - if err != nil { - if resp != nil && resp.StatusCode == 401 { - return nil, errors.New("invalid GitHub token provided. check the value passed with the --token flag or the GITHUB_TOKEN environment variable") - } - return nil, err - } + // store the hash of the config options used to generate this client + hash, err := hashstructure.Hash(connectionOpts, hashstructure.FormatV2, nil) + return &GithubConnection{ Connection: plugin.NewConnection(id, asset), asset: asset, client: client, ctx: ctx, + Hash: hash, }, nil } @@ -106,38 +153,42 @@ func (c *GithubConnection) Context() context.Context { return c.ctx } -func newGithubAppClient(conf *inventory.Config) (*github.Client, error) { - appIdStr := conf.Options[OPTION_APP_ID] - if appIdStr == "" { +func (c *GithubConnection) Verify() error { + // perform a quick call to verify the token's validity. + _, resp, err := c.client.Meta.Zen(c.ctx) + if err != nil { + if resp != nil && resp.StatusCode == 401 { + return errors.New( + "invalid GitHub token provided. check the value passed with the --token flag or the GITHUB_TOKEN environment variable", + ) + } + return err + } + return nil +} + +func newGithubAppClient(opts githubConnectionOptions) (*github.Client, error) { + if opts.AppID == "" { return nil, errors.New("app-id is required for GitHub App authentication") } - appId, err := strconv.ParseInt(appIdStr, 10, 32) + appId, err := strconv.ParseInt(opts.AppID, 10, 32) if err != nil { return nil, err } - appInstallationIdStr := conf.Options[OPTION_APP_INSTALLATION_ID] - if appInstallationIdStr == "" { + if opts.AppInstallationID == "" { return nil, errors.New("app-installation-id is required for GitHub App authentication") } - appInstallationId, err := strconv.ParseInt(appInstallationIdStr, 10, 32) + appInstallationId, err := strconv.ParseInt(opts.AppInstallationID, 10, 32) if err != nil { return nil, err } var itr *ghinstallation.Transport - if pk := conf.Options[OPTION_APP_PRIVATE_KEY]; pk != "" { - itr, err = ghinstallation.NewKeyFromFile(http.DefaultTransport, appId, appInstallationId, pk) + if opts.AppPrivateKeyFile != "" { + itr, err = ghinstallation.NewKeyFromFile(http.DefaultTransport, appId, appInstallationId, opts.AppPrivateKeyFile) } else { - for _, cred := range conf.Credentials { - switch cred.Type { - case vault.CredentialType_private_key: - itr, err = ghinstallation.New(http.DefaultTransport, appId, appInstallationId, cred.Secret) - if err != nil { - return nil, err - } - } - } + itr, err = ghinstallation.New(http.DefaultTransport, appId, appInstallationId, opts.AppPrivateKey) } if err != nil { return nil, err @@ -150,31 +201,15 @@ func newGithubAppClient(conf *inventory.Config) (*github.Client, error) { return github.NewClient(newGithubRetryableClient(&http.Client{Transport: itr})), nil } -func newGithubTokenClient(conf *inventory.Config) (*github.Client, error) { - token := "" - for i := range conf.Credentials { - cred := conf.Credentials[i] - switch cred.Type { - case vault.CredentialType_password: - token = string(cred.Secret) - } - } - - if token == "" { - token = conf.Options["token"] - if token == "" { - token = os.Getenv("GITHUB_TOKEN") - } - } - +func newGithubTokenClient(opts githubConnectionOptions) (*github.Client, error) { // if we still have no token, error out - if token == "" { + if opts.Token == "" { return nil, errors.New("a valid GitHub token is required, pass --token '' or set GITHUB_TOKEN environment variable") } ctx := context.Background() ts := oauth2.StaticTokenSource( - &oauth2.Token{AccessToken: token}, + &oauth2.Token{AccessToken: opts.Token}, ) tc := oauth2.NewClient(ctx, ts) return github.NewClient(newGithubRetryableClient(tc)), nil diff --git a/providers/github/provider/provider.go b/providers/github/provider/provider.go index 25cdfdeaa..9637bf726 100644 --- a/providers/github/provider/provider.go +++ b/providers/github/provider/provider.go @@ -6,15 +6,18 @@ package provider import ( "context" "errors" + "fmt" "os" "strings" "time" + "github.com/rs/zerolog/log" "go.mondoo.com/cnquery/v11/llx" "go.mondoo.com/cnquery/v11/logger" "go.mondoo.com/cnquery/v11/providers-sdk/v1/inventory" "go.mondoo.com/cnquery/v11/providers-sdk/v1/plugin" "go.mondoo.com/cnquery/v11/providers-sdk/v1/upstream" + "go.mondoo.com/cnquery/v11/providers-sdk/v1/util/memoize" "go.mondoo.com/cnquery/v11/providers-sdk/v1/vault" "go.mondoo.com/cnquery/v11/providers/github/connection" "go.mondoo.com/cnquery/v11/providers/github/resources" @@ -22,13 +25,20 @@ import ( const ConnectionType = "github" +var ( + cacheExpirationTime = 24 * time.Hour + cacheCleanupTime = 48 * time.Hour +) + type Service struct { *plugin.Service + *memoize.Memoizer } func Init() *Service { return &Service{ - Service: plugin.NewService(), + plugin.NewService(), + memoize.NewMemoizer(cacheExpirationTime, cacheCleanupTime), } } @@ -163,6 +173,15 @@ func (s *Service) connect(req *plugin.ConnectReq, callback plugin.ProviderCallba return nil, err } + // verify the connection only once + _, err, _ = s.Memoize(fmt.Sprintf("conn_%d", conn.Hash), func() (interface{}, error) { + log.Trace().Msg("verifying github connection client") + err := conn.Verify() + return nil, err + }) + if err != nil { + return nil, err + } var upstream *upstream.UpstreamClient if req.Upstream != nil && !req.Upstream.Incognito { upstream, err = req.Upstream.InitClient(context.Background())