Skip to content

Commit

Permalink
⚡️ verify github connection only once (#4980)
Browse files Browse the repository at this point in the history
When a user scans a single GH repository, this was not a problem, but
for users that scan an entire organization with thousands of
repositories, we were creating a connection for each asset and
validating it thousands of times.

This changes the behavior to verify the connection only once which will
save us thousands of GH API requests.

---------

Signed-off-by: Salim Afiune Maya <[email protected]>
  • Loading branch information
afiune authored Dec 16, 2024
1 parent fee2739 commit f3eafd0
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 54 deletions.
141 changes: 88 additions & 53 deletions providers/github/connection/connection.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/cockroachdb/errors"
"github.com/google/go-github/v67/github"
"github.com/hashicorp/go-retryablehttp"
"github.com/mitchellh/hashstructure/v2"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"go.mondoo.com/cnquery/v11/logger/zerologadapter"
Expand All @@ -25,6 +26,7 @@ import (
)

const (
OPTION_TOKEN = "token"
OPTION_REPOS = "repos"
OPTION_REPOS_EXCLUDE = "repos-exclude"
OPTION_APP_ID = "app-id"
Expand All @@ -38,25 +40,75 @@ type GithubConnection struct {
asset *inventory.Asset
client *github.Client
ctx context.Context

// Used to avoid verifying a client with the same options more than once
Hash uint64
}

type githubConnectionOptions struct {
// Maps to OPTION_ENTERPRISE_URL
EnterpriseURL string

// Maps to OPTION_APP_ID
AppID string
// Maps to OPTION_APP_INSTALLATION_ID
AppInstallationID string
// Maps to OPTION_APP_PRIVATE_KEY
AppPrivateKeyFile string
AppPrivateKey []byte
// Maps to OPTION_TOKEN or the environment variable GITHUB_TOKEN
Token string
}

func connectionOptionsFromConfigOptions(conf *inventory.Config) (opts githubConnectionOptions) {
if conf == nil {
return
}

opts.AppID = conf.Options[OPTION_APP_ID]
opts.AppInstallationID = conf.Options[OPTION_APP_INSTALLATION_ID]
opts.AppPrivateKeyFile = conf.Options[OPTION_APP_PRIVATE_KEY]
opts.EnterpriseURL = conf.Options[OPTION_ENTERPRISE_URL]
opts.Token = conf.Options[OPTION_TOKEN]

if opts.Token == "" {
opts.Token = os.Getenv("GITHUB_TOKEN")
}

for _, cred := range conf.Credentials {
switch cred.Type {

case vault.CredentialType_private_key:
if opts.AppPrivateKeyFile != "" {
opts.AppPrivateKey = cred.Secret
}

case vault.CredentialType_password:
if opts.Token != "" {
opts.Token = string(cred.Secret)
}
}
}

return
}

func NewGithubConnection(id uint32, asset *inventory.Asset) (*GithubConnection, error) {
conf := asset.Connections[0]
connectionOpts := connectionOptionsFromConfigOptions(asset.Connections[0])

var client *github.Client
var err error
appIdStr := conf.Options[OPTION_APP_ID]
if appIdStr != "" {
client, err = newGithubAppClient(conf)
if connectionOpts.AppID != "" {
client, err = newGithubAppClient(connectionOpts)
} else {
client, err = newGithubTokenClient(conf)
client, err = newGithubTokenClient(connectionOpts)
}
if err != nil {
return nil, err
}

if enterpriseUrl := conf.Options[OPTION_ENTERPRISE_URL]; enterpriseUrl != "" {
parsedUrl, err := url.Parse(enterpriseUrl)
if connectionOpts.EnterpriseURL != "" {
parsedUrl, err := url.Parse(connectionOpts.EnterpriseURL)
if err != nil {
return nil, err
}
Expand All @@ -73,20 +125,15 @@ func NewGithubConnection(id uint32, asset *inventory.Asset) (*GithubConnection,
// (default behaviour is to send fake 403 response bypassing the retry logic)
ctx := context.WithValue(context.Background(), github.SleepUntilPrimaryRateLimitResetWhenRateLimited, true)

// perform a quick call to verify the token's validity.
// @afiune do we need to validate the token for every connection? can this be a "once" operation?
_, resp, err := client.Meta.Zen(ctx)
if err != nil {
if resp != nil && resp.StatusCode == 401 {
return nil, errors.New("invalid GitHub token provided. check the value passed with the --token flag or the GITHUB_TOKEN environment variable")
}
return nil, err
}
// store the hash of the config options used to generate this client
hash, err := hashstructure.Hash(connectionOpts, hashstructure.FormatV2, nil)

return &GithubConnection{
Connection: plugin.NewConnection(id, asset),
asset: asset,
client: client,
ctx: ctx,
Hash: hash,
}, nil
}

Expand All @@ -106,38 +153,42 @@ func (c *GithubConnection) Context() context.Context {
return c.ctx
}

func newGithubAppClient(conf *inventory.Config) (*github.Client, error) {
appIdStr := conf.Options[OPTION_APP_ID]
if appIdStr == "" {
func (c *GithubConnection) Verify() error {
// perform a quick call to verify the token's validity.
_, resp, err := c.client.Meta.Zen(c.ctx)
if err != nil {
if resp != nil && resp.StatusCode == 401 {
return errors.New(
"invalid GitHub token provided. check the value passed with the --token flag or the GITHUB_TOKEN environment variable",
)
}
return err
}
return nil
}

func newGithubAppClient(opts githubConnectionOptions) (*github.Client, error) {
if opts.AppID == "" {
return nil, errors.New("app-id is required for GitHub App authentication")
}
appId, err := strconv.ParseInt(appIdStr, 10, 32)
appId, err := strconv.ParseInt(opts.AppID, 10, 32)
if err != nil {
return nil, err
}

appInstallationIdStr := conf.Options[OPTION_APP_INSTALLATION_ID]
if appInstallationIdStr == "" {
if opts.AppInstallationID == "" {
return nil, errors.New("app-installation-id is required for GitHub App authentication")
}
appInstallationId, err := strconv.ParseInt(appInstallationIdStr, 10, 32)
appInstallationId, err := strconv.ParseInt(opts.AppInstallationID, 10, 32)
if err != nil {
return nil, err
}

var itr *ghinstallation.Transport
if pk := conf.Options[OPTION_APP_PRIVATE_KEY]; pk != "" {
itr, err = ghinstallation.NewKeyFromFile(http.DefaultTransport, appId, appInstallationId, pk)
if opts.AppPrivateKeyFile != "" {
itr, err = ghinstallation.NewKeyFromFile(http.DefaultTransport, appId, appInstallationId, opts.AppPrivateKeyFile)
} else {
for _, cred := range conf.Credentials {
switch cred.Type {
case vault.CredentialType_private_key:
itr, err = ghinstallation.New(http.DefaultTransport, appId, appInstallationId, cred.Secret)
if err != nil {
return nil, err
}
}
}
itr, err = ghinstallation.New(http.DefaultTransport, appId, appInstallationId, opts.AppPrivateKey)
}
if err != nil {
return nil, err
Expand All @@ -150,31 +201,15 @@ func newGithubAppClient(conf *inventory.Config) (*github.Client, error) {
return github.NewClient(newGithubRetryableClient(&http.Client{Transport: itr})), nil
}

func newGithubTokenClient(conf *inventory.Config) (*github.Client, error) {
token := ""
for i := range conf.Credentials {
cred := conf.Credentials[i]
switch cred.Type {
case vault.CredentialType_password:
token = string(cred.Secret)
}
}

if token == "" {
token = conf.Options["token"]
if token == "" {
token = os.Getenv("GITHUB_TOKEN")
}
}

func newGithubTokenClient(opts githubConnectionOptions) (*github.Client, error) {
// if we still have no token, error out
if token == "" {
if opts.Token == "" {
return nil, errors.New("a valid GitHub token is required, pass --token '<yourtoken>' or set GITHUB_TOKEN environment variable")
}

ctx := context.Background()
ts := oauth2.StaticTokenSource(
&oauth2.Token{AccessToken: token},
&oauth2.Token{AccessToken: opts.Token},
)
tc := oauth2.NewClient(ctx, ts)
return github.NewClient(newGithubRetryableClient(tc)), nil
Expand Down
21 changes: 20 additions & 1 deletion providers/github/provider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,39 @@ package provider
import (
"context"
"errors"
"fmt"
"os"
"strings"
"time"

"github.com/rs/zerolog/log"
"go.mondoo.com/cnquery/v11/llx"
"go.mondoo.com/cnquery/v11/logger"
"go.mondoo.com/cnquery/v11/providers-sdk/v1/inventory"
"go.mondoo.com/cnquery/v11/providers-sdk/v1/plugin"
"go.mondoo.com/cnquery/v11/providers-sdk/v1/upstream"
"go.mondoo.com/cnquery/v11/providers-sdk/v1/util/memoize"
"go.mondoo.com/cnquery/v11/providers-sdk/v1/vault"
"go.mondoo.com/cnquery/v11/providers/github/connection"
"go.mondoo.com/cnquery/v11/providers/github/resources"
)

const ConnectionType = "github"

var (
cacheExpirationTime = 24 * time.Hour
cacheCleanupTime = 48 * time.Hour
)

type Service struct {
*plugin.Service
*memoize.Memoizer
}

func Init() *Service {
return &Service{
Service: plugin.NewService(),
plugin.NewService(),
memoize.NewMemoizer(cacheExpirationTime, cacheCleanupTime),
}
}

Expand Down Expand Up @@ -163,6 +173,15 @@ func (s *Service) connect(req *plugin.ConnectReq, callback plugin.ProviderCallba
return nil, err
}

// verify the connection only once
_, err, _ = s.Memoize(fmt.Sprintf("conn_%d", conn.Hash), func() (interface{}, error) {
log.Trace().Msg("verifying github connection client")
err := conn.Verify()
return nil, err
})
if err != nil {
return nil, err
}
var upstream *upstream.UpstreamClient
if req.Upstream != nil && !req.Upstream.Incognito {
upstream, err = req.Upstream.InitClient(context.Background())
Expand Down

0 comments on commit f3eafd0

Please sign in to comment.