From 0b262a08bc822543270e5906d62c7c76f06c29ef Mon Sep 17 00:00:00 2001 From: Malte Poll <1780588+malt3@users.noreply.github.com> Date: Tue, 20 Jun 2023 12:02:31 +0200 Subject: [PATCH] cloud: fix discovery of GCP nodes across multiple zones (#1943) --- internal/cloud/gcp/gcp.go | 118 +++++++++++++ internal/cloud/gcp/gcp_test.go | 288 +++++++++++++++++++++++++++++++- internal/cloud/gcp/interface.go | 4 + internal/cloud/gcp/wrappers.go | 16 ++ 4 files changed, 418 insertions(+), 8 deletions(-) diff --git a/internal/cloud/gcp/gcp.go b/internal/cloud/gcp/gcp.go index 9e4729b87d..f4be80f9d5 100644 --- a/internal/cloud/gcp/gcp.go +++ b/internal/cloud/gcp/gcp.go @@ -23,6 +23,8 @@ import ( "path" "regexp" "strings" + "sync" + "time" compute "cloud.google.com/go/compute/apiv1" "cloud.google.com/go/compute/apiv1/computepb" @@ -38,6 +40,8 @@ import ( const ( // tagUsage is a label key used to indicate the use of the resource. tagUsage = "constellation-use" + // maxCacheAgeInvariantResource is the maximum age of cached metadata for invariant resources. + maxCacheAgeInvariantResource = 24 * time.Hour // 1 day ) var ( @@ -52,8 +56,18 @@ type Cloud struct { imds imdsAPI instanceAPI instanceAPI subnetAPI subnetAPI + zoneAPI zoneAPI closers []func() error + + // cached metadata + cacheMux sync.Mutex + regionCache string + regionCacheTime time.Time + zoneCache map[string]struct { + zones []string + zoneCacheTime time.Time + } } // New creates and initializes Cloud. @@ -85,12 +99,19 @@ func New(ctx context.Context) (cloud *Cloud, err error) { } closers = append(closers, subnetAPI.Close) + zoneAPI, err := compute.NewZonesRESTClient(ctx) + if err != nil { + return nil, err + } + closers = append(closers, zoneAPI.Close) + return &Cloud{ imds: imds.NewClient(nil), instanceAPI: &instanceClient{insAPI}, globalForwardingRulesAPI: &globalForwardingRulesClient{globalForwardingRulesAPI}, regionalForwardingRulesAPI: ®ionalForwardingRulesClient{regionalForwardingRulesAPI}, subnetAPI: subnetAPI, + zoneAPI: &zoneClient{zoneAPI}, closers: closers, }, nil } @@ -192,6 +213,7 @@ func (c *Cloud) getRegionalForwardingRule(ctx context.Context, project, uid, reg } // List retrieves all instances belonging to the current constellation. +// On GCP, this is done by listing all instances in a region by requesting all instances in each zone. func (c *Cloud) List(ctx context.Context) ([]metadata.InstanceMetadata, error) { project, zone, instanceName, err := c.retrieveInstanceInfo() if err != nil { @@ -202,8 +224,32 @@ func (c *Cloud) List(ctx context.Context) ([]metadata.InstanceMetadata, error) { return nil, err } + region, err := c.region() + if err != nil { + return nil, fmt.Errorf("getting region: %w", err) + } + + zones, err := c.zones(ctx, project, region) + if err != nil { + return nil, fmt.Errorf("getting zones: %w", err) + } + + var instances []metadata.InstanceMetadata + for _, zone := range zones { + zoneInstances, err := c.listInZone(ctx, project, zone, uid) + if err != nil { + return nil, fmt.Errorf("listing instances in zone %s: %w", zone, err) + } + instances = append(instances, zoneInstances...) + } + return instances, nil +} + +// listInZone retrieves all instances belonging to the current constellation in a given zone. +func (c *Cloud) listInZone(ctx context.Context, project, zone, uid string) ([]metadata.InstanceMetadata, error) { var instances []metadata.InstanceMetadata var resp *computepb.Instance + var err error iter := c.instanceAPI.List(ctx, &computepb.ListInstancesRequest{ Filter: proto.String(fmt.Sprintf("labels.%s:%s", cloud.TagUID, uid)), Project: project, @@ -399,6 +445,70 @@ func (c *Cloud) initSecretHash(ctx context.Context, project, zone, instanceName return "", errors.New("retrieving compute instance: received instance with no init secret hash label") } +// region retrieves the region that this instance is located in. +func (c *Cloud) region() (string, error) { + c.cacheMux.Lock() + defer c.cacheMux.Unlock() + // try to retrieve from cache first + if c.regionCache != "" && + time.Since(c.regionCacheTime) < maxCacheAgeInvariantResource { + return c.regionCache, nil + } + zone, err := c.imds.Zone() + if err != nil { + return "", fmt.Errorf("retrieving zone from imds: %w", err) + } + region, err := regionFromZone(zone) + if err != nil { + return "", fmt.Errorf("retrieving region from zone: %w", err) + } + c.regionCache = region + c.regionCacheTime = time.Now() + return region, nil +} + +// zones retrieves all zones that are within a region. +func (c *Cloud) zones(ctx context.Context, project, region string) ([]string, error) { + c.cacheMux.Lock() + defer c.cacheMux.Unlock() + // try to retrieve from cache first + if cachedZones, ok := c.zoneCache[region]; ok && + time.Since(cachedZones.zoneCacheTime) < maxCacheAgeInvariantResource { + return cachedZones.zones, nil + } + req := &computepb.ListZonesRequest{ + Project: project, + Filter: proto.String(fmt.Sprintf("name = \"%s*\"", region)), + } + zonesIter := c.zoneAPI.List(ctx, req) + var zones []string + var resp *computepb.Zone + var err error + for resp, err = zonesIter.Next(); err == nil; resp, err = zonesIter.Next() { + if resp == nil || resp.Name == nil { + continue + } + zones = append(zones, *resp.Name) + } + if err != nil && err != iterator.Done { + return nil, fmt.Errorf("listing zones: %w", err) + } + if c.zoneCache == nil { + c.zoneCache = make(map[string]struct { + zones []string + zoneCacheTime time.Time + }) + } + c.zoneCache[region] = struct { + zones []string + zoneCacheTime time.Time + }{ + zones: zones, + zoneCacheTime: time.Now(), + } + return zones, nil +} + // convertToInstanceMetadata converts a *computepb.Instance to a metadata.InstanceMetadata. func convertToInstanceMetadata(in *computepb.Instance, project string, zone string) (metadata.InstanceMetadata, error) { if in.Name == nil { @@ -435,3 +545,11 @@ func convertToInstanceMetadata(in *computepb.Instance, project string, zone stri AliasIPRanges: ips, }, nil } + +func regionFromZone(zone string) (string, error) { + zoneParts := strings.Split(zone, "-") + if len(zoneParts) != 3 { + return "", fmt.Errorf("invalid zone format: %s", zone) + } + return fmt.Sprintf("%s-%s", zoneParts[0], zoneParts[1]), nil +} diff --git a/internal/cloud/gcp/gcp_test.go b/internal/cloud/gcp/gcp_test.go index f91e0e4a61..b801630909 100644 --- a/internal/cloud/gcp/gcp_test.go +++ b/internal/cloud/gcp/gcp_test.go @@ -506,6 +506,26 @@ func TestList(t *testing.T) { }, }, } + otherZoneInstance := &computepb.Instance{ + Name: proto.String("otherZoneInstance"), + Zone: proto.String("someZone-east1-a"), + Labels: map[string]string{ + cloud.TagUID: "1234", + cloud.TagRole: role.ControlPlane.String(), + }, + NetworkInterfaces: []*computepb.NetworkInterface{ + { + Name: proto.String("nic0"), + NetworkIP: proto.String("192.0.2.1"), + AliasIpRanges: []*computepb.AliasIpRange{ + { + IpCidrRange: proto.String("198.51.100.0/24"), + }, + }, + Subnetwork: proto.String("projects/someProject/regions/someRegion/subnetworks/someSubnetwork"), + }, + }, + } goodSubnet := &computepb.Subnetwork{ SecondaryIpRanges: []*computepb.SubnetworkSecondaryRange{ { @@ -516,8 +536,9 @@ func TestList(t *testing.T) { testCases := map[string]struct { imds stubIMDS - instanceAPI stubInstanceAPI + instanceAPI instanceAPI subnetAPI stubSubnetAPI + zoneAPI stubZoneAPI wantErr bool wantInstances []metadata.InstanceMetadata }{ @@ -527,7 +548,7 @@ func TestList(t *testing.T) { zone: "someZone-west3-b", instanceName: "someInstance", }, - instanceAPI: stubInstanceAPI{ + instanceAPI: &stubInstanceAPI{ instance: goodInstance, iterator: &stubInstanceIterator{ instances: []*computepb.Instance{ @@ -538,6 +559,15 @@ func TestList(t *testing.T) { subnetAPI: stubSubnetAPI{ subnet: goodSubnet, }, + zoneAPI: stubZoneAPI{ + iterator: &stubZoneIterator{ + zones: []*computepb.Zone{ + { + Name: proto.String("someZone-west3-b"), + }, + }, + }, + }, wantInstances: []metadata.InstanceMetadata{ { Name: "someInstance", @@ -549,13 +579,64 @@ func TestList(t *testing.T) { }, }, }, + "multi-zone": { + imds: stubIMDS{ + projectID: "someProject", + zone: "someZone-west3-b", + instanceName: "someInstance", + }, + instanceAPI: &fakeInstanceAPI{ + instance: goodInstance, + iterators: map[string]*stubInstanceIterator{ + "someZone-east1-a": { + instances: []*computepb.Instance{ + otherZoneInstance, + }, + }, + "someZone-west3-b": { + instances: []*computepb.Instance{ + goodInstance, + }, + }, + }, + }, + subnetAPI: stubSubnetAPI{ + subnet: goodSubnet, + }, + zoneAPI: stubZoneAPI{ + iterator: &stubZoneIterator{ + zones: []*computepb.Zone{ + {Name: proto.String("someZone-east1-a")}, + {Name: proto.String("someZone-west3-b")}, + }, + }, + }, + wantInstances: []metadata.InstanceMetadata{ + { + Name: "otherZoneInstance", + Role: role.ControlPlane, + ProviderID: "gce://someProject/someZone-east1-a/otherZoneInstance", + VPCIP: "192.0.2.1", + AliasIPRanges: []string{"198.51.100.0/24"}, + SecondaryIPRange: "198.51.100.0/24", + }, + { + Name: "someInstance", + Role: role.ControlPlane, + ProviderID: "gce://someProject/someZone-west3-b/someInstance", + VPCIP: "192.0.2.0", + AliasIPRanges: []string{"198.51.100.0/24"}, + SecondaryIPRange: "198.51.100.0/24", + }, + }, + }, "list multiple instances": { imds: stubIMDS{ projectID: "someProject", zone: "someZone-west3-b", instanceName: "someInstance", }, - instanceAPI: stubInstanceAPI{ + instanceAPI: &stubInstanceAPI{ instance: goodInstance, iterator: &stubInstanceIterator{ instances: []*computepb.Instance{ @@ -586,6 +667,15 @@ func TestList(t *testing.T) { subnetAPI: stubSubnetAPI{ subnet: goodSubnet, }, + zoneAPI: stubZoneAPI{ + iterator: &stubZoneIterator{ + zones: []*computepb.Zone{ + { + Name: proto.String("someZone-west3-b"), + }, + }, + }, + }, wantInstances: []metadata.InstanceMetadata{ { Name: "someInstance", @@ -609,7 +699,7 @@ func TestList(t *testing.T) { imds: stubIMDS{ projectIDErr: someErr, }, - instanceAPI: stubInstanceAPI{ + instanceAPI: &stubInstanceAPI{ instance: goodInstance, iterator: &stubInstanceIterator{ instances: []*computepb.Instance{ @@ -628,7 +718,7 @@ func TestList(t *testing.T) { zone: "someZone-west3-b", instanceName: "someInstance", }, - instanceAPI: stubInstanceAPI{ + instanceAPI: &stubInstanceAPI{ instance: goodInstance, iterator: &stubInstanceIterator{ err: someErr, @@ -637,6 +727,15 @@ func TestList(t *testing.T) { subnetAPI: stubSubnetAPI{ subnet: goodSubnet, }, + zoneAPI: stubZoneAPI{ + iterator: &stubZoneIterator{ + zones: []*computepb.Zone{ + { + Name: proto.String("someZone-west3-b"), + }, + }, + }, + }, wantErr: true, }, "get instance error": { @@ -645,7 +744,7 @@ func TestList(t *testing.T) { zone: "someZone-west3-b", instanceName: "someInstance", }, - instanceAPI: stubInstanceAPI{ + instanceAPI: &stubInstanceAPI{ instanceErr: someErr, iterator: &stubInstanceIterator{ instances: []*computepb.Instance{ @@ -656,6 +755,15 @@ func TestList(t *testing.T) { subnetAPI: stubSubnetAPI{ subnet: goodSubnet, }, + zoneAPI: stubZoneAPI{ + iterator: &stubZoneIterator{ + zones: []*computepb.Zone{ + { + Name: proto.String("someZone-west3-b"), + }, + }, + }, + }, wantErr: true, }, "get subnet error": { @@ -664,7 +772,7 @@ func TestList(t *testing.T) { zone: "someZone-west3-b", instanceName: "someInstance", }, - instanceAPI: stubInstanceAPI{ + instanceAPI: &stubInstanceAPI{ instance: goodInstance, iterator: &stubInstanceIterator{ instances: []*computepb.Instance{ @@ -675,6 +783,15 @@ func TestList(t *testing.T) { subnetAPI: stubSubnetAPI{ subnetErr: someErr, }, + zoneAPI: stubZoneAPI{ + iterator: &stubZoneIterator{ + zones: []*computepb.Zone{ + { + Name: proto.String("someZone-west3-b"), + }, + }, + }, + }, wantErr: true, }, } @@ -686,8 +803,9 @@ func TestList(t *testing.T) { cloud := &Cloud{ imds: &tc.imds, - instanceAPI: &tc.instanceAPI, + instanceAPI: tc.instanceAPI, subnetAPI: &tc.subnetAPI, + zoneAPI: &tc.zoneAPI, } instances, err := cloud.List(context.Background()) @@ -701,6 +819,112 @@ func TestList(t *testing.T) { } } +func TestRegion(t *testing.T) { + someErr := errors.New("failed") + + testCases := map[string]struct { + imds stubIMDS + wantRegion string + wantErr bool + }{ + "success": { + imds: stubIMDS{ + projectID: "someProject", + zone: "someregion-west3-b", + instanceName: "someInstance", + }, + wantRegion: "someregion-west3", + }, + "get zone error": { + imds: stubIMDS{ + zoneErr: someErr, + }, + wantErr: true, + }, + "invalid zone format": { + imds: stubIMDS{ + projectID: "someProject", + zone: "invalid", + instanceName: "someInstance", + }, + wantErr: true, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + assert := assert.New(t) + + cloud := &Cloud{ + imds: &tc.imds, + } + + assert.Empty(cloud.regionCache) + + gotRegion, err := cloud.region() + if tc.wantErr { + assert.Error(err) + return + } + assert.NoError(err) + assert.Equal(tc.wantRegion, gotRegion) + assert.Equal(tc.wantRegion, cloud.regionCache) + }) + } +} + +func TestZones(t *testing.T) { + someErr := errors.New("failed") + + testCases := map[string]struct { + zoneAPI stubZoneAPI + wantZones []string + wantErr bool + }{ + "success": { + zoneAPI: stubZoneAPI{ + &stubZoneIterator{ + zones: []*computepb.Zone{ + {Name: proto.String("someregion-west3-b")}, + {}, // missing name (should be ignored) + nil, // nil (should be ignored) + }, + }, + }, + wantZones: []string{"someregion-west3-b"}, + }, + "get zones error": { + zoneAPI: stubZoneAPI{ + &stubZoneIterator{ + err: someErr, + }, + }, + wantErr: true, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + assert := assert.New(t) + + cloud := &Cloud{ + zoneAPI: &tc.zoneAPI, + } + + assert.Empty(cloud.zoneCache) + + gotZones, err := cloud.zones(context.Background(), "someProject", "someregion-west3") + if tc.wantErr { + assert.Error(err) + return + } + assert.NoError(err) + assert.Equal(tc.wantZones, gotZones) + assert.Equal(tc.wantZones, cloud.zoneCache["someregion-west3"].zones) + }) + } +} + func TestRetrieveInstanceInfo(t *testing.T) { someErr := errors.New("failed") @@ -1035,6 +1259,27 @@ func (s *stubInstanceAPI) List( func (s *stubInstanceAPI) Close() error { return nil } +type fakeInstanceAPI struct { + instance *computepb.Instance + instanceErr error + // iterators is a map of zone to instance iterator. + iterators map[string]*stubInstanceIterator +} + +func (s *fakeInstanceAPI) Get( + _ context.Context, _ *computepb.GetInstanceRequest, _ ...gax.CallOption, +) (*computepb.Instance, error) { + return s.instance, s.instanceErr +} + +func (s *fakeInstanceAPI) List( + _ context.Context, req *computepb.ListInstancesRequest, _ ...gax.CallOption, +) instanceIterator { + return s.iterators[req.GetZone()] +} + +func (s *fakeInstanceAPI) Close() error { return nil } + type stubInstanceIterator struct { ctr int instances []*computepb.Instance @@ -1063,3 +1308,30 @@ func (s *stubSubnetAPI) Get( return s.subnet, s.subnetErr } func (s *stubSubnetAPI) Close() error { return nil } + +type stubZoneAPI struct { + iterator *stubZoneIterator +} + +func (s *stubZoneAPI) List(_ context.Context, + _ *computepb.ListZonesRequest, _ ...gax.CallOption, +) zoneIterator { + return s.iterator +} + +type stubZoneIterator struct { + ctr int + zones []*computepb.Zone + err error +} + +func (s *stubZoneIterator) Next() (*computepb.Zone, error) { + if s.err != nil { + return nil, s.err + } + if s.ctr >= len(s.zones) { + return nil, iterator.Done + } + s.ctr++ + return s.zones[s.ctr-1], nil +} diff --git a/internal/cloud/gcp/interface.go b/internal/cloud/gcp/interface.go index 4f0c8da761..e78c9861b5 100644 --- a/internal/cloud/gcp/interface.go +++ b/internal/cloud/gcp/interface.go @@ -40,3 +40,7 @@ type subnetAPI interface { Get(ctx context.Context, req *computepb.GetSubnetworkRequest, opts ...gax.CallOption) (*computepb.Subnetwork, error) Close() error } + +type zoneAPI interface { + List(ctx context.Context, req *computepb.ListZonesRequest, opts ...gax.CallOption) zoneIterator +} diff --git a/internal/cloud/gcp/wrappers.go b/internal/cloud/gcp/wrappers.go index 348041fee0..72b46399a2 100644 --- a/internal/cloud/gcp/wrappers.go +++ b/internal/cloud/gcp/wrappers.go @@ -22,6 +22,10 @@ type instanceIterator interface { Next() (*computepb.Instance, error) } +type zoneIterator interface { + Next() (*computepb.Zone, error) +} + type globalForwardingRulesClient struct { *compute.GlobalForwardingRulesClient } @@ -63,3 +67,15 @@ func (c *instanceClient) List(ctx context.Context, req *computepb.ListInstancesR ) instanceIterator { return c.InstancesClient.List(ctx, req) } + +type zoneClient struct { + *compute.ZonesClient +} + +func (c *zoneClient) Close() error { + return c.ZonesClient.Close() +} + +func (c *zoneClient) List(ctx context.Context, req *computepb.ListZonesRequest, opts ...gax.CallOption) zoneIterator { + return c.ZonesClient.List(ctx, req, opts...) +}