Skip to content

Commit

Permalink
[ACM-8879]: Add initial implementation for T-shirt sizing ACM o11y re…
Browse files Browse the repository at this point in the history
…sources (#1295)

* Refactor the existing resource configuration into separate file and make
code clearer

Signed-off-by: Saswata Mukherjee <[email protected]>

* Add initial implementation for T-Shirt Sizing ACM o11y

Signed-off-by: Saswata Mukherjee <[email protected]>

* Make sure replicas are also t-shirt sized

Signed-off-by: Saswata Mukherjee <[email protected]>

* make lint

Signed-off-by: Saswata Mukherjee <[email protected]>

* Fix test

Signed-off-by: Saswata Mukherjee <[email protected]>

* Implement suggestions

Signed-off-by: Saswata Mukherjee <[email protected]>

* Generate bundle

Signed-off-by: Saswata Mukherjee <[email protected]>

* Unify and rename to InstanceSize; Add default & minimal options

Signed-off-by: Saswata Mukherjee <[email protected]>

* Add in Thanos t-shirt size values

Signed-off-by: Saswata Mukherjee <[email protected]>

* Add in API/RBAC Query Proxy and AM t-shirt size value

Signed-off-by: Saswata Mukherjee <[email protected]>

* Add cache values

Signed-off-by: Saswata Mukherjee <[email protected]>

* Remove 8xlarge size

Signed-off-by: Saswata Mukherjee <[email protected]>

* Add spec and spec update tests

Signed-off-by: Saswata Mukherjee <[email protected]>

* Keep memcached exporter at 5m

Signed-off-by: Saswata Mukherjee <[email protected]>

---------

Signed-off-by: Saswata Mukherjee <[email protected]>
  • Loading branch information
saswatamcode authored Jun 17, 2024
1 parent 2213ea9 commit 78d5118
Show file tree
Hide file tree
Showing 16 changed files with 1,501 additions and 855 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ type MultiClusterObservabilitySpec struct {
// Advanced configurations for observability
// +optional
AdvancedConfig *AdvancedConfig `json:"advanced,omitempty"`
// Size read and write paths of your Observability instance
// +optional
InstanceSize TShirtSize `json:"instanceSize,omitempty"`
// Enable or disable the downsample.
// +optional
// +kubebuilder:default:=true
Expand All @@ -41,6 +44,10 @@ type MultiClusterObservabilitySpec struct {
ObservabilityAddonSpec *observabilityshared.ObservabilityAddonSpec `json:"observabilityAddonSpec"`
}

// T Shirt size class for a particular o11y resource.
// +kubebuilder:validation:Enum:={"default","minimal","small","medium","large","xlarge","2xlarge","4xlarge"}
type TShirtSize string

type AdvancedConfig struct {
// CustomObservabilityHubURL overrides the endpoint used by the metrics-collector to send
// metrics to the hub server.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5904,6 +5904,18 @@ spec:
imagePullSecret:
description: Pull secret of the MultiClusterObservability images
type: string
instanceSize:
description: Size read and write paths of your Observability instance
enum:
- default
- minimal
- small
- medium
- large
- xlarge
- 2xlarge
- 4xlarge
type: string
nodeSelector:
additionalProperties:
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9497,6 +9497,18 @@ spec:
imagePullSecret:
description: Pull secret of the MultiClusterObservability images
type: string
instanceSize:
description: Size read and write paths of your Observability instance
enum:
- default
- minimal
- small
- medium
- large
- xlarge
- 2xlarge
- 4xlarge
type: string
nodeSelector:
additionalProperties:
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -521,9 +521,9 @@ func newAPISpec(c client.Client, mco *mcov1beta2.MultiClusterObservability) (obs
apiSpec.RBAC = newAPIRBAC()
apiSpec.Tenants = newAPITenants()
apiSpec.TLS = newAPITLS()
apiSpec.Replicas = mcoconfig.GetReplicas(mcoconfig.ObservatoriumAPI, mco.Spec.AdvancedConfig)
apiSpec.Replicas = mcoconfig.GetReplicas(mcoconfig.ObservatoriumAPI, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
if !mcoconfig.WithoutResourcesRequests(mco.GetAnnotations()) {
apiSpec.Resources = mcoconfig.GetResources(mcoconfig.ObservatoriumAPI, mco.Spec.AdvancedConfig)
apiSpec.Resources = mcoconfig.GetResources(mcoconfig.ObservatoriumAPI, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
}
// set the default observatorium components' image
apiSpec.Image = mcoconfig.DefaultImgRepository + "/" + mcoconfig.ObservatoriumAPIImgName +
Expand Down Expand Up @@ -619,16 +619,17 @@ func newReceiversSpec(
receSpec.Retention = mcoconfig.RetentionInLocal
}

receSpec.Replicas = mcoconfig.GetReplicas(mcoconfig.ThanosReceive, mco.Spec.AdvancedConfig)
receSpec.Replicas = mcoconfig.GetReplicas(mcoconfig.ThanosReceive, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
if *receSpec.Replicas < 3 {
receSpec.ReplicationFactor = receSpec.Replicas
} else {
receSpec.ReplicationFactor = &mcoconfig.Replicas3
var replicas3 int32 = 3
receSpec.ReplicationFactor = &replicas3
}

receSpec.ServiceMonitor = true
if !mcoconfig.WithoutResourcesRequests(mco.GetAnnotations()) {
receSpec.Resources = mcoconfig.GetResources(mcoconfig.ThanosReceive, mco.Spec.AdvancedConfig)
receSpec.Resources = mcoconfig.GetResources(mcoconfig.ThanosReceive, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
}
receSpec.VolumeClaimTemplate = newVolumeClaimTemplate(
mco.Spec.StorageConfig.ReceiveStorageSize,
Expand Down Expand Up @@ -668,15 +669,18 @@ func newRuleSpec(mco *mcov1beta2.MultiClusterObservability, scSelected string) o
} else {
ruleSpec.EvalInterval = fmt.Sprintf("%ds", mco.Spec.ObservabilityAddonSpec.Interval)
}
ruleSpec.Replicas = mcoconfig.GetReplicas(mcoconfig.ThanosRule, mco.Spec.AdvancedConfig)
ruleSpec.Replicas = mcoconfig.GetReplicas(mcoconfig.ThanosRule, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)

ruleSpec.ServiceMonitor = true
if !mcoconfig.WithoutResourcesRequests(mco.GetAnnotations()) {
ruleSpec.Resources = mcoconfig.GetResources(mcoconfig.ThanosRule, mco.Spec.AdvancedConfig)
ruleSpec.Resources = mcoconfig.GetResources(mcoconfig.ThanosRule, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
if mco.Spec.InstanceSize == "" {
mco.Spec.InstanceSize = mcoconfig.Default
}
ruleSpec.ReloaderResources = v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse(mcoconfig.ThanosRuleReloaderCPURequets),
v1.ResourceName(v1.ResourceMemory): resource.MustParse(mcoconfig.ThanosRuleReloaderMemoryRequets),
v1.ResourceName(v1.ResourceCPU): resource.MustParse(mcoconfig.ThanosRuleReloaderCPURequest[mco.Spec.InstanceSize]),
v1.ResourceName(v1.ResourceMemory): resource.MustParse(mcoconfig.ThanosRuleReloaderMemoryRequest[mco.Spec.InstanceSize]),
},
}
}
Expand Down Expand Up @@ -750,14 +754,14 @@ func newRuleSpec(mco *mcov1beta2.MultiClusterObservability, scSelected string) o
func newStoreSpec(mco *mcov1beta2.MultiClusterObservability, scSelected string) obsv1alpha1.StoreSpec {
storeSpec := obsv1alpha1.StoreSpec{}
if !mcoconfig.WithoutResourcesRequests(mco.GetAnnotations()) {
storeSpec.Resources = mcoconfig.GetResources(mcoconfig.ThanosStoreShard, mco.Spec.AdvancedConfig)
storeSpec.Resources = mcoconfig.GetResources(mcoconfig.ThanosStoreShard, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
}

storeSpec.VolumeClaimTemplate = newVolumeClaimTemplate(
mco.Spec.StorageConfig.StoreStorageSize,
scSelected)

storeSpec.Shards = mcoconfig.GetReplicas(mcoconfig.ThanosStoreShard, mco.Spec.AdvancedConfig)
storeSpec.Shards = mcoconfig.GetReplicas(mcoconfig.ThanosStoreShard, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
storeSpec.ServiceMonitor = true
storeSpec.Cache = newMemCacheSpec(mcoconfig.ThanosStoreMemcached, mco)

Expand Down Expand Up @@ -787,15 +791,15 @@ func newMemCacheSpec(component string, mco *mcov1beta2.MultiClusterObservability
memCacheSpec.Image = mcoconfig.MemcachedImgRepo + "/" +
mcoconfig.MemcachedImgName + ":" + mcoconfig.MemcachedImgTag
memCacheSpec.Version = mcoconfig.MemcachedImgTag
memCacheSpec.Replicas = mcoconfig.GetReplicas(component, mco.Spec.AdvancedConfig)
memCacheSpec.Replicas = mcoconfig.GetReplicas(component, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)

memCacheSpec.ServiceMonitor = true
memCacheSpec.ExporterImage = mcoconfig.MemcachedExporterImgRepo + "/" +
mcoconfig.MemcachedExporterImgName + ":" + mcoconfig.MemcachedExporterImgTag
memCacheSpec.ExporterVersion = mcoconfig.MemcachedExporterImgTag
if !mcoconfig.WithoutResourcesRequests(mco.GetAnnotations()) {
memCacheSpec.Resources = mcoconfig.GetResources(component, mco.Spec.AdvancedConfig)
memCacheSpec.ExporterResources = mcoconfig.GetResources(mcoconfig.MemcachedExporter, mco.Spec.AdvancedConfig)
memCacheSpec.Resources = mcoconfig.GetResources(component, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
memCacheSpec.ExporterResources = mcoconfig.GetResources(mcoconfig.MemcachedExporter, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
}

found, image := mcoconfig.ReplaceImage(mco.Annotations, memCacheSpec.Image, mcoconfig.MemcachedImgName)
Expand Down Expand Up @@ -851,10 +855,10 @@ func newThanosSpec(mco *mcov1beta2.MultiClusterObservability, scSelected string)

func newQueryFrontendSpec(mco *mcov1beta2.MultiClusterObservability) obsv1alpha1.QueryFrontendSpec {
queryFrontendSpec := obsv1alpha1.QueryFrontendSpec{}
queryFrontendSpec.Replicas = mcoconfig.GetReplicas(mcoconfig.ThanosQueryFrontend, mco.Spec.AdvancedConfig)
queryFrontendSpec.Replicas = mcoconfig.GetReplicas(mcoconfig.ThanosQueryFrontend, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
queryFrontendSpec.ServiceMonitor = true
if !mcoconfig.WithoutResourcesRequests(mco.GetAnnotations()) {
queryFrontendSpec.Resources = mcoconfig.GetResources(mcoconfig.ThanosQueryFrontend, mco.Spec.AdvancedConfig)
queryFrontendSpec.Resources = mcoconfig.GetResources(mcoconfig.ThanosQueryFrontend, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
}
queryFrontendSpec.Cache = newMemCacheSpec(mcoconfig.ThanosQueryFrontendMemcached, mco)

Expand All @@ -868,15 +872,15 @@ func newQueryFrontendSpec(mco *mcov1beta2.MultiClusterObservability) obsv1alpha1

func newQuerySpec(mco *mcov1beta2.MultiClusterObservability) obsv1alpha1.QuerySpec {
querySpec := obsv1alpha1.QuerySpec{}
querySpec.Replicas = mcoconfig.GetReplicas(mcoconfig.ThanosQuery, mco.Spec.AdvancedConfig)
querySpec.Replicas = mcoconfig.GetReplicas(mcoconfig.ThanosQuery, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
querySpec.ServiceMonitor = true
// only set lookback-delta when the scrape interval * 2 is larger than 5 minute,
// otherwise default value(5m) will be used.
if mco.Spec.ObservabilityAddonSpec.Interval*2 > 300 {
querySpec.LookbackDelta = fmt.Sprintf("%ds", mco.Spec.ObservabilityAddonSpec.Interval*2)
}
if !mcoconfig.WithoutResourcesRequests(mco.GetAnnotations()) {
querySpec.Resources = mcoconfig.GetResources(mcoconfig.ThanosQuery, mco.Spec.AdvancedConfig)
querySpec.Resources = mcoconfig.GetResources(mcoconfig.ThanosQuery, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
}
if mco.Spec.AdvancedConfig != nil && mco.Spec.AdvancedConfig.Query != nil &&
mco.Spec.AdvancedConfig.Query.ServiceAccountAnnotations != nil {
Expand All @@ -902,13 +906,16 @@ func newReceiverControllerSpec(mco *mcov1beta2.MultiClusterObservability) obsv1a
receiveControllerSpec.ServiceMonitor = true
receiveControllerSpec.Version = mcoconfig.ThanosReceiveControllerImgTag
if !mcoconfig.WithoutResourcesRequests(mco.GetAnnotations()) {
if mco.Spec.InstanceSize == "" {
mco.Spec.InstanceSize = mcoconfig.Default
}
receiveControllerSpec.Resources = v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceName(v1.ResourceCPU): resource.MustParse(
mcoconfig.ObservatoriumReceiveControllerCPURequets,
mcoconfig.ObservatoriumReceiveControllerCPURequest[mco.Spec.InstanceSize],
),
v1.ResourceName(v1.ResourceMemory): resource.MustParse(
mcoconfig.ObservatoriumReceiveControllerMemoryRequets,
mcoconfig.ObservatoriumReceiveControllerMemoryRequest[mco.Spec.InstanceSize],
),
},
}
Expand All @@ -926,9 +933,10 @@ func newCompactSpec(mco *mcov1beta2.MultiClusterObservability, scSelected string
compactSpec := obsv1alpha1.CompactSpec{}
// Compactor, generally, does not need to be highly available.
// Compactions are needed from time to time, only when new blocks appear.
compactSpec.Replicas = &mcoconfig.Replicas1
var replicas1 int32 = 1
compactSpec.Replicas = &replicas1
if !mcoconfig.WithoutResourcesRequests(mco.GetAnnotations()) {
compactSpec.Resources = mcoconfig.GetResources(mcoconfig.ThanosCompact, mco.Spec.AdvancedConfig)
compactSpec.Resources = mcoconfig.GetResources(mcoconfig.ThanosCompact, mco.Spec.InstanceSize, mco.Spec.AdvancedConfig)
}
compactSpec.ServiceMonitor = true
compactSpec.EnableDownsampling = mco.Spec.EnableDownsampling
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ func TestNewDefaultObservatoriumSpec(t *testing.T) {
"write_key": []byte(`url: http://remotewrite/endpoint`),
},
}
s := scheme.Scheme
mcov1beta2.SchemeBuilder.AddToScheme(s)
observatoriumv1alpha1.AddToScheme(s)

objs := []runtime.Object{mco, writeStorageS}
// Create a fake client to mock API calls.
Expand Down Expand Up @@ -139,6 +142,89 @@ func TestNewDefaultObservatoriumSpec(t *testing.T) {
}
}

func TestNewDefaultObservatoriumSpecWithTShirtSize(t *testing.T) {
mco := &mcov1beta2.MultiClusterObservability{
TypeMeta: metav1.TypeMeta{Kind: "MultiClusterObservability"},
ObjectMeta: metav1.ObjectMeta{
Name: "test",
Annotations: map[string]string{
mcoconfig.AnnotationKeyImageRepository: "quay.io:443/acm-d",
mcoconfig.AnnotationKeyImageTagSuffix: "tag",
},
},
Spec: mcov1beta2.MultiClusterObservabilitySpec{
InstanceSize: mcoconfig.FourXLarge,
StorageConfig: &mcov1beta2.StorageConfig{
MetricObjectStorage: &mcoshared.PreConfiguredStorage{
Key: "key",
Name: "name",
TLSSecretName: "secret",
},
WriteStorage: []*mcoshared.PreConfiguredStorage{
{
Key: "write_key",
Name: "write_name",
},
},
StorageClass: storageClassName,
AlertmanagerStorageSize: "1Gi",
CompactStorageSize: "1Gi",
RuleStorageSize: "1Gi",
ReceiveStorageSize: "1Gi",
StoreStorageSize: "1Gi",
},
ObservabilityAddonSpec: &mcoshared.ObservabilityAddonSpec{
EnableMetrics: true,
Interval: 300,
},
},
}

writeStorageS := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "write_name",
Namespace: mcoconfig.GetDefaultNamespace(),
},
Type: "Opaque",
Data: map[string][]byte{
"write_key": []byte(`url: http://remotewrite/endpoint`),
},
}
s := scheme.Scheme
mcov1beta2.SchemeBuilder.AddToScheme(s)
observatoriumv1alpha1.AddToScheme(s)

objs := []runtime.Object{mco, writeStorageS}
// Create a fake client to mock API calls.
cl := fake.NewClientBuilder().WithRuntimeObjects(objs...).Build()

obs, err := newDefaultObservatoriumSpec(cl, mco, storageClassName, "")
if err != nil {
t.Errorf("failed to create obs spec")
}

if obs.Thanos.Receivers.Resources.Requests.Cpu().String() != "10" ||
obs.Thanos.Receivers.Resources.Requests.Memory().String() != "128Gi" ||
*obs.Thanos.Receivers.Replicas != 12 ||
obs.Thanos.Query.Resources.Requests.Cpu().String() != "7" ||
obs.Thanos.Query.Resources.Requests.Memory().String() != "18Gi" ||
*obs.Thanos.Query.Replicas != 10 ||
obs.Thanos.QueryFrontend.Resources.Requests.Cpu().String() != "4" ||
obs.Thanos.QueryFrontend.Resources.Requests.Memory().String() != "12Gi" ||
*obs.Thanos.QueryFrontend.Replicas != 10 ||
obs.Thanos.Compact.Resources.Requests.Cpu().String() != "6" ||
obs.Thanos.Compact.Resources.Requests.Memory().String() != "18Gi" ||
*obs.Thanos.Compact.Replicas != 1 ||
obs.Thanos.Rule.Resources.Requests.Cpu().String() != "6" ||
obs.Thanos.Rule.Resources.Requests.Memory().String() != "15Gi" ||
*obs.Thanos.Rule.Replicas != 3 ||
obs.Thanos.Store.Resources.Requests.Cpu().String() != "6" ||
obs.Thanos.Store.Resources.Requests.Memory().String() != "20Gi" ||
*obs.Thanos.Store.Shards != 6 {
t.Errorf("Failed t-shirt size for Obs Spec")
}
}

func TestUpdateObservatoriumCR(t *testing.T) {
namespace := mcoconfig.GetDefaultNamespace()

Expand Down Expand Up @@ -240,6 +326,82 @@ func TestUpdateObservatoriumCR(t *testing.T) {

}

func TestTShirtSizeUpdateObservatoriumCR(t *testing.T) {
namespace := mcoconfig.GetDefaultNamespace()

// A MultiClusterObservability object with metadata and spec.
mco := &mcov1beta2.MultiClusterObservability{
TypeMeta: metav1.TypeMeta{Kind: "MultiClusterObservability"},
ObjectMeta: metav1.ObjectMeta{
Name: mcoconfig.GetDefaultCRName(),
},
Spec: mcov1beta2.MultiClusterObservabilitySpec{
InstanceSize: mcoconfig.Large,
StorageConfig: &mcov1beta2.StorageConfig{
MetricObjectStorage: &mcoshared.PreConfiguredStorage{
Key: "test",
Name: "test",
},
StorageClass: storageClassName,
AlertmanagerStorageSize: "1Gi",
CompactStorageSize: "1Gi",
RuleStorageSize: "1Gi",
ReceiveStorageSize: "1Gi",
StoreStorageSize: "1Gi",
},
ObservabilityAddonSpec: &mcoshared.ObservabilityAddonSpec{
EnableMetrics: true,
Interval: 300,
},
},
}
// Register operator types with the runtime scheme.
s := scheme.Scheme
mcov1beta2.SchemeBuilder.AddToScheme(s)
observatoriumv1alpha1.AddToScheme(s)

// Create a fake client to mock API calls.
// This should have no extra objects beyond the CMO CRD.
cl := fake.NewClientBuilder().WithRuntimeObjects(mco).Build()
mcoconfig.SetOperandNames(cl)

_, err := GenerateObservatoriumCR(cl, s, mco)
if err != nil {
t.Errorf("Failed to create observatorium due to %v", err)
}

// Check if this Observatorium CR already exists
createdObservatoriumCR := &observatoriumv1alpha1.Observatorium{}
cl.Get(context.TODO(), types.NamespacedName{
Name: mcoconfig.GetDefaultCRName(),
Namespace: namespace,
}, createdObservatoriumCR)

if createdObservatoriumCR.Spec.Thanos.Receivers.Resources.Requests.Cpu().String() != "5" ||
createdObservatoriumCR.Spec.Thanos.Receivers.Resources.Requests.Memory().String() != "24Gi" ||
*createdObservatoriumCR.Spec.Thanos.Receivers.Replicas != 6 {
t.Errorf("t-shirt size values for receive not correct")
}

mco.Spec.InstanceSize = mcoconfig.TwoXLarge
_, err = GenerateObservatoriumCR(cl, s, mco)
if err != nil {
t.Errorf("Failed to update observatorium due to %v", err)
}

updatedObservatorium := &observatoriumv1alpha1.Observatorium{}
cl.Get(context.TODO(), types.NamespacedName{
Name: mcoconfig.GetDefaultCRName(),
Namespace: namespace,
}, updatedObservatorium)

if updatedObservatorium.Spec.Thanos.Receivers.Resources.Requests.Cpu().String() != "6" ||
updatedObservatorium.Spec.Thanos.Receivers.Resources.Requests.Memory().String() != "52Gi" ||
*updatedObservatorium.Spec.Thanos.Receivers.Replicas != 12 {
t.Errorf("updated t-shirt size values for receive not correct")
}
}

func TestNoUpdateObservatoriumCR(t *testing.T) {
var (
namespace = mcoconfig.GetDefaultNamespace()
Expand Down
Loading

0 comments on commit 78d5118

Please sign in to comment.