From b0396fde61c4a6e6620a19edd3fffc22f00e6526 Mon Sep 17 00:00:00 2001 From: Jose Luis Segura Lucas Date: Fri, 29 Nov 2024 03:30:15 +0100 Subject: [PATCH] OCPBUGS-45047: LokiStack gatherer (#1022) * Adding LokiStack gatherer * Pay attention to error * Gathering at most 20. Anonymizing the results to remove tenant's selectors * Fix behaviour * Solving some misunderstandings * Make lintian happy again * Updating docs * Added unit tests * Fix linter * Tests table-strategy * Fix linters * Answering review --- docs/gathered-data.md | 30 ++++ .../openshift-logging/lokistack-sample.json | 125 ++++++++++++++ manifests/03-clusterrole.yaml | 7 + .../clusterconfig/clusterconfig_gatherer.go | 2 + pkg/gatherers/clusterconfig/const.go | 3 + .../clusterconfig/gather_lokistack.go | 153 ++++++++++++++++++ .../clusterconfig/gather_lokistacks_test.go | 94 +++++++++++ 7 files changed, 414 insertions(+) create mode 100644 docs/insights-archive-sample/namespaces/openshift-logging/lokistack-sample.json create mode 100644 pkg/gatherers/clusterconfig/gather_lokistack.go create mode 100644 pkg/gatherers/clusterconfig/gather_lokistacks_test.go diff --git a/docs/gathered-data.md b/docs/gathered-data.md index 922a4311b..4ab8354b0 100644 --- a/docs/gathered-data.md +++ b/docs/gathered-data.md @@ -1062,6 +1062,36 @@ None None +## LokiStack + +Collects `lokistacks.loki.grafana.com` resources. + +The gatherer will collect up to 20 resources from `openshift-logging` namespace +and it will report errors if it finds a `LokiStack` resource in a different namespace +or if there are more than 20 `LokiStacks` in the `openshift-logging` namespace. + +### API Reference +None + +### Sample data +- [docs/insights-archive-sample/namespaces/openshift-logging/.json](./insights-archive-sample/namespaces/openshift-logging/.json) + +### Location in archive +- `namespace/{namespace}/loki.grafana.com/lokistacks/{name}.json` + +### Config ID +`clusterconfig/lokistacks + +### Released version +- 4.18.0 + +### Backported versions +None + +### Changes +None + + ## Machine Collects `Machine` information. diff --git a/docs/insights-archive-sample/namespaces/openshift-logging/lokistack-sample.json b/docs/insights-archive-sample/namespaces/openshift-logging/lokistack-sample.json new file mode 100644 index 000000000..5c5ddadc8 --- /dev/null +++ b/docs/insights-archive-sample/namespaces/openshift-logging/lokistack-sample.json @@ -0,0 +1,125 @@ +{ + "apiVersion": "loki.grafana.com/v1", + "kind": "LokiStack", + "metadata": { + "annotations": { + "kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"loki.grafana.com/v1\",\"kind\":\"LokiStack\",\"metadata\":{\"annotations\":{\"loki.grafana.com/rulesDiscoveredAt\":\"2024-11-14T00:05:44Z\"},\"creationTimestamp\":\"2024-11-13T09:13:10Z\",\"generation\":3,\"name\":\"lokistack-sample\",\"namespace\":\"openshift-logging\",\"resourceVersion\":\"637293\",\"uid\":\"f105635e-dc45-491c-b778-540990e04b3f\"},\"spec\":{\"hashRing\":{\"type\":\"memberlist\"},\"limits\":{\"global\":{\"queries\":{\"queryTimeout\":\"3m\"}},\"tenants\":{\"application\":{\"retention\":{\"days\":1,\"streams\":[{\"days\":4,\"priority\":1,\"selector\":\"{kubernetes_namespace_name=~\\\"test.+\\\"}\"},{\"days\":3,\"priority\":1,\"selector\":\"{log_type=\\\"infrastructure\\\"}\"}]}}}},\"managementState\":\"Managed\",\"size\":\"1x.small\",\"storage\":{\"schemas\":[{\"effectiveDate\":\"2020-10-11\",\"version\":\"v11\"}],\"secret\":{\"name\":\"builder-dockercfg-jzlkq\",\"type\":\"azure\"}},\"storageClassName\":\"gp2-csi\",\"tenants\":{\"mode\":\"openshift-logging\"}},\"status\":{\"components\":{\"compactor\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"distributor\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"gateway\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"indexGateway\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"ingester\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"querier\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"queryFrontend\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"ruler\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]}},\"conditions\":[{\"lastTransitionTime\":\"2024-11-14T08:52:30Z\",\"message\":\"Invalid object storage secret contents: missing secret field: environment\",\"reason\":\"InvalidObjectStorageSecret\",\"status\":\"True\",\"type\":\"Degraded\"}],\"storage\":{}}}\n", + "loki.grafana.com/rulesDiscoveredAt": "2024-11-14T00:05:44Z" + }, + "creationTimestamp": "2024-11-18T12:10:40Z", + "generation": 1, + "name": "lokistack-sample", + "namespace": "openshift-logging", + "resourceVersion": "54569", + "uid": "5072dcdd-e906-4b5f-8473-3d4aa2e579ba" + }, + "spec": { + "hashRing": { + "type": "memberlist" + }, + "limits": { + "global": { + "queries": { + "queryTimeout": "3m" + } + }, + "tenants": { + "application": { + "retention": { + "days": 1, + "streams": [ + { + "days": 4, + "priority": 1 + }, + { + "days": 3, + "priority": 1 + } + ] + } + } + } + }, + "managementState": "Managed", + "size": "1x.small", + "storage": { + "schemas": [ + { + "effectiveDate": "2020-10-11", + "version": "v11" + } + ], + "secret": { + "name": "builder-dockercfg-jzlkq", + "type": "azure" + } + }, + "storageClassName": "gp2-csi", + "tenants": { + "mode": "openshift-logging" + } + }, + "status": { + "components": { + "compactor": { + "Failed": [], + "Pending": [], + "Ready": [], + "Running": [] + }, + "distributor": { + "Failed": [], + "Pending": [], + "Ready": [], + "Running": [] + }, + "gateway": { + "Failed": [], + "Pending": [], + "Ready": [], + "Running": [] + }, + "indexGateway": { + "Failed": [], + "Pending": [], + "Ready": [], + "Running": [] + }, + "ingester": { + "Failed": [], + "Pending": [], + "Ready": [], + "Running": [] + }, + "querier": { + "Failed": [], + "Pending": [], + "Ready": [], + "Running": [] + }, + "queryFrontend": { + "Failed": [], + "Pending": [], + "Ready": [], + "Running": [] + }, + "ruler": { + "Failed": [], + "Pending": [], + "Ready": [], + "Running": [] + } + }, + "conditions": [ + { + "lastTransitionTime": "2024-11-18T12:10:48Z", + "message": "Missing object storage secret", + "reason": "MissingObjectStorageSecret", + "status": "True", + "type": "Degraded" + } + ], + "storage": {} + } +} diff --git a/manifests/03-clusterrole.yaml b/manifests/03-clusterrole.yaml index 10ad801d9..4d922900b 100644 --- a/manifests/03-clusterrole.yaml +++ b/manifests/03-clusterrole.yaml @@ -353,6 +353,13 @@ rules: verbs: - get - list + - apiGroups: + - loki.grafana.com + resources: + - lokistacks + verbs: + - get + - list --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/pkg/gatherers/clusterconfig/clusterconfig_gatherer.go b/pkg/gatherers/clusterconfig/clusterconfig_gatherer.go index 5ba841504..d7449d31e 100644 --- a/pkg/gatherers/clusterconfig/clusterconfig_gatherer.go +++ b/pkg/gatherers/clusterconfig/clusterconfig_gatherer.go @@ -26,6 +26,7 @@ type Gatherer struct { type gathererFuncPtr = func(*Gatherer, context.Context) ([]record.Record, []error) var gatheringFunctions = map[string]gathererFuncPtr{ +<<<<<<< HEAD "active_alerts": (*Gatherer).GatherActiveAlerts, "aggregated_monitoring_cr_names": (*Gatherer).GatherAggregatedMonitoringCRNames, "authentication": (*Gatherer).GatherClusterAuthentication, @@ -50,6 +51,7 @@ var gatheringFunctions = map[string]gathererFuncPtr{ "install_plans": (*Gatherer).GatherInstallPlans, "jaegers": (*Gatherer).GatherJaegerCR, "kube_controller_manager_logs": (*Gatherer).GatherKubeControllerManagerLogs, + "lokistack": (*Gatherer).GatherLokiStack, "machine_autoscalers": (*Gatherer).GatherMachineAutoscalers, "machine_config_pools": (*Gatherer).GatherMachineConfigPool, "machine_configs": (*Gatherer).GatherMachineConfigs, diff --git a/pkg/gatherers/clusterconfig/const.go b/pkg/gatherers/clusterconfig/const.go index e7b39cec3..77be0946d 100644 --- a/pkg/gatherers/clusterconfig/const.go +++ b/pkg/gatherers/clusterconfig/const.go @@ -54,6 +54,9 @@ var ( openshiftLoggingResource = schema.GroupVersionResource{ Group: "logging.openshift.io", Version: "v1", Resource: "clusterloggings", } + lokiStackResource = schema.GroupVersionResource{ + Group: "loki.grafana.com", Version: "v1", Resource: "lokistacks", + } storageClusterResource = schema.GroupVersionResource{ Group: "ocs.openshift.io", Version: "v1", Resource: "storageclusters", } diff --git a/pkg/gatherers/clusterconfig/gather_lokistack.go b/pkg/gatherers/clusterconfig/gather_lokistack.go new file mode 100644 index 000000000..e83bafeb0 --- /dev/null +++ b/pkg/gatherers/clusterconfig/gather_lokistack.go @@ -0,0 +1,153 @@ +package clusterconfig + +// nolint: dupl + +import ( + "context" + "fmt" + "strings" + + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/klog/v2" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/client-go/dynamic" + + "github.com/openshift/insights-operator/pkg/record" +) + +const lokiStackResourceLimit = 20 + +// GatherLokiStack Collects `lokistacks.loki.grafana.com` resources. +// +// The gatherer will collect up to 20 resources from `openshift-logging` namespace +// and it will report errors if it finds a `LokiStack` resource in a different namespace +// or if there are more than 20 `LokiStacks` in the `openshift-logging` namespace. +// +// ### API Reference +// None +// +// ### Sample data +// - docs/insights-archive-sample/namespaces/openshift-logging/.json +// +// ### Location in archive +// - `namespace/openshift-logging/loki.grafana.com/lokistacks/{name}.json` +// +// ### Config ID +// `clusterconfig/lokistacks +// +// ### Released version +// - 4.18.0 +// +// ### Backported versions +// None +// +// ### Changes +// None +func (g *Gatherer) GatherLokiStack(ctx context.Context) ([]record.Record, []error) { + gatherDynamicClient, err := dynamic.NewForConfig(g.gatherKubeConfig) + if err != nil { + return nil, []error{err} + } + + return gatherLokiStack(ctx, gatherDynamicClient) +} + +func gatherLokiStack(ctx context.Context, dynamicClient dynamic.Interface) ([]record.Record, []error) { + klog.V(2).Info("Start LokiStack gathering") + loggingResourceList, err := dynamicClient.Resource(lokiStackResource).List(ctx, metav1.ListOptions{}) + + if errors.IsNotFound(err) { + return nil, nil + } + if err != nil { + klog.V(2).Infof("Unable to list %s resource due to: %s", lokiStackResource, err) + return nil, []error{err} + } + + var records []record.Record + var errs []error + var otherNamespaceError = false + var tooManyResourcesError = false + + for index := range loggingResourceList.Items { + item := loggingResourceList.Items[index] + + namespace := item.GetNamespace() + if !strings.HasPrefix(namespace, "openshift-") { + klog.Infof("LokiStack resource found in an unexpected namespace %s", namespace) + if !otherNamespaceError { + otherNamespaceError = true + errs = append(errs, fmt.Errorf("found resource in an unexpected namespace")) + } + + continue + } + + if len(records) >= lokiStackResourceLimit { + if !tooManyResourcesError { + errs = append(errs, fmt.Errorf( + "found %d resources, limit (%d) reached", + len(loggingResourceList.Items), lokiStackResourceLimit), + ) + } + continue + } + anonymizedRecord, err := fillLokiStackRecord(item) + records = append(records, *anonymizedRecord) + if err != nil { + errs = append(errs, err) + } + } + + return records, errs +} + +func fillLokiStackRecord(item unstructured.Unstructured) (*record.Record, error) { + if err := removeLimitsTenant(item.Object); err != nil { + return nil, err + } + + return &record.Record{ + Name: fmt.Sprintf( + "namespace/%s/%s/%s/%s", + item.GetNamespace(), + lokiStackResource.Group, + lokiStackResource.Resource, + item.GetName()), + Item: record.ResourceMarshaller{Resource: &item}, + }, nil +} + +// removeLimitsTenant tries to get an array of sensitive fields defined in the LokiStack +// and anonymize potentially sensitive data - e.g. url, credentials +func removeLimitsTenant(obj map[string]interface{}) error { + for _, tenant := range []string{"application", "infrastructure", "audit"} { + klog.V(2).Infof("Anonymizing %s tenant", tenant) + streamSlice, ok, err := unstructured.NestedSlice(obj, "spec", "limits", "tenants", tenant, "retention", "streams") + if err != nil { + klog.V(2).Infof("Bad structure for the gathered file: %v", err) + return err + } else if !ok { + // tenant not found + continue + } + + for _, stream := range streamSlice { + streamMap, ok := stream.(map[string]interface{}) + if !ok { + continue + } + unstructured.RemoveNestedField(streamMap, "selector") + } + + err = unstructured.SetNestedSlice(obj, streamSlice, "spec", "limits", "tenants", tenant, "retention", "streams") + if err != nil { + klog.V(2).Infof("Failed to set the anonymized slice for tenant %s", tenant) + return err + } + } + + return nil +} diff --git a/pkg/gatherers/clusterconfig/gather_lokistacks_test.go b/pkg/gatherers/clusterconfig/gather_lokistacks_test.go new file mode 100644 index 000000000..403c0cfb7 --- /dev/null +++ b/pkg/gatherers/clusterconfig/gather_lokistacks_test.go @@ -0,0 +1,94 @@ +package clusterconfig + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/runtime/serializer/yaml" + dynamicfake "k8s.io/client-go/dynamic/fake" +) + +func TestGatherLokiStacks(t *testing.T) { + var lokiStackYAMLTmpl = ` +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + name: test-lokistack-%d + namespace: %s +` + + tests := []struct { + name string + namespace string + resourcesNumber int + expectedErrors []error + expectedNumberOfRecords int + }{ + { + name: "one resource scenario", + namespace: "openshift-logging", + resourcesNumber: 1, + expectedErrors: nil, + expectedNumberOfRecords: 1, + }, + { + name: "several resources in right namespace", + namespace: "openshift-logging", + resourcesNumber: lokiStackResourceLimit, + expectedErrors: nil, + expectedNumberOfRecords: lokiStackResourceLimit, + }, + { + name: "too many resources in right namespace", + namespace: "openshift-logging", + resourcesNumber: lokiStackResourceLimit + 1, + expectedErrors: []error{ + fmt.Errorf("found %d resources, limit (%d) reached", lokiStackResourceLimit+1, lokiStackResourceLimit), + }, + expectedNumberOfRecords: lokiStackResourceLimit, + }, + { + name: "bad namespace", + namespace: "other-namespace", + resourcesNumber: 1, + expectedErrors: []error{ + fmt.Errorf("found resource in an unexpected namespace"), + }, + expectedNumberOfRecords: 0, + }, + } + + for _, tt := range tests { + client := dynamicfake.NewSimpleDynamicClientWithCustomListKinds(runtime.NewScheme(), map[schema.GroupVersionResource]string{ + lokiStackResource: "LokiStacksList", + }) + decUnstructured := yaml.NewDecodingSerializer(unstructured.UnstructuredJSONScheme) + testLokiStack := &unstructured.Unstructured{} + + for idx := 0; idx < tt.resourcesNumber; idx++ { + lokiStackYAML := fmt.Sprintf(lokiStackYAMLTmpl, idx, tt.namespace) + _, _, err := decUnstructured.Decode([]byte(lokiStackYAML), nil, testLokiStack) + if err != nil { + t.Fatal("unable to decode lokistack ", err) + } + _, err = client.Resource(lokiStackResource). + Namespace(tt.namespace). + Create(context.Background(), testLokiStack, metav1.CreateOptions{}) + if err != nil { + t.Fatal("unable to create fake lokistack ", err) + } + } + + ctx := context.Background() + records, errs := gatherLokiStack(ctx, client) + + assert.Equal(t, tt.expectedNumberOfRecords, len(records)) + assert.Equal(t, tt.expectedErrors, errs) + } +}