Skip to content

Commit

Permalink
[release-4.17] OCPBUGS-45202: LokiStack gatherer (#1048)
Browse files Browse the repository at this point in the history
* OCPBUGS-45047: LokiStack gatherer (#1022)

* Adding LokiStack gatherer

* Pay attention to error

* Gathering at most 20. Anonymizing the results to remove tenant's selectors

* Fix behaviour

* Solving some misunderstandings

* Make lintian happy again

* Updating docs

* Added unit tests

* Fix linter

* Tests table-strategy

* Fix linters

* Answering review

* OCPBUGS-45047: Avoid to send the same error repeated N times (#1043)

* Avoid to send the same error repeated N times

* Fix the LokiStack gatherer documentation
  • Loading branch information
joselsegura authored Dec 12, 2024
1 parent 8453aa5 commit dda4a8f
Show file tree
Hide file tree
Showing 7 changed files with 415 additions and 0 deletions.
31 changes: 31 additions & 0 deletions docs/gathered-data.md
Original file line number Diff line number Diff line change
Expand Up @@ -1016,6 +1016,36 @@ None
None


## LokiStack

Collects `lokistacks.loki.grafana.com` resources.

The gatherer will collect up to 20 resources from `openshift-*` namespaces
and it will report errors if it finds a `LokiStack` resource in a different namespace
or if there are more than 20 `LokiStacks` in the `openshift-*` namespaces.

### API Reference
None

### Sample data
- [docs/insights-archive-sample/namespaces/openshift-logging/lokistack-sample.json](./insights-archive-sample/namespaces/openshift-logging/lokistack-sample.json)

### Location in archive
- `namespace/{namespace}/loki.grafana.com/lokistacks/{name}.json`

### Config ID
`clusterconfig/lokistacks

### Released version
- 4.19.0

### Backported versions
None

### Changes
None


## Machine

Collects `Machine` information.
Expand Down Expand Up @@ -2193,5 +2223,6 @@ None

### Changes
- Image repository is now collected if it comes from outside the Red Hat domain
- [Tech Preview] runtime info for workloads are collected (since 4.18.0)


Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
{
"apiVersion": "loki.grafana.com/v1",
"kind": "LokiStack",
"metadata": {
"annotations": {
"kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"loki.grafana.com/v1\",\"kind\":\"LokiStack\",\"metadata\":{\"annotations\":{\"loki.grafana.com/rulesDiscoveredAt\":\"2024-11-14T00:05:44Z\"},\"creationTimestamp\":\"2024-11-13T09:13:10Z\",\"generation\":3,\"name\":\"lokistack-sample\",\"namespace\":\"openshift-logging\",\"resourceVersion\":\"637293\",\"uid\":\"f105635e-dc45-491c-b778-540990e04b3f\"},\"spec\":{\"hashRing\":{\"type\":\"memberlist\"},\"limits\":{\"global\":{\"queries\":{\"queryTimeout\":\"3m\"}},\"tenants\":{\"application\":{\"retention\":{\"days\":1,\"streams\":[{\"days\":4,\"priority\":1,\"selector\":\"{kubernetes_namespace_name=~\\\"test.+\\\"}\"},{\"days\":3,\"priority\":1,\"selector\":\"{log_type=\\\"infrastructure\\\"}\"}]}}}},\"managementState\":\"Managed\",\"size\":\"1x.small\",\"storage\":{\"schemas\":[{\"effectiveDate\":\"2020-10-11\",\"version\":\"v11\"}],\"secret\":{\"name\":\"builder-dockercfg-jzlkq\",\"type\":\"azure\"}},\"storageClassName\":\"gp2-csi\",\"tenants\":{\"mode\":\"openshift-logging\"}},\"status\":{\"components\":{\"compactor\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"distributor\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"gateway\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"indexGateway\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"ingester\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"querier\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"queryFrontend\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]},\"ruler\":{\"Failed\":[],\"Pending\":[],\"Ready\":[],\"Running\":[]}},\"conditions\":[{\"lastTransitionTime\":\"2024-11-14T08:52:30Z\",\"message\":\"Invalid object storage secret contents: missing secret field: environment\",\"reason\":\"InvalidObjectStorageSecret\",\"status\":\"True\",\"type\":\"Degraded\"}],\"storage\":{}}}\n",
"loki.grafana.com/rulesDiscoveredAt": "2024-11-14T00:05:44Z"
},
"creationTimestamp": "2024-11-18T12:10:40Z",
"generation": 1,
"name": "lokistack-sample",
"namespace": "openshift-logging",
"resourceVersion": "54569",
"uid": "5072dcdd-e906-4b5f-8473-3d4aa2e579ba"
},
"spec": {
"hashRing": {
"type": "memberlist"
},
"limits": {
"global": {
"queries": {
"queryTimeout": "3m"
}
},
"tenants": {
"application": {
"retention": {
"days": 1,
"streams": [
{
"days": 4,
"priority": 1
},
{
"days": 3,
"priority": 1
}
]
}
}
}
},
"managementState": "Managed",
"size": "1x.small",
"storage": {
"schemas": [
{
"effectiveDate": "2020-10-11",
"version": "v11"
}
],
"secret": {
"name": "builder-dockercfg-jzlkq",
"type": "azure"
}
},
"storageClassName": "gp2-csi",
"tenants": {
"mode": "openshift-logging"
}
},
"status": {
"components": {
"compactor": {
"Failed": [],
"Pending": [],
"Ready": [],
"Running": []
},
"distributor": {
"Failed": [],
"Pending": [],
"Ready": [],
"Running": []
},
"gateway": {
"Failed": [],
"Pending": [],
"Ready": [],
"Running": []
},
"indexGateway": {
"Failed": [],
"Pending": [],
"Ready": [],
"Running": []
},
"ingester": {
"Failed": [],
"Pending": [],
"Ready": [],
"Running": []
},
"querier": {
"Failed": [],
"Pending": [],
"Ready": [],
"Running": []
},
"queryFrontend": {
"Failed": [],
"Pending": [],
"Ready": [],
"Running": []
},
"ruler": {
"Failed": [],
"Pending": [],
"Ready": [],
"Running": []
}
},
"conditions": [
{
"lastTransitionTime": "2024-11-18T12:10:48Z",
"message": "Missing object storage secret",
"reason": "MissingObjectStorageSecret",
"status": "True",
"type": "Degraded"
}
],
"storage": {}
}
}
7 changes: 7 additions & 0 deletions manifests/03-clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,13 @@ rules:
verbs:
- get
- list
- apiGroups:
- loki.grafana.com
resources:
- lokistacks
verbs:
- get
- list
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
Expand Down
1 change: 1 addition & 0 deletions pkg/gatherers/clusterconfig/clusterconfig_gatherer.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ var gatheringFunctions = map[string]gathererFuncPtr{
"install_plans": (*Gatherer).GatherInstallPlans,
"jaegers": (*Gatherer).GatherJaegerCR,
"kube_controller_manager_logs": (*Gatherer).GatherKubeControllerManagerLogs,
"lokistack": (*Gatherer).GatherLokiStack,
"machine_autoscalers": (*Gatherer).GatherMachineAutoscalers,
"machine_config_pools": (*Gatherer).GatherMachineConfigPool,
"machine_configs": (*Gatherer).GatherMachineConfigs,
Expand Down
3 changes: 3 additions & 0 deletions pkg/gatherers/clusterconfig/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ var (
openshiftLoggingResource = schema.GroupVersionResource{
Group: "logging.openshift.io", Version: "v1", Resource: "clusterloggings",
}
lokiStackResource = schema.GroupVersionResource{
Group: "loki.grafana.com", Version: "v1", Resource: "lokistacks",
}
storageClusterResource = schema.GroupVersionResource{
Group: "ocs.openshift.io", Version: "v1", Resource: "storageclusters",
}
Expand Down
154 changes: 154 additions & 0 deletions pkg/gatherers/clusterconfig/gather_lokistack.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
package clusterconfig

// nolint: dupl

import (
"context"
"fmt"
"strings"

"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/klog/v2"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/client-go/dynamic"

"github.com/openshift/insights-operator/pkg/record"
)

const lokiStackResourceLimit = 20

// GatherLokiStack Collects `lokistacks.loki.grafana.com` resources.
//
// The gatherer will collect up to 20 resources from `openshift-*` namespaces
// and it will report errors if it finds a `LokiStack` resource in a different namespace
// or if there are more than 20 `LokiStacks` in the `openshift-*` namespaces.
//
// ### API Reference
// None
//
// ### Sample data
// - docs/insights-archive-sample/namespaces/openshift-logging/lokistack-sample.json
//
// ### Location in archive
// - `namespace/{namespace}/loki.grafana.com/lokistacks/{name}.json`
//
// ### Config ID
// `clusterconfig/lokistacks
//
// ### Released version
// - 4.19.0
//
// ### Backported versions
// None
//
// ### Changes
// None
func (g *Gatherer) GatherLokiStack(ctx context.Context) ([]record.Record, []error) {
gatherDynamicClient, err := dynamic.NewForConfig(g.gatherKubeConfig)
if err != nil {
return nil, []error{err}
}

return gatherLokiStack(ctx, gatherDynamicClient)
}

func gatherLokiStack(ctx context.Context, dynamicClient dynamic.Interface) ([]record.Record, []error) {
klog.V(2).Info("Start LokiStack gathering")
loggingResourceList, err := dynamicClient.Resource(lokiStackResource).List(ctx, metav1.ListOptions{})

if errors.IsNotFound(err) {
return nil, nil
}
if err != nil {
klog.V(2).Infof("Unable to list %s resource due to: %s", lokiStackResource, err)
return nil, []error{err}
}

var records []record.Record
var errs []error
var otherNamespaceError = false
var tooManyResourcesError = false

for index := range loggingResourceList.Items {
item := loggingResourceList.Items[index]

namespace := item.GetNamespace()
if !strings.HasPrefix(namespace, "openshift-") {
klog.Infof("LokiStack resource found in an unexpected namespace %s", namespace)
if !otherNamespaceError {
otherNamespaceError = true
errs = append(errs, fmt.Errorf("found resource in an unexpected namespace"))
}

continue
}

if len(records) >= lokiStackResourceLimit {
if !tooManyResourcesError {
tooManyResourcesError = true
errs = append(errs, fmt.Errorf(
"found %d resources, limit (%d) reached",
len(loggingResourceList.Items), lokiStackResourceLimit),
)
}
continue
}
anonymizedRecord, err := fillLokiStackRecord(item)
records = append(records, *anonymizedRecord)
if err != nil {
errs = append(errs, err)
}
}

return records, errs
}

func fillLokiStackRecord(item unstructured.Unstructured) (*record.Record, error) {
if err := removeLimitsTenant(item.Object); err != nil {
return nil, err
}

return &record.Record{
Name: fmt.Sprintf(
"namespace/%s/%s/%s/%s",
item.GetNamespace(),
lokiStackResource.Group,
lokiStackResource.Resource,
item.GetName()),
Item: record.ResourceMarshaller{Resource: &item},
}, nil
}

// removeLimitsTenant tries to get an array of sensitive fields defined in the LokiStack
// and anonymize potentially sensitive data - e.g. url, credentials
func removeLimitsTenant(obj map[string]interface{}) error {
for _, tenant := range []string{"application", "infrastructure", "audit"} {
klog.V(2).Infof("Anonymizing %s tenant", tenant)
streamSlice, ok, err := unstructured.NestedSlice(obj, "spec", "limits", "tenants", tenant, "retention", "streams")
if err != nil {
klog.V(2).Infof("Bad structure for the gathered file: %v", err)
return err
} else if !ok {
// tenant not found
continue
}

for _, stream := range streamSlice {
streamMap, ok := stream.(map[string]interface{})
if !ok {
continue
}
unstructured.RemoveNestedField(streamMap, "selector")
}

err = unstructured.SetNestedSlice(obj, streamSlice, "spec", "limits", "tenants", tenant, "retention", "streams")
if err != nil {
klog.V(2).Infof("Failed to set the anonymized slice for tenant %s", tenant)
return err
}
}

return nil
}
Loading

0 comments on commit dda4a8f

Please sign in to comment.