diff --git a/api/v1/redshiftsink_types.go b/api/v1/redshiftsink_types.go index dcf435d77..0d7126acb 100644 --- a/api/v1/redshiftsink_types.go +++ b/api/v1/redshiftsink_types.go @@ -18,6 +18,7 @@ package v1 import ( corev1 "k8s.io/api/core/v1" + resource "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -42,31 +43,109 @@ type RedshiftPodTemplateSpec struct { Tolerations *[]corev1.Toleration `json:"tolerations,omitempty"` } -// RedshiftBatcherSpec defines the desired state of RedshiftBatcher -type RedshiftBatcherSpec struct { - // Supsend when turned on makes sure no batcher pods - // are running for this CRD object. Default: false - Suspend bool `json:"suspend,omitempty"` +// DeploymentUnit is used to specify how many topics will run together in a unit +// and how much resources it needs. +type DeploymentUnit struct { + // PodTemplate describes the pod specification for the unit. + // +optional + PodTemplate *RedshiftPodTemplateSpec `json:"podTemplate,omitempty"` +} - // Max configurations for the batcher to batch - MaxSize int `json:"maxSize"` - MaxWaitSeconds int `json:"maxWaitSeconds"` +// SinkGroupSpec defines the specification for one of the three sinkgroups: +// 1. MainSinkGroup 2. ReloadSinkGroup 3. ReloadDupeSinkGroup +type SinkGroupSpec struct { + // MaxSizePerBatch is the maximum size of the batch in bytes, Ki, Mi, Gi + // Example values: 1000, 1Ki, 100Mi, 1Gi + // 1000 is 1000 bytes, 1Ki is 1 Killo byte, + // 100Mi is 100 mega bytes, 1Gi is 1 Giga bytes + // +optional + MaxSizePerBatch *resource.Quantity `json:"maxSizePerBatch,omitempty"` + // MaxWaitSeconds is the maximum time to wait before making a batch, + // make a batch if MaxSizePerBatch is not hit during MaxWaitSeconds. + // +optional + MaxWaitSeconds *int `json:"maxWaitSeconds,omitempty"` + // MaxConcurrency is the maximum no, of batch processors to run concurrently. + // This spec is useful when the sink group pod operates in asynchronous mode. + // Loader pods does not needed this as they are synchronous. + // +optional MaxConcurrency *int `json:"maxConcurrency,omitempty"` - - // MaxProcessingTime is the sarama configuration MaxProcessingTime - // It is the max time in milliseconds required to consume one message. - // Defaults to 1000ms + // MaxProcessingTime is the max time in ms required to consume one message. + // Defaults for the batcher is 180000ms and loader is 600000ms. + // +optional MaxProcessingTime *int32 `json:"maxProcessingTime,omitempty"` - // Mask when turned on enables masking of the data - // Default: false + // MaxReloadingUnits is the maximum number of units(pods) that can be launched + // based on the DeploymentUnit specification. Only valid for Reloading SinkGroup. + // This value is at present supported to be configurable only for batcher + // +optional + MaxReloadingUnits *int32 `json:"maxReloadingUnits,omitempty"` + // DeploymentUnit(pod) is the unit of deployment for the batcher or the loader. + // Using this user can specify the amount of resources + // needed to run them as one unit. Operator calculates the total units + // based on the total number of topics and this unit spec. This majorly + // solves the scaling issues described in #167. + // +optional + DeploymentUnit *DeploymentUnit `json:"deploymentUnit,omitempty"` +} + +// SinkGroup is the group of batcher and loader pods based on the +// mask version, target table and the topic release status. This is the specification +// to allow to have different set of SinkGroupSpec for each type of SinkGroups. +// Explaining the precedence: +// The configuration required for full sink and the realtime sink can be different. +// SinkGroupSpec for each of the type of sink groups helps us provide different +// configurations for each of them. Following are the precedence: +// a) If All is specified and none of the others are specified, All is used for all SinkGroups. +// b) If All and Main both are specified then Main gets used for MainSinkGroup +// c) If All and Reload are specified then Reload gets used for ReloadSinkGroup +// d) If All and ReloadDupe are specified then ReloadDupe gets used for ReloadDupeSinkGroup +// d) If None gets specified then Defaults are used for all of them.. +type SinkGroup struct { + // All specifies a common specification for all SinkGroups + // +optional + All *SinkGroupSpec `json:"all,omitempty"` + // Main specifies the MainSinkGroup specification, overwrites All + // +optional + Main *SinkGroupSpec `json:"main,omitempty"` + // Reload specifies the ReloadSinkGroup specification, overwrites All + // +optional + Reload *SinkGroupSpec `json:"reload,omitempty"` + // ReloadDupe specifies the ReloadDupeSinkGroup specification, overwrites All + // +optional + ReloadDupe *SinkGroupSpec `json:"reloadDupe,omitempty"` +} + +// RedshiftBatcherSpec defines the desired state of RedshiftBatcher +type RedshiftBatcherSpec struct { + // Supsend is used to suspend batcher pods. Defaults to false. + Suspend bool `json:"suspend,omitempty"` + + // Mask when turned on enables masking of the data. Defaults to false // +optional Mask bool `json:"mask"` + // MaskFile to use to apply mask configurations // +optional - MaskFile string `json:"maskFile"` + MaskFile string `json:"maskFile,omitempty"` + // +optional + + // SinkGroup contains the specification for main, reload and reloadDupe + // sinkgroups. Operator uses 3 groups to perform Redshiftsink. The topics + // which have never been released is part of Reload SinkGroup, the topics + // which gets released moves to the Main SinkGroup. ReloadDupe SinkGroup + // is used to give realtime upaates to the topics which are reloading. + // Defaults are there for all sinkGroups if none is specifed. // +optional + SinkGroup *SinkGroup `json:"sinkGroup,omitempty"` - // Template describes the pods that will be created. + // Deprecated all of the below spec in favour of SinkGroup #167 + MaxSize int `json:"maxSize,omitempty"` + MaxWaitSeconds int `json:"maxWaitSeconds,omitempty"` + MaxConcurrency *int `json:"maxConcurrency,omitempty"` + // MaxProcessingTime is the sarama configuration MaxProcessingTime + // It is the max time in milliseconds required to consume one message. + // Defaults to 1000ms + MaxProcessingTime *int32 `json:"maxProcessingTime,omitempty"` + // PodTemplate describes the pods that will be created. // if this is not specifed, a default pod template is created // +optional PodTemplate *RedshiftPodTemplateSpec `json:"podTemplate,omitempty"` @@ -78,25 +157,38 @@ type RedshiftLoaderSpec struct { // are running for this CRD object. Default: false Suspend bool `json:"suspend,omitempty"` - // Max configurations for the loader to batch the load - MaxSize int `json:"maxSize"` - MaxWaitSeconds int `json:"maxWaitSeconds"` - - // MaxProcessingTime is the sarama configuration MaxProcessingTime - // It is the max time in milliseconds required to consume one message. - // Defaults to 600000ms (10mins) - MaxProcessingTime *int32 `json:"maxProcessingTime,omitempty"` + // SinkGroup contains the specification for main, reload and reloadDupe + // sinkgroups. Operator uses 3 groups to perform Redshiftsink. The topics + // which have never been released is part of Reload SinkGroup, the topics + // which gets released moves to the Main SinkGroup. ReloadDupe SinkGroup + // is used to give realtime upaates to the topics which are reloading. + // Defaults are there for all sinkGroups if none is specifed. + // +optional + SinkGroup *SinkGroup `json:"sinkGroup,omitempty"` // RedshiftSchema to sink the data in RedshiftSchema string `json:"redshiftSchema"` // RedshiftMaxOpenConns is the maximum open connections allowed + // +optional RedshiftMaxOpenConns *int `json:"redshiftMaxOpenConns,omitempty"` // RedshiftMaxIdleConns is the maximum idle connections allowed + // +optional RedshiftMaxIdleConns *int `json:"redshiftMaxIdleConns,omitempty"` // RedshiftGroup to give the access to when new topics gets released RedshiftGroup *string `json:"redshiftGroup"` - // Template describes the pods that will be created. + // Deprecated all of the below spec in favour of SinkGroup #167 + // Max configurations for the loader to batch the load + // +optional + MaxSize int `json:"maxSize,omitempty"` + // +optional + MaxWaitSeconds int `json:"maxWaitSeconds,omitempty"` + // MaxProcessingTime is the sarama configuration MaxProcessingTime + // It is the max time in milliseconds required to consume one message. + // Defaults to 600000ms (10mins) + // +optional + MaxProcessingTime *int32 `json:"maxProcessingTime,omitempty"` + // PodTemplate describes the pods that will be created. // if this is not specifed, a default pod template is created // +optional PodTemplate *RedshiftPodTemplateSpec `json:"podTemplate,omitempty"` @@ -122,7 +214,7 @@ type RedshiftSinkSpec struct { KafkaVersion string `json:"kafkaVersion"` KafkaTopicRegexes string `json:"kafkaTopicRegexes"` // +optional - KafkaLoaderTopicPrefix string `json:"kafkaLoaderTopicPrefix"` + KafkaLoaderTopicPrefix string `json:"kafkaLoaderTopicPrefix,omitempty"` Batcher RedshiftBatcherSpec `json:"batcher"` Loader RedshiftLoaderSpec `json:"loader"` @@ -132,25 +224,25 @@ type RedshiftSinkSpec struct { // This is relevant only if masking is turned on in mask configuration. // It is used for live mask reloading. // +optional - ReleaseCondition *ReleaseCondition `json:"releaseCondition"` + ReleaseCondition *ReleaseCondition `json:"releaseCondition,omitempty"` // TopicReleaseCondition is considered instead of ReleaseCondition // if it is defined for a topic. This is used for topics which // does not work well with central ReleaseCondition for all topics // +optional - TopicReleaseCondition map[string]ReleaseCondition `json:"topicReleaseCondition"` + TopicReleaseCondition map[string]ReleaseCondition `json:"topicReleaseCondition,omitempty"` } type ReleaseCondition struct { // MaxBatcherLag is the maximum lag the batcher consumer group // shoud have to be be considered to be operating in realtime and // to be considered for release. - MaxBatcherLag *int64 `json:"maxBatcherLag"` + MaxBatcherLag *int64 `json:"maxBatcherLag,omitempty"` // MaxLoaderLag is the maximum lag the loader consumer group // shoud have to be be considered to be operating in realtime and // to be considered for release. - MaxLoaderLag *int64 `json:"maxLoaderLag"` + MaxLoaderLag *int64 `json:"maxLoaderLag,omitempty"` } // MaskPhase is a label for the condition of a masking at the current time. @@ -208,7 +300,7 @@ type MaskStatus struct { type Group struct { // LoaderTopicPrefix stores the name of the loader topic prefix - LoaderTopicPrefix string `json:"loaderTopicPrefix"` + LoaderTopicPrefix string `json:"loaderTopicPrefix,omitempty"` // LoaderCurrentOffset stores the last read current offset of the consumer group // This is required to determine if the consumer group has performed any @@ -218,7 +310,7 @@ type Group struct { // throughput consumer groups not getting moved to realtime from reloading. // TODO: This is not dead field once a group moves to released and // should be cleaned after that(status needs to be updated) - LoaderCurrentOffset *int64 `json:"currentOffset"` + LoaderCurrentOffset *int64 `json:"currentOffset,omitempty"` // ID stores the name of the consumer group for the topic // based on this batcher and loader consumer groups are made @@ -232,11 +324,17 @@ type RedshiftSinkStatus struct { // MaskStatus stores the status of masking for topics if masking is enabled // +optional - MaskStatus *MaskStatus `json:"maskStatus"` + MaskStatus *MaskStatus `json:"maskStatus,omitempty"` // TopicGroup stores the group info for the topic // +optional - TopicGroup map[string]Group `json:"topicGroups"` + TopicGroup map[string]Group `json:"topicGroups,omitempty"` + + // BatcherReloadingTopics stores the list of topics which are currently reloading + // for the batcher deployments in the reload sink group. + // There is a limit to maximum topics that can be reloaded. (MaxReloadingUnits) + // +optional + BatcherReloadingTopics []string `json:"batcherReloadingTopics,omitempty"` } // +kubebuilder:resource:path=redshiftsinks,shortName=rsk;rsks diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index a3595f067..1e0a7156c 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -25,6 +25,26 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DeploymentUnit) DeepCopyInto(out *DeploymentUnit) { + *out = *in + if in.PodTemplate != nil { + in, out := &in.PodTemplate, &out.PodTemplate + *out = new(RedshiftPodTemplateSpec) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentUnit. +func (in *DeploymentUnit) DeepCopy() *DeploymentUnit { + if in == nil { + return nil + } + out := new(DeploymentUnit) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Group) DeepCopyInto(out *Group) { *out = *in @@ -87,6 +107,11 @@ func (in *MaskStatus) DeepCopy() *MaskStatus { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RedshiftBatcherSpec) DeepCopyInto(out *RedshiftBatcherSpec) { *out = *in + if in.SinkGroup != nil { + in, out := &in.SinkGroup, &out.SinkGroup + *out = new(SinkGroup) + (*in).DeepCopyInto(*out) + } if in.MaxConcurrency != nil { in, out := &in.MaxConcurrency, &out.MaxConcurrency *out = new(int) @@ -117,10 +142,10 @@ func (in *RedshiftBatcherSpec) DeepCopy() *RedshiftBatcherSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RedshiftLoaderSpec) DeepCopyInto(out *RedshiftLoaderSpec) { *out = *in - if in.MaxProcessingTime != nil { - in, out := &in.MaxProcessingTime, &out.MaxProcessingTime - *out = new(int32) - **out = **in + if in.SinkGroup != nil { + in, out := &in.SinkGroup, &out.SinkGroup + *out = new(SinkGroup) + (*in).DeepCopyInto(*out) } if in.RedshiftMaxOpenConns != nil { in, out := &in.RedshiftMaxOpenConns, &out.RedshiftMaxOpenConns @@ -137,6 +162,11 @@ func (in *RedshiftLoaderSpec) DeepCopyInto(out *RedshiftLoaderSpec) { *out = new(string) **out = **in } + if in.MaxProcessingTime != nil { + in, out := &in.MaxProcessingTime, &out.MaxProcessingTime + *out = new(int32) + **out = **in + } if in.PodTemplate != nil { in, out := &in.PodTemplate, &out.PodTemplate *out = new(RedshiftPodTemplateSpec) @@ -303,6 +333,11 @@ func (in *RedshiftSinkStatus) DeepCopyInto(out *RedshiftSinkStatus) { (*out)[key] = *val.DeepCopy() } } + if in.BatcherReloadingTopics != nil { + in, out := &in.BatcherReloadingTopics, &out.BatcherReloadingTopics + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RedshiftSinkStatus. @@ -340,6 +375,86 @@ func (in *ReleaseCondition) DeepCopy() *ReleaseCondition { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SinkGroup) DeepCopyInto(out *SinkGroup) { + *out = *in + if in.All != nil { + in, out := &in.All, &out.All + *out = new(SinkGroupSpec) + (*in).DeepCopyInto(*out) + } + if in.Main != nil { + in, out := &in.Main, &out.Main + *out = new(SinkGroupSpec) + (*in).DeepCopyInto(*out) + } + if in.Reload != nil { + in, out := &in.Reload, &out.Reload + *out = new(SinkGroupSpec) + (*in).DeepCopyInto(*out) + } + if in.ReloadDupe != nil { + in, out := &in.ReloadDupe, &out.ReloadDupe + *out = new(SinkGroupSpec) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SinkGroup. +func (in *SinkGroup) DeepCopy() *SinkGroup { + if in == nil { + return nil + } + out := new(SinkGroup) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SinkGroupSpec) DeepCopyInto(out *SinkGroupSpec) { + *out = *in + if in.MaxSizePerBatch != nil { + in, out := &in.MaxSizePerBatch, &out.MaxSizePerBatch + x := (*in).DeepCopy() + *out = &x + } + if in.MaxWaitSeconds != nil { + in, out := &in.MaxWaitSeconds, &out.MaxWaitSeconds + *out = new(int) + **out = **in + } + if in.MaxConcurrency != nil { + in, out := &in.MaxConcurrency, &out.MaxConcurrency + *out = new(int) + **out = **in + } + if in.MaxProcessingTime != nil { + in, out := &in.MaxProcessingTime, &out.MaxProcessingTime + *out = new(int32) + **out = **in + } + if in.MaxReloadingUnits != nil { + in, out := &in.MaxReloadingUnits, &out.MaxReloadingUnits + *out = new(int32) + **out = **in + } + if in.DeploymentUnit != nil { + in, out := &in.DeploymentUnit, &out.DeploymentUnit + *out = new(DeploymentUnit) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SinkGroupSpec. +func (in *SinkGroupSpec) DeepCopy() *SinkGroupSpec { + if in == nil { + return nil + } + out := new(SinkGroupSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TopicMaskStatus) DeepCopyInto(out *TopicMaskStatus) { *out = *in diff --git a/cmd/redshiftsink/main.go b/cmd/redshiftsink/main.go index 6532e2474..a326cbcec 100644 --- a/cmd/redshiftsink/main.go +++ b/cmd/redshiftsink/main.go @@ -53,7 +53,6 @@ func main() { rand.Seed(time.Now().UnixNano()) var enableLeaderElection bool - var releaseWaitSeconds int64 var batcherImage, loaderImage, secretRefName, secretRefNamespace, kafkaVersion, metricsAddr string var redshiftMaxOpenConns, redshiftMaxIdleConns int flag.StringVar(&batcherImage, "default-batcher-image", "746161288457.dkr.ecr.ap-south-1.amazonaws.com/redshiftbatcher:latest", "image to use for the redshiftbatcher") @@ -61,7 +60,6 @@ func main() { flag.StringVar(&secretRefName, "default-secret-ref-name", "redshiftsink-secret", "default secret name for all redshiftsink secret") flag.StringVar(&secretRefNamespace, "default-secret-ref-namespace", "ts-redshiftsink-latest", "default namespace where redshiftsink secret is there") flag.StringVar(&kafkaVersion, "default-kafka-version", "2.6.0", "default kafka version") - flag.Int64Var(&releaseWaitSeconds, "release-wait-seconds", 1800, "time to wait after a release to prevent repeated sinkgroup shuffle, takes effect after reloadingRatio is above limit.") flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.") flag.BoolVar(&enableLeaderElection, "enable-leader-election", false, "Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.") flag.IntVar(&redshiftMaxOpenConns, "default-redshift-max-open-conns", 10, "the maximum number of open connections allowed to redshift per redshiftsink resource") @@ -107,7 +105,6 @@ func main() { DefaultSecretRefName: secretRefName, DefaultSecretRefNamespace: secretRefNamespace, DefaultKafkaVersion: kafkaVersion, - ReleaseWaitSeconds: releaseWaitSeconds, DefaultRedshiftMaxOpenConns: redshiftMaxOpenConns, DefaultRedshiftMaxIdleConns: redshiftMaxIdleConns, }).SetupWithManager(mgr); err != nil { diff --git a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml index 126636d9b..349e5b8e1 100644 --- a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml +++ b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml @@ -43,10 +43,11 @@ spec: description: RedshiftBatcherSpec defines the desired state of RedshiftBatcher properties: mask: - description: 'Mask when turned on enables masking of the data Default: - false' + description: Mask when turned on enables masking of the data. Defaults + to false type: boolean maskFile: + description: MaskFile to use to apply mask configurations type: string maxConcurrency: type: integer @@ -57,13 +58,14 @@ spec: format: int32 type: integer maxSize: - description: Max configurations for the batcher to batch + description: 'Deprecated all of the below spec in favour of SinkGroup + #167' type: integer maxWaitSeconds: type: integer podTemplate: - description: Template describes the pods that will be created. if - this is not specifed, a default pod template is created + description: PodTemplate describes the pods that will be created. + if this is not specifed, a default pod template is created properties: image: description: Image for the underlying pod @@ -139,13 +141,559 @@ spec: type: object type: array type: object + sinkGroup: + description: SinkGroup contains the specification for main, reload + and reloadDupe sinkgroups. Operator uses 3 groups to perform Redshiftsink. + The topics which have never been released is part of Reload SinkGroup, + the topics which gets released moves to the Main SinkGroup. ReloadDupe + SinkGroup is used to give realtime upaates to the topics which + are reloading. Defaults are there for all sinkGroups if none is + specifed. + properties: + all: + description: All specifies a common specification for all SinkGroups + properties: + deploymentUnit: + description: 'DeploymentUnit(pod) is the unit of deployment + for the batcher or the loader. Using this user can specify + the amount of resources needed to run them as one unit. + Operator calculates the total units based on the total + number of topics and this unit spec. This majorly solves + the scaling issues described in #167.' + properties: + podTemplate: + description: PodTemplate describes the pod specification + for the unit. + properties: + image: + description: Image for the underlying pod + type: string + resources: + description: Resources is for configuring the compute + resources required + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount + of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum + amount of compute resources required. If Requests + is omitted for a container, it defaults to + Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: + https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + tolerations: + description: Toleartions the underlying pods should + have + items: + description: The pod this Toleration is attached + to tolerates any taint that matches the triple + using the matching operator + . + properties: + effect: + description: Effect indicates the taint effect + to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, + PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the + toleration applies to. Empty means match + all taint keys. If the key is empty, operator + must be Exists; this combination means to + match all values and all keys. + type: string + operator: + description: Operator represents a key's relationship + to the value. Valid operators are Exists + and Equal. Defaults to Equal. Exists is + equivalent to wildcard for value, so that + a pod can tolerate all taints of a particular + category. + type: string + tolerationSeconds: + description: TolerationSeconds represents + the period of time the toleration (which + must be of effect NoExecute, otherwise this + field is ignored) tolerates the taint. By + default, it is not set, which means tolerate + the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict + immediately) by the system. + format: int64 + type: integer + value: + description: Value is the taint value the + toleration matches to. If the operator is + Exists, the value should be empty, otherwise + just a regular string. + type: string + type: object + type: array + type: object + type: object + maxConcurrency: + description: MaxConcurrency is the maximum no, of batch + processors to run concurrently. This spec is useful when + the sink group pod operates in asynchronous mode. Loader + pods does not needed this as they are synchronous. + type: integer + maxProcessingTime: + description: MaxProcessingTime is the max time in ms required + to consume one message. Defaults for the batcher is 180000ms + and loader is 600000ms. + format: int32 + type: integer + maxReloadingUnits: + description: MaxReloadingUnits is the maximum number of + units(pods) that can be launched based on the DeploymentUnit + specification. Only valid for Reloading SinkGroup. This + value is at present supported to be configurable only + for batcher + format: int32 + type: integer + maxSizePerBatch: + anyOf: + - type: integer + - type: string + description: 'MaxSizePerBatch is the maximum size of the + batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki, + 100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi + is 100 mega bytes, 1Gi is 1 Giga bytes' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + maxWaitSeconds: + description: MaxWaitSeconds is the maximum time to wait + before making a batch, make a batch if MaxSizePerBatch + is not hit during MaxWaitSeconds. + type: integer + type: object + main: + description: Main specifies the MainSinkGroup specification, + overwrites All + properties: + deploymentUnit: + description: 'DeploymentUnit(pod) is the unit of deployment + for the batcher or the loader. Using this user can specify + the amount of resources needed to run them as one unit. + Operator calculates the total units based on the total + number of topics and this unit spec. This majorly solves + the scaling issues described in #167.' + properties: + podTemplate: + description: PodTemplate describes the pod specification + for the unit. + properties: + image: + description: Image for the underlying pod + type: string + resources: + description: Resources is for configuring the compute + resources required + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount + of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum + amount of compute resources required. If Requests + is omitted for a container, it defaults to + Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: + https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + tolerations: + description: Toleartions the underlying pods should + have + items: + description: The pod this Toleration is attached + to tolerates any taint that matches the triple + using the matching operator + . + properties: + effect: + description: Effect indicates the taint effect + to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, + PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the + toleration applies to. Empty means match + all taint keys. If the key is empty, operator + must be Exists; this combination means to + match all values and all keys. + type: string + operator: + description: Operator represents a key's relationship + to the value. Valid operators are Exists + and Equal. Defaults to Equal. Exists is + equivalent to wildcard for value, so that + a pod can tolerate all taints of a particular + category. + type: string + tolerationSeconds: + description: TolerationSeconds represents + the period of time the toleration (which + must be of effect NoExecute, otherwise this + field is ignored) tolerates the taint. By + default, it is not set, which means tolerate + the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict + immediately) by the system. + format: int64 + type: integer + value: + description: Value is the taint value the + toleration matches to. If the operator is + Exists, the value should be empty, otherwise + just a regular string. + type: string + type: object + type: array + type: object + type: object + maxConcurrency: + description: MaxConcurrency is the maximum no, of batch + processors to run concurrently. This spec is useful when + the sink group pod operates in asynchronous mode. Loader + pods does not needed this as they are synchronous. + type: integer + maxProcessingTime: + description: MaxProcessingTime is the max time in ms required + to consume one message. Defaults for the batcher is 180000ms + and loader is 600000ms. + format: int32 + type: integer + maxReloadingUnits: + description: MaxReloadingUnits is the maximum number of + units(pods) that can be launched based on the DeploymentUnit + specification. Only valid for Reloading SinkGroup. This + value is at present supported to be configurable only + for batcher + format: int32 + type: integer + maxSizePerBatch: + anyOf: + - type: integer + - type: string + description: 'MaxSizePerBatch is the maximum size of the + batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki, + 100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi + is 100 mega bytes, 1Gi is 1 Giga bytes' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + maxWaitSeconds: + description: MaxWaitSeconds is the maximum time to wait + before making a batch, make a batch if MaxSizePerBatch + is not hit during MaxWaitSeconds. + type: integer + type: object + reload: + description: Reload specifies the ReloadSinkGroup specification, + overwrites All + properties: + deploymentUnit: + description: 'DeploymentUnit(pod) is the unit of deployment + for the batcher or the loader. Using this user can specify + the amount of resources needed to run them as one unit. + Operator calculates the total units based on the total + number of topics and this unit spec. This majorly solves + the scaling issues described in #167.' + properties: + podTemplate: + description: PodTemplate describes the pod specification + for the unit. + properties: + image: + description: Image for the underlying pod + type: string + resources: + description: Resources is for configuring the compute + resources required + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount + of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum + amount of compute resources required. If Requests + is omitted for a container, it defaults to + Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: + https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + tolerations: + description: Toleartions the underlying pods should + have + items: + description: The pod this Toleration is attached + to tolerates any taint that matches the triple + using the matching operator + . + properties: + effect: + description: Effect indicates the taint effect + to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, + PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the + toleration applies to. Empty means match + all taint keys. If the key is empty, operator + must be Exists; this combination means to + match all values and all keys. + type: string + operator: + description: Operator represents a key's relationship + to the value. Valid operators are Exists + and Equal. Defaults to Equal. Exists is + equivalent to wildcard for value, so that + a pod can tolerate all taints of a particular + category. + type: string + tolerationSeconds: + description: TolerationSeconds represents + the period of time the toleration (which + must be of effect NoExecute, otherwise this + field is ignored) tolerates the taint. By + default, it is not set, which means tolerate + the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict + immediately) by the system. + format: int64 + type: integer + value: + description: Value is the taint value the + toleration matches to. If the operator is + Exists, the value should be empty, otherwise + just a regular string. + type: string + type: object + type: array + type: object + type: object + maxConcurrency: + description: MaxConcurrency is the maximum no, of batch + processors to run concurrently. This spec is useful when + the sink group pod operates in asynchronous mode. Loader + pods does not needed this as they are synchronous. + type: integer + maxProcessingTime: + description: MaxProcessingTime is the max time in ms required + to consume one message. Defaults for the batcher is 180000ms + and loader is 600000ms. + format: int32 + type: integer + maxReloadingUnits: + description: MaxReloadingUnits is the maximum number of + units(pods) that can be launched based on the DeploymentUnit + specification. Only valid for Reloading SinkGroup. This + value is at present supported to be configurable only + for batcher + format: int32 + type: integer + maxSizePerBatch: + anyOf: + - type: integer + - type: string + description: 'MaxSizePerBatch is the maximum size of the + batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki, + 100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi + is 100 mega bytes, 1Gi is 1 Giga bytes' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + maxWaitSeconds: + description: MaxWaitSeconds is the maximum time to wait + before making a batch, make a batch if MaxSizePerBatch + is not hit during MaxWaitSeconds. + type: integer + type: object + reloadDupe: + description: ReloadDupe specifies the ReloadDupeSinkGroup specification, + overwrites All + properties: + deploymentUnit: + description: 'DeploymentUnit(pod) is the unit of deployment + for the batcher or the loader. Using this user can specify + the amount of resources needed to run them as one unit. + Operator calculates the total units based on the total + number of topics and this unit spec. This majorly solves + the scaling issues described in #167.' + properties: + podTemplate: + description: PodTemplate describes the pod specification + for the unit. + properties: + image: + description: Image for the underlying pod + type: string + resources: + description: Resources is for configuring the compute + resources required + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount + of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum + amount of compute resources required. If Requests + is omitted for a container, it defaults to + Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: + https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + tolerations: + description: Toleartions the underlying pods should + have + items: + description: The pod this Toleration is attached + to tolerates any taint that matches the triple + using the matching operator + . + properties: + effect: + description: Effect indicates the taint effect + to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, + PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the + toleration applies to. Empty means match + all taint keys. If the key is empty, operator + must be Exists; this combination means to + match all values and all keys. + type: string + operator: + description: Operator represents a key's relationship + to the value. Valid operators are Exists + and Equal. Defaults to Equal. Exists is + equivalent to wildcard for value, so that + a pod can tolerate all taints of a particular + category. + type: string + tolerationSeconds: + description: TolerationSeconds represents + the period of time the toleration (which + must be of effect NoExecute, otherwise this + field is ignored) tolerates the taint. By + default, it is not set, which means tolerate + the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict + immediately) by the system. + format: int64 + type: integer + value: + description: Value is the taint value the + toleration matches to. If the operator is + Exists, the value should be empty, otherwise + just a regular string. + type: string + type: object + type: array + type: object + type: object + maxConcurrency: + description: MaxConcurrency is the maximum no, of batch + processors to run concurrently. This spec is useful when + the sink group pod operates in asynchronous mode. Loader + pods does not needed this as they are synchronous. + type: integer + maxProcessingTime: + description: MaxProcessingTime is the max time in ms required + to consume one message. Defaults for the batcher is 180000ms + and loader is 600000ms. + format: int32 + type: integer + maxReloadingUnits: + description: MaxReloadingUnits is the maximum number of + units(pods) that can be launched based on the DeploymentUnit + specification. Only valid for Reloading SinkGroup. This + value is at present supported to be configurable only + for batcher + format: int32 + type: integer + maxSizePerBatch: + anyOf: + - type: integer + - type: string + description: 'MaxSizePerBatch is the maximum size of the + batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki, + 100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi + is 100 mega bytes, 1Gi is 1 Giga bytes' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + maxWaitSeconds: + description: MaxWaitSeconds is the maximum time to wait + before making a batch, make a batch if MaxSizePerBatch + is not hit during MaxWaitSeconds. + type: integer + type: object + type: object suspend: - description: 'Supsend when turned on makes sure no batcher pods - are running for this CRD object. Default: false' + description: Supsend is used to suspend batcher pods. Defaults to + false. type: boolean - required: - - maxSize - - maxWaitSeconds type: object kafkaBrokers: description: Kafka configurations like consumer group and topics to @@ -167,13 +715,14 @@ spec: format: int32 type: integer maxSize: - description: Max configurations for the loader to batch the load + description: 'Deprecated all of the below spec in favour of SinkGroup + #167 Max configurations for the loader to batch the load' type: integer maxWaitSeconds: type: integer podTemplate: - description: Template describes the pods that will be created. if - this is not specifed, a default pod template is created + description: PodTemplate describes the pods that will be created. + if this is not specifed, a default pod template is created properties: image: description: Image for the underlying pod @@ -264,13 +813,560 @@ spec: redshiftSchema: description: RedshiftSchema to sink the data in type: string + sinkGroup: + description: SinkGroup contains the specification for main, reload + and reloadDupe sinkgroups. Operator uses 3 groups to perform Redshiftsink. + The topics which have never been released is part of Reload SinkGroup, + the topics which gets released moves to the Main SinkGroup. ReloadDupe + SinkGroup is used to give realtime upaates to the topics which + are reloading. Defaults are there for all sinkGroups if none is + specifed. + properties: + all: + description: All specifies a common specification for all SinkGroups + properties: + deploymentUnit: + description: 'DeploymentUnit(pod) is the unit of deployment + for the batcher or the loader. Using this user can specify + the amount of resources needed to run them as one unit. + Operator calculates the total units based on the total + number of topics and this unit spec. This majorly solves + the scaling issues described in #167.' + properties: + podTemplate: + description: PodTemplate describes the pod specification + for the unit. + properties: + image: + description: Image for the underlying pod + type: string + resources: + description: Resources is for configuring the compute + resources required + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount + of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum + amount of compute resources required. If Requests + is omitted for a container, it defaults to + Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: + https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + tolerations: + description: Toleartions the underlying pods should + have + items: + description: The pod this Toleration is attached + to tolerates any taint that matches the triple + using the matching operator + . + properties: + effect: + description: Effect indicates the taint effect + to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, + PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the + toleration applies to. Empty means match + all taint keys. If the key is empty, operator + must be Exists; this combination means to + match all values and all keys. + type: string + operator: + description: Operator represents a key's relationship + to the value. Valid operators are Exists + and Equal. Defaults to Equal. Exists is + equivalent to wildcard for value, so that + a pod can tolerate all taints of a particular + category. + type: string + tolerationSeconds: + description: TolerationSeconds represents + the period of time the toleration (which + must be of effect NoExecute, otherwise this + field is ignored) tolerates the taint. By + default, it is not set, which means tolerate + the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict + immediately) by the system. + format: int64 + type: integer + value: + description: Value is the taint value the + toleration matches to. If the operator is + Exists, the value should be empty, otherwise + just a regular string. + type: string + type: object + type: array + type: object + type: object + maxConcurrency: + description: MaxConcurrency is the maximum no, of batch + processors to run concurrently. This spec is useful when + the sink group pod operates in asynchronous mode. Loader + pods does not needed this as they are synchronous. + type: integer + maxProcessingTime: + description: MaxProcessingTime is the max time in ms required + to consume one message. Defaults for the batcher is 180000ms + and loader is 600000ms. + format: int32 + type: integer + maxReloadingUnits: + description: MaxReloadingUnits is the maximum number of + units(pods) that can be launched based on the DeploymentUnit + specification. Only valid for Reloading SinkGroup. This + value is at present supported to be configurable only + for batcher + format: int32 + type: integer + maxSizePerBatch: + anyOf: + - type: integer + - type: string + description: 'MaxSizePerBatch is the maximum size of the + batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki, + 100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi + is 100 mega bytes, 1Gi is 1 Giga bytes' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + maxWaitSeconds: + description: MaxWaitSeconds is the maximum time to wait + before making a batch, make a batch if MaxSizePerBatch + is not hit during MaxWaitSeconds. + type: integer + type: object + main: + description: Main specifies the MainSinkGroup specification, + overwrites All + properties: + deploymentUnit: + description: 'DeploymentUnit(pod) is the unit of deployment + for the batcher or the loader. Using this user can specify + the amount of resources needed to run them as one unit. + Operator calculates the total units based on the total + number of topics and this unit spec. This majorly solves + the scaling issues described in #167.' + properties: + podTemplate: + description: PodTemplate describes the pod specification + for the unit. + properties: + image: + description: Image for the underlying pod + type: string + resources: + description: Resources is for configuring the compute + resources required + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount + of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum + amount of compute resources required. If Requests + is omitted for a container, it defaults to + Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: + https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + tolerations: + description: Toleartions the underlying pods should + have + items: + description: The pod this Toleration is attached + to tolerates any taint that matches the triple + using the matching operator + . + properties: + effect: + description: Effect indicates the taint effect + to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, + PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the + toleration applies to. Empty means match + all taint keys. If the key is empty, operator + must be Exists; this combination means to + match all values and all keys. + type: string + operator: + description: Operator represents a key's relationship + to the value. Valid operators are Exists + and Equal. Defaults to Equal. Exists is + equivalent to wildcard for value, so that + a pod can tolerate all taints of a particular + category. + type: string + tolerationSeconds: + description: TolerationSeconds represents + the period of time the toleration (which + must be of effect NoExecute, otherwise this + field is ignored) tolerates the taint. By + default, it is not set, which means tolerate + the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict + immediately) by the system. + format: int64 + type: integer + value: + description: Value is the taint value the + toleration matches to. If the operator is + Exists, the value should be empty, otherwise + just a regular string. + type: string + type: object + type: array + type: object + type: object + maxConcurrency: + description: MaxConcurrency is the maximum no, of batch + processors to run concurrently. This spec is useful when + the sink group pod operates in asynchronous mode. Loader + pods does not needed this as they are synchronous. + type: integer + maxProcessingTime: + description: MaxProcessingTime is the max time in ms required + to consume one message. Defaults for the batcher is 180000ms + and loader is 600000ms. + format: int32 + type: integer + maxReloadingUnits: + description: MaxReloadingUnits is the maximum number of + units(pods) that can be launched based on the DeploymentUnit + specification. Only valid for Reloading SinkGroup. This + value is at present supported to be configurable only + for batcher + format: int32 + type: integer + maxSizePerBatch: + anyOf: + - type: integer + - type: string + description: 'MaxSizePerBatch is the maximum size of the + batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki, + 100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi + is 100 mega bytes, 1Gi is 1 Giga bytes' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + maxWaitSeconds: + description: MaxWaitSeconds is the maximum time to wait + before making a batch, make a batch if MaxSizePerBatch + is not hit during MaxWaitSeconds. + type: integer + type: object + reload: + description: Reload specifies the ReloadSinkGroup specification, + overwrites All + properties: + deploymentUnit: + description: 'DeploymentUnit(pod) is the unit of deployment + for the batcher or the loader. Using this user can specify + the amount of resources needed to run them as one unit. + Operator calculates the total units based on the total + number of topics and this unit spec. This majorly solves + the scaling issues described in #167.' + properties: + podTemplate: + description: PodTemplate describes the pod specification + for the unit. + properties: + image: + description: Image for the underlying pod + type: string + resources: + description: Resources is for configuring the compute + resources required + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount + of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum + amount of compute resources required. If Requests + is omitted for a container, it defaults to + Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: + https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + tolerations: + description: Toleartions the underlying pods should + have + items: + description: The pod this Toleration is attached + to tolerates any taint that matches the triple + using the matching operator + . + properties: + effect: + description: Effect indicates the taint effect + to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, + PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the + toleration applies to. Empty means match + all taint keys. If the key is empty, operator + must be Exists; this combination means to + match all values and all keys. + type: string + operator: + description: Operator represents a key's relationship + to the value. Valid operators are Exists + and Equal. Defaults to Equal. Exists is + equivalent to wildcard for value, so that + a pod can tolerate all taints of a particular + category. + type: string + tolerationSeconds: + description: TolerationSeconds represents + the period of time the toleration (which + must be of effect NoExecute, otherwise this + field is ignored) tolerates the taint. By + default, it is not set, which means tolerate + the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict + immediately) by the system. + format: int64 + type: integer + value: + description: Value is the taint value the + toleration matches to. If the operator is + Exists, the value should be empty, otherwise + just a regular string. + type: string + type: object + type: array + type: object + type: object + maxConcurrency: + description: MaxConcurrency is the maximum no, of batch + processors to run concurrently. This spec is useful when + the sink group pod operates in asynchronous mode. Loader + pods does not needed this as they are synchronous. + type: integer + maxProcessingTime: + description: MaxProcessingTime is the max time in ms required + to consume one message. Defaults for the batcher is 180000ms + and loader is 600000ms. + format: int32 + type: integer + maxReloadingUnits: + description: MaxReloadingUnits is the maximum number of + units(pods) that can be launched based on the DeploymentUnit + specification. Only valid for Reloading SinkGroup. This + value is at present supported to be configurable only + for batcher + format: int32 + type: integer + maxSizePerBatch: + anyOf: + - type: integer + - type: string + description: 'MaxSizePerBatch is the maximum size of the + batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki, + 100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi + is 100 mega bytes, 1Gi is 1 Giga bytes' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + maxWaitSeconds: + description: MaxWaitSeconds is the maximum time to wait + before making a batch, make a batch if MaxSizePerBatch + is not hit during MaxWaitSeconds. + type: integer + type: object + reloadDupe: + description: ReloadDupe specifies the ReloadDupeSinkGroup specification, + overwrites All + properties: + deploymentUnit: + description: 'DeploymentUnit(pod) is the unit of deployment + for the batcher or the loader. Using this user can specify + the amount of resources needed to run them as one unit. + Operator calculates the total units based on the total + number of topics and this unit spec. This majorly solves + the scaling issues described in #167.' + properties: + podTemplate: + description: PodTemplate describes the pod specification + for the unit. + properties: + image: + description: Image for the underlying pod + type: string + resources: + description: Resources is for configuring the compute + resources required + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount + of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum + amount of compute resources required. If Requests + is omitted for a container, it defaults to + Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: + https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + tolerations: + description: Toleartions the underlying pods should + have + items: + description: The pod this Toleration is attached + to tolerates any taint that matches the triple + using the matching operator + . + properties: + effect: + description: Effect indicates the taint effect + to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, + PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the + toleration applies to. Empty means match + all taint keys. If the key is empty, operator + must be Exists; this combination means to + match all values and all keys. + type: string + operator: + description: Operator represents a key's relationship + to the value. Valid operators are Exists + and Equal. Defaults to Equal. Exists is + equivalent to wildcard for value, so that + a pod can tolerate all taints of a particular + category. + type: string + tolerationSeconds: + description: TolerationSeconds represents + the period of time the toleration (which + must be of effect NoExecute, otherwise this + field is ignored) tolerates the taint. By + default, it is not set, which means tolerate + the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict + immediately) by the system. + format: int64 + type: integer + value: + description: Value is the taint value the + toleration matches to. If the operator is + Exists, the value should be empty, otherwise + just a regular string. + type: string + type: object + type: array + type: object + type: object + maxConcurrency: + description: MaxConcurrency is the maximum no, of batch + processors to run concurrently. This spec is useful when + the sink group pod operates in asynchronous mode. Loader + pods does not needed this as they are synchronous. + type: integer + maxProcessingTime: + description: MaxProcessingTime is the max time in ms required + to consume one message. Defaults for the batcher is 180000ms + and loader is 600000ms. + format: int32 + type: integer + maxReloadingUnits: + description: MaxReloadingUnits is the maximum number of + units(pods) that can be launched based on the DeploymentUnit + specification. Only valid for Reloading SinkGroup. This + value is at present supported to be configurable only + for batcher + format: int32 + type: integer + maxSizePerBatch: + anyOf: + - type: integer + - type: string + description: 'MaxSizePerBatch is the maximum size of the + batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki, + 100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi + is 100 mega bytes, 1Gi is 1 Giga bytes' + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + maxWaitSeconds: + description: MaxWaitSeconds is the maximum time to wait + before making a batch, make a batch if MaxSizePerBatch + is not hit during MaxWaitSeconds. + type: integer + type: object + type: object suspend: description: 'Supsend when turned on makes sure no batcher pods are running for this CRD object. Default: false' type: boolean required: - - maxSize - - maxWaitSeconds - redshiftGroup - redshiftSchema type: object @@ -292,9 +1388,6 @@ spec: and to be considered for release. format: int64 type: integer - required: - - maxBatcherLag - - maxLoaderLag type: object secretRefName: description: 'Secrets to be used Default: the secret name and namespace @@ -319,9 +1412,6 @@ spec: and to be considered for release. format: int64 type: integer - required: - - maxBatcherLag - - maxLoaderLag type: object description: TopicReleaseCondition is considered instead of ReleaseCondition if it is defined for a topic. This is used for topics which does not @@ -336,6 +1426,14 @@ spec: status: description: RedshiftSinkStatus defines the observed state of RedshiftSink properties: + batcherReloadingTopics: + description: BatcherReloadingTopics stores the list of topics which + are currently reloading for the batcher deployments in the reload + sink group. There is a limit to maximum topics that can be reloaded. + (MaxReloadingUnits) + items: + type: string + type: array maskStatus: description: MaskStatus stores the status of masking for topics if masking is enabled @@ -413,9 +1511,7 @@ spec: prefix type: string required: - - currentOffset - id - - loaderTopicPrefix type: object description: TopicGroup stores the group info for the topic type: object diff --git a/config/operator/redshiftsink_operator.yaml b/config/operator/redshiftsink_operator.yaml index f90db1d0d..c184ab672 100644 --- a/config/operator/redshiftsink_operator.yaml +++ b/config/operator/redshiftsink_operator.yaml @@ -44,7 +44,6 @@ spec: - /redshiftsink args: - -v=2 - - --release-wait-seconds=1800 - --default-batcher-image=746161288457.dkr.ecr.ap-south-1.amazonaws.com/redshiftbatcher:latest - --default-loader-image=746161288457.dkr.ecr.ap-south-1.amazonaws.com/redshiftloader:latest - --default-redshift-max-open-conns=10 diff --git a/config/samples/tipoca_v1_redshiftsink.yaml b/config/samples/tipoca_v1_redshiftsink.yaml index 75fc7c6a7..d8f5a0092 100644 --- a/config/samples/tipoca_v1_redshiftsink.yaml +++ b/config/samples/tipoca_v1_redshiftsink.yaml @@ -14,23 +14,31 @@ spec: maxLoaderLag: 10 batcher: suspend: false - maxSize: 10 - maxWaitSeconds: 30 mask: true maskFile: "https://github.com/practo/tipoca-stream/redshiftsink/pkg/transformer/masker/database.yaml" - podTemplate: - resources: - requests: - cpu: 100m - memory: 200Mi + sinkGroup: + all: + maxSizePerBatch: 10Mi + maxWaitSeconds: 30 + deploymentUnit: + maxTopics: 30 + podTemplate: + resources: + requests: + cpu: 100m + memory: 200Mi loader: suspend: false - maxSize: 10 - maxWaitSeconds: 30 redshiftSchema: "inventory" redshiftGroup: "sales" - podTemplate: - resources: - requests: - cpu: 100m - memory: 200Mi + sinkGroup: + all: + maxSizePerBatch: 1Gi + maxWaitSeconds: 30 + deploymentUnit: + maxTopics: 30 + podTemplate: + resources: + requests: + cpu: 100m + memory: 200Mi diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go index ed5f18e4d..60258fbef 100644 --- a/controllers/batcher_deployment.go +++ b/controllers/batcher_deployment.go @@ -11,10 +11,11 @@ import ( yaml "gopkg.in/yaml.v2" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + resource "k8s.io/apimachinery/pkg/api/resource" ) const ( - BatcherSuffix = "-batcher" + BatcherTag = "batcher" BatcherLabelInstance = "redshiftbatcher" ) @@ -23,6 +24,116 @@ type Batcher struct { namespace string deployment *appsv1.Deployment config *corev1.ConfigMap + topics []string +} + +// applyBatcherSinkGroupDefaults applies the defaults for the batcher +// deployments of the sink group. User does not need to specify big lengthy +// configurations everytime. Defaults are optimized for maximum performance +// and are recommended to use. +func applyBatcherSinkGroupDefaults( + rsk *tipocav1.RedshiftSink, + sgType string, + defaultImage string, +) *tipocav1.SinkGroupSpec { + var maxSizePerBatch *resource.Quantity + var maxWaitSeconds *int + var maxConcurrency *int + var maxProcessingTime *int32 + var image *string + var resources *corev1.ResourceRequirements + var tolerations *[]corev1.Toleration + var maxReloadingUnits *int32 + + // defaults by sinkgroup + switch sgType { + case MainSinkGroup: + maxSizePerBatch = toQuantityPtr(resource.MustParse("0.5Mi")) + maxWaitSeconds = toIntPtr(60) + maxConcurrency = toIntPtr(2) + maxProcessingTime = &redshiftbatcher.DefaultMaxProcessingTime + image = &defaultImage + case ReloadSinkGroup: + maxSizePerBatch = toQuantityPtr(resource.MustParse("0.5Mi")) + maxWaitSeconds = toIntPtr(60) + maxConcurrency = toIntPtr(10) + maxProcessingTime = &redshiftbatcher.DefaultMaxProcessingTime + image = &defaultImage + maxReloadingUnits = toInt32Ptr(10) + case ReloadDupeSinkGroup: + maxSizePerBatch = toQuantityPtr(resource.MustParse("0.5Mi")) + maxWaitSeconds = toIntPtr(60) + maxConcurrency = toIntPtr(10) + maxProcessingTime = &redshiftbatcher.DefaultMaxProcessingTime + image = &defaultImage + } + + var specifiedSpec *tipocav1.SinkGroupSpec + // apply the sinkGroup spec rules + if rsk.Spec.Batcher.SinkGroup.All != nil { + specifiedSpec = rsk.Spec.Batcher.SinkGroup.All + } + switch sgType { + case MainSinkGroup: + if rsk.Spec.Batcher.SinkGroup.Main != nil { + specifiedSpec = rsk.Spec.Batcher.SinkGroup.Main + } + case ReloadSinkGroup: + if rsk.Spec.Batcher.SinkGroup.Reload != nil { + specifiedSpec = rsk.Spec.Batcher.SinkGroup.Reload + } + case ReloadDupeSinkGroup: + if rsk.Spec.Batcher.SinkGroup.ReloadDupe != nil { + specifiedSpec = rsk.Spec.Batcher.SinkGroup.ReloadDupe + } + } + + // overwrite the defaults with the specified values + if specifiedSpec != nil { + if specifiedSpec.MaxSizePerBatch != nil { + maxSizePerBatch = specifiedSpec.MaxSizePerBatch + } + if specifiedSpec.MaxWaitSeconds != nil { + maxWaitSeconds = specifiedSpec.MaxWaitSeconds + } + if specifiedSpec.MaxConcurrency != nil { + maxConcurrency = specifiedSpec.MaxConcurrency + } + if specifiedSpec.MaxProcessingTime != nil { + maxProcessingTime = specifiedSpec.MaxProcessingTime + } + if specifiedSpec.MaxReloadingUnits != nil { + maxReloadingUnits = specifiedSpec.MaxReloadingUnits + } + if specifiedSpec.DeploymentUnit != nil { + if specifiedSpec.DeploymentUnit.PodTemplate != nil { + if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil { + image = specifiedSpec.DeploymentUnit.PodTemplate.Image + } + if specifiedSpec.DeploymentUnit.PodTemplate.Resources != nil { + resources = specifiedSpec.DeploymentUnit.PodTemplate.Resources + } + if specifiedSpec.DeploymentUnit.PodTemplate.Tolerations != nil { + tolerations = specifiedSpec.DeploymentUnit.PodTemplate.Tolerations + } + } + } + } + + return &tipocav1.SinkGroupSpec{ + MaxSizePerBatch: maxSizePerBatch, + MaxWaitSeconds: maxWaitSeconds, + MaxConcurrency: maxConcurrency, + MaxProcessingTime: maxProcessingTime, + MaxReloadingUnits: maxReloadingUnits, + DeploymentUnit: &tipocav1.DeploymentUnit{ + PodTemplate: &tipocav1.RedshiftPodTemplateSpec{ + Image: image, + Resources: resources, + Tolerations: tolerations, + }, + }, + } } func batcherSecret(secret map[string]string) (map[string]string, error) { @@ -49,8 +160,23 @@ func batcherSecret(secret map[string]string) (map[string]string, error) { return s, nil } -func batcherName(rskName, sinkGroup string) string { - return fmt.Sprintf("%s-%s%s", rskName, sinkGroup, BatcherSuffix) +func batcherName(rskName, sinkGroup, id string) string { + if id == "" { + return fmt.Sprintf( + "%s-%s-%s", + rskName, + sinkGroup, + BatcherTag, + ) + } else { + return fmt.Sprintf( + "%s-%s-%s-%s", + rskName, + sinkGroup, + BatcherTag, + id, + ) + } } func NewBatcher( @@ -59,6 +185,7 @@ func NewBatcher( maskFileVersion string, secret map[string]string, sinkGroup string, + sinkGroupSpec *tipocav1.SinkGroupSpec, consumerGroups map[string]consumerGroup, defaultImage string, defaultKafkaVersion string, @@ -72,28 +199,54 @@ func NewBatcher( return nil, err } - totalTopics := 0 - // defaults kafkaVersion := rsk.Spec.KafkaVersion if kafkaVersion == "" { kafkaVersion = defaultKafkaVersion } - maxConcurrency := redshiftbatcher.DefaultMaxConcurrency - if rsk.Spec.Batcher.MaxConcurrency != nil { - maxConcurrency = *rsk.Spec.Batcher.MaxConcurrency - } + var maxSize int // Deprecated + var maxBytesPerBatch *int64 + var maxWaitSeconds, maxConcurrency *int var maxProcessingTime int32 = redshiftbatcher.DefaultMaxProcessingTime - if rsk.Spec.Batcher.MaxProcessingTime != nil { - maxProcessingTime = *rsk.Spec.Batcher.MaxProcessingTime + var image string + var resources *corev1.ResourceRequirements + var tolerations *[]corev1.Toleration + if sinkGroupSpec != nil { + m := sinkGroupSpec.MaxSizePerBatch.Value() + maxBytesPerBatch = &m + maxWaitSeconds = sinkGroupSpec.MaxWaitSeconds + maxConcurrency = sinkGroupSpec.MaxConcurrency + maxProcessingTime = *sinkGroupSpec.MaxProcessingTime + image = *sinkGroupSpec.DeploymentUnit.PodTemplate.Image + resources = sinkGroupSpec.DeploymentUnit.PodTemplate.Resources + tolerations = sinkGroupSpec.DeploymentUnit.PodTemplate.Tolerations + } else { // Deprecated + maxSize = rsk.Spec.Batcher.MaxSize + maxWaitSeconds = &rsk.Spec.Batcher.MaxWaitSeconds + maxConcurrency = &redshiftbatcher.DefaultMaxConcurrency + if rsk.Spec.Batcher.MaxConcurrency != nil { + maxConcurrency = rsk.Spec.Batcher.MaxConcurrency + } + if rsk.Spec.Batcher.MaxProcessingTime != nil { + maxProcessingTime = *rsk.Spec.Batcher.MaxProcessingTime + } + if rsk.Spec.Batcher.PodTemplate.Image != nil { + image = *rsk.Spec.Batcher.PodTemplate.Image + } else { + image = defaultImage + } + resources = rsk.Spec.Batcher.PodTemplate.Resources + tolerations = rsk.Spec.Batcher.PodTemplate.Tolerations } - - // other defaults not configurable defaults for the batcher + // defaults which are not configurable for the user var sessionTimeoutSeconds int = 10 var hearbeatIntervalSeconds int = 2 + topics := []string{} + totalTopics := 0 var groupConfigs []kafka.ConsumerGroupConfig for groupID, group := range consumerGroups { + topics = append(topics, group.topics...) totalTopics += len(group.topics) groupConfigs = append(groupConfigs, kafka.ConsumerGroupConfig{ GroupID: consumerGroupID(rsk.Name, rsk.Namespace, groupID, "-batcher"), @@ -118,13 +271,14 @@ func NewBatcher( conf := config.Config{ Batcher: redshiftbatcher.BatcherConfig{ - Mask: rsk.Spec.Batcher.Mask, - MaskSalt: secret["maskSalt"], - MaskFile: rsk.Spec.Batcher.MaskFile, - MaskFileVersion: maskFileVersion, - MaxSize: rsk.Spec.Batcher.MaxSize, - MaxWaitSeconds: rsk.Spec.Batcher.MaxWaitSeconds, - MaxConcurrency: maxConcurrency, + Mask: rsk.Spec.Batcher.Mask, + MaskSalt: secret["maskSalt"], + MaskFile: rsk.Spec.Batcher.MaskFile, + MaskFileVersion: maskFileVersion, + MaxSize: maxSize, // Deprecated + MaxWaitSeconds: maxWaitSeconds, + MaxConcurrency: maxConcurrency, + MaxBytesPerBatch: maxBytesPerBatch, }, ConsumerGroups: groupConfigs, S3Sink: s3sink.Config{ @@ -148,13 +302,6 @@ func NewBatcher( totalTopics, ) - var image string - if rsk.Spec.Batcher.PodTemplate.Image != nil { - image = *rsk.Spec.Batcher.PodTemplate.Image - } else { - image = defaultImage - } - confString := string(confBytes) hash, err := getHashStructure(conf) if err != nil { @@ -180,17 +327,18 @@ func NewBatcher( namespace: rsk.Namespace, labels: labels, replicas: &replicas, - resources: rsk.Spec.Batcher.PodTemplate.Resources, - tolerations: rsk.Spec.Batcher.PodTemplate.Tolerations, + resources: resources, + tolerations: tolerations, image: image, args: []string{"-v=4", "--config=/config.yaml"}, } return &Batcher{ - name: name, + name: objectName, namespace: rsk.Namespace, deployment: deploymentFromSpec(deploySpec, configSpec), config: configFromSpec(configSpec), + topics: topics, }, nil } @@ -217,3 +365,7 @@ func (b Batcher) UpdateDeployment(current *appsv1.Deployment) bool { func (b Batcher) UpdateConfig(current *corev1.ConfigMap) bool { return !configSpecEqual(current, b.Config()) } + +func (b Batcher) Topics() []string { + return b.topics +} diff --git a/controllers/loader_deployment.go b/controllers/loader_deployment.go index b84c41212..e12fe4417 100644 --- a/controllers/loader_deployment.go +++ b/controllers/loader_deployment.go @@ -12,10 +12,11 @@ import ( yaml "gopkg.in/yaml.v2" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + resource "k8s.io/apimachinery/pkg/api/resource" ) const ( - LoaderSuffix = "-loader" + LoaderTag = "loader" LoaderLabelInstance = "redshiftloader" ) @@ -24,6 +25,109 @@ type Loader struct { namespace string deployment *appsv1.Deployment config *corev1.ConfigMap + topics []string +} + +// applyLoaderSinkGroupDefaults applies the defaults for the loader +// deployments of the sink group. User does not need to specify big lengthy +// configurations everytime. Defaults are optimized for maximum performance +// and are recommended to use. +func applyLoaderSinkGroupDefaults( + rsk *tipocav1.RedshiftSink, + sgType string, + defaultImage string, +) *tipocav1.SinkGroupSpec { + var maxSizePerBatch *resource.Quantity + var maxWaitSeconds *int + var maxProcessingTime *int32 + var image *string + var resources *corev1.ResourceRequirements + var tolerations *[]corev1.Toleration + var maxReloadingUnits *int32 + + // defaults by sinkgroup + switch sgType { + case MainSinkGroup: + maxSizePerBatch = toQuantityPtr(resource.MustParse("1Gi")) + maxWaitSeconds = toIntPtr(60) + maxProcessingTime = &redshiftloader.DefaultMaxProcessingTime + image = &defaultImage + case ReloadSinkGroup: + maxSizePerBatch = toQuantityPtr(resource.MustParse("1Gi")) + maxWaitSeconds = toIntPtr(60) + maxProcessingTime = &redshiftloader.DefaultMaxProcessingTime + image = &defaultImage + maxReloadingUnits = toInt32Ptr(1) // loader only supports one for this at present (there is no need as of now to run multiple) + case ReloadDupeSinkGroup: + maxSizePerBatch = toQuantityPtr(resource.MustParse("1Gi")) + maxWaitSeconds = toIntPtr(60) + maxProcessingTime = &redshiftloader.DefaultMaxProcessingTime + image = &defaultImage + } + + var specifiedSpec *tipocav1.SinkGroupSpec + // apply the sinkGroup spec rules + if rsk.Spec.Loader.SinkGroup.All != nil { + specifiedSpec = rsk.Spec.Loader.SinkGroup.All + } + switch sgType { + case MainSinkGroup: + if rsk.Spec.Loader.SinkGroup.Main != nil { + specifiedSpec = rsk.Spec.Loader.SinkGroup.Main + } + case ReloadSinkGroup: + if rsk.Spec.Loader.SinkGroup.Reload != nil { + specifiedSpec = rsk.Spec.Loader.SinkGroup.Reload + } + case ReloadDupeSinkGroup: + if rsk.Spec.Loader.SinkGroup.ReloadDupe != nil { + specifiedSpec = rsk.Spec.Loader.SinkGroup.ReloadDupe + } + } + + // overwrite the defaults with the specified values + if specifiedSpec != nil { + if specifiedSpec.MaxSizePerBatch != nil { + maxSizePerBatch = specifiedSpec.MaxSizePerBatch + } + if specifiedSpec.MaxWaitSeconds != nil { + maxWaitSeconds = specifiedSpec.MaxWaitSeconds + } + if specifiedSpec.MaxProcessingTime != nil { + maxProcessingTime = specifiedSpec.MaxProcessingTime + } + // Loader does not support MaxReloadingUnits yet + // if specifiedSpec.MaxReloadingUnits != nil { + // maxReloadingUnits = specifiedSpec.MaxReloadingUnits + // } + if specifiedSpec.DeploymentUnit != nil { + if specifiedSpec.DeploymentUnit.PodTemplate != nil { + if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil { + image = specifiedSpec.DeploymentUnit.PodTemplate.Image + } + if specifiedSpec.DeploymentUnit.PodTemplate.Resources != nil { + resources = specifiedSpec.DeploymentUnit.PodTemplate.Resources + } + if specifiedSpec.DeploymentUnit.PodTemplate.Tolerations != nil { + tolerations = specifiedSpec.DeploymentUnit.PodTemplate.Tolerations + } + } + } + } + + return &tipocav1.SinkGroupSpec{ + MaxSizePerBatch: maxSizePerBatch, + MaxWaitSeconds: maxWaitSeconds, + MaxProcessingTime: maxProcessingTime, + MaxReloadingUnits: maxReloadingUnits, + DeploymentUnit: &tipocav1.DeploymentUnit{ + PodTemplate: &tipocav1.RedshiftPodTemplateSpec{ + Image: image, + Resources: resources, + Tolerations: tolerations, + }, + }, + } } func loaderSecret(secret map[string]string) (map[string]string, error) { @@ -53,8 +157,23 @@ func loaderSecret(secret map[string]string) (map[string]string, error) { return s, nil } -func loaderName(rskName, sinkGroup string) string { - return fmt.Sprintf("%s-%s%s", rskName, sinkGroup, LoaderSuffix) +func loaderName(rskName, sinkGroup, id string) string { + if id == "" { + return fmt.Sprintf( + "%s-%s-%s", + rskName, + sinkGroup, + LoaderTag, + ) + } else { + return fmt.Sprintf( + "%s-%s-%s-%s", + rskName, + sinkGroup, + LoaderTag, + id, + ) + } } func redshiftConnections(rsk *tipocav1.RedshiftSink, defaultMaxOpenConns, defaultMaxIdleConns int) (int, int) { @@ -76,6 +195,7 @@ func NewLoader( tableSuffix string, secret map[string]string, sinkGroup string, + sinkGroupSpec *tipocav1.SinkGroupSpec, consumerGroups map[string]consumerGroup, defaultImage string, defaultKafkaVersion string, @@ -91,26 +211,51 @@ func NewLoader( return nil, err } - totalTopics := 0 - // defaults kafkaVersion := rsk.Spec.KafkaVersion if kafkaVersion == "" { kafkaVersion = defaultKafkaVersion } + var maxSize int // Deprecated + var maxBytesPerBatch *int64 + var maxWaitSeconds *int var maxProcessingTime int32 = redshiftloader.DefaultMaxProcessingTime - if rsk.Spec.Batcher.MaxProcessingTime != nil { - maxProcessingTime = *rsk.Spec.Batcher.MaxProcessingTime + var image string + var resources *corev1.ResourceRequirements + var tolerations *[]corev1.Toleration + if sinkGroupSpec != nil { + m := sinkGroupSpec.MaxSizePerBatch.Value() + maxBytesPerBatch = &m + maxWaitSeconds = sinkGroupSpec.MaxWaitSeconds + maxProcessingTime = *sinkGroupSpec.MaxProcessingTime + image = *sinkGroupSpec.DeploymentUnit.PodTemplate.Image + resources = sinkGroupSpec.DeploymentUnit.PodTemplate.Resources + tolerations = sinkGroupSpec.DeploymentUnit.PodTemplate.Tolerations + } else { // Deprecated + maxSize = rsk.Spec.Loader.MaxSize + maxWaitSeconds = &rsk.Spec.Loader.MaxWaitSeconds + if rsk.Spec.Loader.MaxProcessingTime != nil { + maxProcessingTime = *rsk.Spec.Loader.MaxProcessingTime + } + if rsk.Spec.Loader.PodTemplate.Image != nil { + image = *rsk.Spec.Loader.PodTemplate.Image + } else { + image = defaultImage + } + resources = rsk.Spec.Loader.PodTemplate.Resources + tolerations = rsk.Spec.Loader.PodTemplate.Tolerations } - // other defaults for the loader + // defaults which are not configurable for the user var sessionTimeoutSeconds int = 10 var hearbeatIntervalSeconds int = 2 + topics := []string{} + totalTopics := 0 var groupConfigs []kafka.ConsumerGroupConfig for groupID, group := range consumerGroups { + topics = append(topics, group.topics...) totalTopics += len(group.topics) - groupConfigs = append(groupConfigs, kafka.ConsumerGroupConfig{ GroupID: consumerGroupID(rsk.Name, rsk.Namespace, groupID, "-loader"), TopicRegexes: expandTopicsToRegex( @@ -140,8 +285,9 @@ func NewLoader( conf := config.Config{ Loader: redshiftloader.LoaderConfig{ - MaxSize: rsk.Spec.Loader.MaxSize, - MaxWaitSeconds: rsk.Spec.Loader.MaxWaitSeconds, + MaxSize: maxSize, // Deprecated + MaxWaitSeconds: maxWaitSeconds, + MaxBytesPerBatch: maxBytesPerBatch, }, ConsumerGroups: groupConfigs, S3Sink: s3sink.Config{ @@ -177,13 +323,6 @@ func NewLoader( totalTopics, ) - var image string - if rsk.Spec.Loader.PodTemplate.Image != nil { - image = *rsk.Spec.Loader.PodTemplate.Image - } else { - image = defaultImage - } - confString := string(confBytes) hash, err := getHashStructure(conf) if err != nil { @@ -209,17 +348,18 @@ func NewLoader( namespace: rsk.Namespace, labels: labels, replicas: &replicas, - resources: rsk.Spec.Loader.PodTemplate.Resources, - tolerations: rsk.Spec.Loader.PodTemplate.Tolerations, + resources: resources, + tolerations: tolerations, image: image, args: []string{"-v=2", "--config=/config.yaml"}, } return &Loader{ - name: name, + name: objectName, namespace: rsk.Namespace, deployment: deploymentFromSpec(deploySpec, configSpec), config: configFromSpec(configSpec), + topics: topics, }, nil } @@ -246,3 +386,7 @@ func (l Loader) UpdateDeployment(current *appsv1.Deployment) bool { func (l Loader) UpdateConfig(current *corev1.ConfigMap) bool { return !configSpecEqual(current, l.Config()) } + +func (l Loader) Topics() []string { + return l.topics +} diff --git a/controllers/realtime_calculator.go b/controllers/realtime_calculator.go new file mode 100644 index 000000000..6e74d1e90 --- /dev/null +++ b/controllers/realtime_calculator.go @@ -0,0 +1,342 @@ +package controllers + +import ( + "fmt" + klog "github.com/practo/klog/v2" + tipocav1 "github.com/practo/tipoca-stream/redshiftsink/api/v1" + kafka "github.com/practo/tipoca-stream/redshiftsink/pkg/kafka" + "math/rand" + "sync" + "time" +) + +type topicLast struct { + topic string + last int64 +} + +type realtimeCalculator struct { + rsk *tipocav1.RedshiftSink + watcher kafka.Watcher + topicGroups map[string]tipocav1.Group + cache *sync.Map + + batchersRealtime []string + loadersRealtime []string + + batchersLast []topicLast + loadersLast []topicLast +} + +func newRealtimeCalculator( + rsk *tipocav1.RedshiftSink, + watcher kafka.Watcher, + topicGroups map[string]tipocav1.Group, + cache *sync.Map, +) *realtimeCalculator { + + return &realtimeCalculator{ + rsk: rsk, + watcher: watcher, + topicGroups: topicGroups, + cache: cache, + batchersLast: []topicLast{}, + loadersLast: []topicLast{}, + } +} + +func (r *realtimeCalculator) maxLag(topic string) (int64, int64) { + var maxBatcherLag, maxLoaderLag int64 + if r.rsk.Spec.ReleaseCondition == nil { + maxBatcherLag = DefaultMaxBatcherLag + maxLoaderLag = DefautMaxLoaderLag + } else { + if r.rsk.Spec.ReleaseCondition.MaxBatcherLag != nil { + maxBatcherLag = *r.rsk.Spec.ReleaseCondition.MaxBatcherLag + } + if r.rsk.Spec.ReleaseCondition.MaxLoaderLag != nil { + maxLoaderLag = *r.rsk.Spec.ReleaseCondition.MaxLoaderLag + } + if r.rsk.Spec.TopicReleaseCondition != nil { + d, ok := r.rsk.Spec.TopicReleaseCondition[topic] + if ok { + if d.MaxBatcherLag != nil { + maxBatcherLag = *d.MaxBatcherLag + } + if d.MaxLoaderLag != nil { + maxLoaderLag = *d.MaxLoaderLag + } + } + } + } + + return maxBatcherLag, maxLoaderLag +} + +// fetchRealtimeCache tires to get the topicRealtimeInfo from cache +// if found in cache and cache is valid it returns true and the info +// else it returns no info and false +func (r *realtimeCalculator) fetchRealtimeCache( + topic string, +) ( + topicRealtimeInfo, bool, +) { + loadedInfo, ok := r.cache.Load(topic) + if !ok { + return topicRealtimeInfo{}, false + } + + // 120 to 240 seconds, randomness to prevent multiple parallel calls + validSec := rand.Intn(120) + 120 + klog.V(5).Infof( + "rsk/%s, %s, cacheValid=%vs", + r.rsk.Name, + topic, + validSec, + ) + + info := loadedInfo.(topicRealtimeInfo) + if cacheValid(time.Second*time.Duration(validSec), info.lastUpdate) { + klog.V(4).Infof( + "rsk/%s (realtime cache hit) topic: %s", + r.rsk.Name, + topic, + ) + return info, true + } + + return topicRealtimeInfo{}, false +} + +type offsetPosition struct { + last *int64 + current *int64 +} + +type topicRealtimeInfo struct { + lastUpdate *int64 + batcher *offsetPosition + loader *offsetPosition + batcherRealtime bool + loaderRealtime bool +} + +// fetchRealtimeInfo fetches the offset info for the topic +func (r *realtimeCalculator) fetchRealtimeInfo( + topic string, + loaderTopic *string, + group tipocav1.Group, +) ( + topicRealtimeInfo, error, +) { + klog.V(2).Infof("rsk/%s (fetching realtime) topic: %s", r.rsk.Name, topic) + + now := time.Now().UnixNano() + info := topicRealtimeInfo{ + batcher: &offsetPosition{}, + loader: &offsetPosition{}, + batcherRealtime: false, + loaderRealtime: false, + lastUpdate: &now, + } + + // batcher's lag analysis: a) get last + batcherLast, err := r.watcher.LastOffset(topic, 0) + if err != nil { + return info, fmt.Errorf("Error getting last offset for %s", topic) + } + info.batcher.last = &batcherLast + klog.V(4).Infof("rsk/%s %s, lastOffset=%v", r.rsk.Name, topic, batcherLast) + + // batcher's lag analysis: b) get current + batcherCurrent, err := r.watcher.CurrentOffset( + consumerGroupID(r.rsk.Name, r.rsk.Namespace, group.ID, "-batcher"), + topic, + 0, + ) + if err != nil { + return info, err + } + klog.V(4).Infof("rsk/%s %s, currentOffset=%v (queried)", r.rsk.Name, topic, batcherCurrent) + if batcherCurrent == -1 { + info.batcher.current = nil + klog.V(4).Infof("rsk/%s %s, batcher cg 404, not realtime", r.rsk.Name, topic) + return info, nil + } else { + info.batcher.current = &batcherCurrent + } + + if loaderTopic == nil { + return info, nil + } + + // loader's lag analysis: a) get last + loaderLast, err := r.watcher.LastOffset(*loaderTopic, 0) + if err != nil { + return info, fmt.Errorf("Error getting last offset for %s", *loaderTopic) + } + info.loader.last = &loaderLast + klog.V(4).Infof("rsk/%s %s, lastOffset=%v", r.rsk.Name, *loaderTopic, loaderLast) + + // loader's lag analysis: b) get current + loaderCurrent, err := r.watcher.CurrentOffset( + consumerGroupID(r.rsk.Name, r.rsk.Namespace, group.ID, "-loader"), + *loaderTopic, + 0, + ) + if err != nil { + return info, err + } + klog.V(4).Infof("rsk/%s %s, currentOffset=%v (queried)", r.rsk.Name, *loaderTopic, loaderCurrent) + if loaderCurrent == -1 { + // CurrentOffset can be -1 in two cases (this may be required in batcher also) + // 1. When the Consumer Group was never created in that case we return and consider the topic not realtime + // 2. When the Consumer Group had processed before but now is showing -1 currentOffset as it is inactive due to less throughput. + // On such a scenario, we consider it realtime. We find this case by saving the currentOffset for the loader topcics in RedshiftSinkStatus.TopicGroup + if group.LoaderCurrentOffset == nil { + klog.V(2).Infof("%s, loader cg 404, not realtime", *loaderTopic) + return info, nil + } + klog.V(4).Infof("rsk/%s %s, currentOffset=%v (old), cg 404, try realtime", r.rsk.Name, *loaderTopic, *group.LoaderCurrentOffset) + // give the topic the opportunity to release based on its last found currentOffset + info.loader.current = group.LoaderCurrentOffset + } else { + group.LoaderCurrentOffset = &loaderCurrent + // updates the new queried loader offset + klog.V(4).Infof("rsk/%s %s, cg found", r.rsk.Name, *loaderTopic) + updateTopicGroup(r.rsk, topic, group) + info.loader.current = &loaderCurrent + } + + return info, nil +} + +// calculate computes the realtime topics and updates its realtime info +func (r *realtimeCalculator) calculate(reloading []string, currentRealtime []string) []string { + if len(reloading) == 0 { + return currentRealtime + } + + realtimeTopics := []string{} + current := toMap(currentRealtime) + + allTopics, err := r.watcher.Topics() + if err != nil { + klog.Errorf( + "Ignoring realtime update. Error fetching all topics, err:%v", + err, + ) + return currentRealtime + } + allTopicsMap := toMap(allTopics) + + for _, topic := range reloading { + group, ok := r.topicGroups[topic] + if !ok { + klog.Errorf("topicGroup 404 in status for: %s", topic) + continue + } + + var loaderTopic *string + ltopic := r.rsk.Spec.KafkaLoaderTopicPrefix + group.ID + "-" + topic + _, ok = allTopicsMap[ltopic] + if !ok { + klog.V(2).Infof("%s topic 404, not realtime.", ltopic) + } else { + loaderTopic = <opic + } + + now := time.Now().UnixNano() + + info, hit := r.fetchRealtimeCache(topic) + if !hit { // fetch again, cache miss + info, err = r.fetchRealtimeInfo(topic, loaderTopic, group) + if err != nil { + klog.Errorf( + "rsk/%s Error fetching realtime info for topic: %s, err: %v", + r.rsk.Name, + topic, + err, + ) + // if there is an error in finding lag + // and the topic was already in realtime consider it realtime + // consumer groups disappear due to inactivity, hence this + _, ok := current[topic] + if ok { + r.cache.Store( + topic, + topicRealtimeInfo{ + batcherRealtime: true, + loaderRealtime: true, + lastUpdate: &now, + }, + ) + realtimeTopics = append(realtimeTopics, topic) + r.batchersRealtime = append(r.batchersRealtime, topic) + r.loadersRealtime = append(r.loadersRealtime, ltopic) + continue + } + } + } + + // compute realtime + maxBatcherLag, maxLoaderLag := r.maxLag(topic) + if info.batcher != nil && info.batcher.last != nil { + if info.batcher.current != nil { + lag := *info.batcher.last - *info.batcher.current + klog.V(2).Infof("rsk/%s: %s lag=%v", r.rsk.Name, topic, lag) + if lag <= maxBatcherLag { + klog.V(2).Infof("rsk/%s: %s batcher realtime", r.rsk.Name, topic) + info.batcherRealtime = true + r.batchersRealtime = append(r.batchersRealtime, topic) + } + } + r.batchersLast = append( + r.batchersLast, + topicLast{ + topic: topic, + last: *info.batcher.last, + }, + ) + } + if info.loader != nil && info.loader.last != nil { + if info.loader.current != nil { + lag := *info.loader.last - *info.loader.current + klog.V(2).Infof("rsk/%s: %s lag=%v", r.rsk.Name, ltopic, lag) + if lag <= maxLoaderLag { + klog.V(2).Infof("rsk/%s: %s loader realtime", r.rsk.Name, ltopic) + info.loaderRealtime = true + r.loadersRealtime = append(r.loadersRealtime, ltopic) + } + } + r.loadersLast = append( + r.loadersLast, + topicLast{ + topic: topic, + last: *info.loader.last, + }, + ) + } + + if info.batcherRealtime && info.loaderRealtime { + klog.V(2).Infof("rsk/%s: %s realtime", r.rsk.Name, topic) + realtimeTopics = append(realtimeTopics, topic) + } else { + if info.batcherRealtime == false && info.loaderRealtime == false { + klog.V(2).Infof("%v: waiting to reach realtime", topic) + klog.V(2).Infof("%v: waiting to reach realtime", ltopic) + } else if info.batcherRealtime == false { + klog.V(2).Infof("%v: waiting to reach realtime", topic) + } else if info.loaderRealtime == false { + klog.V(2).Infof("%v: waiting to reach realtime", ltopic) + } + } + + if !hit { + info.lastUpdate = &now + } + r.cache.Store(topic, info) + } + + return realtimeTopics +} diff --git a/controllers/redshiftsink_controller.go b/controllers/redshiftsink_controller.go index 162619838..59e4382e1 100644 --- a/controllers/redshiftsink_controller.go +++ b/controllers/redshiftsink_controller.go @@ -61,14 +61,12 @@ type RedshiftSinkReconciler struct { DefaultSecretRefName string DefaultSecretRefNamespace string DefaultKafkaVersion string - ReleaseWaitSeconds int64 DefaultRedshiftMaxIdleConns int DefaultRedshiftMaxOpenConns int } const ( - MaxConcurrentReloading = 30 - MaxTopicRelease = 50 + MaxTopicRelease = 50 ) // +kubebuilder:rbac:groups=tipoca.k8s.practo.dev,resources=redshiftsinks,verbs=get;list;watch;create;update;patch;delete @@ -341,8 +339,8 @@ func (r *RedshiftSinkReconciler) reconcile( setType(MainSinkGroup). setTopics(kafkaTopics). setMaskVersion(""). - buildBatcher(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig). - buildLoader(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns). + buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig). + buildLoaders(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns). build() result, event, err := maskLessSinkGroup.reconcile(ctx) return result, event, err @@ -366,7 +364,6 @@ func (r *RedshiftSinkReconciler) reconcile( var currentMaskVersion string if rsk.Status.MaskStatus != nil && rsk.Status.MaskStatus.CurrentMaskVersion != nil { - currentMaskVersion = *rsk.Status.MaskStatus.CurrentMaskVersion } else { klog.V(2).Infof("rsk/%s, Status empty, currentVersion=''", rsk.Name) @@ -415,6 +412,31 @@ func (r *RedshiftSinkReconciler) reconcile( klog.Fatalf("rsk/%s unexpected status, no diff but reloading", rsk.Name) } + // Realtime status is always calculated to keep the CurrentOffset + // info updated in the rsk status. This is required so that low throughput + // release do not get blocked due to missing consumer group currentOffset. + reloadTopicGroup := topicGroupBySinkGroup(rsk, ReloadSinkGroup, status.reloading, status.desiredVersion, rsk.Spec.KafkaLoaderTopicPrefix) + calc := newRealtimeCalculator(rsk, kafkaWatcher, reloadTopicGroup, r.KafkaRealtimeCache) + currentRealtime := calc.calculate(status.reloading, status.realtime) + if len(status.reloading) > 0 { + klog.V(2).Infof("rsk/%v batchersRealtime: %d / %d (current=%d)", rsk.Name, len(calc.batchersRealtime), len(status.reloading), len(rsk.Status.BatcherReloadingTopics)) + klog.V(2).Infof("rsk/%v loadersRealtime: %d / %d", rsk.Name, len(calc.loadersRealtime), len(status.reloading)) + } + + if !subSetSlice(currentRealtime, status.realtime) { + for _, moreRealtime := range currentRealtime { + status.realtime = appendIfMissing(status.realtime, moreRealtime) + } + klog.V(2).Infof( + "rsk/%s reconcile needed, realtime topics updated: %v", + rsk.Name, + status.realtime, + ) + status.updateBatcherReloadingTopics(rsk.Status.BatcherReloadingTopics, calc.batchersRealtime) + return resultRequeueMilliSeconds(1500), nil, nil + } + klog.V(2).Infof("rsk/%v reconciling all sinkGroups", rsk.Name) + // SinkGroup are of following types: // 1. main: sink group which has desiredMaskVersion // and has topics which have been released @@ -431,60 +453,17 @@ func (r *RedshiftSinkReconciler) reconcile( // tableSuffix: "" var reload, reloadDupe, main *sinkGroup - allowedReloadingTopics := status.reloading - if len(status.reloading) > MaxConcurrentReloading { - allowedReloadingTopics = status.reloading[:MaxConcurrentReloading] - } reload = sgBuilder. setRedshiftSink(rsk).setClient(r.Client).setScheme(r.Scheme). setType(ReloadSinkGroup). - setTopics(allowedReloadingTopics). + setTopics(status.reloading). setMaskVersion(status.desiredVersion). setTopicGroups(). - buildBatcher(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig). - buildLoader(secret, r.DefaultLoaderImage, ReloadTableSuffix, r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns). + setRealtimeCalculator(calc). + buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig). + buildLoaders(secret, r.DefaultLoaderImage, ReloadTableSuffix, r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns). build() - - reloadingRatio := status.reloadingRatio() - allowShuffle := true - if reloadingRatio > 0.2 { - rcloaded, ok := r.ReleaseCache.Load(rsk.Namespace + rsk.Name) - if ok { - cache := rcloaded.(releaseCache) - if cacheValid(time.Second*time.Duration(r.ReleaseWaitSeconds), cache.lastCacheRefresh) { - allowShuffle = false - } - // } else { - // klog.V(2).Infof("rsk/%v init release cache", rsk.Name) - // now := time.Now().UnixNano() - // r.ReleaseCache.Store( - // rsk.Namespace+rsk.Name, - // releaseCache{lastCacheRefresh: &now}, - // ) - // return resultRequeueMilliSeconds(100), nil, nil - } - } - klog.V(2).Infof("rsk/%v allowShuffle=%v, reloadingRatio=%v", rsk.Name, allowShuffle, reloadingRatio) - - // Realtime status is always calculated to keep the CurrentOffset - // info updated in the rsk status. This is required so that low throughput - // release do not get blocked due to missing consumer group currentOffset. - currentRealtime := reload.realtimeTopics(status.realtime, kafkaWatcher, r.KafkaRealtimeCache) - - // Allow realtime update only during release window, to minimize shuffle - if allowShuffle { - if !subSetSlice(currentRealtime, status.realtime) { - for _, moreRealtime := range currentRealtime { - status.realtime = appendIfMissing(status.realtime, moreRealtime) - } - klog.V(2).Infof( - "Reconcile needed, realtime topics updated: %v", status.realtime) - return resultRequeueMilliSeconds(1500), nil, nil - } - klog.V(2).Infof("rsk/%v reconciling all sinkGroups", rsk.Name) - } else { - klog.V(2).Infof("rsk/%s realtime (waiting): %d %v", rsk.Name, len(currentRealtime), currentRealtime) - } + status.updateBatcherReloadingTopics(reload.batcherDeploymentTopics(), calc.batchersRealtime) reloadDupe = sgBuilder. setRedshiftSink(rsk).setClient(r.Client).setScheme(r.Scheme). @@ -492,8 +471,9 @@ func (r *RedshiftSinkReconciler) reconcile( setTopics(status.reloadingDupe). setMaskVersion(status.currentVersion). setTopicGroups(). - buildBatcher(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig). - buildLoader(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns). + setRealtimeCalculator(nil). + buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig). + buildLoaders(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns). build() main = sgBuilder. @@ -502,8 +482,9 @@ func (r *RedshiftSinkReconciler) reconcile( setTopics(status.released). setMaskVersion(status.desiredVersion). setTopicGroups(). - buildBatcher(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig). - buildLoader(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns). + setRealtimeCalculator(nil). + buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig). + buildLoaders(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns). build() sinkGroups := []*sinkGroup{reloadDupe, reload, main} @@ -534,7 +515,7 @@ func (r *RedshiftSinkReconciler) reconcile( if len(status.realtime) >= MaxTopicRelease { releaseCandidates = status.realtime[:MaxTopicRelease] } - klog.V(2).Infof("release candidates: %v", releaseCandidates) + klog.V(2).Infof("rsk/%s releaseCandidates: %v", rsk.Name, releaseCandidates) var releaser *releaser if len(releaseCandidates) > 0 { diff --git a/controllers/sinkgroup_controller.go b/controllers/sinkgroup_controller.go index b63baa8f2..1c06afbfd 100644 --- a/controllers/sinkgroup_controller.go +++ b/controllers/sinkgroup_controller.go @@ -3,8 +3,6 @@ package controllers import ( "context" "fmt" - "math/rand" - "sync" "time" klog "github.com/practo/klog/v2" @@ -19,26 +17,31 @@ import ( ) const ( - MainSinkGroup = "main" - ReloadSinkGroup = "reload" - ReloadDupeSinkGroup = "reload-dupe" + AllSinkGroup = "all" + MainSinkGroup = "main" + ReloadSinkGroup = "reload" + ReloadDupeSinkGroup = "reload-dupe" + DefaultMaxBatcherLag = int64(100) DefautMaxLoaderLag = int64(10) - ReloadTableSuffix = "_ts_adx_reload" + + ReloadTableSuffix = "_ts_adx_reload" ) type sinkGroupInterface interface { - Reconcile(ctx context.Context) (ctrl.Result, ReconcilerEvent, error) - RealtimeTopics(currentRealtime []string, watcher kafka.Watcher, cache *sync.Map) []string + batcherDeploymentTopics() []string + loaderDeploymentTopics() []string + reconcile(ctx context.Context) (ctrl.Result, ReconcilerEvent, error) } type Deployment interface { Name() string Namespace() string - Deployment() *appsv1.Deployment Config() *corev1.ConfigMap + Deployment() *appsv1.Deployment UpdateConfig(current *corev1.ConfigMap) bool UpdateDeployment(current *appsv1.Deployment) bool + Topics() []string } type sinkGroup struct { @@ -48,8 +51,10 @@ type sinkGroup struct { sgType string topics []string topicGroups map[string]tipocav1.Group - batcher Deployment - loader Deployment + calc *realtimeCalculator + + batchers []Deployment + loaders []Deployment } type sinkGroupBuilder interface { @@ -60,8 +65,11 @@ type sinkGroupBuilder interface { setTopics(topics []string) sinkGroupBuilder setMaskVersion(version string) sinkGroupBuilder setTopicGroups() sinkGroupBuilder - buildBatcher(secret map[string]string, defaultImage, defaultKafkaVersion string, tlsConfig *kafka.TLSConfig) sinkGroupBuilder - buildLoader(secret map[string]string, defaultImage, tableSuffix string, defaultKafkaVersion string, tlsConfig *kafka.TLSConfig, defaultMaxOpenConns int, defaultMaxIdleConns int) sinkGroupBuilder + setRealtimeCalculator(calc *realtimeCalculator) sinkGroupBuilder + + buildBatchers(secret map[string]string, defaultImage, defaultKafkaVersion string, tlsConfig *kafka.TLSConfig) sinkGroupBuilder + buildLoaders(secret map[string]string, defaultImage, tableSuffix string, defaultKafkaVersion string, tlsConfig *kafka.TLSConfig, defaultMaxOpenConns int, defaultMaxIdleConns int) sinkGroupBuilder + build() *sinkGroup } @@ -77,8 +85,10 @@ type buildSinkGroup struct { topics []string topicGroups map[string]tipocav1.Group maskVersion string - batcher Deployment - loader Deployment + calc *realtimeCalculator + + batchers []Deployment + loaders []Deployment } func (sb *buildSinkGroup) setRedshiftSink(rsk *tipocav1.RedshiftSink) sinkGroupBuilder { @@ -122,35 +132,107 @@ func (sb *buildSinkGroup) setTopicGroups() sinkGroupBuilder { return sb } -func (sb *buildSinkGroup) buildBatcher( +func (sb *buildSinkGroup) setRealtimeCalculator(calc *realtimeCalculator) sinkGroupBuilder { + sb.calc = calc + + return sb +} + +func (sb *buildSinkGroup) buildBatchers( secret map[string]string, defaultImage string, defaultKafkaVersion string, tlsConfig *kafka.TLSConfig, ) sinkGroupBuilder { - consumerGroups, err := computeConsumerGroups(sb.topicGroups, sb.topics) - if err != nil { - klog.Fatalf("Error computing consumer group from status, err: %v", err) - } - batcher, err := NewBatcher( - batcherName(sb.rsk.Name, sb.sgType), - sb.rsk, - sb.maskVersion, - secret, - sb.sgType, - consumerGroups, - defaultImage, - defaultKafkaVersion, - tlsConfig, - ) - if err != nil { - klog.Fatalf("Error making batcher: %v", err) + batchers := []Deployment{} + if sb.rsk.Spec.Batcher.SinkGroup != nil { + var sinkGroupSpec, mainSinkGroupSpec *tipocav1.SinkGroupSpec + sinkGroupSpec = applyBatcherSinkGroupDefaults( + sb.rsk, + sb.sgType, + defaultImage, + ) + units := []deploymentUnit{ + deploymentUnit{ + id: "", + sinkGroupSpec: sinkGroupSpec, + topics: sb.topics, + }, + } + if len(sb.topics) > 0 && sb.calc != nil { // overwrite units if currently reloading and calculation is available + if len(sb.calc.batchersRealtime) > 0 { + mainSinkGroupSpec = applyBatcherSinkGroupDefaults( + sb.rsk, + MainSinkGroup, + defaultImage, + ) + } + allocator := newUnitAllocator( + sb.rsk.Name, + sb.topics, + sb.calc.batchersRealtime, + sb.calc.batchersLast, + *sinkGroupSpec.MaxReloadingUnits, + sb.rsk.Status.BatcherReloadingTopics, + mainSinkGroupSpec, + sinkGroupSpec, + ) + allocator.allocateReloadingUnits() + units = allocator.units + } + for _, unit := range units { + consumerGroups, err := computeConsumerGroups( + sb.topicGroups, unit.topics) + if err != nil { + klog.Fatalf( + "Error computing consumer group from status, err: %v", err) + } + batcher, err := NewBatcher( + batcherName(sb.rsk.Name, sb.sgType, unit.id), + sb.rsk, + sb.maskVersion, + secret, + sb.sgType, + unit.sinkGroupSpec, + consumerGroups, + defaultImage, + defaultKafkaVersion, + tlsConfig, + ) + if err != nil { + klog.Fatalf("Error making batcher: %v", err) + } + batchers = append(batchers, batcher) + } + } else { // Deprecated + consumerGroups, err := computeConsumerGroups(sb.topicGroups, sb.topics) + if err != nil { + klog.Fatalf( + "Error computing consumer group from status, err: %v", err) + } + batcher, err := NewBatcher( + batcherName(sb.rsk.Name, sb.sgType, ""), + sb.rsk, + sb.maskVersion, + secret, + sb.sgType, + nil, + consumerGroups, + defaultImage, + defaultKafkaVersion, + tlsConfig, + ) + if err != nil { + klog.Fatalf("Error making batcher: %v", err) + } + batchers = append(batchers, batcher) } - sb.batcher = batcher + + sb.batchers = batchers return sb } -func (sb *buildSinkGroup) buildLoader( +func (sb *buildSinkGroup) buildLoaders( secret map[string]string, defaultImage string, tableSuffix string, @@ -159,27 +241,72 @@ func (sb *buildSinkGroup) buildLoader( defaultMaxOpenConns int, defaultMaxIdleConns int, ) sinkGroupBuilder { - consumerGroups, err := computeConsumerGroups(sb.topicGroups, sb.topics) - if err != nil { - klog.Fatalf("Error computing consumer group from status, err: %v", err) - } - loader, err := NewLoader( - loaderName(sb.rsk.Name, sb.sgType), - sb.rsk, - tableSuffix, - secret, - sb.sgType, - consumerGroups, - defaultImage, - defaultKafkaVersion, - tlsConfig, - defaultMaxOpenConns, - defaultMaxIdleConns, - ) - if err != nil { - klog.Fatalf("Error making loader: %v", err) + loaders := []Deployment{} + if sb.rsk.Spec.Loader.SinkGroup != nil { + sinkGroupSpec := applyLoaderSinkGroupDefaults( + sb.rsk, + sb.sgType, + defaultImage, + ) + units := []deploymentUnit{ + deploymentUnit{ + id: "", + topics: sb.topics, + }, + } + for _, unit := range units { + consumerGroups, err := computeConsumerGroups( + sb.topicGroups, unit.topics) + if err != nil { + klog.Fatalf( + "Error computing consumer group from status, err: %v", err) + } + loader, err := NewLoader( + loaderName(sb.rsk.Name, sb.sgType, unit.id), + sb.rsk, + tableSuffix, + secret, + sb.sgType, + sinkGroupSpec, + consumerGroups, + defaultImage, + defaultKafkaVersion, + tlsConfig, + defaultMaxOpenConns, + defaultMaxIdleConns, + ) + if err != nil { + klog.Fatalf("Error making loader: %v", err) + } + loaders = append(loaders, loader) + } + } else { // Deprecated + consumerGroups, err := computeConsumerGroups(sb.topicGroups, sb.topics) + if err != nil { + klog.Fatalf( + "Error computing consumer group from status, err: %v", err) + } + loader, err := NewLoader( + loaderName(sb.rsk.Name, sb.sgType, ""), + sb.rsk, + tableSuffix, + secret, + sb.sgType, + nil, + consumerGroups, + defaultImage, + defaultKafkaVersion, + tlsConfig, + defaultMaxOpenConns, + defaultMaxIdleConns, + ) + if err != nil { + klog.Fatalf("Error making loader: %v", err) + } + loaders = append(loaders, loader) } - sb.loader = loader + + sb.loaders = loaders return sb } @@ -191,8 +318,9 @@ func (sb *buildSinkGroup) build() *sinkGroup { sgType: sb.sgType, topics: sb.topics, topicGroups: sb.topicGroups, - batcher: sb.batcher, - loader: sb.loader, + + batchers: sb.batchers, + loaders: sb.loaders, } } @@ -350,7 +478,7 @@ func (s *sinkGroup) reconcileConfigMap( return nil, err } - klog.V(2).Infof("Creating configMap: %v", config.Name) + klog.V(2).Infof("rsk/%s Creating configMap: %v", s.rsk.Name, config.Name) event, err := createConfigMap(ctx, s.client, config, s.rsk) if err != nil { return nil, err @@ -361,15 +489,12 @@ func (s *sinkGroup) reconcileConfigMap( func (s *sinkGroup) reconcileDeployment( ctx context.Context, - labelInstance string, d Deployment, ) ( ReconcilerEvent, error, ) { deployment := d.Deployment() - configMap := d.Config() - current, exists, err := getDeployment( ctx, s.client, @@ -389,7 +514,7 @@ func (s *sinkGroup) reconcileDeployment( return nil, err } - klog.V(2).Infof("Updating deployment: %v", deployment.Name) + klog.V(2).Infof("rsk/%s Updating deployment: %v", s.rsk.Name, deployment.Name) event, err := updateDeployment(ctx, s.client, deployment, s.rsk) if err != nil { return nil, err @@ -397,30 +522,54 @@ func (s *sinkGroup) reconcileDeployment( if event != nil { return event, nil } - return nil, nil } - klog.V(3).Infof("[Cleanup] Attempt deploy, current: %v", deployment.Name) - // find and cleanup dead deployments + err = ctrlutil.SetOwnerReference(s.rsk, deployment, s.scheme) + if err != nil { + return nil, err + } + + // create new deployment pointing to new config map + klog.V(2).Infof("rsk/%s Creating deployment: %v", s.rsk.Name, deployment.Name) + event, err := createDeployment(ctx, s.client, deployment, s.rsk) + if err != nil { + return nil, err + } + + return event, nil +} + +func (s *sinkGroup) cleanup( + ctx context.Context, + labelInstance string, + neededDeployments map[string]bool, + neededConfigMaps map[string]bool, +) ( + ReconcilerEvent, + error, +) { + klog.V(3).Infof("Current active deployments, needed: %+v", neededDeployments) + // query all deployment for the sinkgroup deploymentList, err := listDeployments( ctx, s.client, labelInstance, s.sgType, - d.Namespace(), + s.rsk.Namespace, s.rsk.Name, ) if err != nil { return nil, err } for _, deploy := range deploymentList.Items { - klog.V(3).Infof("[Cleanup] Attempting deploy: %v", deploy.Name) + klog.V(4).Infof("Cleanup suspect deployment: %v", deploy.Name) labelValue, ok := deploy.Labels[InstanceName] if !ok { continue } - if labelValue != deployment.Name { - klog.V(2).Infof("[Cleanup] Deleting deploy: %s", labelValue) + _, ok = neededDeployments[labelValue] + if !ok { + klog.V(2).Infof("rsk/%s Deleting deployment: %v", s.rsk.Name, labelValue) event, err := deleteDeployment(ctx, s.client, &deploy, s.rsk) if err != nil { return nil, err @@ -431,14 +580,14 @@ func (s *sinkGroup) reconcileDeployment( } } - klog.V(3).Infof("[Cleanup] Attempt cm, current: %v", configMap.Name) - // find and cleanup dead config maps + klog.V(3).Infof("Current active configMaps, needed: %+v", neededConfigMaps) + // query all configmaps for the sinkgroup configMapList, err := listConfigMaps( ctx, s.client, labelInstance, s.sgType, - d.Namespace(), + s.rsk.Namespace, s.rsk.Name, ) if err != nil { @@ -446,13 +595,14 @@ func (s *sinkGroup) reconcileDeployment( } for _, config := range configMapList.Items { - klog.V(3).Infof("[Cleanup] Attempting cm: %v", config.Name) + klog.V(3).Infof("Cleanup configmap suspect cm: %v", config.Name) labelValue, ok := config.Labels[InstanceName] if !ok { continue } - if labelValue != configMap.Name { - klog.V(2).Infof("[Cleanup] Deleting cm: %s", labelValue) + _, ok = neededConfigMaps[labelValue] + if !ok { + klog.V(2).Infof("rsk/%s Deleting configmap: %s", s.rsk.Name, labelValue) event, err := deleteConfigMap(ctx, s.client, &config, s.rsk) if err != nil { return nil, err @@ -463,18 +613,7 @@ func (s *sinkGroup) reconcileDeployment( } } - err = ctrlutil.SetOwnerReference(s.rsk, deployment, s.scheme) - if err != nil { - return nil, err - } - - // create new deployment pointing to new config map - klog.V(2).Infof("Creating deployment: %v", deployment.Name) - event, err := createDeployment(ctx, s.client, deployment, s.rsk) - if err != nil { - return nil, err - } - return event, nil + return nil, nil } func (s *sinkGroup) reconcileBatcher( @@ -485,7 +624,7 @@ func (s *sinkGroup) reconcileBatcher( error, ) { // reconcile batcher configMap - event, err := s.reconcileConfigMap(ctx, s.batcher) + event, err := s.reconcileConfigMap(ctx, d) if err != nil { return nil, fmt.Errorf("Error reconciling batcher configMap, %v", err) } @@ -494,7 +633,7 @@ func (s *sinkGroup) reconcileBatcher( } // reconcile batcher deployment - event, err = s.reconcileDeployment(ctx, BatcherLabelInstance, s.batcher) + event, err = s.reconcileDeployment(ctx, d) if err != nil { return nil, fmt.Errorf("Error reconciling batcher deployment, %v", err) } @@ -505,282 +644,111 @@ func (s *sinkGroup) reconcileBatcher( return nil, nil } -func (s *sinkGroup) reconcileLoader( +func (s *sinkGroup) reconcileBatchers( ctx context.Context, - d Deployment, + deployments []Deployment, ) ( ReconcilerEvent, error, ) { - event, err := s.reconcileConfigMap(ctx, s.loader) - if err != nil { - return nil, fmt.Errorf("Error reconciling loader configMap, %v", err) - } - if event != nil { - return event, nil + // cleanup the ones which should be dead before creating new + var neededDeployments, neededConfigMaps []string + for _, d := range deployments { + neededDeployments = append(neededDeployments, d.Name()) + neededConfigMaps = append(neededConfigMaps, d.Name()) } - - // reconcile loader deployment - event, err = s.reconcileDeployment(ctx, LoaderLabelInstance, s.loader) + event, err := s.cleanup( + ctx, + BatcherLabelInstance, + toMap(neededDeployments), + toMap(neededConfigMaps), + ) if err != nil { - return nil, fmt.Errorf("Error reconciling loader deployment, %v", err) + return nil, err } if event != nil { return event, nil } - return nil, nil -} - -func maxLag(rsk *tipocav1.RedshiftSink, topic string) (int64, int64) { - var maxBatcherLag, maxLoaderLag int64 - if rsk.Spec.ReleaseCondition == nil { - maxBatcherLag = DefaultMaxBatcherLag - maxLoaderLag = DefautMaxLoaderLag - } else { - if rsk.Spec.ReleaseCondition.MaxBatcherLag != nil { - maxBatcherLag = *rsk.Spec.ReleaseCondition.MaxBatcherLag - } - if rsk.Spec.ReleaseCondition.MaxLoaderLag != nil { - maxLoaderLag = *rsk.Spec.ReleaseCondition.MaxLoaderLag + // create or update + for _, d := range deployments { + event, err := s.reconcileBatcher(ctx, d) + if err != nil { + return nil, err } - if rsk.Spec.TopicReleaseCondition != nil { - d, ok := rsk.Spec.TopicReleaseCondition[topic] - if ok { - if d.MaxBatcherLag != nil { - maxBatcherLag = *d.MaxBatcherLag - } - if d.MaxLoaderLag != nil { - maxLoaderLag = *d.MaxLoaderLag - } - } + if event != nil { + return event, nil } } - return maxBatcherLag, maxLoaderLag + return nil, nil } -func (s *sinkGroup) lagBelowThreshold( - topic string, - batcherLag, - loaderLag, - maxBatcherLag, - maxLoaderLag int64, -) bool { - // if batcherLag <= maxBatcherLag && loaderLag == -1 { - // // TODO: this might lead to false positives, solve it - // // but without it some very low throughput topics wont go live. - // // may need to plugin prometheus time series data for analysis later - // // to solve it - // klog.Warningf("topic: %s assumed to have reached realtime as batcherLag<=threshold and loaderLag=-1 (consumer group not active)", topic) - // return true - // } - - klog.V(4).Infof("topic: %s lag=%v", topic, batcherLag) - klog.V(4).Infof("topic: %s lag=%v", topic, loaderLag) - - if batcherLag <= maxBatcherLag && - loaderLag <= maxLoaderLag { - - return true +func (s *sinkGroup) reconcileLoader( + ctx context.Context, + d Deployment, +) ( + ReconcilerEvent, + error, +) { + event, err := s.reconcileConfigMap(ctx, d) + if err != nil { + return nil, fmt.Errorf("Error reconciling loader configMap, %v", err) } - - return false -} - -func cacheValid(validity time.Duration, lastCachedTime *int64) bool { - if lastCachedTime == nil { - return false + if event != nil { + return event, nil } - if (*lastCachedTime + validity.Nanoseconds()) > time.Now().UnixNano() { - return true + // reconcile loader deployment + event, err = s.reconcileDeployment(ctx, d) + if err != nil { + return nil, fmt.Errorf("Error reconciling loader deployment, %v", err) + } + if event != nil { + return event, nil } - return false -} - -type kafkaRealtimeCache struct { - lastCacheRefresh *int64 - realtime bool + return nil, nil } -func (s *sinkGroup) topicRealtime( - watcher kafka.Watcher, - topic string, - cache *sync.Map, - allTopics map[string]bool, +func (s *sinkGroup) reconcileLoaders( + ctx context.Context, + deployments []Deployment, ) ( - bool, *int64, error, + ReconcilerEvent, + error, ) { - // use cache to prevent calls to kafka - var realtimeCache kafkaRealtimeCache - cacheLoaded, ok := cache.Load(topic) - if ok { - realtimeCache = cacheLoaded.(kafkaRealtimeCache) - // 600 to 840 seconds - validitySeconds := rand.Intn(240) + 300 - klog.V(5).Infof("rsk/%s validity seconds: %v topic: %s", s.rsk.Name, validitySeconds, topic) - if cacheValid(time.Second*time.Duration(validitySeconds), realtimeCache.lastCacheRefresh) { - klog.V(4).Infof("rsk/%s (realtime cache hit) topic: %s", s.rsk.Name, topic) - if realtimeCache.realtime { - return true, realtimeCache.lastCacheRefresh, nil - } - return false, realtimeCache.lastCacheRefresh, nil - } - } - - // new cache refresh time so that topics are only checked after an interval - // reduces the request to Kafka by big factor - now := time.Now().UnixNano() - maxBatcherLag, maxLoaderLag := maxLag(s.rsk, topic) - - klog.V(2).Infof("rsk/%s (fetching realtime stats) topic: %s", s.rsk.Name, topic) - group, ok := s.topicGroups[topic] - if !ok { - return false, &now, fmt.Errorf("consumerGroupID not found for %s", topic) + // cleanup the ones which should be dead before creating new + var neededDeployments, neededConfigMaps []string + for _, d := range deployments { + neededDeployments = append(neededDeployments, d.Name()) + neededConfigMaps = append(neededConfigMaps, d.Name()) } - - // batcher's lag analysis - batcherLastOffset, err := watcher.LastOffset(topic, 0) - if err != nil { - return false, &now, fmt.Errorf("Error getting current offset for %s", topic) - } - klog.V(4).Infof("%s, lastOffset=%v", topic, batcherLastOffset) - - // This won't work for topics which have lastOffset less than lag - // klog.V(2).Infof("%s, lastOffset=%v", topic, batcherLastOffset) - // if batcherLastOffset < maxBatcherLag { - // klog.V(2).Infof("%s, lastOffset < %v, not realtime", topic, maxBatcherLag) - // return false, &now, nil - // } - - batcherCGID := consumerGroupID(s.rsk.Name, s.rsk.Namespace, group.ID, "-batcher") - batcherCurrentOffset, err := watcher.CurrentOffset( - batcherCGID, - topic, - 0, + event, err := s.cleanup( + ctx, + LoaderLabelInstance, + toMap(neededDeployments), + toMap(neededConfigMaps), ) if err != nil { - return false, &now, err - } - klog.V(4).Infof("%s, currentOffset=%v", topic, batcherCurrentOffset) - if batcherCurrentOffset == -1 { - klog.V(2).Infof("%s, batcher cg 404, not realtime", topic) - return false, &now, nil - } - - // loader's lag analysis - loaderTopic := s.rsk.Spec.KafkaLoaderTopicPrefix + group.ID + "-" + topic - _, ok = allTopics[loaderTopic] - if !ok { - klog.V(2).Infof("%s topic 404, not realtime.", loaderTopic) - return false, &now, nil + return nil, err } - loaderLastOffset, err := watcher.LastOffset(loaderTopic, 0) - if err != nil { - return false, &now, fmt.Errorf("Error getting last offset for %s", loaderTopic) - } - klog.V(4).Infof("%s, lastOffset=%v", loaderTopic, loaderLastOffset) - - // This won't work for topics which have lastOffset less than lag - // if loaderLastOffset < maxLoaderLag { - // klog.V(2).Infof("%s, lastOffset < %v, not realtime", loaderTopic, maxLoaderLag) - // return false, &now, nil - // } - loaderCGID := consumerGroupID(s.rsk.Name, s.rsk.Namespace, group.ID, "-loader") - loaderCurrentOffset, err := watcher.CurrentOffset( - loaderCGID, - loaderTopic, - 0, - ) - if err != nil { - return false, &now, err - } - klog.V(4).Infof("%s, currentOffset=%v (queried)", loaderTopic, loaderCurrentOffset) - if loaderCurrentOffset == -1 { - // CurrentOffset can be -1 in two cases - // 1. When the Consumer Group was never created in that case we return and consider the topic not realtime - // 2. When the Consumer Group had processed before but now is showing -1 currentOffset as it is inactive due to less throughput, - // On such a scenario, we consider it realtime. - // we find this case by saving the currentOffset for the loader topcics in RedshiftSink Topic Group Status - if group.LoaderCurrentOffset == nil { - klog.V(2).Infof("%s, loader cg 404, not realtime", loaderTopic) - return false, &now, nil - } - klog.V(2).Infof("%s, currentOffset=%v (old), cg 404, try realtime", loaderTopic, *group.LoaderCurrentOffset) - // give the topic the opportunity to release based on its last found currentOffset - loaderCurrentOffset = *group.LoaderCurrentOffset - } else { - group.LoaderCurrentOffset = &loaderCurrentOffset - // updates the new queried lodaer offset - klog.V(4).Infof("%s, cg found", loaderTopic) - updateTopicGroup(s.rsk, topic, group) - } - klog.V(2).Infof("%s, currentOffset=%v, checking realtime", loaderTopic, *group.LoaderCurrentOffset) - - // check realtime - if s.lagBelowThreshold( - topic, - batcherLastOffset-batcherCurrentOffset, // batcher lag - loaderLastOffset-loaderCurrentOffset, // loader lag - maxBatcherLag, - maxLoaderLag, - ) { - klog.V(2).Infof("%s, realtime", topic) - return true, &now, nil - } else { - klog.V(2).Infof("%v: waiting to reach realtime", topic) - return false, &now, nil - } -} - -// realtimeTopics gives back the list of topics whose consumer lags are -// less than or equal to the specified thresholds to be considered realtime -func (s *sinkGroup) realtimeTopics( - currentRealtime []string, - watcher kafka.Watcher, - cache *sync.Map, -) []string { - current := toMap(currentRealtime) - realtimeTopics := []string{} - - allTopics, err := watcher.Topics() - if err != nil { - klog.Errorf( - "Ignoring realtime update. Error fetching all topics, err:%v", - err, - ) - return currentRealtime + if event != nil { + return event, nil } - for _, topic := range s.topics { - realtime, lastRefresh, err := s.topicRealtime( - watcher, topic, cache, toMap(allTopics), - ) + // create or update + for _, d := range deployments { + event, err := s.reconcileLoader(ctx, d) if err != nil { - klog.Errorf( - "rsk/%s Error getting realtime for topic: %s, err: %v", - s.rsk.Name, - topic, - err, - ) - _, ok := current[topic] - // if there is an error in finding lag - // and the topic was already in realtime consider it realtime - if ok { - cache.Store(topic, kafkaRealtimeCache{lastCacheRefresh: lastRefresh, realtime: true}) - realtimeTopics = append(realtimeTopics, topic) - continue - } + return nil, err } - if realtime { - realtimeTopics = append(realtimeTopics, topic) + if event != nil { + return event, nil } - cache.Store(topic, kafkaRealtimeCache{lastCacheRefresh: lastRefresh, realtime: realtime}) } - return realtimeTopics + return nil, nil } func (s *sinkGroup) reconcile( @@ -790,7 +758,7 @@ func (s *sinkGroup) reconcile( ) { result := ctrl.Result{RequeueAfter: time.Second * 30} - event, err := s.reconcileBatcher(ctx, s.batcher) + event, err := s.reconcileBatchers(ctx, s.batchers) if err != nil { return result, nil, err } @@ -798,7 +766,7 @@ func (s *sinkGroup) reconcile( return result, event, nil } - event, err = s.reconcileLoader(ctx, s.loader) + event, err = s.reconcileLoaders(ctx, s.loaders) if err != nil { return result, nil, err } @@ -808,3 +776,21 @@ func (s *sinkGroup) reconcile( return result, nil, nil } + +func (s *sinkGroup) batcherDeploymentTopics() []string { + t := []string{} + for _, d := range s.batchers { + t = append(t, d.Topics()...) + } + + return t +} + +func (s *sinkGroup) loaderDeploymentTopics() []string { + t := []string{} + for _, d := range s.loaders { + t = append(t, d.Topics()...) + } + + return t +} diff --git a/controllers/status.go b/controllers/status.go index dd54c353f..6b6b1a2d9 100644 --- a/controllers/status.go +++ b/controllers/status.go @@ -68,11 +68,13 @@ func (sb *buildStatus) setDesiredVersion(version string) statusBuilder { func (sb *buildStatus) setAllTopics(topics []string) statusBuilder { sb.allTopics = topics + sortStringSlice(sb.allTopics) return sb } func (sb *buildStatus) setDiffTopics(topics []string) statusBuilder { sb.diffTopics = topics + sortStringSlice(sb.diffTopics) return sb } @@ -108,6 +110,7 @@ func (sb *buildStatus) computeReleased() statusBuilder { klog.V(2).Infof("rsk/%s, Status empty, released=0 ", sb.rsk.Name) } sb.released = released + sortStringSlice(sb.released) return sb } @@ -116,6 +119,8 @@ func (sb *buildStatus) setRealtime() statusBuilder { sb.realtime = currentTopicsByMaskStatus( sb.rsk, tipocav1.MaskRealtime, sb.desiredVersion, ) + sortStringSlice(sb.realtime) + return sb } @@ -124,6 +129,7 @@ func (sb *buildStatus) computeReloading() statusBuilder { sb.rsk.Status.MaskStatus.CurrentMaskStatus == nil { klog.V(2).Infof("rsk/%s, Status empty, reloading=diffTopics ", sb.rsk.Name) sb.reloading = sb.diffTopics + sortStringSlice(sb.reloading) return sb } @@ -160,6 +166,7 @@ func (sb *buildStatus) computeReloading() statusBuilder { } sb.reloading = reConstructingReloading + sortStringSlice(sb.reloading) return sb } @@ -180,6 +187,7 @@ func (sb *buildStatus) computeReloadingDupe() statusBuilder { } sb.reloadingDupe = reloadDupeTopics + sortStringSlice(sb.reloadingDupe) return sb } @@ -301,9 +309,6 @@ func (s *status) info() { klog.V(2).Infof("%s reloading: %d %v", rskName, len(s.reloading), s.reloading) klog.V(2).Infof("%s rDupe: %d %v", rskName, len(s.reloadingDupe), s.reloadingDupe) klog.V(2).Infof("%s realtime: %d %v", rskName, len(s.realtime), s.realtime) - if len(s.reloading) > MaxConcurrentReloading { - klog.V(2).Infof("%s reloadingC: %d %v", rskName, MaxConcurrentReloading, s.reloading[:MaxConcurrentReloading]) - } } // manyReloading checks the percentage of reloading topics of the total topics @@ -486,6 +491,32 @@ func (s *status) updateTopicGroup(topic string) { updateTopicGroup(s.rsk, topic, group) } +func (s *status) updateBatcherReloadingTopics(topics []string, batcherRealtime []string) { + reloadingTopics := []string{} + + // remove topics which have become realtime + realtime := toMap(s.realtime) + realtimeBatcher := toMap(batcherRealtime) + for _, t := range topics { + // remove topics which have become realtime (both batcher and loader) + _, ok := realtime[t] + if ok { + continue + + } + // remove topics which have become batcher realtime + _, ok = realtimeBatcher[t] + if ok { + continue + } + + reloadingTopics = append(reloadingTopics, t) + } + + klog.V(2).Infof("rsk/%s currentReloading: %d %v", s.rsk.Name, len(reloadingTopics), reloadingTopics) + s.rsk.Status.BatcherReloadingTopics = reloadingTopics +} + func updateTopicGroup(rsk *tipocav1.RedshiftSink, topic string, group tipocav1.Group) { if rsk.Status.TopicGroup == nil { rsk.Status.TopicGroup = make(map[string]tipocav1.Group) diff --git a/controllers/suite_test.go b/controllers/suite_test.go index 6b7ba543d..bfb8a4574 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -27,8 +27,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest" "sigs.k8s.io/controller-runtime/pkg/envtest/printer" - logf "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/log/zap" tipocav1 "github.com/practo/tipoca-stream/redshiftsink/api/v1" // +kubebuilder:scaffold:imports @@ -50,7 +48,7 @@ func TestAPIs(t *testing.T) { } var _ = BeforeSuite(func(done Done) { - logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) + // logf.SetLogger(zap.LoggerTo(GinkgoWriter, true)) By("bootstrapping test environment") testEnv = &envtest.Environment{ diff --git a/controllers/unit_allocator.go b/controllers/unit_allocator.go new file mode 100644 index 000000000..293178aac --- /dev/null +++ b/controllers/unit_allocator.go @@ -0,0 +1,170 @@ +package controllers + +import ( + "github.com/practo/klog/v2" + tipocav1 "github.com/practo/tipoca-stream/redshiftsink/api/v1" + transformer "github.com/practo/tipoca-stream/redshiftsink/pkg/transformer" + "sort" + "strings" +) + +type unitAllocator struct { + rskName string + topics []string + realtime []string + + topicsLast []topicLast + maxReloadingUnits int + currentReloadingTopics []string + mainSinkGroupSpec *tipocav1.SinkGroupSpec + reloadSinkGroupSpec *tipocav1.SinkGroupSpec + + units []deploymentUnit +} + +func newUnitAllocator( + rskName string, + topics, + realtime []string, + topicsLast []topicLast, + maxReloadingUnits int32, + currentReloadingTopics []string, + main *tipocav1.SinkGroupSpec, + reload *tipocav1.SinkGroupSpec, +) *unitAllocator { + return &unitAllocator{ + rskName: rskName, + topics: topics, + realtime: realtime, + topicsLast: topicsLast, + maxReloadingUnits: int(maxReloadingUnits), + currentReloadingTopics: currentReloadingTopics, + units: []deploymentUnit{}, + mainSinkGroupSpec: main, + reloadSinkGroupSpec: reload, + } +} + +type deploymentUnit struct { + id string + sinkGroupSpec *tipocav1.SinkGroupSpec + topics []string +} + +func sortTopicsByLastOffset(topicsLast []topicLast) []string { + sort.SliceStable(topicsLast, func(i, j int) bool { + return topicsLast[i].last < topicsLast[j].last + }) + + topics := []string{} + for _, tl := range topicsLast { + topics = append(topics, tl.topic) + } + + return topics +} + +func k8sCompatibleName(name string) string { + // satisfy k8s name regex + // '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*' + return strings.ToLower(strings.ReplaceAll(name, "_", "-")) +} + +func (u *unitAllocator) unitID(topic string) string { + _, _, table := transformer.ParseTopic(topic) + + table = k8sCompatibleName(table) + + if len(table) > 20 { + return table[:20] + } + + return table +} + +// for the reloading sinkGroup +func (u *unitAllocator) allocateReloadingUnits() { + realtime := toMap(u.realtime) + klog.V(3).Infof( + "rsk/%s realtime: %v, max: %v", + u.rskName, + u.realtime, + u.maxReloadingUnits, + ) + + klog.V(3).Infof( + "rsk/%s currentUnits: %v %v", + u.rskName, + len(u.currentReloadingTopics), + u.currentReloadingTopics, + ) + + reloadingTopics := make(map[string]bool) + reloadingUnits := []deploymentUnit{} + + // don't shuffle the already reloading topics unless realtime + for _, topic := range u.currentReloadingTopics { + _, ok := realtime[topic] + if ok { + continue + } + reloadingUnits = append(reloadingUnits, deploymentUnit{ + id: u.unitID(topic), + sinkGroupSpec: u.reloadSinkGroupSpec, + topics: []string{topic}, + }) + reloadingTopics[topic] = true + if len(reloadingUnits) >= u.maxReloadingUnits { + break + } + } + klog.V(3).Infof( + "rsk/%s reloadingUnits(based on current): %v %v", + u.rskName, + len(reloadingUnits), + reloadingUnits, + ) + + realtimeUnit := deploymentUnit{ + id: "realtime", + sinkGroupSpec: u.mainSinkGroupSpec, + topics: u.realtime, + } + + if len(reloadingUnits) >= u.maxReloadingUnits { + u.units = reloadingUnits + if len(realtimeUnit.topics) > 0 { + u.units = append(u.units, realtimeUnit) + } + klog.V(2).Infof("rsk/%s units: %v", u.rskName, len(u.units)) + return + } + + topicsByLastAsc := sortTopicsByLastOffset(u.topicsLast) + klog.V(3).Infof("rsk/%s sortByLast: %v", u.rskName, topicsByLastAsc) + for _, topic := range topicsByLastAsc { + _, ok := realtime[topic] + if ok { + continue + } + _, ok = reloadingTopics[topic] + if ok { + continue + } + if len(reloadingUnits) >= u.maxReloadingUnits { + break + } + reloadingUnits = append(reloadingUnits, deploymentUnit{ + id: u.unitID(topic), + sinkGroupSpec: u.reloadSinkGroupSpec, + topics: []string{topic}, + }) + reloadingTopics[topic] = true + } + + u.units = reloadingUnits + if len(realtimeUnit.topics) > 0 { + u.units = append(u.units, realtimeUnit) + } + klog.V(2).Infof("rsk/%s units: %v", u.rskName, len(u.units)) +} diff --git a/controllers/unit_allocator_test.go b/controllers/unit_allocator_test.go new file mode 100644 index 000000000..81eecc5ff --- /dev/null +++ b/controllers/unit_allocator_test.go @@ -0,0 +1,408 @@ +package controllers + +import ( + "reflect" + "testing" +) + +func TestAllocateReloadingUnits(t *testing.T) { + // t.Parallel() + + tests := []struct { + name string + topics []string + realtime []string + topicsLast []topicLast + maxReloadingUnits int32 + currentReloadingTopics []string + units []deploymentUnit + }{ + { + name: "FirstCase", + topics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"}, + realtime: []string{}, + topicsLast: []topicLast{ + topicLast{ + topic: "db.inventory.t1", + last: 1500, + }, + topicLast{ + topic: "db.inventory.t2", + last: 1500, + }, + topicLast{ + topic: "db.inventory.t3", + last: 1400, + }, + topicLast{ + topic: "db.inventory.t4", + last: 1400, + }, + }, + maxReloadingUnits: 1, + currentReloadingTopics: []string{}, + units: []deploymentUnit{ + deploymentUnit{ + id: "t3", + topics: []string{"db.inventory.t3"}, + }, + }, + }, + { + name: "SecondCaseMax3", + topics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"}, + realtime: []string{}, + topicsLast: []topicLast{ + topicLast{ + topic: "db.inventory.t1", + last: 1500, + }, + topicLast{ + topic: "db.inventory.t2", + last: 1500, + }, + topicLast{ + topic: "db.inventory.t3", + last: 1400, + }, + topicLast{ + topic: "db.inventory.t4", + last: 1400, + }, + }, + maxReloadingUnits: 3, + currentReloadingTopics: []string{}, + units: []deploymentUnit{ + deploymentUnit{ + id: "t3", + topics: []string{"db.inventory.t3"}, + }, + deploymentUnit{ + id: "t4", + topics: []string{"db.inventory.t4"}, + }, + deploymentUnit{ + id: "t1", + topics: []string{"db.inventory.t1"}, + }, + }, + }, + { + name: "ThirdCaseCurrentThere", + topics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"}, + realtime: []string{}, + topicsLast: []topicLast{ + topicLast{ + topic: "db.inventory.t1", + last: 1500, + }, + topicLast{ + topic: "db.inventory.t2", + last: 1500, + }, + topicLast{ + topic: "db.inventory.t3", + last: 1400, + }, + topicLast{ + topic: "db.inventory.t4", + last: 1400, + }, + }, + maxReloadingUnits: 3, + currentReloadingTopics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3"}, + units: []deploymentUnit{ + deploymentUnit{ + id: "t1", + topics: []string{"db.inventory.t1"}, + }, + deploymentUnit{ + id: "t2", + topics: []string{"db.inventory.t2"}, + }, + deploymentUnit{ + id: "t3", + topics: []string{"db.inventory.t3"}, + }, + }, + }, + { + name: "FourthCaseLagChangedShouldNotChangeAnything", + topics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"}, + realtime: []string{}, + topicsLast: []topicLast{ + topicLast{ + topic: "db.inventory.t1", + last: 1500, + }, + topicLast{ + topic: "db.inventory.t2", + last: 1500, + }, + topicLast{ + topic: "db.inventory.t3", + last: 2, + }, + topicLast{ + topic: "db.inventory.t4", + last: 1, + }, + }, + maxReloadingUnits: 3, + currentReloadingTopics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3"}, + units: []deploymentUnit{ + deploymentUnit{ + id: "t1", + topics: []string{"db.inventory.t1"}, + }, + deploymentUnit{ + id: "t2", + topics: []string{"db.inventory.t2"}, + }, + deploymentUnit{ + id: "t3", + topics: []string{"db.inventory.t3"}, + }, + }, + }, + { + name: "FifthCaseOneRealtimeOneMovesin", + topics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"}, + realtime: []string{"db.inventory.t3"}, + topicsLast: []topicLast{ + topicLast{ + topic: "db.inventory.t1", + last: 1500, + }, + topicLast{ + topic: "db.inventory.t2", + last: 1500, + }, + topicLast{ + topic: "db.inventory.t3", + last: 2, + }, + topicLast{ + topic: "db.inventory.t4", + last: 1, + }, + }, + maxReloadingUnits: 3, + currentReloadingTopics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3"}, + units: []deploymentUnit{ + deploymentUnit{ + id: "t1", + topics: []string{"db.inventory.t1"}, + }, + deploymentUnit{ + id: "t2", + topics: []string{"db.inventory.t2"}, + }, + deploymentUnit{ + id: "t4", + topics: []string{"db.inventory.t4"}, + }, + deploymentUnit{ + id: "realtime", + topics: []string{"db.inventory.t3"}, + }, + }, + }, + { + name: "SixthCaseAllRealtime", + topics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"}, + realtime: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"}, + topicsLast: []topicLast{ + topicLast{ + topic: "db.inventory.t1", + last: 1, + }, + topicLast{ + topic: "db.inventory.t2", + last: 1, + }, + topicLast{ + topic: "db.inventory.t3", + last: 2, + }, + topicLast{ + topic: "db.inventory.t4", + last: 1, + }, + }, + maxReloadingUnits: 3, + currentReloadingTopics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t4", "db.inventory.t3"}, + units: []deploymentUnit{ + deploymentUnit{ + id: "realtime", + topics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"}, + }, + }, + }, + { + name: "K8sNameCompatibility", + topics: []string{"db.inventory.t1_aks"}, + realtime: []string{}, + topicsLast: []topicLast{ + topicLast{ + topic: "db.inventory.t1_aks", + last: 1, + }, + }, + maxReloadingUnits: 3, + currentReloadingTopics: []string{}, + units: []deploymentUnit{ + deploymentUnit{ + id: "t1-aks", + topics: []string{"db.inventory.t1_aks"}, + }, + }, + }, + { + name: "UnitsGoingAboveMax", + topics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4", "db.inventory.t5", "db.inventory.t6", "db.inventory.t7", "db.inventory.t8", "db.inventory.t9"}, + realtime: []string{"db.inventory.t1"}, + topicsLast: []topicLast{ + topicLast{ + topic: "db.inventory.t1", + last: 1, + }, + topicLast{ + topic: "db.inventory.t2", + last: 10, + }, + topicLast{ + topic: "db.inventory.t3", + last: 100, + }, + topicLast{ + topic: "db.inventory.t4", + last: 1000, + }, + topicLast{ + topic: "db.inventory.t5", + last: 10000, + }, + topicLast{ + topic: "db.inventory.t6", + last: 20000, + }, + topicLast{ + topic: "db.inventory.t7", + last: 100000, + }, + }, + maxReloadingUnits: 5, + currentReloadingTopics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4", "db.inventory.t5"}, + units: []deploymentUnit{ + deploymentUnit{ + id: "t2", + topics: []string{"db.inventory.t2"}, + }, + deploymentUnit{ + id: "t3", + topics: []string{"db.inventory.t3"}, + }, + deploymentUnit{ + id: "t4", + topics: []string{"db.inventory.t4"}, + }, + deploymentUnit{ + id: "t5", + topics: []string{"db.inventory.t5"}, + }, + deploymentUnit{ + id: "t6", + topics: []string{"db.inventory.t6"}, + }, + deploymentUnit{ + id: "realtime", + topics: []string{"db.inventory.t1"}, + }, + }, + }, + { + name: "UnitsGoingAboveMaxCase2", + topics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4", "db.inventory.t5", "db.inventory.t6", "db.inventory.t7", "db.inventory.t8", "db.inventory.t9"}, + realtime: []string{"db.inventory.t3", "db.inventory.t4"}, + topicsLast: []topicLast{ + topicLast{ + topic: "db.inventory.t1", + last: 1, + }, + topicLast{ + topic: "db.inventory.t2", + last: 10, + }, + topicLast{ + topic: "db.inventory.t3", + last: 100, + }, + topicLast{ + topic: "db.inventory.t4", + last: 1000, + }, + topicLast{ + topic: "db.inventory.t5", + last: 10000, + }, + topicLast{ + topic: "db.inventory.t6", + last: 20000, + }, + topicLast{ + topic: "db.inventory.t7", + last: 100000, + }, + }, + maxReloadingUnits: 5, + currentReloadingTopics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4", "db.inventory.t5"}, + units: []deploymentUnit{ + deploymentUnit{ + id: "t1", + topics: []string{"db.inventory.t1"}, + }, + deploymentUnit{ + id: "t2", + topics: []string{"db.inventory.t2"}, + }, + deploymentUnit{ + id: "t5", + topics: []string{"db.inventory.t5"}, + }, + deploymentUnit{ + id: "t6", + topics: []string{"db.inventory.t6"}, + }, + deploymentUnit{ + id: "t7", + topics: []string{"db.inventory.t7"}, + }, + deploymentUnit{ + id: "realtime", + topics: []string{"db.inventory.t3", "db.inventory.t4"}, + }, + }, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + allocator := newUnitAllocator( + "testrsk", + tc.topics, + tc.realtime, + tc.topicsLast, + tc.maxReloadingUnits, + tc.currentReloadingTopics, + nil, // TODO add test cases for them also + nil, + ) + allocator.allocateReloadingUnits() + if !reflect.DeepEqual(allocator.units, tc.units) { + t.Errorf("\nexpected (%v): %+v\ngot (%v): %+v\n", len(tc.units), tc.units, len(allocator.units), allocator.units) + } + }) + } +} diff --git a/controllers/util.go b/controllers/util.go index 6c5a4acf6..d5cf1854c 100644 --- a/controllers/util.go +++ b/controllers/util.go @@ -6,6 +6,7 @@ import ( "reflect" "sort" "strings" + "time" hashstructure "github.com/mitchellh/hashstructure/v2" klog "github.com/practo/klog/v2" @@ -13,6 +14,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + resource "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" types "k8s.io/apimachinery/pkg/types" client "sigs.k8s.io/controller-runtime/pkg/client" @@ -139,6 +141,18 @@ func getHashStructure(v interface{}) (string, error) { return hash[:6], nil } +func toIntPtr(i int) *int { + return &i +} + +func toInt32Ptr(i int32) *int32 { + return &i +} + +func toQuantityPtr(r resource.Quantity) *resource.Quantity { + return &r +} + func sortStringSlice(t []string) { sort.Sort(sort.StringSlice(t)) } @@ -176,6 +190,18 @@ func getReplicas(suspend bool, totalGroups, totalTopics int) int32 { return 1 } +func cacheValid(validity time.Duration, lastCachedTime *int64) bool { + if lastCachedTime == nil { + return false + } + + if (*lastCachedTime + validity.Nanoseconds()) > time.Now().UnixNano() { + return true + } + + return false +} + func makeLoaderTopics(prefix string, topics []string) []string { var prefixedTopics []string for _, topic := range topics { diff --git a/pkg/kafka/manager.go b/pkg/kafka/manager.go index ee994aba3..29b8866bb 100644 --- a/pkg/kafka/manager.go +++ b/pkg/kafka/manager.go @@ -164,6 +164,7 @@ func (c *Manager) SyncTopics( ticker := time.NewTicker(time.Second * time.Duration(c.tickSeconds)) for { + klog.V(2).Info("fetching topics...") err := c.refreshTopics() if err != nil { klog.Errorf("error refreshing topic, err:%v\n", err) diff --git a/pkg/redshiftbatcher/batch_processor.go b/pkg/redshiftbatcher/batch_processor.go index b1143be14..b5d45f2c2 100644 --- a/pkg/redshiftbatcher/batch_processor.go +++ b/pkg/redshiftbatcher/batch_processor.go @@ -127,6 +127,7 @@ type response struct { endOffset int64 messagesProcessed int maskSchema map[string]serializer.MaskInfo + bytesProcessed int64 } func (b *batchProcessor) ctxCancelled(ctx context.Context) error { @@ -144,6 +145,7 @@ func (b *batchProcessor) ctxCancelled(ctx context.Context) error { func constructS3key( s3ucketDir string, + consumerGroupID string, topic string, partition int32, offset int64, @@ -158,6 +160,7 @@ func constructS3key( if maskFileVersion != "" { return filepath.Join( s3ucketDir, + consumerGroupID, topic, maskFileVersion, s3FileName, @@ -165,6 +168,7 @@ func constructS3key( } else { return filepath.Join( s3ucketDir, + consumerGroupID, topic, s3FileName, ) @@ -211,6 +215,7 @@ func (b *batchProcessor) signalLoad(resp *response) error { resp.batchSchemaID, // schema of upstream topic resp.maskSchema, resp.skipMerge, + resp.bytesProcessed, ) err := b.signaler.Add( @@ -251,7 +256,9 @@ func (b *batchProcessor) processMessage( message *serializer.Message, resp *response, messageID int, -) error { +) (int64, error) { + var bytesProcessed int64 + klog.V(5).Infof( "%s: batchID:%d id:%d: transforming", b.topic, resp.batchID, messageID, @@ -267,7 +274,7 @@ func (b *batchProcessor) processMessage( resp.maskSchema, ) if err != nil { - return fmt.Errorf( + return bytesProcessed, fmt.Errorf( "transforming schema:%d => inputTable failed: %v", resp.batchSchemaID, err, @@ -276,6 +283,7 @@ func (b *batchProcessor) processMessage( resp.batchSchemaTable = r.(redshift.Table) resp.s3Key = constructS3key( b.s3BucketDir, + b.consumerGroupID, message.Topic, message.Partition, message.Offset, @@ -284,7 +292,7 @@ func (b *batchProcessor) processMessage( } if resp.batchSchemaID != message.SchemaId { - return fmt.Errorf("topic:%s, schema id mismatch in the batch, %d != %d", + return bytesProcessed, fmt.Errorf("%s: schema id mismatch in the batch, %d != %d", b.topic, resp.batchSchemaID, message.SchemaId, @@ -293,7 +301,7 @@ func (b *batchProcessor) processMessage( err := b.messageTransformer.Transform(message, resp.batchSchemaTable) if err != nil { - return fmt.Errorf( + return bytesProcessed, fmt.Errorf( "Error transforming message:%+v, err:%v", message, err, ) } @@ -301,17 +309,20 @@ func (b *batchProcessor) processMessage( if b.maskMessages { err := b.msgMasker.Transform(message, resp.batchSchemaTable) if err != nil { - return fmt.Errorf("Error masking message:%+v, err:%v", message, err) + return bytesProcessed, fmt.Errorf( + "Error masking message:%+v, err:%v", message, err) } } message.Value = removeEmptyNullValues(message.Value.(map[string]*string)) messageValueBytes, err := json.Marshal(message.Value) if err != nil { - return fmt.Errorf("Error marshalling message.Value, message: %+v", message) + return bytesProcessed, fmt.Errorf( + "Error marshalling message.Value, message: %+v", message) } resp.bodyBuf.Write(messageValueBytes) resp.bodyBuf.Write([]byte{'\n'}) + bytesProcessed += message.Bytes if b.maskMessages && len(resp.maskSchema) == 0 { resp.maskSchema = message.MaskSchema @@ -325,7 +336,7 @@ func (b *batchProcessor) processMessage( ) resp.endOffset = message.Offset - return nil + return bytesProcessed, nil } // processMessages handles the batch procesing and return true if all completes @@ -335,21 +346,23 @@ func (b *batchProcessor) processMessages( ctx context.Context, msgBuf []*serializer.Message, resp *response, -) error { +) (int64, error) { + var totalBytesProcessed int64 for messageID, message := range msgBuf { select { case <-ctx.Done(): - return kafka.ErrSaramaSessionContextDone + return totalBytesProcessed, kafka.ErrSaramaSessionContextDone default: - err := b.processMessage(ctx, message, resp, messageID) + bytesProcessed, err := b.processMessage(ctx, message, resp, messageID) if err != nil { - return err + return totalBytesProcessed, err } + totalBytesProcessed += bytesProcessed } } - return nil + return totalBytesProcessed, nil } func (b *batchProcessor) processBatch( @@ -370,7 +383,7 @@ func (b *batchProcessor) processBatch( klog.V(4).Infof("%s: batchID:%d, size:%d: processing...", b.topic, resp.batchID, len(msgBuf), ) - err = b.processMessages(ctx, msgBuf, resp) + resp.bytesProcessed, err = b.processMessages(ctx, msgBuf, resp) if err != nil { resp.err = err return @@ -468,10 +481,12 @@ func (b *batchProcessor) Process( klog.V(2).Infof("%s: finished (%d batches)", b.topic, len(responses)) // return if there was any error in processing any of the batches - totalMessages := 0 + var totalBytesProcessed int64 = 0 + totalMessagesProcessed := 0 var errors error for _, resp := range responses { - totalMessages += resp.messagesProcessed + totalBytesProcessed += resp.bytesProcessed + totalMessagesProcessed += resp.messagesProcessed if resp.err != nil { if resp.err == kafka.ErrSaramaSessionContextDone { klog.V(2).Infof( @@ -548,10 +563,11 @@ func (b *batchProcessor) Process( last := responses[len(responses)-1] b.markOffset(session, b.topic, 0, last.endOffset, b.autoCommit) - setMsgsProcessedPerSecond( + setMetrics( b.consumerGroupID, b.topic, - float64(totalMessages)/time.Since(now).Seconds(), + float64(totalBytesProcessed)/time.Since(now).Seconds(), + float64(totalMessagesProcessed)/time.Since(now).Seconds(), ) klog.V(2).Infof( diff --git a/pkg/redshiftbatcher/batcher_handler.go b/pkg/redshiftbatcher/batcher_handler.go index c1f797cf7..dcf1a9252 100644 --- a/pkg/redshiftbatcher/batcher_handler.go +++ b/pkg/redshiftbatcher/batcher_handler.go @@ -14,9 +14,10 @@ import ( "time" ) -const ( - DefaultMaxConcurrency = 10 - DefaultMaxProcessingTime = 180000 +var ( + DefaultMaxWaitSeconds int = 30 + DefaultMaxConcurrency int = 10 + DefaultMaxProcessingTime int32 = 180000 ) type BatcherConfig struct { @@ -38,12 +39,20 @@ type BatcherConfig struct { // MaxSize is the maximum size of a batch, on exceeding this batch is pushed // regarless of the wait time. + // Deprecated: in favour of MaxBytesPerBatch MaxSize int `yaml:"maxSize,omitempty"` - // MaxWaitSeconds after which the bash would be pushed regardless of its size. - MaxWaitSeconds int `yaml:"maxWaitSeconds,omitempty"` - // MaxConcurrency is the maximum number of concurrent batch processing to run + + // MaxWaitSeconds after which the batch would be flushed + // Defaults to 30 + MaxWaitSeconds *int `yaml:"maxWaitSeconds,omitempty"` + // MaxConcurrency is the maximum number of concurrent processing to run // Defaults to 10 - MaxConcurrency int `yaml:"maxConcurrency,omitempty"` + MaxConcurrency *int `yaml:"maxConcurrency,omitempty"` + // MaxBytesPerBatch is the maximum bytes per batch. Default is there + // if the user has not specified a default will be applied. + // If this is specified, maxSize specification is not considered. + // Default would be specified after MaxSize is gone + MaxBytesPerBatch *int64 `yaml:"maxBytesPerBatch,omitempty"` } // batcherHandler is the sarama consumer handler @@ -52,12 +61,13 @@ type batcherHandler struct { ready chan bool ctx context.Context - maxSize int - maxWaitSeconds int - maxConcurrency int + maxSize int // Deprecated in favour of maxBytesPerBatch - consumerGroupID string + maxWaitSeconds *int + maxConcurrency *int + maxBytesPerBatch *int64 + consumerGroupID string kafkaConfig kafka.KafkaConfig saramaConfig kafka.SaramaConfig maskConfig masker.MaskConfig @@ -77,8 +87,11 @@ func NewHandler( ) *batcherHandler { // apply defaults - if batcherConfig.MaxConcurrency == 0 { - batcherConfig.MaxConcurrency = DefaultMaxConcurrency + if batcherConfig.MaxWaitSeconds == nil { + batcherConfig.MaxWaitSeconds = &DefaultMaxWaitSeconds + } + if batcherConfig.MaxConcurrency == nil { + batcherConfig.MaxConcurrency = &DefaultMaxConcurrency } return &batcherHandler{ @@ -87,9 +100,11 @@ func NewHandler( consumerGroupID: consumerGroupID, - maxSize: batcherConfig.MaxSize, - maxWaitSeconds: batcherConfig.MaxWaitSeconds, - maxConcurrency: batcherConfig.MaxConcurrency, + maxSize: batcherConfig.MaxSize, // Deprecated + + maxWaitSeconds: batcherConfig.MaxWaitSeconds, + maxConcurrency: batcherConfig.MaxConcurrency, + maxBytesPerBatch: batcherConfig.MaxBytesPerBatch, kafkaConfig: kafkaConfig, saramaConfig: saramaConfig, @@ -134,7 +149,7 @@ func (h *batcherHandler) ConsumeClaim( ) var lastSchemaId *int - processChan := make(chan []*serializer.Message, h.maxConcurrency) + processChan := make(chan []*serializer.Message, *h.maxConcurrency) errChan := make(chan error) processor := newBatchProcessor( h.consumerGroupID, @@ -145,16 +160,22 @@ func (h *batcherHandler) ConsumeClaim( h.saramaConfig, h.maskConfig, h.kafkaLoaderTopicPrefix, - h.maxConcurrency, + *h.maxConcurrency, ) + maxBufSize := h.maxSize + if h.maxBytesPerBatch != nil { + maxBufSize = serializer.DefaultMessageBufferSize + } msgBatch := serializer.NewMessageAsyncBatch( claim.Topic(), claim.Partition(), - h.maxSize, + h.maxSize, // Deprecated + maxBufSize, + h.maxBytesPerBatch, processChan, ) maxWaitTicker := time.NewTicker( - time.Duration(h.maxWaitSeconds) * time.Second, + time.Duration(*h.maxWaitSeconds) * time.Second, ) wg := &sync.WaitGroup{} @@ -227,7 +248,7 @@ func (h *batcherHandler) ConsumeClaim( // Flush the batch due to schema change msgBatch.Flush(session.Context()) } - // Flush the batch by size or insert in batch + // Flush the batch by maxBytes or size on insert in batch msgBatch.Insert(session.Context(), msg) *lastSchemaId = msg.SchemaId case <-maxWaitTicker.C: diff --git a/pkg/redshiftbatcher/metrics.go b/pkg/redshiftbatcher/metrics.go index e943a0f19..ab098b52e 100644 --- a/pkg/redshiftbatcher/metrics.go +++ b/pkg/redshiftbatcher/metrics.go @@ -5,24 +5,46 @@ import ( ) var ( - msgsPerSecMetric = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ + bytesProcessedMetric = prometheus.NewCounterVec( + prometheus.CounterOpts{ Namespace: "rsk", Subsystem: "batcher", - Name: "messages_processed_per_second", - Help: "number of messages processed per second", + Name: "bytes_processed", + Help: "total number of bytes processed", + }, + []string{"consumergroup", "topic"}, + ) + msgsProcessedMetric = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "rsk", + Subsystem: "batcher", + Name: "messages_processed", + Help: "total number of messages processed", }, []string{"consumergroup", "topic"}, ) ) func init() { - prometheus.MustRegister(msgsPerSecMetric) + prometheus.MustRegister(bytesProcessedMetric) + prometheus.MustRegister(msgsProcessedMetric) } -func setMsgsProcessedPerSecond(consumergroup string, topic string, msgsPerSec float64) { - msgsPerSecMetric.WithLabelValues( +func setBytesProcessed(consumergroup string, topic string, bytes float64) { + bytesProcessedMetric.WithLabelValues( consumergroup, topic, - ).Set(msgsPerSec) + ).Add(bytes) +} + +func setMsgsProcessed(consumergroup string, topic string, msgs float64) { + msgsProcessedMetric.WithLabelValues( + consumergroup, + topic, + ).Add(msgs) +} + +func setMetrics(consumergroup, topic string, bytes, msgs float64) { + setBytesProcessed(consumergroup, topic, bytes) + setMsgsProcessed(consumergroup, topic, msgs) } diff --git a/pkg/redshiftloader/job.go b/pkg/redshiftloader/job.go index 2493a76b7..b763539f1 100644 --- a/pkg/redshiftloader/job.go +++ b/pkg/redshiftloader/job.go @@ -17,7 +17,8 @@ var JobAvroSchema string = `{ {"name": "s3Path", "type": "string"}, {"name": "schemaId", "type": "int"}, {"name": "maskSchema", "type": "string"}, - {"name": "skipMerge", "type": "string", "default": ""} + {"name": "skipMerge", "type": "string", "default": ""}, + {"name": "batchBytes", "type": "long", "default": 0} ] }` @@ -29,13 +30,15 @@ type Job struct { S3Path string `json:"s3Path"` SchemaId int `json:"schemaId"` // schema id of debezium event MaskSchema map[string]serializer.MaskInfo `json:"maskSchema"` - SkipMerge bool `json:"skipMerge"` // to load using merge strategy or directy COPY + SkipMerge bool `json:"skipMerge"` // to load using merge strategy or directy COPY + BatchBytes int64 `json:"batchBytes"` // batch bytes store sum of all message bytes in this batch } func NewJob( upstreamTopic string, startOffset int64, endOffset int64, csvDialect string, s3Path string, schemaId int, - maskSchema map[string]serializer.MaskInfo, skipMerge bool) Job { + maskSchema map[string]serializer.MaskInfo, skipMerge bool, + batchBytes int64) Job { return Job{ UpstreamTopic: upstreamTopic, @@ -46,6 +49,7 @@ func NewJob( SchemaId: schemaId, MaskSchema: maskSchema, SkipMerge: skipMerge, + BatchBytes: batchBytes, } } @@ -94,6 +98,12 @@ func StringMapToJob(data map[string]interface{}) Job { schema = ToSchemaMap(value) } job.MaskSchema = schema + case "batchBytes": + if value, ok := v.(int64); ok { + job.BatchBytes = value + } else { // backward compatibility + job.BatchBytes = 0 + } } } @@ -190,5 +200,6 @@ func (c Job) ToStringMap() map[string]interface{} { "schemaId": c.SchemaId, "skipMerge": skipMerge, "maskSchema": ToSchemaString(c.MaskSchema), + "batchBytes": c.BatchBytes, } } diff --git a/pkg/redshiftloader/job_test.go b/pkg/redshiftloader/job_test.go index 3e048311f..f13568d90 100644 --- a/pkg/redshiftloader/job_test.go +++ b/pkg/redshiftloader/job_test.go @@ -13,8 +13,17 @@ func TestToStringMap(t *testing.T) { "id": serializer.MaskInfo{Masked: true}, } - job := NewJob("upstreamTopic", 2091, 2100, ",", - "s3path", 1, maskSchema, false) + job := NewJob( + "upstreamTopic", + 2091, + 2100, + ",", + "s3path", + 1, + maskSchema, + false, + 10, + ) // fmt.Printf("job_now=%+v\n\n", job) sMap := job.ToStringMap() diff --git a/pkg/redshiftloader/load_processor.go b/pkg/redshiftloader/load_processor.go index f1ef514b2..bbefa6d96 100644 --- a/pkg/redshiftloader/load_processor.go +++ b/pkg/redshiftloader/load_processor.go @@ -305,6 +305,7 @@ func (b *loadProcessor) insertIntoTargetTable(ctx context.Context, tx *sql.Tx) e s3CopyDir := filepath.Join( viper.GetString("s3sink.bucketDir"), + b.consumerGroupID, b.topic, util.NewUUIDString(), "unload_", @@ -509,6 +510,7 @@ func (b *loadProcessor) migrateTable( s3CopyDir := filepath.Join( viper.GetString("s3sink.bucketDir"), + b.consumerGroupID, b.topic, util.NewUUIDString(), "migrating_unload_", @@ -610,8 +612,11 @@ func (b *loadProcessor) migrateSchema(ctx context.Context, schemaId int, inputTa func (b *loadProcessor) processBatch( ctx context.Context, msgBuf []*serializer.Message, -) error { - +) ( + int64, + error, +) { + var bytesProcessed int64 if b.redshiftStats { klog.V(2).Infof("dbstats: %+v\n", b.redshifter.Stats()) } @@ -627,11 +632,13 @@ func (b *loadProcessor) processBatch( for id, message := range msgBuf { select { case <-ctx.Done(): - return fmt.Errorf("session ctx done, err: %v", ctx.Err()) + return bytesProcessed, fmt.Errorf( + "session ctx done, err: %v", ctx.Err()) default: job := StringMapToJob(message.Value.(map[string]interface{})) schemaId = job.SchemaId b.batchEndOffset = message.Offset + bytesProcessed += job.BatchBytes // this assumes all messages in a batch have same schema id if id == 0 { @@ -647,7 +654,7 @@ func (b *loadProcessor) processBatch( job.MaskSchema, ) if err != nil { - return fmt.Errorf( + return bytesProcessed, fmt.Errorf( "Transforming schema:%d => inputTable failed: %v\n", schemaId, err, @@ -660,7 +667,7 @@ func (b *loadProcessor) processBatch( inputTable.Name + b.tableSuffix) err = b.migrateSchema(ctx, schemaId, inputTable) if err != nil { - return err + return bytesProcessed, err } } entries = append( @@ -676,13 +683,14 @@ func (b *loadProcessor) processBatch( // upload s3 manifest file to bulk copy data to staging table s3ManifestKey := filepath.Join( viper.GetString("s3sink.bucketDir"), + b.consumerGroupID, b.topic, util.NewUUIDString(), "manifest.json", ) err = b.s3sink.UploadS3Manifest(s3ManifestKey, entries) if err != nil { - return fmt.Errorf( + return bytesProcessed, fmt.Errorf( "Error uploading manifest: %s to s3, err:%v\n", s3ManifestKey, err, @@ -693,7 +701,7 @@ func (b *loadProcessor) processBatch( klog.V(2).Infof("%s, load staging\n", b.topic) err = b.createStagingTable(ctx, schemaId, inputTable) if err != nil { - return err + return bytesProcessed, err } err = b.loadTable( ctx, @@ -702,20 +710,20 @@ func (b *loadProcessor) processBatch( s3ManifestKey, ) if err != nil { - return err + return bytesProcessed, err } // merge err = b.merge(ctx) if err != nil { - return err + return bytesProcessed, err } if b.redshiftStats { klog.V(3).Infof("endbatch dbstats: %+v\n", b.redshifter.Stats()) } - return nil + return bytesProcessed, nil } // Process implements serializer.MessageBatchSyncProcessor @@ -728,10 +736,10 @@ func (b *loadProcessor) Process(session sarama.ConsumerGroupSession, msgBuf []*s if err != nil { return err } - klog.Infof("%s, batchId:%d, size:%d: Processing...\n", + klog.Infof("%s, batchId:%d, size:%d: processing...\n", b.topic, b.batchId, len(msgBuf), ) - err = b.processBatch(ctx, msgBuf) + bytesProcessed, err := b.processBatch(ctx, msgBuf) if err != nil { b.printCurrentState() return err @@ -750,16 +758,17 @@ func (b *loadProcessor) Process(session sarama.ConsumerGroupSession, msgBuf []*s timeTaken = fmt.Sprintf("%.0fs", secondsTaken) } - klog.Infof( - "%s, batchId:%d, size:%d, end:%d:, Processed in %s", - b.topic, b.batchId, len(msgBuf), b.batchEndOffset, timeTaken, - ) - - setMsgsProcessedPerSecond( + setMetrics( b.consumerGroupID, b.topic, + float64(bytesProcessed)/secondsTaken, float64(len(msgBuf))/secondsTaken, ) + klog.Infof( + "%s, batchId:%d, size:%d, end:%d:, processed in %s", + b.topic, b.batchId, len(msgBuf), b.batchEndOffset, timeTaken, + ) + return nil } diff --git a/pkg/redshiftloader/loader_handler.go b/pkg/redshiftloader/loader_handler.go index aa24d16fd..b8206350a 100644 --- a/pkg/redshiftloader/loader_handler.go +++ b/pkg/redshiftloader/loader_handler.go @@ -12,17 +12,27 @@ import ( "time" ) -const ( - DefaultMaxProcessingTime = 600000 +var ( + DefaultMaxWaitSeconds int = 60 + DefaultMaxProcessingTime int32 = 600000 ) type LoaderConfig struct { // Maximum size of a batch, on exceeding this batch is pushed // regarless of the wait time. + // Deprecated: in favour of MaxBytesPerBatch MaxSize int `yaml:"maxSize,omitempty"` + // MaxBytesPerBatch is the maximum bytes per batch. It is not the size + // of kafka message but the size of all the messages that would be + // loaded in the batch. Default is there + // if the user has not specified a default will be applied. + // If this is specified, maxSize specification is not considered. + // Default would be specified after MaxSize is gone + MaxBytesPerBatch *int64 `yaml:"maxBytesPerBatch,omitempty"` + // MaxWaitSeconds after which the bash would be pushed regardless of its size. - MaxWaitSeconds int `yaml:"maxWaitSeconds,omitempty"` + MaxWaitSeconds *int `yaml:"maxWaitSeconds,omitempty"` } // loaderHandler is the sarama consumer handler @@ -33,8 +43,10 @@ type loaderHandler struct { consumerGroupID string - maxSize int - maxWaitSeconds int + maxSize int // Deprecated + + maxWaitSeconds *int + maxBytesPerBatch *int64 saramaConfig kafka.SaramaConfig redshifter *redshift.Redshift @@ -49,14 +61,21 @@ func NewHandler( saramaConfig kafka.SaramaConfig, redshifter *redshift.Redshift, ) *loaderHandler { + // apply defaults + if loaderConfig.MaxWaitSeconds == nil { + loaderConfig.MaxWaitSeconds = &DefaultMaxWaitSeconds + } + return &loaderHandler{ ready: ready, ctx: ctx, consumerGroupID: consumerGroupID, - maxSize: loaderConfig.MaxSize, - maxWaitSeconds: loaderConfig.MaxWaitSeconds, + maxSize: loaderConfig.MaxSize, // Deprecated + + maxWaitSeconds: loaderConfig.MaxWaitSeconds, + maxBytesPerBatch: loaderConfig.MaxBytesPerBatch, saramaConfig: saramaConfig, redshifter: redshifter, @@ -66,7 +85,7 @@ func NewHandler( // Setup is run at the beginning of a new session, before ConsumeClaim func (h *loaderHandler) Setup(sarama.ConsumerGroupSession) error { - klog.V(1).Info("Setting up handler") + klog.V(1).Info("setting up handler") // Mark the consumer as ready select { @@ -81,7 +100,7 @@ func (h *loaderHandler) Setup(sarama.ConsumerGroupSession) error { // Cleanup is run at the end of a session, once all ConsumeClaim goroutines have exited func (h *loaderHandler) Cleanup(sarama.ConsumerGroupSession) error { - klog.V(1).Info("Cleaning up handler") + klog.V(1).Info("cleaning up handler") return nil } @@ -91,9 +110,8 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error { klog.V(1).Infof( - "ConsumeClaim started for topic:%s, partition:%d, initalOffset:%d\n", + "%s: consumeClaim started, initalOffset:%d\n", claim.Topic(), - claim.Partition(), claim.InitialOffset(), ) @@ -106,14 +124,20 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession, h.saramaConfig, h.redshifter, ) + maxBufSize := h.maxSize + if h.maxBytesPerBatch != nil { + maxBufSize = serializer.DefaultMessageBufferSize + } msgBatch := serializer.NewMessageSyncBatch( claim.Topic(), claim.Partition(), - h.maxSize, + h.maxSize, // Deprecated + maxBufSize, + h.maxBytesPerBatch, processor, ) maxWaitTicker := time.NewTicker( - time.Duration(h.maxWaitSeconds) * time.Second, + time.Duration(*h.maxWaitSeconds) * time.Second, ) // NOTE: @@ -126,20 +150,22 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession, select { case <-h.ctx.Done(): klog.V(2).Infof( - "ConsumeClaim returning for topic: %s (main ctx done)", + "%s: consumeClaim returning, main ctx done", claim.Topic(), ) return nil case <-session.Context().Done(): klog.V(2).Infof( - "ConsumeClaim returning for topic: %s (session ctx done)", + "%s: consumeClaim returning. session ctx done, ctxErr: %v", claim.Topic(), + session.Context().Err(), ) return fmt.Errorf("session ctx done, err: %v", session.Context().Err()) case message, ok := <-claimMsgChan: + maxWaitTicker.Stop() if !ok { klog.V(2).Infof( - "ConsumeClaim returning for topic: %s (read msg channel closed)", + "%s: consumeClaim returning. read msg channel closed", claim.Topic(), ) return nil @@ -149,14 +175,15 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession, default: case <-h.ctx.Done(): klog.V(2).Infof( - "ConsumeClaim returning for topic: %s (main ctx done)", + "%s: consumeClaim returning, main ctx done", claim.Topic(), ) return nil case <-session.Context().Done(): klog.V(2).Infof( - "ConsumeClaim returning for topic: %s (session ctx done)", + "%s: consumeClaim returning. session ctx done, ctxErr: %v", claim.Topic(), + session.Context().Err(), ) return fmt.Errorf("session ctx done, err: %v", session.Context().Err()) } @@ -178,7 +205,7 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession, lastSchemaId = new(int) } else if *lastSchemaId != upstreamJobSchemaId { klog.V(2).Infof( - "topic:%s: schema changed, %d => %d (batch flush)\n", + "%s: schema changed, %d => %d (batch flush)\n", claim.Topic(), *lastSchemaId, upstreamJobSchemaId, @@ -189,15 +216,16 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession, } } // Process the batch by size or insert in batch - err = msgBatch.Insert(session, msg) + err = msgBatch.Insert(session, msg, job.BatchBytes) if err != nil { return err } *lastSchemaId = upstreamJobSchemaId + maxWaitTicker.Reset(time.Duration(*h.maxWaitSeconds) * time.Second) case <-maxWaitTicker.C: // Process the batch by time klog.V(2).Infof( - "topic:%s: maxWaitSeconds hit", + "%s: maxWaitSeconds hit", claim.Topic(), ) err = msgBatch.Process(session) diff --git a/pkg/redshiftloader/metrics.go b/pkg/redshiftloader/metrics.go index 6ae6691d6..aba310bd6 100644 --- a/pkg/redshiftloader/metrics.go +++ b/pkg/redshiftloader/metrics.go @@ -5,24 +5,46 @@ import ( ) var ( - msgsPerSecMetric = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ + bytesLoadedMetric = prometheus.NewCounterVec( + prometheus.CounterOpts{ Namespace: "rsk", Subsystem: "loader", - Name: "messages_processed_per_second", - Help: "number of messages processed per second", + Name: "bytes_loaded", + Help: "total number of bytes loaded", + }, + []string{"consumergroup", "topic"}, + ) + msgsLoadedMetric = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "rsk", + Subsystem: "loader", + Name: "messages_loaded", + Help: "total number of messages loaded", }, []string{"consumergroup", "topic"}, ) ) func init() { - prometheus.MustRegister(msgsPerSecMetric) + prometheus.MustRegister(bytesLoadedMetric) + prometheus.MustRegister(msgsLoadedMetric) } -func setMsgsProcessedPerSecond(consumergroup string, topic string, msgsPerSec float64) { - msgsPerSecMetric.WithLabelValues( +func setBytesLoaded(consumergroup string, topic string, bytes float64) { + bytesLoadedMetric.WithLabelValues( consumergroup, topic, - ).Set(msgsPerSec) + ).Add(bytes) +} + +func setMsgsLoaded(consumergroup string, topic string, msgs float64) { + msgsLoadedMetric.WithLabelValues( + consumergroup, + topic, + ).Add(msgs) +} + +func setMetrics(consumergroup, topic string, bytes, msgs float64) { + setBytesLoaded(consumergroup, topic, bytes) + setMsgsLoaded(consumergroup, topic, msgs) } diff --git a/pkg/serializer/message.go b/pkg/serializer/message.go index e629d8032..04d431f1b 100644 --- a/pkg/serializer/message.go +++ b/pkg/serializer/message.go @@ -7,6 +7,8 @@ import ( "sync" ) +const DefaultMessageBufferSize = 10 + type MessageBatchSyncProcessor interface { Process(session sarama.ConsumerGroupSession, msgBuf []*Message) error } @@ -27,31 +29,37 @@ type Message struct { Offset int64 Key string Value interface{} + Bytes int64 Operation string MaskSchema map[string]MaskInfo } type MessageAsyncBatch struct { - topic string - partition int32 - maxSize int - msgBuf []*Message - processChan chan []*Message + topic string + partition int32 + maxSize int + msgBuf []*Message + msgBufBytes int64 + maxBytesPerBatch *int64 + processChan chan []*Message } func NewMessageAsyncBatch( topic string, partition int32, maxSize int, + maxBufSize int, + maxBytesPerBatch *int64, processChan chan []*Message, ) *MessageAsyncBatch { return &MessageAsyncBatch{ - topic: topic, - partition: partition, - maxSize: maxSize, - msgBuf: make([]*Message, 0, maxSize), - processChan: processChan, + topic: topic, + partition: partition, + maxSize: maxSize, + msgBuf: make([]*Message, 0, maxBufSize), + maxBytesPerBatch: maxBytesPerBatch, + processChan: processChan, } } @@ -66,6 +74,7 @@ func (b *MessageAsyncBatch) Flush(ctx context.Context) { case b.processChan <- b.msgBuf: } b.msgBuf = make([]*Message, 0, b.maxSize) + b.msgBufBytes = 0 klog.V(4).Infof( "%s: flushed:%d, processChan:%v", b.topic, @@ -84,6 +93,20 @@ func (b *MessageAsyncBatch) Flush(ctx context.Context) { // if batchSize >= maxSize func (b *MessageAsyncBatch) Insert(ctx context.Context, msg *Message) { b.msgBuf = append(b.msgBuf, msg) + + if b.maxBytesPerBatch != nil { + b.msgBufBytes += msg.Bytes + if b.msgBufBytes >= *b.maxBytesPerBatch { + klog.V(2).Infof( + "%s: maxBytesPerBatch hit", + msg.Topic, + ) + b.Flush(ctx) + } + return + } + + // Deprecated if len(b.msgBuf) >= b.maxSize { klog.V(2).Infof( "%s: maxSize hit", @@ -94,20 +117,30 @@ func (b *MessageAsyncBatch) Insert(ctx context.Context, msg *Message) { } type MessageSyncBatch struct { - topic string - partition int32 - maxSize int - msgBuf []*Message - processor MessageBatchSyncProcessor + topic string + partition int32 + maxSize int + msgBuf []*Message + msgBufBytes int64 + maxBytesPerBatch *int64 + processor MessageBatchSyncProcessor } -func NewMessageSyncBatch(topic string, partition int32, maxSize int, processor MessageBatchSyncProcessor) *MessageSyncBatch { +func NewMessageSyncBatch( + topic string, + partition int32, + maxSize int, + maxBufSize int, + maxBytesPerBatch *int64, + processor MessageBatchSyncProcessor, +) *MessageSyncBatch { return &MessageSyncBatch{ - topic: topic, - partition: partition, - maxSize: maxSize, - msgBuf: make([]*Message, 0, maxSize), - processor: processor, + topic: topic, + partition: partition, + maxSize: maxSize, + msgBuf: make([]*Message, 0, maxBufSize), + maxBytesPerBatch: maxBytesPerBatch, + processor: processor, } } @@ -115,7 +148,7 @@ func NewMessageSyncBatch(topic string, partition int32, maxSize int, processor M func (b *MessageSyncBatch) Process(session sarama.ConsumerGroupSession) error { if len(b.msgBuf) > 0 { klog.V(2).Infof( - "topic:%s: calling processor...", + "%s: calling processor...", b.topic, ) err := b.processor.Process(session, b.msgBuf) @@ -123,9 +156,10 @@ func (b *MessageSyncBatch) Process(session sarama.ConsumerGroupSession) error { return err } b.msgBuf = make([]*Message, 0, b.maxSize) + b.msgBufBytes = 0 } else { klog.V(2).Infof( - "topic:%s: no msgs", + "%s: no msgs", b.topic, ) } @@ -137,11 +171,26 @@ func (b *MessageSyncBatch) Process(session sarama.ConsumerGroupSession) error { func (b *MessageSyncBatch) Insert( session sarama.ConsumerGroupSession, msg *Message, + batchBytes int64, ) error { b.msgBuf = append(b.msgBuf, msg) + + if b.maxBytesPerBatch != nil && batchBytes != 0 { + b.msgBufBytes += batchBytes + if b.msgBufBytes >= *b.maxBytesPerBatch { + klog.V(2).Infof( + "%s: maxBytesPerBatch hit", + msg.Topic, + ) + return b.Process(session) + } + return nil + } + + // Deprecated if len(b.msgBuf) >= b.maxSize { klog.V(2).Infof( - "topic:%s: maxSize hit", + "%s: maxSize hit", msg.Topic, ) return b.Process(session) diff --git a/pkg/serializer/serializer.go b/pkg/serializer/serializer.go index 85b040700..c3ccb64c1 100644 --- a/pkg/serializer/serializer.go +++ b/pkg/serializer/serializer.go @@ -65,6 +65,7 @@ func (c *avroSerializer) Deserialize( Offset: message.Offset, Key: string(message.Key), Value: native, + Bytes: int64(len(message.Value)), MaskSchema: make(map[string]MaskInfo), }, nil } diff --git a/redshiftsink/README.md b/redshiftsink/README.md index cc466d5b2..7e07039a8 100644 --- a/redshiftsink/README.md +++ b/redshiftsink/README.md @@ -50,28 +50,35 @@ spec: maxLoaderLag: 10 batcher: suspend: false - maxSize: 10 - maxWaitSeconds: 30 - maxConcurrency: 10 mask: true maskFile: "github.com/practo/tipoca-stream/redshiftsink/pkg/transformer/masker/database.yaml" - podTemplate: - resources: - requests: - cpu: 100m - memory: 200Mi + sinkGroup: + all: + maxSizePerBatch: 10Mi + maxWaitSeconds: 30 + maxConcurrency: 10 + deploymentUnit: + podTemplate: + resources: + requests: + cpu: 100m + memory: 200Mi loader: suspend: false - maxSize: 10 - maxWaitSeconds: 30 - maxProcessingTime: 60000 redshiftSchema: "inventory" redshiftGroup: "sales" - podTemplate: - resources: - requests: - cpu: 100m - memory: 200Mi + sinkGroup: + all: + maxSizePerBatch: 1Gi + maxWaitSeconds: 30 + maxProcessingTime: 60000 + deploymentUnit: + podTemplate: + resources: + requests: + cpu: 100m + memory: 200Mi + ``` ```bash @@ -82,11 +89,6 @@ This will start syncing all the Kakfa topics matching regex `"^db.inventory*"` f ### Configuration -### Redshiftsink Spec Documentation (TODO): -| Spec | Description | Mandatory | -| :------------ | :----------- |:------------| - - ## RedshiftSink Managed Pods Redshiftsink performs the sink by creating two pods. Creating a RedshiftSink CRD installs the batcher and loader pods. Batcher and loader pods details are below: @@ -113,7 +115,8 @@ Flags: #### Metrics ``` -rsk_batcher_messages_processed_per_second +rsk_batcher_bytes_processed +rsk_batcher_messages_processed ``` ### Configuration @@ -144,7 +147,8 @@ Flags: #### Metrics ``` -rsk_loader_messages_processed_per_second +rsk_loader_bytes_loaded +rsk_loader_messages_loaded ``` ### Configuration