From f9e9750c2b92e6d63aa43270488eaa48f079323c Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Sun, 21 Mar 2021 21:31:32 +0530
Subject: [PATCH 01/49] MaxBytesPerBatch is better than maxSize

MaxSize gets deprecated in favour of MaxBytesPerBatch. This config makes all fat and lean tables behave the same way in the Redshiftbatcher. They both take the same amount of memory and the scaling becomes easier now.

Related https://github.com/practo/tipoca-stream/issues/136
https://github.com/practo/tipoca-stream/issues/167
---
 controllers/batcher_deployment.go      |  4 +-
 pkg/redshiftbatcher/batcher_handler.go | 63 +++++++++++++++++---------
 pkg/serializer/message.go              | 43 ++++++++++++++----
 pkg/serializer/serializer.go           |  1 +
 4 files changed, 78 insertions(+), 33 deletions(-)

diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go
index ed5f18e4d..8caec2f2e 100644
--- a/controllers/batcher_deployment.go
+++ b/controllers/batcher_deployment.go
@@ -123,8 +123,8 @@ func NewBatcher(
 			MaskFile:        rsk.Spec.Batcher.MaskFile,
 			MaskFileVersion: maskFileVersion,
 			MaxSize:         rsk.Spec.Batcher.MaxSize,
-			MaxWaitSeconds:  rsk.Spec.Batcher.MaxWaitSeconds,
-			MaxConcurrency:  maxConcurrency,
+			MaxWaitSeconds:  &rsk.Spec.Batcher.MaxWaitSeconds,
+			MaxConcurrency:  &maxConcurrency,
 		},
 		ConsumerGroups: groupConfigs,
 		S3Sink: s3sink.Config{
diff --git a/pkg/redshiftbatcher/batcher_handler.go b/pkg/redshiftbatcher/batcher_handler.go
index c1f797cf7..1f5c2393e 100644
--- a/pkg/redshiftbatcher/batcher_handler.go
+++ b/pkg/redshiftbatcher/batcher_handler.go
@@ -14,9 +14,10 @@ import (
 	"time"
 )
 
-const (
-	DefaultMaxConcurrency    = 10
-	DefaultMaxProcessingTime = 180000
+var (
+	DefaultMaxWaitSeconds    int   = 30
+	DefaultMaxConcurrency    int   = 10
+	DefaultMaxProcessingTime int32 = 180000
 )
 
 type BatcherConfig struct {
@@ -38,12 +39,20 @@ type BatcherConfig struct {
 
 	// MaxSize is the maximum size of a batch, on exceeding this batch is pushed
 	// regarless of the wait time.
+	// Deprecated: in favour of MaxBytesPerBatch
 	MaxSize int `yaml:"maxSize,omitempty"`
-	// MaxWaitSeconds after which the bash would be pushed regardless of its size.
-	MaxWaitSeconds int `yaml:"maxWaitSeconds,omitempty"`
-	// MaxConcurrency is the maximum number of concurrent batch processing to run
+
+	// MaxWaitSeconds after which the batch would be flushed
+	// Defaults to 30
+	MaxWaitSeconds *int `yaml:"maxWaitSeconds,omitempty"`
+	// MaxConcurrency is the maximum number of concurrent processing to run
 	// Defaults to 10
-	MaxConcurrency int `yaml:"maxConcurrency,omitempty"`
+	MaxConcurrency *int `yaml:"maxConcurrency,omitempty"`
+	// MaxBytesPerBatch is the maximum bytes per batch. Default is there
+	// if the user has not specified a default will be applied.
+	// If this is specified maxSize specification is not considered.
+	// Default woult be specified after MaxSize is gone
+	MaxBytesPerBatch *int `yaml:"maxBytesPerBatch,omitempty"`
 }
 
 // batcherHandler is the sarama consumer handler
@@ -52,12 +61,13 @@ type batcherHandler struct {
 	ready chan bool
 	ctx   context.Context
 
-	maxSize        int
-	maxWaitSeconds int
-	maxConcurrency int
+	maxSize int // Deprecated in favour of maxBytesPerBatch
 
-	consumerGroupID string
+	maxWaitSeconds   *int
+	maxConcurrency   *int
+	maxBytesPerBatch *int
 
+	consumerGroupID        string
 	kafkaConfig            kafka.KafkaConfig
 	saramaConfig           kafka.SaramaConfig
 	maskConfig             masker.MaskConfig
@@ -77,8 +87,11 @@ func NewHandler(
 ) *batcherHandler {
 
 	// apply defaults
-	if batcherConfig.MaxConcurrency == 0 {
-		batcherConfig.MaxConcurrency = DefaultMaxConcurrency
+	if batcherConfig.MaxWaitSeconds == nil {
+		batcherConfig.MaxWaitSeconds = &DefaultMaxWaitSeconds
+	}
+	if batcherConfig.MaxConcurrency == nil {
+		batcherConfig.MaxConcurrency = &DefaultMaxConcurrency
 	}
 
 	return &batcherHandler{
@@ -87,9 +100,11 @@ func NewHandler(
 
 		consumerGroupID: consumerGroupID,
 
-		maxSize:        batcherConfig.MaxSize,
-		maxWaitSeconds: batcherConfig.MaxWaitSeconds,
-		maxConcurrency: batcherConfig.MaxConcurrency,
+		maxSize: batcherConfig.MaxSize, // Deprecated
+
+		maxWaitSeconds:   batcherConfig.MaxWaitSeconds,
+		maxConcurrency:   batcherConfig.MaxConcurrency,
+		maxBytesPerBatch: batcherConfig.MaxBytesPerBatch,
 
 		kafkaConfig:            kafkaConfig,
 		saramaConfig:           saramaConfig,
@@ -134,7 +149,7 @@ func (h *batcherHandler) ConsumeClaim(
 	)
 
 	var lastSchemaId *int
-	processChan := make(chan []*serializer.Message, h.maxConcurrency)
+	processChan := make(chan []*serializer.Message, *h.maxConcurrency)
 	errChan := make(chan error)
 	processor := newBatchProcessor(
 		h.consumerGroupID,
@@ -145,16 +160,22 @@ func (h *batcherHandler) ConsumeClaim(
 		h.saramaConfig,
 		h.maskConfig,
 		h.kafkaLoaderTopicPrefix,
-		h.maxConcurrency,
+		*h.maxConcurrency,
 	)
+	maxBufSize := h.maxSize
+	if h.maxBytesPerBatch != nil {
+		maxBufSize = serializer.DefaultMessageBufferSize
+	}
 	msgBatch := serializer.NewMessageAsyncBatch(
 		claim.Topic(),
 		claim.Partition(),
-		h.maxSize,
+		h.maxSize, // Deprecated
+		maxBufSize,
+		h.maxBytesPerBatch,
 		processChan,
 	)
 	maxWaitTicker := time.NewTicker(
-		time.Duration(h.maxWaitSeconds) * time.Second,
+		time.Duration(*h.maxWaitSeconds) * time.Second,
 	)
 
 	wg := &sync.WaitGroup{}
@@ -227,7 +248,7 @@ func (h *batcherHandler) ConsumeClaim(
 				// Flush the batch due to schema change
 				msgBatch.Flush(session.Context())
 			}
-			// Flush the batch by size or insert in batch
+			// Flush the batch by maxBytes or size on insert in batch
 			msgBatch.Insert(session.Context(), msg)
 			*lastSchemaId = msg.SchemaId
 		case <-maxWaitTicker.C:
diff --git a/pkg/serializer/message.go b/pkg/serializer/message.go
index e629d8032..074570957 100644
--- a/pkg/serializer/message.go
+++ b/pkg/serializer/message.go
@@ -7,6 +7,8 @@ import (
 	"sync"
 )
 
+const DefaultMessageBufferSize = 10
+
 type MessageBatchSyncProcessor interface {
 	Process(session sarama.ConsumerGroupSession, msgBuf []*Message) error
 }
@@ -27,31 +29,37 @@ type Message struct {
 	Offset    int64
 	Key       string
 	Value     interface{}
+	Bytes     int
 
 	Operation  string
 	MaskSchema map[string]MaskInfo
 }
 
 type MessageAsyncBatch struct {
-	topic       string
-	partition   int32
-	maxSize     int
-	msgBuf      []*Message
-	processChan chan []*Message
+	topic            string
+	partition        int32
+	maxSize          int
+	msgBuf           []*Message
+	msgBufBytes      int
+	maxBytesPerBatch *int
+	processChan      chan []*Message
 }
 
 func NewMessageAsyncBatch(
 	topic string,
 	partition int32,
 	maxSize int,
+	maxBufSize int,
+	maxBytesPerBatch *int,
 	processChan chan []*Message,
 ) *MessageAsyncBatch {
 	return &MessageAsyncBatch{
-		topic:       topic,
-		partition:   partition,
-		maxSize:     maxSize,
-		msgBuf:      make([]*Message, 0, maxSize),
-		processChan: processChan,
+		topic:            topic,
+		partition:        partition,
+		maxSize:          maxSize,
+		msgBuf:           make([]*Message, 0, maxBufSize),
+		maxBytesPerBatch: maxBytesPerBatch,
+		processChan:      processChan,
 	}
 }
 
@@ -66,6 +74,7 @@ func (b *MessageAsyncBatch) Flush(ctx context.Context) {
 		case b.processChan <- b.msgBuf:
 		}
 		b.msgBuf = make([]*Message, 0, b.maxSize)
+		b.msgBufBytes = 0
 		klog.V(4).Infof(
 			"%s: flushed:%d, processChan:%v",
 			b.topic,
@@ -84,6 +93,20 @@ func (b *MessageAsyncBatch) Flush(ctx context.Context) {
 // if batchSize >= maxSize
 func (b *MessageAsyncBatch) Insert(ctx context.Context, msg *Message) {
 	b.msgBuf = append(b.msgBuf, msg)
+
+	if b.maxBytesPerBatch != nil {
+		b.msgBufBytes += msg.Bytes
+		if b.msgBufBytes >= *b.maxBytesPerBatch {
+			klog.V(2).Infof(
+				"%s: maxBytesPerBatch hit",
+				msg.Topic,
+			)
+			b.Flush(ctx)
+		}
+		return
+	}
+
+	// Deprecated
 	if len(b.msgBuf) >= b.maxSize {
 		klog.V(2).Infof(
 			"%s: maxSize hit",
diff --git a/pkg/serializer/serializer.go b/pkg/serializer/serializer.go
index 85b040700..c65345705 100644
--- a/pkg/serializer/serializer.go
+++ b/pkg/serializer/serializer.go
@@ -65,6 +65,7 @@ func (c *avroSerializer) Deserialize(
 		Offset:     message.Offset,
 		Key:        string(message.Key),
 		Value:      native,
+		Bytes:      len(message.Value),
 		MaskSchema: make(map[string]MaskInfo),
 	}, nil
 }

From 60978eeb23f95e567ad891dde5b2c60a04ce2930 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Mon, 22 Mar 2021 12:16:46 +0530
Subject: [PATCH 02/49] Deployment Unit and SinkGroup spec

---
 api/v1/redshiftsink_types.go                  |  86 ++-
 api/v1/zz_generated.deepcopy.go               | 105 ++++
 .../tipoca.k8s.practo.dev_redshiftsinks.yaml  | 522 +++++++++++++++++-
 3 files changed, 689 insertions(+), 24 deletions(-)

diff --git a/api/v1/redshiftsink_types.go b/api/v1/redshiftsink_types.go
index dcf435d77..c7e3c05fd 100644
--- a/api/v1/redshiftsink_types.go
+++ b/api/v1/redshiftsink_types.go
@@ -42,31 +42,87 @@ type RedshiftPodTemplateSpec struct {
 	Tolerations *[]corev1.Toleration `json:"tolerations,omitempty"`
 }
 
-// RedshiftBatcherSpec defines the desired state of RedshiftBatcher
-type RedshiftBatcherSpec struct {
-	// Supsend when turned on makes sure no batcher pods
-	// are running for this CRD object. Default: false
-	Suspend bool `json:"suspend,omitempty"`
+// Deployment is used to specify how many topics will run together in a unit
+// and how much resources needs to be given to them.
+type DeploymentUnit struct {
+	// MaxTopics specify the maximum number of topics that
+	// can be part of this unit of deployment.
+	MaxTopics *int `json:"maxTopics,omitempty"`
+
+	// PodTemplate describes the specification for the unit.
+	// +optional
+	PodTemplate *RedshiftPodTemplateSpec `json:"podTemplate,omitempty"`
+}
 
-	// Max configurations for the batcher to batch
-	MaxSize        int  `json:"maxSize"`
-	MaxWaitSeconds int  `json:"maxWaitSeconds"`
+type SinkGroupSpec struct {
+	// MaxBytesPerBatch is the maximum bytes per batch.
+	MaxBytesPerBatch *int `json:"maxBytesPerBatch,omitempty"`
+	// MaxWaitSeconds is the maximum time to wait before making a batch,
+	// make a batch if MaxBytesPerBatch is not hit during MaxWaitSeconds.
+	MaxWaitSeconds *int `json:"maxWaitSeconds"`
+	// MaxConcurrency is the maximum no, of batch processors to run concurrently.
 	MaxConcurrency *int `json:"maxConcurrency,omitempty"`
-
-	// MaxProcessingTime is the sarama configuration MaxProcessingTime
-	// It is the max time in milliseconds required to consume one message.
+	// MaxProcessingTime is the max time in ms required to consume one message.
 	// Defaults to 1000ms
 	MaxProcessingTime *int32 `json:"maxProcessingTime,omitempty"`
 
-	// Mask when turned on enables masking of the data
-	// Default: false
+	// DeploymentUnit is to specify the configuration of the unit of deployment
+	// This helps the user to specify how many topics with what resources
+	// can run in one unit of Deployment. Based on this the operator decides
+	// how many deployment units would be launched. This is useful in the first
+	// time sink of redshiftsink resources having huge number of topics.
+	// Check #167 to understand the need of a unit specification.
+	DeploymentUnit *DeploymentUnit `json:"deploymentUnit,omitempty"`
+}
+
+// SinkGroup is the group of batcher and loader pods based on the
+// mask version, target table and the topic release status. These grouped
+// pods can require different configuration to sink the resources. Pods of batcher
+// and loader can specify their sink group configuration using SinkGroupSpec.
+// For example:
+// The first time sink of a table requires different values for MaxBytesPerBatch
+// and different pod resources than the realtime differential sink ones.
+// If All is specified and none of the others are specified, all is used
+// for Main, Reload and ReloadDupe SinkGroup. If others are specified then
+// they take precedence over all. For example if you have specified All and
+// Main, then for the MainSinkGroup Main is used and not All.
+type SinkGroup struct {
+	All        *SinkGroupSpec `json:"all,omitempty"`
+	Main       *SinkGroupSpec `json:"main,omitempty"`
+	Reload     *SinkGroupSpec `json:"reload,omitempty"`
+	ReloadDupe *SinkGroupSpec `json:"reloadDupe,omitempty"`
+}
+
+// RedshiftBatcherSpec defines the desired state of RedshiftBatcher
+type RedshiftBatcherSpec struct {
+	// Supsend is used to suspend batcher pods. Defaults to false.
+	Suspend bool `json:"suspend,omitempty"`
+
+	// Mask when turned on enables masking of the data. Defaults to false
 	// +optional
 	Mask bool `json:"mask"`
+	// MaskFile to use to apply mask configurations
 	// +optional
 	MaskFile string `json:"maskFile"`
 	// +optional
 
-	// Template describes the pods that will be created.
+	// SinkGroup contains the specification for main, reload and reloadDupe
+	// sinkgroups. Operator uses 3 groups to perform Redshiftsink. The topics
+	// which have never been released is part of Reload SinkGroup, the topics
+	// which gets released moves to the Main SinkGroup. ReloadDupe SinkGroup
+	// is used to give realtime upadates to the topics which are reloading.
+	// Defaults are there for all sinkGroups if none is specifed.
+	SinkGroup *SinkGroup `json:"sinkGroup,omitempty"`
+
+	// Deprecated all of the below spec in favour of SinkGroup #167
+	MaxSize        int  `json:"maxSize"`
+	MaxWaitSeconds int  `json:"maxWaitSeconds"`
+	MaxConcurrency *int `json:"maxConcurrency,omitempty"`
+	// MaxProcessingTime is the sarama configuration MaxProcessingTime
+	// It is the max time in milliseconds required to consume one message.
+	// Defaults to 1000ms
+	MaxProcessingTime *int32 `json:"maxProcessingTime,omitempty"`
+	// PodTemplate describes the pods that will be created.
 	// if this is not specifed, a default pod template is created
 	// +optional
 	PodTemplate *RedshiftPodTemplateSpec `json:"podTemplate,omitempty"`
@@ -96,7 +152,7 @@ type RedshiftLoaderSpec struct {
 	// RedshiftGroup to give the access to when new topics gets released
 	RedshiftGroup *string `json:"redshiftGroup"`
 
-	// Template describes the pods that will be created.
+	// PodTemplate describes the pods that will be created.
 	// if this is not specifed, a default pod template is created
 	// +optional
 	PodTemplate *RedshiftPodTemplateSpec `json:"podTemplate,omitempty"`
diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
index a3595f067..6fd4a806e 100644
--- a/api/v1/zz_generated.deepcopy.go
+++ b/api/v1/zz_generated.deepcopy.go
@@ -25,6 +25,31 @@ import (
 	runtime "k8s.io/apimachinery/pkg/runtime"
 )
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DeploymentUnit) DeepCopyInto(out *DeploymentUnit) {
+	*out = *in
+	if in.MaxTopics != nil {
+		in, out := &in.MaxTopics, &out.MaxTopics
+		*out = new(int)
+		**out = **in
+	}
+	if in.PodTemplate != nil {
+		in, out := &in.PodTemplate, &out.PodTemplate
+		*out = new(RedshiftPodTemplateSpec)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentUnit.
+func (in *DeploymentUnit) DeepCopy() *DeploymentUnit {
+	if in == nil {
+		return nil
+	}
+	out := new(DeploymentUnit)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *Group) DeepCopyInto(out *Group) {
 	*out = *in
@@ -87,6 +112,11 @@ func (in *MaskStatus) DeepCopy() *MaskStatus {
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *RedshiftBatcherSpec) DeepCopyInto(out *RedshiftBatcherSpec) {
 	*out = *in
+	if in.SinkGroup != nil {
+		in, out := &in.SinkGroup, &out.SinkGroup
+		*out = new(SinkGroup)
+		(*in).DeepCopyInto(*out)
+	}
 	if in.MaxConcurrency != nil {
 		in, out := &in.MaxConcurrency, &out.MaxConcurrency
 		*out = new(int)
@@ -340,6 +370,81 @@ func (in *ReleaseCondition) DeepCopy() *ReleaseCondition {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SinkGroup) DeepCopyInto(out *SinkGroup) {
+	*out = *in
+	if in.All != nil {
+		in, out := &in.All, &out.All
+		*out = new(SinkGroupSpec)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.Main != nil {
+		in, out := &in.Main, &out.Main
+		*out = new(SinkGroupSpec)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.Reload != nil {
+		in, out := &in.Reload, &out.Reload
+		*out = new(SinkGroupSpec)
+		(*in).DeepCopyInto(*out)
+	}
+	if in.ReloadDupe != nil {
+		in, out := &in.ReloadDupe, &out.ReloadDupe
+		*out = new(SinkGroupSpec)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SinkGroup.
+func (in *SinkGroup) DeepCopy() *SinkGroup {
+	if in == nil {
+		return nil
+	}
+	out := new(SinkGroup)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SinkGroupSpec) DeepCopyInto(out *SinkGroupSpec) {
+	*out = *in
+	if in.MaxBytesPerBatch != nil {
+		in, out := &in.MaxBytesPerBatch, &out.MaxBytesPerBatch
+		*out = new(int)
+		**out = **in
+	}
+	if in.MaxWaitSeconds != nil {
+		in, out := &in.MaxWaitSeconds, &out.MaxWaitSeconds
+		*out = new(int)
+		**out = **in
+	}
+	if in.MaxConcurrency != nil {
+		in, out := &in.MaxConcurrency, &out.MaxConcurrency
+		*out = new(int)
+		**out = **in
+	}
+	if in.MaxProcessingTime != nil {
+		in, out := &in.MaxProcessingTime, &out.MaxProcessingTime
+		*out = new(int32)
+		**out = **in
+	}
+	if in.DeploymentUnit != nil {
+		in, out := &in.DeploymentUnit, &out.DeploymentUnit
+		*out = new(DeploymentUnit)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SinkGroupSpec.
+func (in *SinkGroupSpec) DeepCopy() *SinkGroupSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(SinkGroupSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *TopicMaskStatus) DeepCopyInto(out *TopicMaskStatus) {
 	*out = *in
diff --git a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
index 126636d9b..d3f5d81ed 100644
--- a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
+++ b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
@@ -43,10 +43,11 @@ spec:
               description: RedshiftBatcherSpec defines the desired state of RedshiftBatcher
               properties:
                 mask:
-                  description: 'Mask when turned on enables masking of the data Default:
-                    false'
+                  description: Mask when turned on enables masking of the data. Defaults
+                    to false
                   type: boolean
                 maskFile:
+                  description: MaskFile to use to apply mask configurations
                   type: string
                 maxConcurrency:
                   type: integer
@@ -57,13 +58,14 @@ spec:
                   format: int32
                   type: integer
                 maxSize:
-                  description: Max configurations for the batcher to batch
+                  description: 'Deprecated all of the below spec in favour of SinkGroup
+                    #167'
                   type: integer
                 maxWaitSeconds:
                   type: integer
                 podTemplate:
-                  description: Template describes the pods that will be created. if
-                    this is not specifed, a default pod template is created
+                  description: PodTemplate describes the pods that will be created.
+                    if this is not specifed, a default pod template is created
                   properties:
                     image:
                       description: Image for the underlying pod
@@ -139,9 +141,511 @@ spec:
                         type: object
                       type: array
                   type: object
+                sinkGroup:
+                  description: SinkGroup contains the specification for main, reload
+                    and reloadDupe sinkgroups. Operator uses 3 groups to perform Redshiftsink.
+                    The topics which have never been released is part of Reload SinkGroup,
+                    the topics which gets released moves to the Main SinkGroup. ReloadDupe
+                    SinkGroup is used to give realtime upadates to the topics which
+                    are reloading. Defaults are there for all sinkGroups if none is
+                    specifed.
+                  properties:
+                    all:
+                      properties:
+                        deploymentUnit:
+                          description: 'DeploymentUnit is to specify the configuration
+                            of the unit of deployment This helps the user to specify
+                            how many topics with what resources can run in one unit
+                            of Deployment. Based on this the operator decides how
+                            many deployment units would be launched. This is useful
+                            in the first time sink of redshiftsink resources having
+                            huge number of topics. Check #167 to understand the need
+                            of a unit specification.'
+                          properties:
+                            maxTopics:
+                              description: MaxTopics specify the maximum number of
+                                topics that can be part of this unit of deployment.
+                              type: integer
+                            podTemplate:
+                              description: PodTemplate describes the specification
+                                for the unit.
+                              properties:
+                                image:
+                                  description: Image for the underlying pod
+                                  type: string
+                                resources:
+                                  description: Resources is for configuring the compute
+                                    resources required
+                                  properties:
+                                    limits:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Limits describes the maximum amount
+                                        of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                    requests:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Requests describes the minimum
+                                        amount of compute resources required. If Requests
+                                        is omitted for a container, it defaults to
+                                        Limits if that is explicitly specified, otherwise
+                                        to an implementation-defined value. More info:
+                                        https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                  type: object
+                                tolerations:
+                                  description: Toleartions the underlying pods should
+                                    have
+                                  items:
+                                    description: The pod this Toleration is attached
+                                      to tolerates any taint that matches the triple
+                                      <key,value,effect> using the matching operator
+                                      <operator>.
+                                    properties:
+                                      effect:
+                                        description: Effect indicates the taint effect
+                                          to match. Empty means match all taint effects.
+                                          When specified, allowed values are NoSchedule,
+                                          PreferNoSchedule and NoExecute.
+                                        type: string
+                                      key:
+                                        description: Key is the taint key that the
+                                          toleration applies to. Empty means match
+                                          all taint keys. If the key is empty, operator
+                                          must be Exists; this combination means to
+                                          match all values and all keys.
+                                        type: string
+                                      operator:
+                                        description: Operator represents a key's relationship
+                                          to the value. Valid operators are Exists
+                                          and Equal. Defaults to Equal. Exists is
+                                          equivalent to wildcard for value, so that
+                                          a pod can tolerate all taints of a particular
+                                          category.
+                                        type: string
+                                      tolerationSeconds:
+                                        description: TolerationSeconds represents
+                                          the period of time the toleration (which
+                                          must be of effect NoExecute, otherwise this
+                                          field is ignored) tolerates the taint. By
+                                          default, it is not set, which means tolerate
+                                          the taint forever (do not evict). Zero and
+                                          negative values will be treated as 0 (evict
+                                          immediately) by the system.
+                                        format: int64
+                                        type: integer
+                                      value:
+                                        description: Value is the taint value the
+                                          toleration matches to. If the operator is
+                                          Exists, the value should be empty, otherwise
+                                          just a regular string.
+                                        type: string
+                                    type: object
+                                  type: array
+                              type: object
+                          type: object
+                        maxBytesPerBatch:
+                          description: MaxBytesPerBatch is the maximum bytes per batch.
+                          type: integer
+                        maxConcurrency:
+                          description: MaxConcurrency is the maximum no, of batch
+                            processors to run concurrently.
+                          type: integer
+                        maxProcessingTime:
+                          description: MaxProcessingTime is the max time in ms required
+                            to consume one message. Defaults to 1000ms
+                          format: int32
+                          type: integer
+                        maxWaitSeconds:
+                          description: MaxWaitSeconds is the maximum time to wait
+                            before making a batch, make a batch if MaxBytesPerBatch
+                            is not hit during MaxWaitSeconds.
+                          type: integer
+                      required:
+                      - maxWaitSeconds
+                      type: object
+                    main:
+                      properties:
+                        deploymentUnit:
+                          description: 'DeploymentUnit is to specify the configuration
+                            of the unit of deployment This helps the user to specify
+                            how many topics with what resources can run in one unit
+                            of Deployment. Based on this the operator decides how
+                            many deployment units would be launched. This is useful
+                            in the first time sink of redshiftsink resources having
+                            huge number of topics. Check #167 to understand the need
+                            of a unit specification.'
+                          properties:
+                            maxTopics:
+                              description: MaxTopics specify the maximum number of
+                                topics that can be part of this unit of deployment.
+                              type: integer
+                            podTemplate:
+                              description: PodTemplate describes the specification
+                                for the unit.
+                              properties:
+                                image:
+                                  description: Image for the underlying pod
+                                  type: string
+                                resources:
+                                  description: Resources is for configuring the compute
+                                    resources required
+                                  properties:
+                                    limits:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Limits describes the maximum amount
+                                        of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                    requests:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Requests describes the minimum
+                                        amount of compute resources required. If Requests
+                                        is omitted for a container, it defaults to
+                                        Limits if that is explicitly specified, otherwise
+                                        to an implementation-defined value. More info:
+                                        https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                  type: object
+                                tolerations:
+                                  description: Toleartions the underlying pods should
+                                    have
+                                  items:
+                                    description: The pod this Toleration is attached
+                                      to tolerates any taint that matches the triple
+                                      <key,value,effect> using the matching operator
+                                      <operator>.
+                                    properties:
+                                      effect:
+                                        description: Effect indicates the taint effect
+                                          to match. Empty means match all taint effects.
+                                          When specified, allowed values are NoSchedule,
+                                          PreferNoSchedule and NoExecute.
+                                        type: string
+                                      key:
+                                        description: Key is the taint key that the
+                                          toleration applies to. Empty means match
+                                          all taint keys. If the key is empty, operator
+                                          must be Exists; this combination means to
+                                          match all values and all keys.
+                                        type: string
+                                      operator:
+                                        description: Operator represents a key's relationship
+                                          to the value. Valid operators are Exists
+                                          and Equal. Defaults to Equal. Exists is
+                                          equivalent to wildcard for value, so that
+                                          a pod can tolerate all taints of a particular
+                                          category.
+                                        type: string
+                                      tolerationSeconds:
+                                        description: TolerationSeconds represents
+                                          the period of time the toleration (which
+                                          must be of effect NoExecute, otherwise this
+                                          field is ignored) tolerates the taint. By
+                                          default, it is not set, which means tolerate
+                                          the taint forever (do not evict). Zero and
+                                          negative values will be treated as 0 (evict
+                                          immediately) by the system.
+                                        format: int64
+                                        type: integer
+                                      value:
+                                        description: Value is the taint value the
+                                          toleration matches to. If the operator is
+                                          Exists, the value should be empty, otherwise
+                                          just a regular string.
+                                        type: string
+                                    type: object
+                                  type: array
+                              type: object
+                          type: object
+                        maxBytesPerBatch:
+                          description: MaxBytesPerBatch is the maximum bytes per batch.
+                          type: integer
+                        maxConcurrency:
+                          description: MaxConcurrency is the maximum no, of batch
+                            processors to run concurrently.
+                          type: integer
+                        maxProcessingTime:
+                          description: MaxProcessingTime is the max time in ms required
+                            to consume one message. Defaults to 1000ms
+                          format: int32
+                          type: integer
+                        maxWaitSeconds:
+                          description: MaxWaitSeconds is the maximum time to wait
+                            before making a batch, make a batch if MaxBytesPerBatch
+                            is not hit during MaxWaitSeconds.
+                          type: integer
+                      required:
+                      - maxWaitSeconds
+                      type: object
+                    reload:
+                      properties:
+                        deploymentUnit:
+                          description: 'DeploymentUnit is to specify the configuration
+                            of the unit of deployment This helps the user to specify
+                            how many topics with what resources can run in one unit
+                            of Deployment. Based on this the operator decides how
+                            many deployment units would be launched. This is useful
+                            in the first time sink of redshiftsink resources having
+                            huge number of topics. Check #167 to understand the need
+                            of a unit specification.'
+                          properties:
+                            maxTopics:
+                              description: MaxTopics specify the maximum number of
+                                topics that can be part of this unit of deployment.
+                              type: integer
+                            podTemplate:
+                              description: PodTemplate describes the specification
+                                for the unit.
+                              properties:
+                                image:
+                                  description: Image for the underlying pod
+                                  type: string
+                                resources:
+                                  description: Resources is for configuring the compute
+                                    resources required
+                                  properties:
+                                    limits:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Limits describes the maximum amount
+                                        of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                    requests:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Requests describes the minimum
+                                        amount of compute resources required. If Requests
+                                        is omitted for a container, it defaults to
+                                        Limits if that is explicitly specified, otherwise
+                                        to an implementation-defined value. More info:
+                                        https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                  type: object
+                                tolerations:
+                                  description: Toleartions the underlying pods should
+                                    have
+                                  items:
+                                    description: The pod this Toleration is attached
+                                      to tolerates any taint that matches the triple
+                                      <key,value,effect> using the matching operator
+                                      <operator>.
+                                    properties:
+                                      effect:
+                                        description: Effect indicates the taint effect
+                                          to match. Empty means match all taint effects.
+                                          When specified, allowed values are NoSchedule,
+                                          PreferNoSchedule and NoExecute.
+                                        type: string
+                                      key:
+                                        description: Key is the taint key that the
+                                          toleration applies to. Empty means match
+                                          all taint keys. If the key is empty, operator
+                                          must be Exists; this combination means to
+                                          match all values and all keys.
+                                        type: string
+                                      operator:
+                                        description: Operator represents a key's relationship
+                                          to the value. Valid operators are Exists
+                                          and Equal. Defaults to Equal. Exists is
+                                          equivalent to wildcard for value, so that
+                                          a pod can tolerate all taints of a particular
+                                          category.
+                                        type: string
+                                      tolerationSeconds:
+                                        description: TolerationSeconds represents
+                                          the period of time the toleration (which
+                                          must be of effect NoExecute, otherwise this
+                                          field is ignored) tolerates the taint. By
+                                          default, it is not set, which means tolerate
+                                          the taint forever (do not evict). Zero and
+                                          negative values will be treated as 0 (evict
+                                          immediately) by the system.
+                                        format: int64
+                                        type: integer
+                                      value:
+                                        description: Value is the taint value the
+                                          toleration matches to. If the operator is
+                                          Exists, the value should be empty, otherwise
+                                          just a regular string.
+                                        type: string
+                                    type: object
+                                  type: array
+                              type: object
+                          type: object
+                        maxBytesPerBatch:
+                          description: MaxBytesPerBatch is the maximum bytes per batch.
+                          type: integer
+                        maxConcurrency:
+                          description: MaxConcurrency is the maximum no, of batch
+                            processors to run concurrently.
+                          type: integer
+                        maxProcessingTime:
+                          description: MaxProcessingTime is the max time in ms required
+                            to consume one message. Defaults to 1000ms
+                          format: int32
+                          type: integer
+                        maxWaitSeconds:
+                          description: MaxWaitSeconds is the maximum time to wait
+                            before making a batch, make a batch if MaxBytesPerBatch
+                            is not hit during MaxWaitSeconds.
+                          type: integer
+                      required:
+                      - maxWaitSeconds
+                      type: object
+                    reloadDupe:
+                      properties:
+                        deploymentUnit:
+                          description: 'DeploymentUnit is to specify the configuration
+                            of the unit of deployment This helps the user to specify
+                            how many topics with what resources can run in one unit
+                            of Deployment. Based on this the operator decides how
+                            many deployment units would be launched. This is useful
+                            in the first time sink of redshiftsink resources having
+                            huge number of topics. Check #167 to understand the need
+                            of a unit specification.'
+                          properties:
+                            maxTopics:
+                              description: MaxTopics specify the maximum number of
+                                topics that can be part of this unit of deployment.
+                              type: integer
+                            podTemplate:
+                              description: PodTemplate describes the specification
+                                for the unit.
+                              properties:
+                                image:
+                                  description: Image for the underlying pod
+                                  type: string
+                                resources:
+                                  description: Resources is for configuring the compute
+                                    resources required
+                                  properties:
+                                    limits:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Limits describes the maximum amount
+                                        of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                    requests:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Requests describes the minimum
+                                        amount of compute resources required. If Requests
+                                        is omitted for a container, it defaults to
+                                        Limits if that is explicitly specified, otherwise
+                                        to an implementation-defined value. More info:
+                                        https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                  type: object
+                                tolerations:
+                                  description: Toleartions the underlying pods should
+                                    have
+                                  items:
+                                    description: The pod this Toleration is attached
+                                      to tolerates any taint that matches the triple
+                                      <key,value,effect> using the matching operator
+                                      <operator>.
+                                    properties:
+                                      effect:
+                                        description: Effect indicates the taint effect
+                                          to match. Empty means match all taint effects.
+                                          When specified, allowed values are NoSchedule,
+                                          PreferNoSchedule and NoExecute.
+                                        type: string
+                                      key:
+                                        description: Key is the taint key that the
+                                          toleration applies to. Empty means match
+                                          all taint keys. If the key is empty, operator
+                                          must be Exists; this combination means to
+                                          match all values and all keys.
+                                        type: string
+                                      operator:
+                                        description: Operator represents a key's relationship
+                                          to the value. Valid operators are Exists
+                                          and Equal. Defaults to Equal. Exists is
+                                          equivalent to wildcard for value, so that
+                                          a pod can tolerate all taints of a particular
+                                          category.
+                                        type: string
+                                      tolerationSeconds:
+                                        description: TolerationSeconds represents
+                                          the period of time the toleration (which
+                                          must be of effect NoExecute, otherwise this
+                                          field is ignored) tolerates the taint. By
+                                          default, it is not set, which means tolerate
+                                          the taint forever (do not evict). Zero and
+                                          negative values will be treated as 0 (evict
+                                          immediately) by the system.
+                                        format: int64
+                                        type: integer
+                                      value:
+                                        description: Value is the taint value the
+                                          toleration matches to. If the operator is
+                                          Exists, the value should be empty, otherwise
+                                          just a regular string.
+                                        type: string
+                                    type: object
+                                  type: array
+                              type: object
+                          type: object
+                        maxBytesPerBatch:
+                          description: MaxBytesPerBatch is the maximum bytes per batch.
+                          type: integer
+                        maxConcurrency:
+                          description: MaxConcurrency is the maximum no, of batch
+                            processors to run concurrently.
+                          type: integer
+                        maxProcessingTime:
+                          description: MaxProcessingTime is the max time in ms required
+                            to consume one message. Defaults to 1000ms
+                          format: int32
+                          type: integer
+                        maxWaitSeconds:
+                          description: MaxWaitSeconds is the maximum time to wait
+                            before making a batch, make a batch if MaxBytesPerBatch
+                            is not hit during MaxWaitSeconds.
+                          type: integer
+                      required:
+                      - maxWaitSeconds
+                      type: object
+                  type: object
                 suspend:
-                  description: 'Supsend when turned on makes sure no batcher pods
-                    are running for this CRD object. Default: false'
+                  description: Supsend is used to suspend batcher pods. Defaults to
+                    false.
                   type: boolean
               required:
               - maxSize
@@ -172,8 +676,8 @@ spec:
                 maxWaitSeconds:
                   type: integer
                 podTemplate:
-                  description: Template describes the pods that will be created. if
-                    this is not specifed, a default pod template is created
+                  description: PodTemplate describes the pods that will be created.
+                    if this is not specifed, a default pod template is created
                   properties:
                     image:
                       description: Image for the underlying pod

From 351eb190c31d61998d081afdcd93021b37a4ba04 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Mon, 22 Mar 2021 22:06:31 +0530
Subject: [PATCH 03/49] Operator changes for SinkGroupSpec, DeploymentUnit,
 MaxBytesPerBatch

---
 controllers/batcher_deployment.go        | 130 +++++++--
 controllers/redshiftsink_controller.go   |  16 +-
 controllers/sinkgroup_controller.go      | 321 ++++++++++++++++++-----
 controllers/sinkgroup_controller_test.go | 115 ++++++++
 controllers/suite_test.go                |   4 +-
 pkg/redshiftbatcher/batcher_handler.go   |   1 +
 6 files changed, 490 insertions(+), 97 deletions(-)
 create mode 100644 controllers/sinkgroup_controller_test.go

diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go
index 8caec2f2e..21313f056 100644
--- a/controllers/batcher_deployment.go
+++ b/controllers/batcher_deployment.go
@@ -25,6 +25,86 @@ type Batcher struct {
 	config     *corev1.ConfigMap
 }
 
+// applyBatcherSinkGroupDefaults applies the defaults for the batcher
+// deplyoments of the sink group so that the user does not need to specify
+// big lengthy configurations everytime. Also the defaults are
+// optimized for maximum performance and is recommended to use.
+func applyBatcherSinkGroupDefaults(
+	rsk *tipocav1.RedshiftSink,
+	sgType string,
+	defaultImage string,
+) *tipocav1.SinkGroupSpec {
+	// defaults
+	maxBytesPerBatch := &redshiftbatcher.DefaultMaxBytesPerBatch
+	maxWaitSeconds := &redshiftbatcher.DefaultMaxWaitSeconds
+	maxConcurrency := &redshiftbatcher.DefaultMaxConcurrency
+	maxProcessingTime := &redshiftbatcher.DefaultMaxProcessingTime
+	maxTopics := &DefaultMaxBatcherTopics
+	image := &defaultImage
+	var resources *corev1.ResourceRequirements
+	var tolerations *[]corev1.Toleration
+
+	// apply the sinkGroup spec rules
+	var specifiedSpec *tipocav1.SinkGroupSpec
+	if rsk.Spec.Batcher.SinkGroup.All != nil {
+		specifiedSpec = rsk.Spec.Batcher.SinkGroup.All
+	}
+	switch sgType {
+	case MainSinkGroup:
+		if rsk.Spec.Batcher.SinkGroup.Main != nil {
+			specifiedSpec = rsk.Spec.Batcher.SinkGroup.Main
+		}
+	case ReloadSinkGroup:
+		if rsk.Spec.Batcher.SinkGroup.Reload != nil {
+			specifiedSpec = rsk.Spec.Batcher.SinkGroup.Reload
+		}
+	case ReloadDupeSinkGroup:
+		if rsk.Spec.Batcher.SinkGroup.ReloadDupe != nil {
+			specifiedSpec = rsk.Spec.Batcher.SinkGroup.ReloadDupe
+		}
+	}
+
+	// overwrite with the defaults with the specified values
+	if specifiedSpec != nil {
+		if specifiedSpec.MaxBytesPerBatch != nil {
+			maxBytesPerBatch = specifiedSpec.MaxBytesPerBatch
+		}
+		if specifiedSpec.MaxWaitSeconds != nil {
+			maxWaitSeconds = specifiedSpec.MaxWaitSeconds
+		}
+		if specifiedSpec.MaxConcurrency != nil {
+			maxConcurrency = specifiedSpec.MaxConcurrency
+		}
+		if specifiedSpec.MaxProcessingTime != nil {
+			maxProcessingTime = specifiedSpec.MaxProcessingTime
+		}
+		if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
+			image = specifiedSpec.DeploymentUnit.PodTemplate.Image
+		}
+		if specifiedSpec.DeploymentUnit.PodTemplate.Resources != nil {
+			resources = specifiedSpec.DeploymentUnit.PodTemplate.Resources
+		}
+		if specifiedSpec.DeploymentUnit.PodTemplate.Tolerations != nil {
+			tolerations = specifiedSpec.DeploymentUnit.PodTemplate.Tolerations
+		}
+	}
+
+	return &tipocav1.SinkGroupSpec{
+		MaxBytesPerBatch:  maxBytesPerBatch,
+		MaxWaitSeconds:    maxWaitSeconds,
+		MaxConcurrency:    maxConcurrency,
+		MaxProcessingTime: maxProcessingTime,
+		DeploymentUnit: &tipocav1.DeploymentUnit{
+			MaxTopics: maxTopics,
+			PodTemplate: &tipocav1.RedshiftPodTemplateSpec{
+				Image:       image,
+				Resources:   resources,
+				Tolerations: tolerations,
+			},
+		},
+	}
+}
+
 func batcherSecret(secret map[string]string) (map[string]string, error) {
 	s := make(map[string]string)
 	secretKeys := []string{
@@ -49,8 +129,14 @@ func batcherSecret(secret map[string]string) (map[string]string, error) {
 	return s, nil
 }
 
-func batcherName(rskName, sinkGroup string) string {
-	return fmt.Sprintf("%s-%s%s", rskName, sinkGroup, BatcherSuffix)
+func batcherName(rskName, sinkGroup string, id string) string {
+	return fmt.Sprintf(
+		"%s-%s%s%s",
+		rskName,
+		sinkGroup,
+		id,
+		BatcherSuffix,
+	)
 }
 
 func NewBatcher(
@@ -59,6 +145,7 @@ func NewBatcher(
 	maskFileVersion string,
 	secret map[string]string,
 	sinkGroup string,
+	sinkGroupSpec *tipocav1.SinkGroupSpec,
 	consumerGroups map[string]consumerGroup,
 	defaultImage string,
 	defaultKafkaVersion string,
@@ -72,26 +159,36 @@ func NewBatcher(
 		return nil, err
 	}
 
-	totalTopics := 0
-
 	// defaults
 	kafkaVersion := rsk.Spec.KafkaVersion
 	if kafkaVersion == "" {
 		kafkaVersion = defaultKafkaVersion
 	}
-	maxConcurrency := redshiftbatcher.DefaultMaxConcurrency
-	if rsk.Spec.Batcher.MaxConcurrency != nil {
-		maxConcurrency = *rsk.Spec.Batcher.MaxConcurrency
-	}
 	var maxProcessingTime int32 = redshiftbatcher.DefaultMaxProcessingTime
 	if rsk.Spec.Batcher.MaxProcessingTime != nil {
 		maxProcessingTime = *rsk.Spec.Batcher.MaxProcessingTime
 	}
+	var maxSize int // Deprecated
+	var maxWaitSeconds, maxConcurrency, maxBytesPerBatch *int
+	if sinkGroupSpec != nil {
+		maxBytesPerBatch = sinkGroupSpec.MaxBytesPerBatch
+		maxWaitSeconds = sinkGroupSpec.MaxWaitSeconds
+		maxConcurrency = sinkGroupSpec.MaxConcurrency
+		maxProcessingTime = *sinkGroupSpec.MaxProcessingTime
+	} else { // Deprecated below, remove later when removing TODO:
+		maxSize = rsk.Spec.Batcher.MaxSize
+		maxWaitSeconds = &rsk.Spec.Batcher.MaxWaitSeconds
+		maxConcurrency = &redshiftbatcher.DefaultMaxConcurrency
+		if rsk.Spec.Batcher.MaxConcurrency != nil {
+			maxConcurrency = rsk.Spec.Batcher.MaxConcurrency
+		}
 
-	// other defaults not configurable defaults for the batcher
+	}
+	// other defaults which are not configurable defaults for the batcher
 	var sessionTimeoutSeconds int = 10
 	var hearbeatIntervalSeconds int = 2
 
+	totalTopics := 0
 	var groupConfigs []kafka.ConsumerGroupConfig
 	for groupID, group := range consumerGroups {
 		totalTopics += len(group.topics)
@@ -118,13 +215,14 @@ func NewBatcher(
 
 	conf := config.Config{
 		Batcher: redshiftbatcher.BatcherConfig{
-			Mask:            rsk.Spec.Batcher.Mask,
-			MaskSalt:        secret["maskSalt"],
-			MaskFile:        rsk.Spec.Batcher.MaskFile,
-			MaskFileVersion: maskFileVersion,
-			MaxSize:         rsk.Spec.Batcher.MaxSize,
-			MaxWaitSeconds:  &rsk.Spec.Batcher.MaxWaitSeconds,
-			MaxConcurrency:  &maxConcurrency,
+			Mask:             rsk.Spec.Batcher.Mask,
+			MaskSalt:         secret["maskSalt"],
+			MaskFile:         rsk.Spec.Batcher.MaskFile,
+			MaskFileVersion:  maskFileVersion,
+			MaxSize:          maxSize, // Deprecated
+			MaxWaitSeconds:   maxWaitSeconds,
+			MaxConcurrency:   maxConcurrency,
+			MaxBytesPerBatch: maxBytesPerBatch,
 		},
 		ConsumerGroups: groupConfigs,
 		S3Sink: s3sink.Config{
diff --git a/controllers/redshiftsink_controller.go b/controllers/redshiftsink_controller.go
index 162619838..a1e2e6171 100644
--- a/controllers/redshiftsink_controller.go
+++ b/controllers/redshiftsink_controller.go
@@ -341,8 +341,8 @@ func (r *RedshiftSinkReconciler) reconcile(
 			setType(MainSinkGroup).
 			setTopics(kafkaTopics).
 			setMaskVersion("").
-			buildBatcher(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
-			buildLoader(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
+			buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
+			buildLoaders(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
 			build()
 		result, event, err := maskLessSinkGroup.reconcile(ctx)
 		return result, event, err
@@ -441,8 +441,8 @@ func (r *RedshiftSinkReconciler) reconcile(
 		setTopics(allowedReloadingTopics).
 		setMaskVersion(status.desiredVersion).
 		setTopicGroups().
-		buildBatcher(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
-		buildLoader(secret, r.DefaultLoaderImage, ReloadTableSuffix, r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
+		buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
+		buildLoaders(secret, r.DefaultLoaderImage, ReloadTableSuffix, r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
 		build()
 
 	reloadingRatio := status.reloadingRatio()
@@ -492,8 +492,8 @@ func (r *RedshiftSinkReconciler) reconcile(
 		setTopics(status.reloadingDupe).
 		setMaskVersion(status.currentVersion).
 		setTopicGroups().
-		buildBatcher(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
-		buildLoader(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
+		buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
+		buildLoaders(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
 		build()
 
 	main = sgBuilder.
@@ -502,8 +502,8 @@ func (r *RedshiftSinkReconciler) reconcile(
 		setTopics(status.released).
 		setMaskVersion(status.desiredVersion).
 		setTopicGroups().
-		buildBatcher(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
-		buildLoader(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
+		buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
+		buildLoaders(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
 		build()
 
 	sinkGroups := []*sinkGroup{reloadDupe, reload, main}
diff --git a/controllers/sinkgroup_controller.go b/controllers/sinkgroup_controller.go
index b63baa8f2..12e2532cd 100644
--- a/controllers/sinkgroup_controller.go
+++ b/controllers/sinkgroup_controller.go
@@ -3,6 +3,7 @@ package controllers
 import (
 	"context"
 	"fmt"
+	"math"
 	"math/rand"
 	"sync"
 	"time"
@@ -19,24 +20,29 @@ import (
 )
 
 const (
-	MainSinkGroup        = "main"
-	ReloadSinkGroup      = "reload"
-	ReloadDupeSinkGroup  = "reload-dupe"
+	AllSinkGroup        = "all"
+	MainSinkGroup       = "main"
+	ReloadSinkGroup     = "reload"
+	ReloadDupeSinkGroup = "reload-dupe"
+
 	DefaultMaxBatcherLag = int64(100)
 	DefautMaxLoaderLag   = int64(10)
-	ReloadTableSuffix    = "_ts_adx_reload"
+
+	ReloadTableSuffix = "_ts_adx_reload"
 )
 
+var DefaultMaxBatcherTopics int = 30
+
 type sinkGroupInterface interface {
-	Reconcile(ctx context.Context) (ctrl.Result, ReconcilerEvent, error)
-	RealtimeTopics(currentRealtime []string, watcher kafka.Watcher, cache *sync.Map) []string
+	reconcile(ctx context.Context) (ctrl.Result, ReconcilerEvent, error)
+	realtimeTopics(currentRealtime []string, watcher kafka.Watcher, cache *sync.Map) []string
 }
 
 type Deployment interface {
 	Name() string
 	Namespace() string
-	Deployment() *appsv1.Deployment
 	Config() *corev1.ConfigMap
+	Deployment() *appsv1.Deployment
 	UpdateConfig(current *corev1.ConfigMap) bool
 	UpdateDeployment(current *appsv1.Deployment) bool
 }
@@ -48,8 +54,9 @@ type sinkGroup struct {
 	sgType      string
 	topics      []string
 	topicGroups map[string]tipocav1.Group
-	batcher     Deployment
-	loader      Deployment
+
+	batchers []Deployment
+	loaders  []Deployment
 }
 
 type sinkGroupBuilder interface {
@@ -60,8 +67,10 @@ type sinkGroupBuilder interface {
 	setTopics(topics []string) sinkGroupBuilder
 	setMaskVersion(version string) sinkGroupBuilder
 	setTopicGroups() sinkGroupBuilder
-	buildBatcher(secret map[string]string, defaultImage, defaultKafkaVersion string, tlsConfig *kafka.TLSConfig) sinkGroupBuilder
-	buildLoader(secret map[string]string, defaultImage, tableSuffix string, defaultKafkaVersion string, tlsConfig *kafka.TLSConfig, defaultMaxOpenConns int, defaultMaxIdleConns int) sinkGroupBuilder
+
+	buildBatchers(secret map[string]string, defaultImage, defaultKafkaVersion string, tlsConfig *kafka.TLSConfig) sinkGroupBuilder
+	buildLoaders(secret map[string]string, defaultImage, tableSuffix string, defaultKafkaVersion string, tlsConfig *kafka.TLSConfig, defaultMaxOpenConns int, defaultMaxIdleConns int) sinkGroupBuilder
+
 	build() *sinkGroup
 }
 
@@ -77,8 +86,9 @@ type buildSinkGroup struct {
 	topics      []string
 	topicGroups map[string]tipocav1.Group
 	maskVersion string
-	batcher     Deployment
-	loader      Deployment
+
+	batchers []Deployment
+	loaders  []Deployment
 }
 
 func (sb *buildSinkGroup) setRedshiftSink(rsk *tipocav1.RedshiftSink) sinkGroupBuilder {
@@ -122,35 +132,114 @@ func (sb *buildSinkGroup) setTopicGroups() sinkGroupBuilder {
 	return sb
 }
 
-func (sb *buildSinkGroup) buildBatcher(
+type deploymentUnit struct {
+	id     string
+	topics []string
+}
+
+// assignDeploymentUnits allocates the total topics into groups of deployments
+// based on the specification. groups are called as deploymentUnit
+func assignDeploymentUnits(allTopics []string, maxTopics int) []deploymentUnit {
+	if len(allTopics) <= maxTopics {
+		return []deploymentUnit{
+			deploymentUnit{
+				id:     "0",
+				topics: allTopics,
+			},
+		}
+	}
+
+	units := []deploymentUnit{}
+	totalUnits := int(math.Ceil(float64(len(allTopics)) / float64(maxTopics)))
+	startIndex := 0
+	lastIndex := maxTopics
+	for id := 0; id < totalUnits; id++ {
+		topics := allTopics[startIndex:lastIndex]
+		startIndex = lastIndex
+		if lastIndex+maxTopics >= len(allTopics) {
+			lastIndex = len(allTopics)
+		} else {
+			lastIndex = lastIndex + maxTopics
+		}
+		units = append(units, deploymentUnit{
+			id:     fmt.Sprintf("%d", id),
+			topics: topics,
+		})
+	}
+
+	return units
+}
+
+func (sb *buildSinkGroup) buildBatchers(
 	secret map[string]string,
 	defaultImage string,
 	defaultKafkaVersion string,
 	tlsConfig *kafka.TLSConfig,
 ) sinkGroupBuilder {
-	consumerGroups, err := computeConsumerGroups(sb.topicGroups, sb.topics)
-	if err != nil {
-		klog.Fatalf("Error computing consumer group from status, err: %v", err)
-	}
-	batcher, err := NewBatcher(
-		batcherName(sb.rsk.Name, sb.sgType),
-		sb.rsk,
-		sb.maskVersion,
-		secret,
-		sb.sgType,
-		consumerGroups,
-		defaultImage,
-		defaultKafkaVersion,
-		tlsConfig,
-	)
-	if err != nil {
-		klog.Fatalf("Error making batcher: %v", err)
+	batchers := []Deployment{}
+	if sb.rsk.Spec.Batcher.SinkGroup != nil {
+		sinkGroupSpec := applyBatcherSinkGroupDefaults(
+			sb.rsk,
+			sb.sgType,
+			defaultImage,
+		)
+		units := assignDeploymentUnits(
+			sb.topics,
+			*sinkGroupSpec.DeploymentUnit.MaxTopics,
+		)
+		for _, unit := range units {
+			consumerGroups, err := computeConsumerGroups(
+				sb.topicGroups, unit.topics)
+			if err != nil {
+				klog.Fatalf(
+					"Error computing consumer group from status, err: %v", err)
+			}
+			batcher, err := NewBatcher(
+				batcherName(sb.rsk.Name, sb.sgType, unit.id),
+				sb.rsk,
+				sb.maskVersion,
+				secret,
+				sb.sgType,
+				sinkGroupSpec,
+				consumerGroups,
+				defaultImage,
+				defaultKafkaVersion,
+				tlsConfig,
+			)
+			if err != nil {
+				klog.Fatalf("Error making batcher: %v", err)
+			}
+			batchers = append(batchers, batcher)
+		}
+	} else { // Deprecated
+		consumerGroups, err := computeConsumerGroups(sb.topicGroups, sb.topics)
+		if err != nil {
+			klog.Fatalf(
+				"Error computing consumer group from status, err: %v", err)
+		}
+		batcher, err := NewBatcher(
+			batcherName(sb.rsk.Name, sb.sgType, ""),
+			sb.rsk,
+			sb.maskVersion,
+			secret,
+			sb.sgType,
+			nil,
+			consumerGroups,
+			defaultImage,
+			defaultKafkaVersion,
+			tlsConfig,
+		)
+		if err != nil {
+			klog.Fatalf("Error making batcher: %v", err)
+		}
+		batchers = append(batchers, batcher)
 	}
-	sb.batcher = batcher
+
+	sb.batchers = batchers
 	return sb
 }
 
-func (sb *buildSinkGroup) buildLoader(
+func (sb *buildSinkGroup) buildLoaders(
 	secret map[string]string,
 	defaultImage string,
 	tableSuffix string,
@@ -179,7 +268,7 @@ func (sb *buildSinkGroup) buildLoader(
 	if err != nil {
 		klog.Fatalf("Error making loader: %v", err)
 	}
-	sb.loader = loader
+	sb.loaders = []Deployment{loader}
 	return sb
 }
 
@@ -191,8 +280,9 @@ func (sb *buildSinkGroup) build() *sinkGroup {
 		sgType:      sb.sgType,
 		topics:      sb.topics,
 		topicGroups: sb.topicGroups,
-		batcher:     sb.batcher,
-		loader:      sb.loader,
+
+		batchers: sb.batchers,
+		loaders:  sb.loaders,
 	}
 }
 
@@ -361,15 +451,12 @@ func (s *sinkGroup) reconcileConfigMap(
 
 func (s *sinkGroup) reconcileDeployment(
 	ctx context.Context,
-	labelInstance string,
 	d Deployment,
 ) (
 	ReconcilerEvent,
 	error,
 ) {
 	deployment := d.Deployment()
-	configMap := d.Config()
-
 	current, exists, err := getDeployment(
 		ctx,
 		s.client,
@@ -397,30 +484,54 @@ func (s *sinkGroup) reconcileDeployment(
 		if event != nil {
 			return event, nil
 		}
-		return nil, nil
 	}
 
-	klog.V(3).Infof("[Cleanup] Attempt deploy, current: %v", deployment.Name)
-	// find and cleanup dead deployments
+	err = ctrlutil.SetOwnerReference(s.rsk, deployment, s.scheme)
+	if err != nil {
+		return nil, err
+	}
+
+	// create new deployment pointing to new config map
+	klog.V(2).Infof("Creating deployment: %v", deployment.Name)
+	event, err := createDeployment(ctx, s.client, deployment, s.rsk)
+	if err != nil {
+		return nil, err
+	}
+
+	return event, nil
+}
+
+func (s *sinkGroup) cleanup(
+	ctx context.Context,
+	labelInstance string,
+	neededDeployments map[string]bool,
+	neededConfigMaps map[string]bool,
+) (
+	ReconcilerEvent,
+	error,
+) {
+	klog.V(3).Infof("Current active deployments, needed: %+v", neededDeployments)
+	// query all deployment for the sinkgroup
 	deploymentList, err := listDeployments(
 		ctx,
 		s.client,
 		labelInstance,
 		s.sgType,
-		d.Namespace(),
+		s.rsk.Namespace,
 		s.rsk.Name,
 	)
 	if err != nil {
 		return nil, err
 	}
 	for _, deploy := range deploymentList.Items {
-		klog.V(3).Infof("[Cleanup] Attempting deploy: %v", deploy.Name)
+		klog.V(3).Infof("Cleanup suspect deployment: %v", deploy.Name)
 		labelValue, ok := deploy.Labels[InstanceName]
 		if !ok {
 			continue
 		}
-		if labelValue != deployment.Name {
-			klog.V(2).Infof("[Cleanup] Deleting deploy: %s", labelValue)
+		_, ok = neededDeployments[labelValue]
+		if !ok {
+			klog.V(3).Infof("Cleanup deployment: %v", labelValue)
 			event, err := deleteDeployment(ctx, s.client, &deploy, s.rsk)
 			if err != nil {
 				return nil, err
@@ -431,14 +542,14 @@ func (s *sinkGroup) reconcileDeployment(
 		}
 	}
 
-	klog.V(3).Infof("[Cleanup] Attempt cm, current: %v", configMap.Name)
-	// find and cleanup dead config maps
+	klog.V(3).Infof("Current active configMaps, needed: %+v", neededConfigMaps)
+	// query all configmaps for the sinkgroup
 	configMapList, err := listConfigMaps(
 		ctx,
 		s.client,
 		labelInstance,
 		s.sgType,
-		d.Namespace(),
+		s.rsk.Namespace,
 		s.rsk.Name,
 	)
 	if err != nil {
@@ -446,13 +557,14 @@ func (s *sinkGroup) reconcileDeployment(
 	}
 
 	for _, config := range configMapList.Items {
-		klog.V(3).Infof("[Cleanup] Attempting cm: %v", config.Name)
+		klog.V(3).Infof("Cleanup configmap suspect cm: %v", config.Name)
 		labelValue, ok := config.Labels[InstanceName]
 		if !ok {
 			continue
 		}
-		if labelValue != configMap.Name {
-			klog.V(2).Infof("[Cleanup] Deleting cm: %s", labelValue)
+		_, ok = neededConfigMaps[labelValue]
+		if !ok {
+			klog.V(2).Infof("Cleanup configmap: %s", labelValue)
 			event, err := deleteConfigMap(ctx, s.client, &config, s.rsk)
 			if err != nil {
 				return nil, err
@@ -463,18 +575,7 @@ func (s *sinkGroup) reconcileDeployment(
 		}
 	}
 
-	err = ctrlutil.SetOwnerReference(s.rsk, deployment, s.scheme)
-	if err != nil {
-		return nil, err
-	}
-
-	// create new deployment pointing to new config map
-	klog.V(2).Infof("Creating deployment: %v", deployment.Name)
-	event, err := createDeployment(ctx, s.client, deployment, s.rsk)
-	if err != nil {
-		return nil, err
-	}
-	return event, nil
+	return nil, nil
 }
 
 func (s *sinkGroup) reconcileBatcher(
@@ -485,7 +586,7 @@ func (s *sinkGroup) reconcileBatcher(
 	error,
 ) {
 	// reconcile batcher configMap
-	event, err := s.reconcileConfigMap(ctx, s.batcher)
+	event, err := s.reconcileConfigMap(ctx, d)
 	if err != nil {
 		return nil, fmt.Errorf("Error reconciling batcher configMap, %v", err)
 	}
@@ -494,7 +595,7 @@ func (s *sinkGroup) reconcileBatcher(
 	}
 
 	// reconcile batcher deployment
-	event, err = s.reconcileDeployment(ctx, BatcherLabelInstance, s.batcher)
+	event, err = s.reconcileDeployment(ctx, d)
 	if err != nil {
 		return nil, fmt.Errorf("Error reconciling batcher deployment, %v", err)
 	}
@@ -505,6 +606,46 @@ func (s *sinkGroup) reconcileBatcher(
 	return nil, nil
 }
 
+func (s *sinkGroup) reconcileBatchers(
+	ctx context.Context,
+	deployments []Deployment,
+) (
+	ReconcilerEvent,
+	error,
+) {
+	// cleanup the ones which should be dead before creating new
+	var neededDeployments, neededConfigMaps []string
+	for _, d := range deployments {
+		neededDeployments = append(neededDeployments, d.Name())
+		neededConfigMaps = append(neededConfigMaps, d.Name())
+	}
+	event, err := s.cleanup(
+		ctx,
+		BatcherLabelInstance,
+		toMap(neededDeployments),
+		toMap(neededConfigMaps),
+	)
+	if err != nil {
+		return nil, err
+	}
+	if event != nil {
+		return event, nil
+	}
+
+	// create or update
+	for _, d := range deployments {
+		event, err := s.reconcileBatcher(ctx, d)
+		if err != nil {
+			return nil, err
+		}
+		if event != nil {
+			return event, nil
+		}
+	}
+
+	return nil, nil
+}
+
 func (s *sinkGroup) reconcileLoader(
 	ctx context.Context,
 	d Deployment,
@@ -512,7 +653,7 @@ func (s *sinkGroup) reconcileLoader(
 	ReconcilerEvent,
 	error,
 ) {
-	event, err := s.reconcileConfigMap(ctx, s.loader)
+	event, err := s.reconcileConfigMap(ctx, d)
 	if err != nil {
 		return nil, fmt.Errorf("Error reconciling loader configMap, %v", err)
 	}
@@ -521,7 +662,7 @@ func (s *sinkGroup) reconcileLoader(
 	}
 
 	// reconcile loader deployment
-	event, err = s.reconcileDeployment(ctx, LoaderLabelInstance, s.loader)
+	event, err = s.reconcileDeployment(ctx, d)
 	if err != nil {
 		return nil, fmt.Errorf("Error reconciling loader deployment, %v", err)
 	}
@@ -532,6 +673,46 @@ func (s *sinkGroup) reconcileLoader(
 	return nil, nil
 }
 
+func (s *sinkGroup) reconcileLoaders(
+	ctx context.Context,
+	deployments []Deployment,
+) (
+	ReconcilerEvent,
+	error,
+) {
+	// cleanup the ones which should be dead before creating new
+	var neededDeployments, neededConfigMaps []string
+	for _, d := range deployments {
+		neededDeployments = append(neededDeployments, d.Name())
+		neededConfigMaps = append(neededConfigMaps, d.Name())
+	}
+	event, err := s.cleanup(
+		ctx,
+		LoaderLabelInstance,
+		toMap(neededDeployments),
+		toMap(neededConfigMaps),
+	)
+	if err != nil {
+		return nil, err
+	}
+	if event != nil {
+		return event, nil
+	}
+
+	// create or update
+	for _, d := range deployments {
+		event, err := s.reconcileLoader(ctx, d)
+		if err != nil {
+			return nil, err
+		}
+		if event != nil {
+			return event, nil
+		}
+	}
+
+	return nil, nil
+}
+
 func maxLag(rsk *tipocav1.RedshiftSink, topic string) (int64, int64) {
 	var maxBatcherLag, maxLoaderLag int64
 	if rsk.Spec.ReleaseCondition == nil {
@@ -790,7 +971,7 @@ func (s *sinkGroup) reconcile(
 ) {
 	result := ctrl.Result{RequeueAfter: time.Second * 30}
 
-	event, err := s.reconcileBatcher(ctx, s.batcher)
+	event, err := s.reconcileBatchers(ctx, s.batchers)
 	if err != nil {
 		return result, nil, err
 	}
@@ -798,7 +979,7 @@ func (s *sinkGroup) reconcile(
 		return result, event, nil
 	}
 
-	event, err = s.reconcileLoader(ctx, s.loader)
+	event, err = s.reconcileLoaders(ctx, s.loaders)
 	if err != nil {
 		return result, nil, err
 	}
diff --git a/controllers/sinkgroup_controller_test.go b/controllers/sinkgroup_controller_test.go
new file mode 100644
index 000000000..677675046
--- /dev/null
+++ b/controllers/sinkgroup_controller_test.go
@@ -0,0 +1,115 @@
+package controllers
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestDeploymentUnitAssignment(t *testing.T) {
+	// t.Parallel()
+
+	tests := []struct {
+		name        string
+		allTopics   []string
+		maxTopics   int
+		resultUnits []deploymentUnit
+	}{
+		{
+			name:      "single group",
+			allTopics: []string{"t1", "t2", "t3", "t4", "t5"},
+			maxTopics: 10,
+			resultUnits: []deploymentUnit{
+				deploymentUnit{
+					id:     "0",
+					topics: []string{"t1", "t2", "t3", "t4", "t5"},
+				},
+			},
+		},
+		{
+			name:      "equal group",
+			allTopics: []string{"t1", "t2", "t3", "t4", "t5"},
+			maxTopics: 1,
+			resultUnits: []deploymentUnit{
+				deploymentUnit{
+					id:     "0",
+					topics: []string{"t1"},
+				},
+				deploymentUnit{
+					id:     "1",
+					topics: []string{"t2"},
+				},
+				deploymentUnit{
+					id:     "2",
+					topics: []string{"t3"},
+				},
+				deploymentUnit{
+					id:     "3",
+					topics: []string{"t4"},
+				},
+				deploymentUnit{
+					id:     "4",
+					topics: []string{"t5"},
+				},
+			},
+		},
+		{
+			name:      "unequal group",
+			allTopics: []string{"t1", "t2", "t3", "t4", "t5"},
+			maxTopics: 3,
+			resultUnits: []deploymentUnit{
+				deploymentUnit{
+					id:     "0",
+					topics: []string{"t1", "t2", "t3"},
+				},
+				deploymentUnit{
+					id:     "1",
+					topics: []string{"t4", "t5"},
+				},
+			},
+		},
+		{
+			name:      "equal group one more",
+			allTopics: []string{"t1", "t2", "t3", "t4", "t5", "t6"},
+			maxTopics: 2,
+			resultUnits: []deploymentUnit{
+				deploymentUnit{
+					id:     "0",
+					topics: []string{"t1", "t2"},
+				},
+				deploymentUnit{
+					id:     "1",
+					topics: []string{"t3", "t4"},
+				},
+				deploymentUnit{
+					id:     "2",
+					topics: []string{"t5", "t6"},
+				},
+			},
+		},
+		{
+			name:      "unequal group",
+			allTopics: []string{"t1", "t2", "t3", "t4", "t5", "t6"},
+			maxTopics: 5,
+			resultUnits: []deploymentUnit{
+				deploymentUnit{
+					id:     "0",
+					topics: []string{"t1", "t2", "t3", "t4", "t5"},
+				},
+				deploymentUnit{
+					id:     "1",
+					topics: []string{"t6"},
+				},
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			resultUnits := assignDeploymentUnits(tc.allTopics, tc.maxTopics)
+			if !reflect.DeepEqual(tc.resultUnits, resultUnits) {
+				t.Errorf("expected: %v, got: %v\n", tc.resultUnits, resultUnits)
+			}
+		})
+	}
+}
diff --git a/controllers/suite_test.go b/controllers/suite_test.go
index 6b7ba543d..bfb8a4574 100644
--- a/controllers/suite_test.go
+++ b/controllers/suite_test.go
@@ -27,8 +27,6 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/envtest"
 	"sigs.k8s.io/controller-runtime/pkg/envtest/printer"
-	logf "sigs.k8s.io/controller-runtime/pkg/log"
-	"sigs.k8s.io/controller-runtime/pkg/log/zap"
 
 	tipocav1 "github.com/practo/tipoca-stream/redshiftsink/api/v1"
 	// +kubebuilder:scaffold:imports
@@ -50,7 +48,7 @@ func TestAPIs(t *testing.T) {
 }
 
 var _ = BeforeSuite(func(done Done) {
-	logf.SetLogger(zap.LoggerTo(GinkgoWriter, true))
+	// logf.SetLogger(zap.LoggerTo(GinkgoWriter, true))
 
 	By("bootstrapping test environment")
 	testEnv = &envtest.Environment{
diff --git a/pkg/redshiftbatcher/batcher_handler.go b/pkg/redshiftbatcher/batcher_handler.go
index 1f5c2393e..180e7e7b6 100644
--- a/pkg/redshiftbatcher/batcher_handler.go
+++ b/pkg/redshiftbatcher/batcher_handler.go
@@ -15,6 +15,7 @@ import (
 )
 
 var (
+	DefaultMaxBytesPerBatch  int   = 1024
 	DefaultMaxWaitSeconds    int   = 30
 	DefaultMaxConcurrency    int   = 10
 	DefaultMaxProcessingTime int32 = 180000

From 45cf6f4f23ddc9b41c3954122d83f32b298be366 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Tue, 23 Mar 2021 11:47:37 +0530
Subject: [PATCH 04/49] Documentation improvements

---
 api/v1/redshiftsink_types.go                  | 63 ++++++++------
 .../tipoca.k8s.practo.dev_redshiftsinks.yaml  | 85 +++++++++----------
 controllers/batcher_deployment.go             | 21 +++--
 controllers/sinkgroup_controller.go           |  3 +-
 controllers/sinkgroup_controller_test.go      |  2 +-
 5 files changed, 90 insertions(+), 84 deletions(-)

diff --git a/api/v1/redshiftsink_types.go b/api/v1/redshiftsink_types.go
index c7e3c05fd..fa3d5e4f3 100644
--- a/api/v1/redshiftsink_types.go
+++ b/api/v1/redshiftsink_types.go
@@ -42,54 +42,67 @@ type RedshiftPodTemplateSpec struct {
 	Tolerations *[]corev1.Toleration `json:"tolerations,omitempty"`
 }
 
-// Deployment is used to specify how many topics will run together in a unit
-// and how much resources needs to be given to them.
+// DeploymentUnit is used to specify how many topics will run together in a unit
+// and how much resources it needs.
 type DeploymentUnit struct {
 	// MaxTopics specify the maximum number of topics that
 	// can be part of this unit of deployment.
 	MaxTopics *int `json:"maxTopics,omitempty"`
 
-	// PodTemplate describes the specification for the unit.
+	// PodTemplate describes the pod specification for the unit.
 	// +optional
 	PodTemplate *RedshiftPodTemplateSpec `json:"podTemplate,omitempty"`
 }
 
+// SinkGroupSpec defines the specification for one of the three sinkgroups:
+// 1. MainSinkGroup 2. ReloadSinkGroup 3. ReloadDupeSinkGroup
 type SinkGroupSpec struct {
 	// MaxBytesPerBatch is the maximum bytes per batch.
+	// +optional
 	MaxBytesPerBatch *int `json:"maxBytesPerBatch,omitempty"`
 	// MaxWaitSeconds is the maximum time to wait before making a batch,
 	// make a batch if MaxBytesPerBatch is not hit during MaxWaitSeconds.
-	MaxWaitSeconds *int `json:"maxWaitSeconds"`
+	// +optional
+	MaxWaitSeconds *int `json:"maxWaitSeconds,omitempty"`
 	// MaxConcurrency is the maximum no, of batch processors to run concurrently.
+	// +optional
 	MaxConcurrency *int `json:"maxConcurrency,omitempty"`
 	// MaxProcessingTime is the max time in ms required to consume one message.
 	// Defaults to 1000ms
+	// +optional
 	MaxProcessingTime *int32 `json:"maxProcessingTime,omitempty"`
-
-	// DeploymentUnit is to specify the configuration of the unit of deployment
-	// This helps the user to specify how many topics with what resources
-	// can run in one unit of Deployment. Based on this the operator decides
-	// how many deployment units would be launched. This is useful in the first
-	// time sink of redshiftsink resources having huge number of topics.
-	// Check #167 to understand the need of a unit specification.
+	// DeploymentUnit is the unit of deployment for the batcher or the loader.
+	// Using this user can specify the no of topics and the amount of resources
+	// needed to run them as one unit. Operator calculates the total units
+	// based on this and the total number of topics it needs to sink. This
+	// greatly solves the scaling issues described in #167.
+	// +optional
 	DeploymentUnit *DeploymentUnit `json:"deploymentUnit,omitempty"`
 }
 
 // SinkGroup is the group of batcher and loader pods based on the
-// mask version, target table and the topic release status. These grouped
-// pods can require different configuration to sink the resources. Pods of batcher
-// and loader can specify their sink group configuration using SinkGroupSpec.
-// For example:
+// mask version, target table and the topic release status. This is the specification
+// to allow to have different set of SinkGroupSpec for each type of SinkGroups.
+// Explaining the precedence:
 // The first time sink of a table requires different values for MaxBytesPerBatch
-// and different pod resources than the realtime differential sink ones.
-// If All is specified and none of the others are specified, all is used
-// for Main, Reload and ReloadDupe SinkGroup. If others are specified then
-// they take precedence over all. For example if you have specified All and
-// Main, then for the MainSinkGroup Main is used and not All.
+// and different pod resources.
+// a) If All is specified and none of the others are specified, All is used.
+// b) If All and Main both are specified then Main gets used for MainSinkGroup
+// c) If All and Reload are specified then Reload gets used for ReloadSinkGroup
+// d) If All and ReloadDupe are specified then ReloadDupe gets used for ReloadDupeSinkGroup
+// d) If None gets specified then Defaults are used for all of them..
 type SinkGroup struct {
-	All        *SinkGroupSpec `json:"all,omitempty"`
-	Main       *SinkGroupSpec `json:"main,omitempty"`
-	Reload     *SinkGroupSpec `json:"reload,omitempty"`
+	// All specifies a common specification for all SinkGroups
+	// +optional
+	All *SinkGroupSpec `json:"all,omitempty"`
+	// Main specifies the MainSinkGroup specification, overwrites All
+	// +optional
+	Main *SinkGroupSpec `json:"main,omitempty"`
+	// Reload specifies the ReloadSinkGroup specification, overwrites All
+	// +optional
+	Reload *SinkGroupSpec `json:"reload,omitempty"`
+	// ReloadDupe specifies the ReloadDupeSinkGroup specification, overwrites All
+	// +optional
 	ReloadDupe *SinkGroupSpec `json:"reloadDupe,omitempty"`
 }
 
@@ -110,8 +123,9 @@ type RedshiftBatcherSpec struct {
 	// sinkgroups. Operator uses 3 groups to perform Redshiftsink. The topics
 	// which have never been released is part of Reload SinkGroup, the topics
 	// which gets released moves to the Main SinkGroup. ReloadDupe SinkGroup
-	// is used to give realtime upadates to the topics which are reloading.
+	// is used to give realtime upaates to the topics which are reloading.
 	// Defaults are there for all sinkGroups if none is specifed.
+	// +optional
 	SinkGroup *SinkGroup `json:"sinkGroup,omitempty"`
 
 	// Deprecated all of the below spec in favour of SinkGroup #167
@@ -137,7 +151,6 @@ type RedshiftLoaderSpec struct {
 	// Max configurations for the loader to batch the load
 	MaxSize        int `json:"maxSize"`
 	MaxWaitSeconds int `json:"maxWaitSeconds"`
-
 	// MaxProcessingTime is the sarama configuration MaxProcessingTime
 	// It is the max time in milliseconds required to consume one message.
 	// Defaults to 600000ms (10mins)
diff --git a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
index d3f5d81ed..a4be57170 100644
--- a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
+++ b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
@@ -146,28 +146,28 @@ spec:
                     and reloadDupe sinkgroups. Operator uses 3 groups to perform Redshiftsink.
                     The topics which have never been released is part of Reload SinkGroup,
                     the topics which gets released moves to the Main SinkGroup. ReloadDupe
-                    SinkGroup is used to give realtime upadates to the topics which
+                    SinkGroup is used to give realtime upaates to the topics which
                     are reloading. Defaults are there for all sinkGroups if none is
                     specifed.
                   properties:
                     all:
+                      description: All specifies a common specification for all SinkGroups
                       properties:
                         deploymentUnit:
-                          description: 'DeploymentUnit is to specify the configuration
-                            of the unit of deployment This helps the user to specify
-                            how many topics with what resources can run in one unit
-                            of Deployment. Based on this the operator decides how
-                            many deployment units would be launched. This is useful
-                            in the first time sink of redshiftsink resources having
-                            huge number of topics. Check #167 to understand the need
-                            of a unit specification.'
+                          description: 'DeploymentUnit is the unit of deployment for
+                            the batcher or the loader. Using this user can specify
+                            the no of topics and the amount of resources needed to
+                            run them as one unit. Operator calculates the total units
+                            based on this and the total number of topics it needs
+                            to sink. This greatly solves the scaling issues described
+                            in #167.'
                           properties:
                             maxTopics:
                               description: MaxTopics specify the maximum number of
                                 topics that can be part of this unit of deployment.
                               type: integer
                             podTemplate:
-                              description: PodTemplate describes the specification
+                              description: PodTemplate describes the pod specification
                                 for the unit.
                               properties:
                                 image:
@@ -270,27 +270,26 @@ spec:
                             before making a batch, make a batch if MaxBytesPerBatch
                             is not hit during MaxWaitSeconds.
                           type: integer
-                      required:
-                      - maxWaitSeconds
                       type: object
                     main:
+                      description: Main specifies the MainSinkGroup specification,
+                        overwrites All
                       properties:
                         deploymentUnit:
-                          description: 'DeploymentUnit is to specify the configuration
-                            of the unit of deployment This helps the user to specify
-                            how many topics with what resources can run in one unit
-                            of Deployment. Based on this the operator decides how
-                            many deployment units would be launched. This is useful
-                            in the first time sink of redshiftsink resources having
-                            huge number of topics. Check #167 to understand the need
-                            of a unit specification.'
+                          description: 'DeploymentUnit is the unit of deployment for
+                            the batcher or the loader. Using this user can specify
+                            the no of topics and the amount of resources needed to
+                            run them as one unit. Operator calculates the total units
+                            based on this and the total number of topics it needs
+                            to sink. This greatly solves the scaling issues described
+                            in #167.'
                           properties:
                             maxTopics:
                               description: MaxTopics specify the maximum number of
                                 topics that can be part of this unit of deployment.
                               type: integer
                             podTemplate:
-                              description: PodTemplate describes the specification
+                              description: PodTemplate describes the pod specification
                                 for the unit.
                               properties:
                                 image:
@@ -393,27 +392,26 @@ spec:
                             before making a batch, make a batch if MaxBytesPerBatch
                             is not hit during MaxWaitSeconds.
                           type: integer
-                      required:
-                      - maxWaitSeconds
                       type: object
                     reload:
+                      description: Reload specifies the ReloadSinkGroup specification,
+                        overwrites All
                       properties:
                         deploymentUnit:
-                          description: 'DeploymentUnit is to specify the configuration
-                            of the unit of deployment This helps the user to specify
-                            how many topics with what resources can run in one unit
-                            of Deployment. Based on this the operator decides how
-                            many deployment units would be launched. This is useful
-                            in the first time sink of redshiftsink resources having
-                            huge number of topics. Check #167 to understand the need
-                            of a unit specification.'
+                          description: 'DeploymentUnit is the unit of deployment for
+                            the batcher or the loader. Using this user can specify
+                            the no of topics and the amount of resources needed to
+                            run them as one unit. Operator calculates the total units
+                            based on this and the total number of topics it needs
+                            to sink. This greatly solves the scaling issues described
+                            in #167.'
                           properties:
                             maxTopics:
                               description: MaxTopics specify the maximum number of
                                 topics that can be part of this unit of deployment.
                               type: integer
                             podTemplate:
-                              description: PodTemplate describes the specification
+                              description: PodTemplate describes the pod specification
                                 for the unit.
                               properties:
                                 image:
@@ -516,27 +514,26 @@ spec:
                             before making a batch, make a batch if MaxBytesPerBatch
                             is not hit during MaxWaitSeconds.
                           type: integer
-                      required:
-                      - maxWaitSeconds
                       type: object
                     reloadDupe:
+                      description: ReloadDupe specifies the ReloadDupeSinkGroup specification,
+                        overwrites All
                       properties:
                         deploymentUnit:
-                          description: 'DeploymentUnit is to specify the configuration
-                            of the unit of deployment This helps the user to specify
-                            how many topics with what resources can run in one unit
-                            of Deployment. Based on this the operator decides how
-                            many deployment units would be launched. This is useful
-                            in the first time sink of redshiftsink resources having
-                            huge number of topics. Check #167 to understand the need
-                            of a unit specification.'
+                          description: 'DeploymentUnit is the unit of deployment for
+                            the batcher or the loader. Using this user can specify
+                            the no of topics and the amount of resources needed to
+                            run them as one unit. Operator calculates the total units
+                            based on this and the total number of topics it needs
+                            to sink. This greatly solves the scaling issues described
+                            in #167.'
                           properties:
                             maxTopics:
                               description: MaxTopics specify the maximum number of
                                 topics that can be part of this unit of deployment.
                               type: integer
                             podTemplate:
-                              description: PodTemplate describes the specification
+                              description: PodTemplate describes the pod specification
                                 for the unit.
                               properties:
                                 image:
@@ -639,8 +636,6 @@ spec:
                             before making a batch, make a batch if MaxBytesPerBatch
                             is not hit during MaxWaitSeconds.
                           type: integer
-                      required:
-                      - maxWaitSeconds
                       type: object
                   type: object
                 suspend:
diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go
index 21313f056..2203944a8 100644
--- a/controllers/batcher_deployment.go
+++ b/controllers/batcher_deployment.go
@@ -26,9 +26,9 @@ type Batcher struct {
 }
 
 // applyBatcherSinkGroupDefaults applies the defaults for the batcher
-// deplyoments of the sink group so that the user does not need to specify
-// big lengthy configurations everytime. Also the defaults are
-// optimized for maximum performance and is recommended to use.
+// deployments of the sink group. User does not need to specify big lengthy
+// configurations everytime. Defaults are optimized for maximum performance
+// and is recommended to use.
 func applyBatcherSinkGroupDefaults(
 	rsk *tipocav1.RedshiftSink,
 	sgType string,
@@ -164,27 +164,26 @@ func NewBatcher(
 	if kafkaVersion == "" {
 		kafkaVersion = defaultKafkaVersion
 	}
-	var maxProcessingTime int32 = redshiftbatcher.DefaultMaxProcessingTime
-	if rsk.Spec.Batcher.MaxProcessingTime != nil {
-		maxProcessingTime = *rsk.Spec.Batcher.MaxProcessingTime
-	}
 	var maxSize int // Deprecated
-	var maxWaitSeconds, maxConcurrency, maxBytesPerBatch *int
+	var maxBytesPerBatch, maxWaitSeconds, maxConcurrency *int
+	var maxProcessingTime int32 = redshiftbatcher.DefaultMaxProcessingTime
 	if sinkGroupSpec != nil {
 		maxBytesPerBatch = sinkGroupSpec.MaxBytesPerBatch
 		maxWaitSeconds = sinkGroupSpec.MaxWaitSeconds
 		maxConcurrency = sinkGroupSpec.MaxConcurrency
 		maxProcessingTime = *sinkGroupSpec.MaxProcessingTime
-	} else { // Deprecated below, remove later when removing TODO:
+	} else { // Deprecated
 		maxSize = rsk.Spec.Batcher.MaxSize
 		maxWaitSeconds = &rsk.Spec.Batcher.MaxWaitSeconds
 		maxConcurrency = &redshiftbatcher.DefaultMaxConcurrency
 		if rsk.Spec.Batcher.MaxConcurrency != nil {
 			maxConcurrency = rsk.Spec.Batcher.MaxConcurrency
 		}
-
+		if rsk.Spec.Batcher.MaxProcessingTime != nil {
+			maxProcessingTime = *rsk.Spec.Batcher.MaxProcessingTime
+		}
 	}
-	// other defaults which are not configurable defaults for the batcher
+	// defaults which are not configurable for the user
 	var sessionTimeoutSeconds int = 10
 	var hearbeatIntervalSeconds int = 2
 
diff --git a/controllers/sinkgroup_controller.go b/controllers/sinkgroup_controller.go
index 12e2532cd..85f644d2c 100644
--- a/controllers/sinkgroup_controller.go
+++ b/controllers/sinkgroup_controller.go
@@ -137,8 +137,7 @@ type deploymentUnit struct {
 	topics []string
 }
 
-// assignDeploymentUnits allocates the total topics into groups of deployments
-// based on the specification. groups are called as deploymentUnit
+// assignDeploymentUnits allocates the total topics into units of deployments.
 func assignDeploymentUnits(allTopics []string, maxTopics int) []deploymentUnit {
 	if len(allTopics) <= maxTopics {
 		return []deploymentUnit{
diff --git a/controllers/sinkgroup_controller_test.go b/controllers/sinkgroup_controller_test.go
index 677675046..c62cc36d0 100644
--- a/controllers/sinkgroup_controller_test.go
+++ b/controllers/sinkgroup_controller_test.go
@@ -6,7 +6,7 @@ import (
 )
 
 func TestDeploymentUnitAssignment(t *testing.T) {
-	// t.Parallel()
+	t.Parallel()
 
 	tests := []struct {
 		name        string

From 49a89d2f12e3c5408e08265820a6119afe8ea2bb Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Tue, 23 Mar 2021 16:51:53 +0530
Subject: [PATCH 05/49] Loader changes for the new spec, resource Quanitity

Few more changes for metrics
---
 api/v1/redshiftsink_types.go                  |  37 +-
 api/v1/zz_generated.deepcopy.go               |  21 +-
 .../tipoca.k8s.practo.dev_redshiftsinks.yaml  | 612 +++++++++++++++++-
 config/samples/tipoca_v1_redshiftsink.yaml    |  32 +-
 controllers/batcher_deployment.go             |  20 +-
 controllers/loader_deployment.go              | 117 +++-
 controllers/sinkgroup_controller.go           |  83 ++-
 pkg/redshiftbatcher/batch_processor.go        |  48 +-
 pkg/redshiftbatcher/batcher_handler.go        |  16 +-
 pkg/redshiftbatcher/metrics.go                |  17 +
 pkg/redshiftloader/job.go                     |  17 +-
 pkg/redshiftloader/load_processor.go          |  34 +-
 pkg/redshiftloader/loader_handler.go          |  69 +-
 pkg/redshiftloader/metrics.go                 |  23 +-
 pkg/serializer/message.go                     |  62 +-
 pkg/serializer/serializer.go                  |   2 +-
 redshiftsink/README.md                        |  46 +-
 17 files changed, 1061 insertions(+), 195 deletions(-)

diff --git a/api/v1/redshiftsink_types.go b/api/v1/redshiftsink_types.go
index fa3d5e4f3..3bcc24b62 100644
--- a/api/v1/redshiftsink_types.go
+++ b/api/v1/redshiftsink_types.go
@@ -18,6 +18,7 @@ package v1
 
 import (
 	corev1 "k8s.io/api/core/v1"
+	resource "k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
@@ -57,14 +58,19 @@ type DeploymentUnit struct {
 // SinkGroupSpec defines the specification for one of the three sinkgroups:
 // 1. MainSinkGroup 2. ReloadSinkGroup 3. ReloadDupeSinkGroup
 type SinkGroupSpec struct {
-	// MaxBytesPerBatch is the maximum bytes per batch.
+	// MaxSizePerBatch is the maximum size of the batch in Bytes, Ki, Mi, Gi
+	// Examples:
+	// 1000 is 1000 bytes, 1Ki is 1 Killo byte,
+	// 100Mi 100 mega bytes, 1Gi is 1 Giga bytes
 	// +optional
-	MaxBytesPerBatch *int `json:"maxBytesPerBatch,omitempty"`
+	MaxSizePerBatch *resource.Quantity `json:"maxSizePerBatch,omitempty"`
 	// MaxWaitSeconds is the maximum time to wait before making a batch,
-	// make a batch if MaxBytesPerBatch is not hit during MaxWaitSeconds.
+	// make a batch if MaxSizePerBatch is not hit during MaxWaitSeconds.
 	// +optional
 	MaxWaitSeconds *int `json:"maxWaitSeconds,omitempty"`
 	// MaxConcurrency is the maximum no, of batch processors to run concurrently.
+	// this spec is useful only when the sink group pod operates on
+	// asynchronous mode. loader pods does not needed this.
 	// +optional
 	MaxConcurrency *int `json:"maxConcurrency,omitempty"`
 	// MaxProcessingTime is the max time in ms required to consume one message.
@@ -84,7 +90,7 @@ type SinkGroupSpec struct {
 // mask version, target table and the topic release status. This is the specification
 // to allow to have different set of SinkGroupSpec for each type of SinkGroups.
 // Explaining the precedence:
-// The first time sink of a table requires different values for MaxBytesPerBatch
+// The first time sink of a table requires different values for MaxSizePerBatch
 // and different pod resources.
 // a) If All is specified and none of the others are specified, All is used.
 // b) If All and Main both are specified then Main gets used for MainSinkGroup
@@ -148,13 +154,14 @@ type RedshiftLoaderSpec struct {
 	// are running for this CRD object. Default: false
 	Suspend bool `json:"suspend,omitempty"`
 
-	// Max configurations for the loader to batch the load
-	MaxSize        int `json:"maxSize"`
-	MaxWaitSeconds int `json:"maxWaitSeconds"`
-	// MaxProcessingTime is the sarama configuration MaxProcessingTime
-	// It is the max time in milliseconds required to consume one message.
-	// Defaults to 600000ms (10mins)
-	MaxProcessingTime *int32 `json:"maxProcessingTime,omitempty"`
+	// SinkGroup contains the specification for main, reload and reloadDupe
+	// sinkgroups. Operator uses 3 groups to perform Redshiftsink. The topics
+	// which have never been released is part of Reload SinkGroup, the topics
+	// which gets released moves to the Main SinkGroup. ReloadDupe SinkGroup
+	// is used to give realtime upaates to the topics which are reloading.
+	// Defaults are there for all sinkGroups if none is specifed.
+	// +optional
+	SinkGroup *SinkGroup `json:"sinkGroup,omitempty"`
 
 	// RedshiftSchema to sink the data in
 	RedshiftSchema string `json:"redshiftSchema"`
@@ -165,6 +172,14 @@ type RedshiftLoaderSpec struct {
 	// RedshiftGroup to give the access to when new topics gets released
 	RedshiftGroup *string `json:"redshiftGroup"`
 
+	// Deprecated all of the below spec in favour of SinkGroup #167
+	// Max configurations for the loader to batch the load
+	MaxSize        int `json:"maxSize"`
+	MaxWaitSeconds int `json:"maxWaitSeconds"`
+	// MaxProcessingTime is the sarama configuration MaxProcessingTime
+	// It is the max time in milliseconds required to consume one message.
+	// Defaults to 600000ms (10mins)
+	MaxProcessingTime *int32 `json:"maxProcessingTime,omitempty"`
 	// PodTemplate describes the pods that will be created.
 	// if this is not specifed, a default pod template is created
 	// +optional
diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
index 6fd4a806e..faa9e95a6 100644
--- a/api/v1/zz_generated.deepcopy.go
+++ b/api/v1/zz_generated.deepcopy.go
@@ -147,10 +147,10 @@ func (in *RedshiftBatcherSpec) DeepCopy() *RedshiftBatcherSpec {
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *RedshiftLoaderSpec) DeepCopyInto(out *RedshiftLoaderSpec) {
 	*out = *in
-	if in.MaxProcessingTime != nil {
-		in, out := &in.MaxProcessingTime, &out.MaxProcessingTime
-		*out = new(int32)
-		**out = **in
+	if in.SinkGroup != nil {
+		in, out := &in.SinkGroup, &out.SinkGroup
+		*out = new(SinkGroup)
+		(*in).DeepCopyInto(*out)
 	}
 	if in.RedshiftMaxOpenConns != nil {
 		in, out := &in.RedshiftMaxOpenConns, &out.RedshiftMaxOpenConns
@@ -167,6 +167,11 @@ func (in *RedshiftLoaderSpec) DeepCopyInto(out *RedshiftLoaderSpec) {
 		*out = new(string)
 		**out = **in
 	}
+	if in.MaxProcessingTime != nil {
+		in, out := &in.MaxProcessingTime, &out.MaxProcessingTime
+		*out = new(int32)
+		**out = **in
+	}
 	if in.PodTemplate != nil {
 		in, out := &in.PodTemplate, &out.PodTemplate
 		*out = new(RedshiftPodTemplateSpec)
@@ -408,10 +413,10 @@ func (in *SinkGroup) DeepCopy() *SinkGroup {
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *SinkGroupSpec) DeepCopyInto(out *SinkGroupSpec) {
 	*out = *in
-	if in.MaxBytesPerBatch != nil {
-		in, out := &in.MaxBytesPerBatch, &out.MaxBytesPerBatch
-		*out = new(int)
-		**out = **in
+	if in.MaxSizePerBatch != nil {
+		in, out := &in.MaxSizePerBatch, &out.MaxSizePerBatch
+		x := (*in).DeepCopy()
+		*out = &x
 	}
 	if in.MaxWaitSeconds != nil {
 		in, out := &in.MaxWaitSeconds, &out.MaxWaitSeconds
diff --git a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
index a4be57170..95d37f739 100644
--- a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
+++ b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
@@ -253,21 +253,30 @@ spec:
                                   type: array
                               type: object
                           type: object
-                        maxBytesPerBatch:
-                          description: MaxBytesPerBatch is the maximum bytes per batch.
-                          type: integer
                         maxConcurrency:
                           description: MaxConcurrency is the maximum no, of batch
-                            processors to run concurrently.
+                            processors to run concurrently. this spec is useful only
+                            when the sink group pod operates on asynchronous mode.
+                            loader pods does not needed this.
                           type: integer
                         maxProcessingTime:
                           description: MaxProcessingTime is the max time in ms required
                             to consume one message. Defaults to 1000ms
                           format: int32
                           type: integer
+                        maxSizePerBatch:
+                          anyOf:
+                          - type: integer
+                          - type: string
+                          description: 'MaxSizePerBatch is the maximum size of the
+                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
+                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
+                            bytes'
+                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                          x-kubernetes-int-or-string: true
                         maxWaitSeconds:
                           description: MaxWaitSeconds is the maximum time to wait
-                            before making a batch, make a batch if MaxBytesPerBatch
+                            before making a batch, make a batch if MaxSizePerBatch
                             is not hit during MaxWaitSeconds.
                           type: integer
                       type: object
@@ -375,21 +384,30 @@ spec:
                                   type: array
                               type: object
                           type: object
-                        maxBytesPerBatch:
-                          description: MaxBytesPerBatch is the maximum bytes per batch.
-                          type: integer
                         maxConcurrency:
                           description: MaxConcurrency is the maximum no, of batch
-                            processors to run concurrently.
+                            processors to run concurrently. this spec is useful only
+                            when the sink group pod operates on asynchronous mode.
+                            loader pods does not needed this.
                           type: integer
                         maxProcessingTime:
                           description: MaxProcessingTime is the max time in ms required
                             to consume one message. Defaults to 1000ms
                           format: int32
                           type: integer
+                        maxSizePerBatch:
+                          anyOf:
+                          - type: integer
+                          - type: string
+                          description: 'MaxSizePerBatch is the maximum size of the
+                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
+                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
+                            bytes'
+                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                          x-kubernetes-int-or-string: true
                         maxWaitSeconds:
                           description: MaxWaitSeconds is the maximum time to wait
-                            before making a batch, make a batch if MaxBytesPerBatch
+                            before making a batch, make a batch if MaxSizePerBatch
                             is not hit during MaxWaitSeconds.
                           type: integer
                       type: object
@@ -497,21 +515,30 @@ spec:
                                   type: array
                               type: object
                           type: object
-                        maxBytesPerBatch:
-                          description: MaxBytesPerBatch is the maximum bytes per batch.
-                          type: integer
                         maxConcurrency:
                           description: MaxConcurrency is the maximum no, of batch
-                            processors to run concurrently.
+                            processors to run concurrently. this spec is useful only
+                            when the sink group pod operates on asynchronous mode.
+                            loader pods does not needed this.
                           type: integer
                         maxProcessingTime:
                           description: MaxProcessingTime is the max time in ms required
                             to consume one message. Defaults to 1000ms
                           format: int32
                           type: integer
+                        maxSizePerBatch:
+                          anyOf:
+                          - type: integer
+                          - type: string
+                          description: 'MaxSizePerBatch is the maximum size of the
+                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
+                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
+                            bytes'
+                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                          x-kubernetes-int-or-string: true
                         maxWaitSeconds:
                           description: MaxWaitSeconds is the maximum time to wait
-                            before making a batch, make a batch if MaxBytesPerBatch
+                            before making a batch, make a batch if MaxSizePerBatch
                             is not hit during MaxWaitSeconds.
                           type: integer
                       type: object
@@ -619,21 +646,30 @@ spec:
                                   type: array
                               type: object
                           type: object
-                        maxBytesPerBatch:
-                          description: MaxBytesPerBatch is the maximum bytes per batch.
-                          type: integer
                         maxConcurrency:
                           description: MaxConcurrency is the maximum no, of batch
-                            processors to run concurrently.
+                            processors to run concurrently. this spec is useful only
+                            when the sink group pod operates on asynchronous mode.
+                            loader pods does not needed this.
                           type: integer
                         maxProcessingTime:
                           description: MaxProcessingTime is the max time in ms required
                             to consume one message. Defaults to 1000ms
                           format: int32
                           type: integer
+                        maxSizePerBatch:
+                          anyOf:
+                          - type: integer
+                          - type: string
+                          description: 'MaxSizePerBatch is the maximum size of the
+                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
+                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
+                            bytes'
+                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                          x-kubernetes-int-or-string: true
                         maxWaitSeconds:
                           description: MaxWaitSeconds is the maximum time to wait
-                            before making a batch, make a batch if MaxBytesPerBatch
+                            before making a batch, make a batch if MaxSizePerBatch
                             is not hit during MaxWaitSeconds.
                           type: integer
                       type: object
@@ -666,7 +702,8 @@ spec:
                   format: int32
                   type: integer
                 maxSize:
-                  description: Max configurations for the loader to batch the load
+                  description: 'Deprecated all of the below spec in favour of SinkGroup
+                    #167 Max configurations for the loader to batch the load'
                   type: integer
                 maxWaitSeconds:
                   type: integer
@@ -763,6 +800,539 @@ spec:
                 redshiftSchema:
                   description: RedshiftSchema to sink the data in
                   type: string
+                sinkGroup:
+                  description: SinkGroup contains the specification for main, reload
+                    and reloadDupe sinkgroups. Operator uses 3 groups to perform Redshiftsink.
+                    The topics which have never been released is part of Reload SinkGroup,
+                    the topics which gets released moves to the Main SinkGroup. ReloadDupe
+                    SinkGroup is used to give realtime upaates to the topics which
+                    are reloading. Defaults are there for all sinkGroups if none is
+                    specifed.
+                  properties:
+                    all:
+                      description: All specifies a common specification for all SinkGroups
+                      properties:
+                        deploymentUnit:
+                          description: 'DeploymentUnit is the unit of deployment for
+                            the batcher or the loader. Using this user can specify
+                            the no of topics and the amount of resources needed to
+                            run them as one unit. Operator calculates the total units
+                            based on this and the total number of topics it needs
+                            to sink. This greatly solves the scaling issues described
+                            in #167.'
+                          properties:
+                            maxTopics:
+                              description: MaxTopics specify the maximum number of
+                                topics that can be part of this unit of deployment.
+                              type: integer
+                            podTemplate:
+                              description: PodTemplate describes the pod specification
+                                for the unit.
+                              properties:
+                                image:
+                                  description: Image for the underlying pod
+                                  type: string
+                                resources:
+                                  description: Resources is for configuring the compute
+                                    resources required
+                                  properties:
+                                    limits:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Limits describes the maximum amount
+                                        of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                    requests:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Requests describes the minimum
+                                        amount of compute resources required. If Requests
+                                        is omitted for a container, it defaults to
+                                        Limits if that is explicitly specified, otherwise
+                                        to an implementation-defined value. More info:
+                                        https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                  type: object
+                                tolerations:
+                                  description: Toleartions the underlying pods should
+                                    have
+                                  items:
+                                    description: The pod this Toleration is attached
+                                      to tolerates any taint that matches the triple
+                                      <key,value,effect> using the matching operator
+                                      <operator>.
+                                    properties:
+                                      effect:
+                                        description: Effect indicates the taint effect
+                                          to match. Empty means match all taint effects.
+                                          When specified, allowed values are NoSchedule,
+                                          PreferNoSchedule and NoExecute.
+                                        type: string
+                                      key:
+                                        description: Key is the taint key that the
+                                          toleration applies to. Empty means match
+                                          all taint keys. If the key is empty, operator
+                                          must be Exists; this combination means to
+                                          match all values and all keys.
+                                        type: string
+                                      operator:
+                                        description: Operator represents a key's relationship
+                                          to the value. Valid operators are Exists
+                                          and Equal. Defaults to Equal. Exists is
+                                          equivalent to wildcard for value, so that
+                                          a pod can tolerate all taints of a particular
+                                          category.
+                                        type: string
+                                      tolerationSeconds:
+                                        description: TolerationSeconds represents
+                                          the period of time the toleration (which
+                                          must be of effect NoExecute, otherwise this
+                                          field is ignored) tolerates the taint. By
+                                          default, it is not set, which means tolerate
+                                          the taint forever (do not evict). Zero and
+                                          negative values will be treated as 0 (evict
+                                          immediately) by the system.
+                                        format: int64
+                                        type: integer
+                                      value:
+                                        description: Value is the taint value the
+                                          toleration matches to. If the operator is
+                                          Exists, the value should be empty, otherwise
+                                          just a regular string.
+                                        type: string
+                                    type: object
+                                  type: array
+                              type: object
+                          type: object
+                        maxConcurrency:
+                          description: MaxConcurrency is the maximum no, of batch
+                            processors to run concurrently. this spec is useful only
+                            when the sink group pod operates on asynchronous mode.
+                            loader pods does not needed this.
+                          type: integer
+                        maxProcessingTime:
+                          description: MaxProcessingTime is the max time in ms required
+                            to consume one message. Defaults to 1000ms
+                          format: int32
+                          type: integer
+                        maxSizePerBatch:
+                          anyOf:
+                          - type: integer
+                          - type: string
+                          description: 'MaxSizePerBatch is the maximum size of the
+                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
+                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
+                            bytes'
+                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                          x-kubernetes-int-or-string: true
+                        maxWaitSeconds:
+                          description: MaxWaitSeconds is the maximum time to wait
+                            before making a batch, make a batch if MaxSizePerBatch
+                            is not hit during MaxWaitSeconds.
+                          type: integer
+                      type: object
+                    main:
+                      description: Main specifies the MainSinkGroup specification,
+                        overwrites All
+                      properties:
+                        deploymentUnit:
+                          description: 'DeploymentUnit is the unit of deployment for
+                            the batcher or the loader. Using this user can specify
+                            the no of topics and the amount of resources needed to
+                            run them as one unit. Operator calculates the total units
+                            based on this and the total number of topics it needs
+                            to sink. This greatly solves the scaling issues described
+                            in #167.'
+                          properties:
+                            maxTopics:
+                              description: MaxTopics specify the maximum number of
+                                topics that can be part of this unit of deployment.
+                              type: integer
+                            podTemplate:
+                              description: PodTemplate describes the pod specification
+                                for the unit.
+                              properties:
+                                image:
+                                  description: Image for the underlying pod
+                                  type: string
+                                resources:
+                                  description: Resources is for configuring the compute
+                                    resources required
+                                  properties:
+                                    limits:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Limits describes the maximum amount
+                                        of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                    requests:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Requests describes the minimum
+                                        amount of compute resources required. If Requests
+                                        is omitted for a container, it defaults to
+                                        Limits if that is explicitly specified, otherwise
+                                        to an implementation-defined value. More info:
+                                        https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                  type: object
+                                tolerations:
+                                  description: Toleartions the underlying pods should
+                                    have
+                                  items:
+                                    description: The pod this Toleration is attached
+                                      to tolerates any taint that matches the triple
+                                      <key,value,effect> using the matching operator
+                                      <operator>.
+                                    properties:
+                                      effect:
+                                        description: Effect indicates the taint effect
+                                          to match. Empty means match all taint effects.
+                                          When specified, allowed values are NoSchedule,
+                                          PreferNoSchedule and NoExecute.
+                                        type: string
+                                      key:
+                                        description: Key is the taint key that the
+                                          toleration applies to. Empty means match
+                                          all taint keys. If the key is empty, operator
+                                          must be Exists; this combination means to
+                                          match all values and all keys.
+                                        type: string
+                                      operator:
+                                        description: Operator represents a key's relationship
+                                          to the value. Valid operators are Exists
+                                          and Equal. Defaults to Equal. Exists is
+                                          equivalent to wildcard for value, so that
+                                          a pod can tolerate all taints of a particular
+                                          category.
+                                        type: string
+                                      tolerationSeconds:
+                                        description: TolerationSeconds represents
+                                          the period of time the toleration (which
+                                          must be of effect NoExecute, otherwise this
+                                          field is ignored) tolerates the taint. By
+                                          default, it is not set, which means tolerate
+                                          the taint forever (do not evict). Zero and
+                                          negative values will be treated as 0 (evict
+                                          immediately) by the system.
+                                        format: int64
+                                        type: integer
+                                      value:
+                                        description: Value is the taint value the
+                                          toleration matches to. If the operator is
+                                          Exists, the value should be empty, otherwise
+                                          just a regular string.
+                                        type: string
+                                    type: object
+                                  type: array
+                              type: object
+                          type: object
+                        maxConcurrency:
+                          description: MaxConcurrency is the maximum no, of batch
+                            processors to run concurrently. this spec is useful only
+                            when the sink group pod operates on asynchronous mode.
+                            loader pods does not needed this.
+                          type: integer
+                        maxProcessingTime:
+                          description: MaxProcessingTime is the max time in ms required
+                            to consume one message. Defaults to 1000ms
+                          format: int32
+                          type: integer
+                        maxSizePerBatch:
+                          anyOf:
+                          - type: integer
+                          - type: string
+                          description: 'MaxSizePerBatch is the maximum size of the
+                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
+                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
+                            bytes'
+                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                          x-kubernetes-int-or-string: true
+                        maxWaitSeconds:
+                          description: MaxWaitSeconds is the maximum time to wait
+                            before making a batch, make a batch if MaxSizePerBatch
+                            is not hit during MaxWaitSeconds.
+                          type: integer
+                      type: object
+                    reload:
+                      description: Reload specifies the ReloadSinkGroup specification,
+                        overwrites All
+                      properties:
+                        deploymentUnit:
+                          description: 'DeploymentUnit is the unit of deployment for
+                            the batcher or the loader. Using this user can specify
+                            the no of topics and the amount of resources needed to
+                            run them as one unit. Operator calculates the total units
+                            based on this and the total number of topics it needs
+                            to sink. This greatly solves the scaling issues described
+                            in #167.'
+                          properties:
+                            maxTopics:
+                              description: MaxTopics specify the maximum number of
+                                topics that can be part of this unit of deployment.
+                              type: integer
+                            podTemplate:
+                              description: PodTemplate describes the pod specification
+                                for the unit.
+                              properties:
+                                image:
+                                  description: Image for the underlying pod
+                                  type: string
+                                resources:
+                                  description: Resources is for configuring the compute
+                                    resources required
+                                  properties:
+                                    limits:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Limits describes the maximum amount
+                                        of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                    requests:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Requests describes the minimum
+                                        amount of compute resources required. If Requests
+                                        is omitted for a container, it defaults to
+                                        Limits if that is explicitly specified, otherwise
+                                        to an implementation-defined value. More info:
+                                        https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                  type: object
+                                tolerations:
+                                  description: Toleartions the underlying pods should
+                                    have
+                                  items:
+                                    description: The pod this Toleration is attached
+                                      to tolerates any taint that matches the triple
+                                      <key,value,effect> using the matching operator
+                                      <operator>.
+                                    properties:
+                                      effect:
+                                        description: Effect indicates the taint effect
+                                          to match. Empty means match all taint effects.
+                                          When specified, allowed values are NoSchedule,
+                                          PreferNoSchedule and NoExecute.
+                                        type: string
+                                      key:
+                                        description: Key is the taint key that the
+                                          toleration applies to. Empty means match
+                                          all taint keys. If the key is empty, operator
+                                          must be Exists; this combination means to
+                                          match all values and all keys.
+                                        type: string
+                                      operator:
+                                        description: Operator represents a key's relationship
+                                          to the value. Valid operators are Exists
+                                          and Equal. Defaults to Equal. Exists is
+                                          equivalent to wildcard for value, so that
+                                          a pod can tolerate all taints of a particular
+                                          category.
+                                        type: string
+                                      tolerationSeconds:
+                                        description: TolerationSeconds represents
+                                          the period of time the toleration (which
+                                          must be of effect NoExecute, otherwise this
+                                          field is ignored) tolerates the taint. By
+                                          default, it is not set, which means tolerate
+                                          the taint forever (do not evict). Zero and
+                                          negative values will be treated as 0 (evict
+                                          immediately) by the system.
+                                        format: int64
+                                        type: integer
+                                      value:
+                                        description: Value is the taint value the
+                                          toleration matches to. If the operator is
+                                          Exists, the value should be empty, otherwise
+                                          just a regular string.
+                                        type: string
+                                    type: object
+                                  type: array
+                              type: object
+                          type: object
+                        maxConcurrency:
+                          description: MaxConcurrency is the maximum no, of batch
+                            processors to run concurrently. this spec is useful only
+                            when the sink group pod operates on asynchronous mode.
+                            loader pods does not needed this.
+                          type: integer
+                        maxProcessingTime:
+                          description: MaxProcessingTime is the max time in ms required
+                            to consume one message. Defaults to 1000ms
+                          format: int32
+                          type: integer
+                        maxSizePerBatch:
+                          anyOf:
+                          - type: integer
+                          - type: string
+                          description: 'MaxSizePerBatch is the maximum size of the
+                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
+                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
+                            bytes'
+                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                          x-kubernetes-int-or-string: true
+                        maxWaitSeconds:
+                          description: MaxWaitSeconds is the maximum time to wait
+                            before making a batch, make a batch if MaxSizePerBatch
+                            is not hit during MaxWaitSeconds.
+                          type: integer
+                      type: object
+                    reloadDupe:
+                      description: ReloadDupe specifies the ReloadDupeSinkGroup specification,
+                        overwrites All
+                      properties:
+                        deploymentUnit:
+                          description: 'DeploymentUnit is the unit of deployment for
+                            the batcher or the loader. Using this user can specify
+                            the no of topics and the amount of resources needed to
+                            run them as one unit. Operator calculates the total units
+                            based on this and the total number of topics it needs
+                            to sink. This greatly solves the scaling issues described
+                            in #167.'
+                          properties:
+                            maxTopics:
+                              description: MaxTopics specify the maximum number of
+                                topics that can be part of this unit of deployment.
+                              type: integer
+                            podTemplate:
+                              description: PodTemplate describes the pod specification
+                                for the unit.
+                              properties:
+                                image:
+                                  description: Image for the underlying pod
+                                  type: string
+                                resources:
+                                  description: Resources is for configuring the compute
+                                    resources required
+                                  properties:
+                                    limits:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Limits describes the maximum amount
+                                        of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                    requests:
+                                      additionalProperties:
+                                        anyOf:
+                                        - type: integer
+                                        - type: string
+                                        pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                        x-kubernetes-int-or-string: true
+                                      description: 'Requests describes the minimum
+                                        amount of compute resources required. If Requests
+                                        is omitted for a container, it defaults to
+                                        Limits if that is explicitly specified, otherwise
+                                        to an implementation-defined value. More info:
+                                        https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
+                                      type: object
+                                  type: object
+                                tolerations:
+                                  description: Toleartions the underlying pods should
+                                    have
+                                  items:
+                                    description: The pod this Toleration is attached
+                                      to tolerates any taint that matches the triple
+                                      <key,value,effect> using the matching operator
+                                      <operator>.
+                                    properties:
+                                      effect:
+                                        description: Effect indicates the taint effect
+                                          to match. Empty means match all taint effects.
+                                          When specified, allowed values are NoSchedule,
+                                          PreferNoSchedule and NoExecute.
+                                        type: string
+                                      key:
+                                        description: Key is the taint key that the
+                                          toleration applies to. Empty means match
+                                          all taint keys. If the key is empty, operator
+                                          must be Exists; this combination means to
+                                          match all values and all keys.
+                                        type: string
+                                      operator:
+                                        description: Operator represents a key's relationship
+                                          to the value. Valid operators are Exists
+                                          and Equal. Defaults to Equal. Exists is
+                                          equivalent to wildcard for value, so that
+                                          a pod can tolerate all taints of a particular
+                                          category.
+                                        type: string
+                                      tolerationSeconds:
+                                        description: TolerationSeconds represents
+                                          the period of time the toleration (which
+                                          must be of effect NoExecute, otherwise this
+                                          field is ignored) tolerates the taint. By
+                                          default, it is not set, which means tolerate
+                                          the taint forever (do not evict). Zero and
+                                          negative values will be treated as 0 (evict
+                                          immediately) by the system.
+                                        format: int64
+                                        type: integer
+                                      value:
+                                        description: Value is the taint value the
+                                          toleration matches to. If the operator is
+                                          Exists, the value should be empty, otherwise
+                                          just a regular string.
+                                        type: string
+                                    type: object
+                                  type: array
+                              type: object
+                          type: object
+                        maxConcurrency:
+                          description: MaxConcurrency is the maximum no, of batch
+                            processors to run concurrently. this spec is useful only
+                            when the sink group pod operates on asynchronous mode.
+                            loader pods does not needed this.
+                          type: integer
+                        maxProcessingTime:
+                          description: MaxProcessingTime is the max time in ms required
+                            to consume one message. Defaults to 1000ms
+                          format: int32
+                          type: integer
+                        maxSizePerBatch:
+                          anyOf:
+                          - type: integer
+                          - type: string
+                          description: 'MaxSizePerBatch is the maximum size of the
+                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
+                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
+                            bytes'
+                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                          x-kubernetes-int-or-string: true
+                        maxWaitSeconds:
+                          description: MaxWaitSeconds is the maximum time to wait
+                            before making a batch, make a batch if MaxSizePerBatch
+                            is not hit during MaxWaitSeconds.
+                          type: integer
+                      type: object
+                  type: object
                 suspend:
                   description: 'Supsend when turned on makes sure no batcher pods
                     are running for this CRD object. Default: false'
diff --git a/config/samples/tipoca_v1_redshiftsink.yaml b/config/samples/tipoca_v1_redshiftsink.yaml
index 75fc7c6a7..8c1d3165e 100644
--- a/config/samples/tipoca_v1_redshiftsink.yaml
+++ b/config/samples/tipoca_v1_redshiftsink.yaml
@@ -14,23 +14,27 @@ spec:
     maxLoaderLag: 10
   batcher:
     suspend: false
-    maxSize: 10
-    maxWaitSeconds: 30
     mask: true
     maskFile: "https://github.com/practo/tipoca-stream/redshiftsink/pkg/transformer/masker/database.yaml"
-    podTemplate:
-      resources:
-        requests:
-          cpu: 100m
-          memory: 200Mi
+    sinkGroup:
+        all:
+            maxSizePerBatch: 10Mi
+            maxWaitSeconds: 30
+            podTemplate:
+              resources:
+                requests:
+                  cpu: 100m
+                  memory: 200Mi
   loader:
     suspend: false
-    maxSize: 10
-    maxWaitSeconds: 30
     redshiftSchema: "inventory"
     redshiftGroup:  "sales"
-    podTemplate:
-      resources:
-        requests:
-          cpu: 100m
-          memory: 200Mi
+    sinkGroup:
+        all:
+            maxSizePerBatch: 1Gi
+            maxWaitSeconds: 30
+            podTemplate:
+              resources:
+                requests:
+                  cpu: 100m
+                  memory: 200Mi
diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go
index 2203944a8..866372cca 100644
--- a/controllers/batcher_deployment.go
+++ b/controllers/batcher_deployment.go
@@ -11,6 +11,7 @@ import (
 	yaml "gopkg.in/yaml.v2"
 	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
+	resource "k8s.io/apimachinery/pkg/api/resource"
 )
 
 const (
@@ -28,14 +29,17 @@ type Batcher struct {
 // applyBatcherSinkGroupDefaults applies the defaults for the batcher
 // deployments of the sink group. User does not need to specify big lengthy
 // configurations everytime. Defaults are optimized for maximum performance
-// and is recommended to use.
+// and are recommended to use.
 func applyBatcherSinkGroupDefaults(
 	rsk *tipocav1.RedshiftSink,
 	sgType string,
 	defaultImage string,
 ) *tipocav1.SinkGroupSpec {
 	// defaults
-	maxBytesPerBatch := &redshiftbatcher.DefaultMaxBytesPerBatch
+	defaultMaxBytesPerBatch := resource.MustParse(
+		redshiftbatcher.DefaultMaxBytesPerBatch,
+	)
+	maxSizePerBatch := &defaultMaxBytesPerBatch
 	maxWaitSeconds := &redshiftbatcher.DefaultMaxWaitSeconds
 	maxConcurrency := &redshiftbatcher.DefaultMaxConcurrency
 	maxProcessingTime := &redshiftbatcher.DefaultMaxProcessingTime
@@ -66,8 +70,8 @@ func applyBatcherSinkGroupDefaults(
 
 	// overwrite with the defaults with the specified values
 	if specifiedSpec != nil {
-		if specifiedSpec.MaxBytesPerBatch != nil {
-			maxBytesPerBatch = specifiedSpec.MaxBytesPerBatch
+		if specifiedSpec.MaxSizePerBatch != nil {
+			maxSizePerBatch = specifiedSpec.MaxSizePerBatch
 		}
 		if specifiedSpec.MaxWaitSeconds != nil {
 			maxWaitSeconds = specifiedSpec.MaxWaitSeconds
@@ -90,7 +94,7 @@ func applyBatcherSinkGroupDefaults(
 	}
 
 	return &tipocav1.SinkGroupSpec{
-		MaxBytesPerBatch:  maxBytesPerBatch,
+		MaxSizePerBatch:   maxSizePerBatch,
 		MaxWaitSeconds:    maxWaitSeconds,
 		MaxConcurrency:    maxConcurrency,
 		MaxProcessingTime: maxProcessingTime,
@@ -165,10 +169,12 @@ func NewBatcher(
 		kafkaVersion = defaultKafkaVersion
 	}
 	var maxSize int // Deprecated
-	var maxBytesPerBatch, maxWaitSeconds, maxConcurrency *int
+	var maxBytesPerBatch *int64
+	var maxWaitSeconds, maxConcurrency *int
 	var maxProcessingTime int32 = redshiftbatcher.DefaultMaxProcessingTime
 	if sinkGroupSpec != nil {
-		maxBytesPerBatch = sinkGroupSpec.MaxBytesPerBatch
+		m := sinkGroupSpec.MaxSizePerBatch.Value()
+		maxBytesPerBatch = &m
 		maxWaitSeconds = sinkGroupSpec.MaxWaitSeconds
 		maxConcurrency = sinkGroupSpec.MaxConcurrency
 		maxProcessingTime = *sinkGroupSpec.MaxProcessingTime
diff --git a/controllers/loader_deployment.go b/controllers/loader_deployment.go
index b84c41212..2f82d7a92 100644
--- a/controllers/loader_deployment.go
+++ b/controllers/loader_deployment.go
@@ -12,6 +12,7 @@ import (
 	yaml "gopkg.in/yaml.v2"
 	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
+	resource "k8s.io/apimachinery/pkg/api/resource"
 )
 
 const (
@@ -26,6 +27,84 @@ type Loader struct {
 	config     *corev1.ConfigMap
 }
 
+// applyLoaderSinkGroupDefaults applies the defaults for the loader
+// deployments of the sink group. User does not need to specify big lengthy
+// configurations everytime. Defaults are optimized for maximum performance
+// and are recommended to use.
+func applyLoaderSinkGroupDefaults(
+	rsk *tipocav1.RedshiftSink,
+	sgType string,
+	defaultImage string,
+) *tipocav1.SinkGroupSpec {
+	// defaults
+	defaultMaxBytesPerBatch := resource.MustParse(
+		redshiftloader.DefaultMaxBytesPerBatch,
+	)
+	maxSizePerBatch := &defaultMaxBytesPerBatch
+	maxWaitSeconds := &redshiftloader.DefaultMaxWaitSeconds
+	maxProcessingTime := &redshiftloader.DefaultMaxProcessingTime
+	maxTopics := &DefaultMaxBatcherTopics
+	image := &defaultImage
+	var resources *corev1.ResourceRequirements
+	var tolerations *[]corev1.Toleration
+
+	// apply the sinkGroup spec rules
+	var specifiedSpec *tipocav1.SinkGroupSpec
+	if rsk.Spec.Loader.SinkGroup.All != nil {
+		specifiedSpec = rsk.Spec.Loader.SinkGroup.All
+	}
+	switch sgType {
+	case MainSinkGroup:
+		if rsk.Spec.Loader.SinkGroup.Main != nil {
+			specifiedSpec = rsk.Spec.Loader.SinkGroup.Main
+		}
+	case ReloadSinkGroup:
+		if rsk.Spec.Loader.SinkGroup.Reload != nil {
+			specifiedSpec = rsk.Spec.Loader.SinkGroup.Reload
+		}
+	case ReloadDupeSinkGroup:
+		if rsk.Spec.Loader.SinkGroup.ReloadDupe != nil {
+			specifiedSpec = rsk.Spec.Loader.SinkGroup.ReloadDupe
+		}
+	}
+
+	// overwrite with the defaults with the specified values
+	if specifiedSpec != nil {
+		if specifiedSpec.MaxSizePerBatch != nil {
+			maxSizePerBatch = specifiedSpec.MaxSizePerBatch
+		}
+		if specifiedSpec.MaxWaitSeconds != nil {
+			maxWaitSeconds = specifiedSpec.MaxWaitSeconds
+		}
+		if specifiedSpec.MaxProcessingTime != nil {
+			maxProcessingTime = specifiedSpec.MaxProcessingTime
+		}
+		if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
+			image = specifiedSpec.DeploymentUnit.PodTemplate.Image
+		}
+		if specifiedSpec.DeploymentUnit.PodTemplate.Resources != nil {
+			resources = specifiedSpec.DeploymentUnit.PodTemplate.Resources
+		}
+		if specifiedSpec.DeploymentUnit.PodTemplate.Tolerations != nil {
+			tolerations = specifiedSpec.DeploymentUnit.PodTemplate.Tolerations
+		}
+	}
+
+	return &tipocav1.SinkGroupSpec{
+		MaxSizePerBatch:   maxSizePerBatch,
+		MaxWaitSeconds:    maxWaitSeconds,
+		MaxProcessingTime: maxProcessingTime,
+		DeploymentUnit: &tipocav1.DeploymentUnit{
+			MaxTopics: maxTopics,
+			PodTemplate: &tipocav1.RedshiftPodTemplateSpec{
+				Image:       image,
+				Resources:   resources,
+				Tolerations: tolerations,
+			},
+		},
+	}
+}
+
 func loaderSecret(secret map[string]string) (map[string]string, error) {
 	s := make(map[string]string)
 	secretKeys := []string{
@@ -53,8 +132,14 @@ func loaderSecret(secret map[string]string) (map[string]string, error) {
 	return s, nil
 }
 
-func loaderName(rskName, sinkGroup string) string {
-	return fmt.Sprintf("%s-%s%s", rskName, sinkGroup, LoaderSuffix)
+func loaderName(rskName, sinkGroup string, id string) string {
+	return fmt.Sprintf(
+		"%s-%s%s%s",
+		rskName,
+		sinkGroup,
+		id,
+		LoaderSuffix,
+	)
 }
 
 func redshiftConnections(rsk *tipocav1.RedshiftSink, defaultMaxOpenConns, defaultMaxIdleConns int) (int, int) {
@@ -76,6 +161,7 @@ func NewLoader(
 	tableSuffix string,
 	secret map[string]string,
 	sinkGroup string,
+	sinkGroupSpec *tipocav1.SinkGroupSpec,
 	consumerGroups map[string]consumerGroup,
 	defaultImage string,
 	defaultKafkaVersion string,
@@ -91,26 +177,36 @@ func NewLoader(
 		return nil, err
 	}
 
-	totalTopics := 0
-
 	// defaults
 	kafkaVersion := rsk.Spec.KafkaVersion
 	if kafkaVersion == "" {
 		kafkaVersion = defaultKafkaVersion
 	}
+	var maxSize int // Deprecated
+	var maxBytesPerBatch *int64
+	var maxWaitSeconds *int
 	var maxProcessingTime int32 = redshiftloader.DefaultMaxProcessingTime
-	if rsk.Spec.Batcher.MaxProcessingTime != nil {
-		maxProcessingTime = *rsk.Spec.Batcher.MaxProcessingTime
+	if sinkGroupSpec != nil {
+		m := sinkGroupSpec.MaxSizePerBatch.Value()
+		maxBytesPerBatch = &m
+		maxWaitSeconds = sinkGroupSpec.MaxWaitSeconds
+		maxProcessingTime = *sinkGroupSpec.MaxProcessingTime
+	} else { // Deprecated
+		maxSize = rsk.Spec.Loader.MaxSize
+		maxWaitSeconds = &rsk.Spec.Loader.MaxWaitSeconds
+		if rsk.Spec.Loader.MaxProcessingTime != nil {
+			maxProcessingTime = *rsk.Spec.Loader.MaxProcessingTime
+		}
 	}
 
-	// other defaults for the loader
+	// defaults which are not configurable for the user
 	var sessionTimeoutSeconds int = 10
 	var hearbeatIntervalSeconds int = 2
 
+	totalTopics := 0
 	var groupConfigs []kafka.ConsumerGroupConfig
 	for groupID, group := range consumerGroups {
 		totalTopics += len(group.topics)
-
 		groupConfigs = append(groupConfigs, kafka.ConsumerGroupConfig{
 			GroupID: consumerGroupID(rsk.Name, rsk.Namespace, groupID, "-loader"),
 			TopicRegexes: expandTopicsToRegex(
@@ -140,8 +236,9 @@ func NewLoader(
 
 	conf := config.Config{
 		Loader: redshiftloader.LoaderConfig{
-			MaxSize:        rsk.Spec.Loader.MaxSize,
-			MaxWaitSeconds: rsk.Spec.Loader.MaxWaitSeconds,
+			MaxSize:          maxSize, // Deprecated
+			MaxWaitSeconds:   maxWaitSeconds,
+			MaxBytesPerBatch: maxBytesPerBatch,
 		},
 		ConsumerGroups: groupConfigs,
 		S3Sink: s3sink.Config{
diff --git a/controllers/sinkgroup_controller.go b/controllers/sinkgroup_controller.go
index 85f644d2c..a4a19e066 100644
--- a/controllers/sinkgroup_controller.go
+++ b/controllers/sinkgroup_controller.go
@@ -247,27 +247,70 @@ func (sb *buildSinkGroup) buildLoaders(
 	defaultMaxOpenConns int,
 	defaultMaxIdleConns int,
 ) sinkGroupBuilder {
-	consumerGroups, err := computeConsumerGroups(sb.topicGroups, sb.topics)
-	if err != nil {
-		klog.Fatalf("Error computing consumer group from status, err: %v", err)
-	}
-	loader, err := NewLoader(
-		loaderName(sb.rsk.Name, sb.sgType),
-		sb.rsk,
-		tableSuffix,
-		secret,
-		sb.sgType,
-		consumerGroups,
-		defaultImage,
-		defaultKafkaVersion,
-		tlsConfig,
-		defaultMaxOpenConns,
-		defaultMaxIdleConns,
-	)
-	if err != nil {
-		klog.Fatalf("Error making loader: %v", err)
+	loaders := []Deployment{}
+	if sb.rsk.Spec.Loader.SinkGroup != nil {
+		sinkGroupSpec := applyLoaderSinkGroupDefaults(
+			sb.rsk,
+			sb.sgType,
+			defaultImage,
+		)
+		units := assignDeploymentUnits(
+			sb.topics,
+			*sinkGroupSpec.DeploymentUnit.MaxTopics,
+		)
+		for _, unit := range units {
+			consumerGroups, err := computeConsumerGroups(
+				sb.topicGroups, unit.topics)
+			if err != nil {
+				klog.Fatalf(
+					"Error computing consumer group from status, err: %v", err)
+			}
+			loader, err := NewLoader(
+				loaderName(sb.rsk.Name, sb.sgType, unit.id),
+				sb.rsk,
+				tableSuffix,
+				secret,
+				sb.sgType,
+				sinkGroupSpec,
+				consumerGroups,
+				defaultImage,
+				defaultKafkaVersion,
+				tlsConfig,
+				defaultMaxOpenConns,
+				defaultMaxIdleConns,
+			)
+			if err != nil {
+				klog.Fatalf("Error making loader: %v", err)
+			}
+			loaders = append(loaders, loader)
+		}
+	} else { // Deprecated
+		consumerGroups, err := computeConsumerGroups(sb.topicGroups, sb.topics)
+		if err != nil {
+			klog.Fatalf(
+				"Error computing consumer group from status, err: %v", err)
+		}
+		loader, err := NewLoader(
+			loaderName(sb.rsk.Name, sb.sgType, ""),
+			sb.rsk,
+			tableSuffix,
+			secret,
+			sb.sgType,
+			nil,
+			consumerGroups,
+			defaultImage,
+			defaultKafkaVersion,
+			tlsConfig,
+			defaultMaxOpenConns,
+			defaultMaxIdleConns,
+		)
+		if err != nil {
+			klog.Fatalf("Error making loader: %v", err)
+		}
+		loaders = append(loaders, loader)
 	}
-	sb.loaders = []Deployment{loader}
+
+	sb.loaders = loaders
 	return sb
 }
 
diff --git a/pkg/redshiftbatcher/batch_processor.go b/pkg/redshiftbatcher/batch_processor.go
index b1143be14..dc9a9e4e3 100644
--- a/pkg/redshiftbatcher/batch_processor.go
+++ b/pkg/redshiftbatcher/batch_processor.go
@@ -127,6 +127,7 @@ type response struct {
 	endOffset         int64
 	messagesProcessed int
 	maskSchema        map[string]serializer.MaskInfo
+	bytesProcessed    int64
 }
 
 func (b *batchProcessor) ctxCancelled(ctx context.Context) error {
@@ -211,6 +212,7 @@ func (b *batchProcessor) signalLoad(resp *response) error {
 		resp.batchSchemaID, // schema of upstream topic
 		resp.maskSchema,
 		resp.skipMerge,
+		resp.bytesProcessed,
 	)
 
 	err := b.signaler.Add(
@@ -251,7 +253,9 @@ func (b *batchProcessor) processMessage(
 	message *serializer.Message,
 	resp *response,
 	messageID int,
-) error {
+) (int64, error) {
+	var bytesProcessed int64
+
 	klog.V(5).Infof(
 		"%s: batchID:%d id:%d: transforming",
 		b.topic, resp.batchID, messageID,
@@ -267,7 +271,7 @@ func (b *batchProcessor) processMessage(
 			resp.maskSchema,
 		)
 		if err != nil {
-			return fmt.Errorf(
+			return bytesProcessed, fmt.Errorf(
 				"transforming schema:%d => inputTable failed: %v",
 				resp.batchSchemaID,
 				err,
@@ -284,7 +288,7 @@ func (b *batchProcessor) processMessage(
 	}
 
 	if resp.batchSchemaID != message.SchemaId {
-		return fmt.Errorf("topic:%s, schema id mismatch in the batch, %d != %d",
+		return bytesProcessed, fmt.Errorf("topic:%s, schema id mismatch in the batch, %d != %d",
 			b.topic,
 			resp.batchSchemaID,
 			message.SchemaId,
@@ -293,7 +297,7 @@ func (b *batchProcessor) processMessage(
 
 	err := b.messageTransformer.Transform(message, resp.batchSchemaTable)
 	if err != nil {
-		return fmt.Errorf(
+		return bytesProcessed, fmt.Errorf(
 			"Error transforming message:%+v, err:%v", message, err,
 		)
 	}
@@ -301,17 +305,20 @@ func (b *batchProcessor) processMessage(
 	if b.maskMessages {
 		err := b.msgMasker.Transform(message, resp.batchSchemaTable)
 		if err != nil {
-			return fmt.Errorf("Error masking message:%+v, err:%v", message, err)
+			return bytesProcessed, fmt.Errorf(
+				"Error masking message:%+v, err:%v", message, err)
 		}
 	}
 
 	message.Value = removeEmptyNullValues(message.Value.(map[string]*string))
 	messageValueBytes, err := json.Marshal(message.Value)
 	if err != nil {
-		return fmt.Errorf("Error marshalling message.Value, message: %+v", message)
+		return bytesProcessed, fmt.Errorf(
+			"Error marshalling message.Value, message: %+v", message)
 	}
 	resp.bodyBuf.Write(messageValueBytes)
 	resp.bodyBuf.Write([]byte{'\n'})
+	bytesProcessed += message.Bytes
 
 	if b.maskMessages && len(resp.maskSchema) == 0 {
 		resp.maskSchema = message.MaskSchema
@@ -325,7 +332,7 @@ func (b *batchProcessor) processMessage(
 	)
 	resp.endOffset = message.Offset
 
-	return nil
+	return bytesProcessed, nil
 }
 
 // processMessages handles the batch procesing and return true if all completes
@@ -335,21 +342,23 @@ func (b *batchProcessor) processMessages(
 	ctx context.Context,
 	msgBuf []*serializer.Message,
 	resp *response,
-) error {
+) (int64, error) {
 
+	var totalBytesProcessed int64
 	for messageID, message := range msgBuf {
 		select {
 		case <-ctx.Done():
-			return kafka.ErrSaramaSessionContextDone
+			return totalBytesProcessed, kafka.ErrSaramaSessionContextDone
 		default:
-			err := b.processMessage(ctx, message, resp, messageID)
+			bytesProcessed, err := b.processMessage(ctx, message, resp, messageID)
 			if err != nil {
-				return err
+				return totalBytesProcessed, err
 			}
+			totalBytesProcessed += bytesProcessed
 		}
 	}
 
-	return nil
+	return totalBytesProcessed, nil
 }
 
 func (b *batchProcessor) processBatch(
@@ -370,7 +379,7 @@ func (b *batchProcessor) processBatch(
 	klog.V(4).Infof("%s: batchID:%d, size:%d: processing...",
 		b.topic, resp.batchID, len(msgBuf),
 	)
-	err = b.processMessages(ctx, msgBuf, resp)
+	resp.bytesProcessed, err = b.processMessages(ctx, msgBuf, resp)
 	if err != nil {
 		resp.err = err
 		return
@@ -468,10 +477,12 @@ func (b *batchProcessor) Process(
 		klog.V(2).Infof("%s: finished (%d batches)", b.topic, len(responses))
 
 		// return if there was any error in processing any of the batches
-		totalMessages := 0
+		var totalBytesProcessed int64 = 0
+		totalMessagesProcessed := 0
 		var errors error
 		for _, resp := range responses {
-			totalMessages += resp.messagesProcessed
+			totalBytesProcessed += resp.bytesProcessed
+			totalMessagesProcessed += resp.messagesProcessed
 			if resp.err != nil {
 				if resp.err == kafka.ErrSaramaSessionContextDone {
 					klog.V(2).Infof(
@@ -548,10 +559,15 @@ func (b *batchProcessor) Process(
 		last := responses[len(responses)-1]
 		b.markOffset(session, b.topic, 0, last.endOffset, b.autoCommit)
 
+		setBytesProcessedPerSecond(
+			b.consumerGroupID,
+			b.topic,
+			float64(totalBytesProcessed)/time.Since(now).Seconds(),
+		)
 		setMsgsProcessedPerSecond(
 			b.consumerGroupID,
 			b.topic,
-			float64(totalMessages)/time.Since(now).Seconds(),
+			float64(totalMessagesProcessed)/time.Since(now).Seconds(),
 		)
 
 		klog.V(2).Infof(
diff --git a/pkg/redshiftbatcher/batcher_handler.go b/pkg/redshiftbatcher/batcher_handler.go
index 180e7e7b6..c209f5e60 100644
--- a/pkg/redshiftbatcher/batcher_handler.go
+++ b/pkg/redshiftbatcher/batcher_handler.go
@@ -15,10 +15,10 @@ import (
 )
 
 var (
-	DefaultMaxBytesPerBatch  int   = 1024
-	DefaultMaxWaitSeconds    int   = 30
-	DefaultMaxConcurrency    int   = 10
-	DefaultMaxProcessingTime int32 = 180000
+	DefaultMaxBytesPerBatch  string = "1024"
+	DefaultMaxWaitSeconds    int    = 30
+	DefaultMaxConcurrency    int    = 10
+	DefaultMaxProcessingTime int32  = 180000
 )
 
 type BatcherConfig struct {
@@ -51,9 +51,9 @@ type BatcherConfig struct {
 	MaxConcurrency *int `yaml:"maxConcurrency,omitempty"`
 	// MaxBytesPerBatch is the maximum bytes per batch. Default is there
 	// if the user has not specified a default will be applied.
-	// If this is specified maxSize specification is not considered.
-	// Default woult be specified after MaxSize is gone
-	MaxBytesPerBatch *int `yaml:"maxBytesPerBatch,omitempty"`
+	// If this is specified, maxSize specification is not considered.
+	// Default would be specified after MaxSize is gone
+	MaxBytesPerBatch *int64 `yaml:"maxBytesPerBatch,omitempty"`
 }
 
 // batcherHandler is the sarama consumer handler
@@ -66,7 +66,7 @@ type batcherHandler struct {
 
 	maxWaitSeconds   *int
 	maxConcurrency   *int
-	maxBytesPerBatch *int
+	maxBytesPerBatch *int64
 
 	consumerGroupID        string
 	kafkaConfig            kafka.KafkaConfig
diff --git a/pkg/redshiftbatcher/metrics.go b/pkg/redshiftbatcher/metrics.go
index e943a0f19..da76fc185 100644
--- a/pkg/redshiftbatcher/metrics.go
+++ b/pkg/redshiftbatcher/metrics.go
@@ -5,6 +5,15 @@ import (
 )
 
 var (
+	bytesPerSecMetric = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: "rsk",
+			Subsystem: "batcher",
+			Name:      "bytes_processed_per_second",
+			Help:      "bytes processed per second",
+		},
+		[]string{"consumergroup", "topic"},
+	)
 	msgsPerSecMetric = prometheus.NewGaugeVec(
 		prometheus.GaugeOpts{
 			Namespace: "rsk",
@@ -17,9 +26,17 @@ var (
 )
 
 func init() {
+	prometheus.MustRegister(bytesPerSecMetric)
 	prometheus.MustRegister(msgsPerSecMetric)
 }
 
+func setBytesProcessedPerSecond(consumergroup string, topic string, bytesPerSec float64) {
+	bytesPerSecMetric.WithLabelValues(
+		consumergroup,
+		topic,
+	).Set(bytesPerSec)
+}
+
 func setMsgsProcessedPerSecond(consumergroup string, topic string, msgsPerSec float64) {
 	msgsPerSecMetric.WithLabelValues(
 		consumergroup,
diff --git a/pkg/redshiftloader/job.go b/pkg/redshiftloader/job.go
index 2493a76b7..b763539f1 100644
--- a/pkg/redshiftloader/job.go
+++ b/pkg/redshiftloader/job.go
@@ -17,7 +17,8 @@ var JobAvroSchema string = `{
         {"name": "s3Path", "type": "string"},
         {"name": "schemaId", "type": "int"},
         {"name": "maskSchema", "type": "string"},
-        {"name": "skipMerge", "type": "string", "default": ""}
+        {"name": "skipMerge", "type": "string", "default": ""},
+        {"name": "batchBytes", "type": "long", "default": 0}
     ]
 }`
 
@@ -29,13 +30,15 @@ type Job struct {
 	S3Path        string                         `json:"s3Path"`
 	SchemaId      int                            `json:"schemaId"` // schema id of debezium event
 	MaskSchema    map[string]serializer.MaskInfo `json:"maskSchema"`
-	SkipMerge     bool                           `json:"skipMerge"` // to load using merge strategy or directy COPY
+	SkipMerge     bool                           `json:"skipMerge"`  // to load using merge strategy or directy COPY
+	BatchBytes    int64                          `json:"batchBytes"` // batch bytes store sum of all message bytes in this batch
 }
 
 func NewJob(
 	upstreamTopic string, startOffset int64, endOffset int64,
 	csvDialect string, s3Path string, schemaId int,
-	maskSchema map[string]serializer.MaskInfo, skipMerge bool) Job {
+	maskSchema map[string]serializer.MaskInfo, skipMerge bool,
+	batchBytes int64) Job {
 
 	return Job{
 		UpstreamTopic: upstreamTopic,
@@ -46,6 +49,7 @@ func NewJob(
 		SchemaId:      schemaId,
 		MaskSchema:    maskSchema,
 		SkipMerge:     skipMerge,
+		BatchBytes:    batchBytes,
 	}
 }
 
@@ -94,6 +98,12 @@ func StringMapToJob(data map[string]interface{}) Job {
 				schema = ToSchemaMap(value)
 			}
 			job.MaskSchema = schema
+		case "batchBytes":
+			if value, ok := v.(int64); ok {
+				job.BatchBytes = value
+			} else { // backward compatibility
+				job.BatchBytes = 0
+			}
 		}
 
 	}
@@ -190,5 +200,6 @@ func (c Job) ToStringMap() map[string]interface{} {
 		"schemaId":      c.SchemaId,
 		"skipMerge":     skipMerge,
 		"maskSchema":    ToSchemaString(c.MaskSchema),
+		"batchBytes":    c.BatchBytes,
 	}
 }
diff --git a/pkg/redshiftloader/load_processor.go b/pkg/redshiftloader/load_processor.go
index f1ef514b2..7fee6eae4 100644
--- a/pkg/redshiftloader/load_processor.go
+++ b/pkg/redshiftloader/load_processor.go
@@ -610,8 +610,11 @@ func (b *loadProcessor) migrateSchema(ctx context.Context, schemaId int, inputTa
 func (b *loadProcessor) processBatch(
 	ctx context.Context,
 	msgBuf []*serializer.Message,
-) error {
-
+) (
+	int64,
+	error,
+) {
+	var bytesProcessed int64
 	if b.redshiftStats {
 		klog.V(2).Infof("dbstats: %+v\n", b.redshifter.Stats())
 	}
@@ -627,11 +630,13 @@ func (b *loadProcessor) processBatch(
 	for id, message := range msgBuf {
 		select {
 		case <-ctx.Done():
-			return fmt.Errorf("session ctx done, err: %v", ctx.Err())
+			return bytesProcessed, fmt.Errorf(
+				"session ctx done, err: %v", ctx.Err())
 		default:
 			job := StringMapToJob(message.Value.(map[string]interface{}))
 			schemaId = job.SchemaId
 			b.batchEndOffset = message.Offset
+			bytesProcessed += job.BatchBytes
 
 			// this assumes all messages in a batch have same schema id
 			if id == 0 {
@@ -647,7 +652,7 @@ func (b *loadProcessor) processBatch(
 					job.MaskSchema,
 				)
 				if err != nil {
-					return fmt.Errorf(
+					return bytesProcessed, fmt.Errorf(
 						"Transforming schema:%d => inputTable failed: %v\n",
 						schemaId,
 						err,
@@ -660,7 +665,7 @@ func (b *loadProcessor) processBatch(
 					inputTable.Name + b.tableSuffix)
 				err = b.migrateSchema(ctx, schemaId, inputTable)
 				if err != nil {
-					return err
+					return bytesProcessed, err
 				}
 			}
 			entries = append(
@@ -682,7 +687,7 @@ func (b *loadProcessor) processBatch(
 	)
 	err = b.s3sink.UploadS3Manifest(s3ManifestKey, entries)
 	if err != nil {
-		return fmt.Errorf(
+		return bytesProcessed, fmt.Errorf(
 			"Error uploading manifest: %s to s3, err:%v\n",
 			s3ManifestKey,
 			err,
@@ -693,7 +698,7 @@ func (b *loadProcessor) processBatch(
 	klog.V(2).Infof("%s, load staging\n", b.topic)
 	err = b.createStagingTable(ctx, schemaId, inputTable)
 	if err != nil {
-		return err
+		return bytesProcessed, err
 	}
 	err = b.loadTable(
 		ctx,
@@ -702,20 +707,20 @@ func (b *loadProcessor) processBatch(
 		s3ManifestKey,
 	)
 	if err != nil {
-		return err
+		return bytesProcessed, err
 	}
 
 	// merge
 	err = b.merge(ctx)
 	if err != nil {
-		return err
+		return bytesProcessed, err
 	}
 
 	if b.redshiftStats {
 		klog.V(3).Infof("endbatch dbstats: %+v\n", b.redshifter.Stats())
 	}
 
-	return nil
+	return bytesProcessed, nil
 }
 
 // Process implements serializer.MessageBatchSyncProcessor
@@ -731,7 +736,7 @@ func (b *loadProcessor) Process(session sarama.ConsumerGroupSession, msgBuf []*s
 	klog.Infof("%s, batchId:%d, size:%d: Processing...\n",
 		b.topic, b.batchId, len(msgBuf),
 	)
-	err = b.processBatch(ctx, msgBuf)
+	bytesProcessed, err := b.processBatch(ctx, msgBuf)
 	if err != nil {
 		b.printCurrentState()
 		return err
@@ -755,7 +760,12 @@ func (b *loadProcessor) Process(session sarama.ConsumerGroupSession, msgBuf []*s
 		b.topic, b.batchId, len(msgBuf), b.batchEndOffset, timeTaken,
 	)
 
-	setMsgsProcessedPerSecond(
+	setBytesLoadedPerSecond(
+		b.consumerGroupID,
+		b.topic,
+		float64(bytesProcessed)/secondsTaken,
+	)
+	setMsgsLoadedPerSecond(
 		b.consumerGroupID,
 		b.topic,
 		float64(len(msgBuf))/secondsTaken,
diff --git a/pkg/redshiftloader/loader_handler.go b/pkg/redshiftloader/loader_handler.go
index aa24d16fd..c4786100f 100644
--- a/pkg/redshiftloader/loader_handler.go
+++ b/pkg/redshiftloader/loader_handler.go
@@ -12,17 +12,28 @@ import (
 	"time"
 )
 
-const (
-	DefaultMaxProcessingTime = 600000
+var (
+	DefaultMaxBytesPerBatch  string = "1000000" // 1 MB
+	DefaultMaxWaitSeconds    int    = 60
+	DefaultMaxProcessingTime int32  = 600000
 )
 
 type LoaderConfig struct {
 	// Maximum size of a batch, on exceeding this batch is pushed
 	// regarless of the wait time.
+	// Deprecated: in favour of MaxBytesPerBatch
 	MaxSize int `yaml:"maxSize,omitempty"`
 
+	// MaxBytesPerBatch is the maximum bytes per batch. It is not the size
+	// of kafka message but the size of all the messages that would be
+	// loaded in the batch. Default is there
+	// if the user has not specified a default will be applied.
+	// If this is specified, maxSize specification is not considered.
+	// Default would be specified after MaxSize is gone
+	MaxBytesPerBatch *int64 `yaml:"maxBytesPerBatch,omitempty"`
+
 	// MaxWaitSeconds after which the bash would be pushed regardless of its size.
-	MaxWaitSeconds int `yaml:"maxWaitSeconds,omitempty"`
+	MaxWaitSeconds *int `yaml:"maxWaitSeconds,omitempty"`
 }
 
 // loaderHandler is the sarama consumer handler
@@ -33,8 +44,10 @@ type loaderHandler struct {
 
 	consumerGroupID string
 
-	maxSize        int
-	maxWaitSeconds int
+	maxSize int // Deprecated
+
+	maxWaitSeconds   *int
+	maxBytesPerBatch *int64
 
 	saramaConfig kafka.SaramaConfig
 	redshifter   *redshift.Redshift
@@ -49,14 +62,21 @@ func NewHandler(
 	saramaConfig kafka.SaramaConfig,
 	redshifter *redshift.Redshift,
 ) *loaderHandler {
+	// apply defaults
+	if loaderConfig.MaxWaitSeconds == nil {
+		loaderConfig.MaxWaitSeconds = &DefaultMaxWaitSeconds
+	}
+
 	return &loaderHandler{
 		ready: ready,
 		ctx:   ctx,
 
 		consumerGroupID: consumerGroupID,
 
-		maxSize:        loaderConfig.MaxSize,
-		maxWaitSeconds: loaderConfig.MaxWaitSeconds,
+		maxSize: loaderConfig.MaxSize, // Deprecated
+
+		maxWaitSeconds:   loaderConfig.MaxWaitSeconds,
+		maxBytesPerBatch: loaderConfig.MaxBytesPerBatch,
 
 		saramaConfig: saramaConfig,
 		redshifter:   redshifter,
@@ -66,7 +86,7 @@ func NewHandler(
 
 // Setup is run at the beginning of a new session, before ConsumeClaim
 func (h *loaderHandler) Setup(sarama.ConsumerGroupSession) error {
-	klog.V(1).Info("Setting up handler")
+	klog.V(1).Info("setting up handler")
 
 	// Mark the consumer as ready
 	select {
@@ -81,7 +101,7 @@ func (h *loaderHandler) Setup(sarama.ConsumerGroupSession) error {
 
 // Cleanup is run at the end of a session, once all ConsumeClaim goroutines have exited
 func (h *loaderHandler) Cleanup(sarama.ConsumerGroupSession) error {
-	klog.V(1).Info("Cleaning up handler")
+	klog.V(1).Info("cleaning up handler")
 	return nil
 }
 
@@ -91,9 +111,8 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession,
 	claim sarama.ConsumerGroupClaim) error {
 
 	klog.V(1).Infof(
-		"ConsumeClaim started for topic:%s, partition:%d, initalOffset:%d\n",
+		"%s: consumeClaim started, initalOffset:%d\n",
 		claim.Topic(),
-		claim.Partition(),
 		claim.InitialOffset(),
 	)
 
@@ -106,14 +125,20 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession,
 		h.saramaConfig,
 		h.redshifter,
 	)
+	maxBufSize := h.maxSize
+	if h.maxBytesPerBatch != nil {
+		maxBufSize = serializer.DefaultMessageBufferSize
+	}
 	msgBatch := serializer.NewMessageSyncBatch(
 		claim.Topic(),
 		claim.Partition(),
-		h.maxSize,
+		h.maxSize, // Deprecated
+		maxBufSize,
+		h.maxBytesPerBatch,
 		processor,
 	)
 	maxWaitTicker := time.NewTicker(
-		time.Duration(h.maxWaitSeconds) * time.Second,
+		time.Duration(*h.maxWaitSeconds) * time.Second,
 	)
 
 	// NOTE:
@@ -126,20 +151,21 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession,
 		select {
 		case <-h.ctx.Done():
 			klog.V(2).Infof(
-				"ConsumeClaim returning for topic: %s (main ctx done)",
+				"%s: consumeClaim returning, main ctx done",
 				claim.Topic(),
 			)
 			return nil
 		case <-session.Context().Done():
 			klog.V(2).Infof(
-				"ConsumeClaim returning for topic: %s (session ctx done)",
+				"%s: consumeClaim returning. session ctx done, ctxErr: %v",
 				claim.Topic(),
+				session.Context().Err(),
 			)
 			return fmt.Errorf("session ctx done, err: %v", session.Context().Err())
 		case message, ok := <-claimMsgChan:
 			if !ok {
 				klog.V(2).Infof(
-					"ConsumeClaim returning for topic: %s (read msg channel closed)",
+					"%s: consumeClaim returning. read msg channel closed",
 					claim.Topic(),
 				)
 				return nil
@@ -149,14 +175,15 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession,
 			default:
 			case <-h.ctx.Done():
 				klog.V(2).Infof(
-					"ConsumeClaim returning for topic: %s (main ctx done)",
+					"%s: consumeClaim returning, main ctx done",
 					claim.Topic(),
 				)
 				return nil
 			case <-session.Context().Done():
 				klog.V(2).Infof(
-					"ConsumeClaim returning for topic: %s (session ctx done)",
+					"%s: consumeClaim returning. session ctx done, ctxErr: %v",
 					claim.Topic(),
+					session.Context().Err(),
 				)
 				return fmt.Errorf("session ctx done, err: %v", session.Context().Err())
 			}
@@ -178,7 +205,7 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession,
 				lastSchemaId = new(int)
 			} else if *lastSchemaId != upstreamJobSchemaId {
 				klog.V(2).Infof(
-					"topic:%s: schema changed, %d => %d (batch flush)\n",
+					"%s: schema changed, %d => %d (batch flush)\n",
 					claim.Topic(),
 					*lastSchemaId,
 					upstreamJobSchemaId,
@@ -189,7 +216,7 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession,
 				}
 			}
 			// Process the batch by size or insert in batch
-			err = msgBatch.Insert(session, msg)
+			err = msgBatch.Insert(session, msg, job.BatchBytes)
 			if err != nil {
 				return err
 			}
@@ -197,7 +224,7 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession,
 		case <-maxWaitTicker.C:
 			// Process the batch by time
 			klog.V(2).Infof(
-				"topic:%s: maxWaitSeconds hit",
+				"%s: maxWaitSeconds hit",
 				claim.Topic(),
 			)
 			err = msgBatch.Process(session)
diff --git a/pkg/redshiftloader/metrics.go b/pkg/redshiftloader/metrics.go
index 6ae6691d6..0220de045 100644
--- a/pkg/redshiftloader/metrics.go
+++ b/pkg/redshiftloader/metrics.go
@@ -5,22 +5,39 @@ import (
 )
 
 var (
+	bytesPerSecMetric = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: "rsk",
+			Subsystem: "loader",
+			Name:      "bytes_loaded_per_second",
+			Help:      "bytes loaded per second",
+		},
+		[]string{"consumergroup", "topic"},
+	)
 	msgsPerSecMetric = prometheus.NewGaugeVec(
 		prometheus.GaugeOpts{
 			Namespace: "rsk",
 			Subsystem: "loader",
-			Name:      "messages_processed_per_second",
-			Help:      "number of messages processed per second",
+			Name:      "messages_loaded_per_second",
+			Help:      "number of messages loaded per second",
 		},
 		[]string{"consumergroup", "topic"},
 	)
 )
 
 func init() {
+	prometheus.MustRegister(bytesPerSecMetric)
 	prometheus.MustRegister(msgsPerSecMetric)
 }
 
-func setMsgsProcessedPerSecond(consumergroup string, topic string, msgsPerSec float64) {
+func setBytesLoadedPerSecond(consumergroup string, topic string, bytesPerSec float64) {
+	bytesPerSecMetric.WithLabelValues(
+		consumergroup,
+		topic,
+	).Set(bytesPerSec)
+}
+
+func setMsgsLoadedPerSecond(consumergroup string, topic string, msgsPerSec float64) {
 	msgsPerSecMetric.WithLabelValues(
 		consumergroup,
 		topic,
diff --git a/pkg/serializer/message.go b/pkg/serializer/message.go
index 074570957..04d431f1b 100644
--- a/pkg/serializer/message.go
+++ b/pkg/serializer/message.go
@@ -29,7 +29,7 @@ type Message struct {
 	Offset    int64
 	Key       string
 	Value     interface{}
-	Bytes     int
+	Bytes     int64
 
 	Operation  string
 	MaskSchema map[string]MaskInfo
@@ -40,8 +40,8 @@ type MessageAsyncBatch struct {
 	partition        int32
 	maxSize          int
 	msgBuf           []*Message
-	msgBufBytes      int
-	maxBytesPerBatch *int
+	msgBufBytes      int64
+	maxBytesPerBatch *int64
 	processChan      chan []*Message
 }
 
@@ -50,7 +50,7 @@ func NewMessageAsyncBatch(
 	partition int32,
 	maxSize int,
 	maxBufSize int,
-	maxBytesPerBatch *int,
+	maxBytesPerBatch *int64,
 	processChan chan []*Message,
 ) *MessageAsyncBatch {
 	return &MessageAsyncBatch{
@@ -117,20 +117,30 @@ func (b *MessageAsyncBatch) Insert(ctx context.Context, msg *Message) {
 }
 
 type MessageSyncBatch struct {
-	topic     string
-	partition int32
-	maxSize   int
-	msgBuf    []*Message
-	processor MessageBatchSyncProcessor
+	topic            string
+	partition        int32
+	maxSize          int
+	msgBuf           []*Message
+	msgBufBytes      int64
+	maxBytesPerBatch *int64
+	processor        MessageBatchSyncProcessor
 }
 
-func NewMessageSyncBatch(topic string, partition int32, maxSize int, processor MessageBatchSyncProcessor) *MessageSyncBatch {
+func NewMessageSyncBatch(
+	topic string,
+	partition int32,
+	maxSize int,
+	maxBufSize int,
+	maxBytesPerBatch *int64,
+	processor MessageBatchSyncProcessor,
+) *MessageSyncBatch {
 	return &MessageSyncBatch{
-		topic:     topic,
-		partition: partition,
-		maxSize:   maxSize,
-		msgBuf:    make([]*Message, 0, maxSize),
-		processor: processor,
+		topic:            topic,
+		partition:        partition,
+		maxSize:          maxSize,
+		msgBuf:           make([]*Message, 0, maxBufSize),
+		maxBytesPerBatch: maxBytesPerBatch,
+		processor:        processor,
 	}
 }
 
@@ -138,7 +148,7 @@ func NewMessageSyncBatch(topic string, partition int32, maxSize int, processor M
 func (b *MessageSyncBatch) Process(session sarama.ConsumerGroupSession) error {
 	if len(b.msgBuf) > 0 {
 		klog.V(2).Infof(
-			"topic:%s: calling processor...",
+			"%s: calling processor...",
 			b.topic,
 		)
 		err := b.processor.Process(session, b.msgBuf)
@@ -146,9 +156,10 @@ func (b *MessageSyncBatch) Process(session sarama.ConsumerGroupSession) error {
 			return err
 		}
 		b.msgBuf = make([]*Message, 0, b.maxSize)
+		b.msgBufBytes = 0
 	} else {
 		klog.V(2).Infof(
-			"topic:%s: no msgs",
+			"%s: no msgs",
 			b.topic,
 		)
 	}
@@ -160,11 +171,26 @@ func (b *MessageSyncBatch) Process(session sarama.ConsumerGroupSession) error {
 func (b *MessageSyncBatch) Insert(
 	session sarama.ConsumerGroupSession,
 	msg *Message,
+	batchBytes int64,
 ) error {
 	b.msgBuf = append(b.msgBuf, msg)
+
+	if b.maxBytesPerBatch != nil && batchBytes != 0 {
+		b.msgBufBytes += batchBytes
+		if b.msgBufBytes >= *b.maxBytesPerBatch {
+			klog.V(2).Infof(
+				"%s: maxBytesPerBatch hit",
+				msg.Topic,
+			)
+			return b.Process(session)
+		}
+		return nil
+	}
+
+	// Deprecated
 	if len(b.msgBuf) >= b.maxSize {
 		klog.V(2).Infof(
-			"topic:%s: maxSize hit",
+			"%s: maxSize hit",
 			msg.Topic,
 		)
 		return b.Process(session)
diff --git a/pkg/serializer/serializer.go b/pkg/serializer/serializer.go
index c65345705..c3ccb64c1 100644
--- a/pkg/serializer/serializer.go
+++ b/pkg/serializer/serializer.go
@@ -65,7 +65,7 @@ func (c *avroSerializer) Deserialize(
 		Offset:     message.Offset,
 		Key:        string(message.Key),
 		Value:      native,
-		Bytes:      len(message.Value),
+		Bytes:      int64(len(message.Value)),
 		MaskSchema: make(map[string]MaskInfo),
 	}, nil
 }
diff --git a/redshiftsink/README.md b/redshiftsink/README.md
index cc466d5b2..56a811a05 100644
--- a/redshiftsink/README.md
+++ b/redshiftsink/README.md
@@ -50,28 +50,33 @@ spec:
     maxLoaderLag: 10
   batcher:
     suspend: false
-    maxSize: 10
-    maxWaitSeconds: 30
-    maxConcurrency: 10
     mask: true
     maskFile: "github.com/practo/tipoca-stream/redshiftsink/pkg/transformer/masker/database.yaml"
-    podTemplate:
-      resources:
-        requests:
-          cpu: 100m
-          memory: 200Mi
+    sinkGroup:
+        all:
+          maxSizePerBatch: 10Mi
+          maxWaitSeconds: 30
+          maxConcurrency: 10
+          podTemplate:
+            resources:
+              requests:
+                cpu: 100m
+                memory: 200Mi
   loader:
     suspend: false
-    maxSize: 10
-    maxWaitSeconds: 30
-    maxProcessingTime: 60000
     redshiftSchema: "inventory"
     redshiftGroup:  "sales"
-    podTemplate:
-      resources:
-        requests:
-          cpu: 100m
-          memory: 200Mi
+    sinkGroup:
+        all:
+          maxSizePerBatch: 1Gi
+          maxWaitSeconds: 30
+          maxProcessingTime: 60000
+          podTemplate:
+            resources:
+              requests:
+                cpu: 100m
+                memory: 200Mi
+
 ```
 
 ```bash
@@ -82,11 +87,6 @@ This will start syncing all the Kakfa topics matching regex `"^db.inventory*"` f
 
 ### Configuration
 
-### Redshiftsink Spec Documentation (TODO):
-| Spec          | Description   | Mandatory |
-| :------------ | :----------- |:------------|
-
-
 ## RedshiftSink Managed Pods
 Redshiftsink performs the sink by creating two pods. Creating a RedshiftSink CRD installs the batcher and loader pods. Batcher and loader pods details are below:
 
@@ -113,6 +113,7 @@ Flags:
 
 #### Metrics
 ```
+rsk_batcher_bytes_processed_per_second
 rsk_batcher_messages_processed_per_second
 ```
 
@@ -144,7 +145,8 @@ Flags:
 
 #### Metrics
 ```
-rsk_loader_messages_processed_per_second
+rsk_loader_bytes_loaded_per_second
+rsk_loader_messages_loaded_per_second
 ```
 
 ### Configuration

From 718fa09bd39a1e47a6d00dfc61362dc8eacfbff9 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Tue, 23 Mar 2021 17:01:34 +0530
Subject: [PATCH 06/49] Fix the spec

---
 config/samples/tipoca_v1_redshiftsink.yaml | 24 +++++++++++++---------
 redshiftsink/README.md                     | 24 +++++++++++++---------
 2 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/config/samples/tipoca_v1_redshiftsink.yaml b/config/samples/tipoca_v1_redshiftsink.yaml
index 8c1d3165e..d8f5a0092 100644
--- a/config/samples/tipoca_v1_redshiftsink.yaml
+++ b/config/samples/tipoca_v1_redshiftsink.yaml
@@ -20,11 +20,13 @@ spec:
         all:
             maxSizePerBatch: 10Mi
             maxWaitSeconds: 30
-            podTemplate:
-              resources:
-                requests:
-                  cpu: 100m
-                  memory: 200Mi
+            deploymentUnit:
+                maxTopics: 30
+                podTemplate:
+                  resources:
+                    requests:
+                      cpu: 100m
+                      memory: 200Mi
   loader:
     suspend: false
     redshiftSchema: "inventory"
@@ -33,8 +35,10 @@ spec:
         all:
             maxSizePerBatch: 1Gi
             maxWaitSeconds: 30
-            podTemplate:
-              resources:
-                requests:
-                  cpu: 100m
-                  memory: 200Mi
+            deploymentUnit:
+                maxTopics: 30
+                podTemplate:
+                  resources:
+                    requests:
+                      cpu: 100m
+                      memory: 200Mi
diff --git a/redshiftsink/README.md b/redshiftsink/README.md
index 56a811a05..bcee07783 100644
--- a/redshiftsink/README.md
+++ b/redshiftsink/README.md
@@ -57,11 +57,13 @@ spec:
           maxSizePerBatch: 10Mi
           maxWaitSeconds: 30
           maxConcurrency: 10
-          podTemplate:
-            resources:
-              requests:
-                cpu: 100m
-                memory: 200Mi
+          deploymentUnit:
+              maxTopics: 30
+              podTemplate:
+                resources:
+                  requests:
+                    cpu: 100m
+                    memory: 200Mi
   loader:
     suspend: false
     redshiftSchema: "inventory"
@@ -71,11 +73,13 @@ spec:
           maxSizePerBatch: 1Gi
           maxWaitSeconds: 30
           maxProcessingTime: 60000
-          podTemplate:
-            resources:
-              requests:
-                cpu: 100m
-                memory: 200Mi
+          deploymentUnit:
+              maxTopics: 30
+              podTemplate:
+                resources:
+                  requests:
+                    cpu: 100m
+                    memory: 200Mi
 
 ```
 

From decace8062a6adeda778a2827f714ae68ebc08a5 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 24 Mar 2021 09:39:16 +0530
Subject: [PATCH 07/49] Fix test

---
 pkg/redshiftloader/job_test.go | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/pkg/redshiftloader/job_test.go b/pkg/redshiftloader/job_test.go
index 3e048311f..f13568d90 100644
--- a/pkg/redshiftloader/job_test.go
+++ b/pkg/redshiftloader/job_test.go
@@ -13,8 +13,17 @@ func TestToStringMap(t *testing.T) {
 		"id":          serializer.MaskInfo{Masked: true},
 	}
 
-	job := NewJob("upstreamTopic", 2091, 2100, ",",
-		"s3path", 1, maskSchema, false)
+	job := NewJob(
+		"upstreamTopic",
+		2091,
+		2100,
+		",",
+		"s3path",
+		1,
+		maskSchema,
+		false,
+		10,
+	)
 	// fmt.Printf("job_now=%+v\n\n", job)
 
 	sMap := job.ToStringMap()

From 053a93b456304d7dfd7eec77c59bd0966f826c14 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 24 Mar 2021 14:15:55 +0530
Subject: [PATCH 08/49] Fixes after self review

---
 api/v1/redshiftsink_types.go                  |  26 +--
 .../tipoca.k8s.practo.dev_redshiftsinks.yaml  | 160 +++++++++---------
 controllers/batcher_deployment.go             |   2 +-
 controllers/loader_deployment.go              |   4 +-
 controllers/sinkgroup_controller.go           |   5 +-
 pkg/redshiftbatcher/batch_processor.go        |   2 +-
 6 files changed, 103 insertions(+), 96 deletions(-)

diff --git a/api/v1/redshiftsink_types.go b/api/v1/redshiftsink_types.go
index 3bcc24b62..b8de932f1 100644
--- a/api/v1/redshiftsink_types.go
+++ b/api/v1/redshiftsink_types.go
@@ -48,6 +48,7 @@ type RedshiftPodTemplateSpec struct {
 type DeploymentUnit struct {
 	// MaxTopics specify the maximum number of topics that
 	// can be part of this unit of deployment.
+	// +optional
 	MaxTopics *int `json:"maxTopics,omitempty"`
 
 	// PodTemplate describes the pod specification for the unit.
@@ -58,10 +59,10 @@ type DeploymentUnit struct {
 // SinkGroupSpec defines the specification for one of the three sinkgroups:
 // 1. MainSinkGroup 2. ReloadSinkGroup 3. ReloadDupeSinkGroup
 type SinkGroupSpec struct {
-	// MaxSizePerBatch is the maximum size of the batch in Bytes, Ki, Mi, Gi
-	// Examples:
+	// MaxSizePerBatch is the maximum size of the batch in bytes, Ki, Mi, Gi
+	// Example values: 1000, 1Ki, 100Mi, 1Gi
 	// 1000 is 1000 bytes, 1Ki is 1 Killo byte,
-	// 100Mi 100 mega bytes, 1Gi is 1 Giga bytes
+	// 100Mi is 100 mega bytes, 1Gi is 1 Giga bytes
 	// +optional
 	MaxSizePerBatch *resource.Quantity `json:"maxSizePerBatch,omitempty"`
 	// MaxWaitSeconds is the maximum time to wait before making a batch,
@@ -69,19 +70,19 @@ type SinkGroupSpec struct {
 	// +optional
 	MaxWaitSeconds *int `json:"maxWaitSeconds,omitempty"`
 	// MaxConcurrency is the maximum no, of batch processors to run concurrently.
-	// this spec is useful only when the sink group pod operates on
-	// asynchronous mode. loader pods does not needed this.
+	// This spec is useful when the sink group pod operates in asynchronous mode.
+	// Loader pods does not needed this as they are synchronous.
 	// +optional
 	MaxConcurrency *int `json:"maxConcurrency,omitempty"`
 	// MaxProcessingTime is the max time in ms required to consume one message.
-	// Defaults to 1000ms
+	// Defaults for the batcher is 180000ms and loader is 600000ms.
 	// +optional
 	MaxProcessingTime *int32 `json:"maxProcessingTime,omitempty"`
 	// DeploymentUnit is the unit of deployment for the batcher or the loader.
 	// Using this user can specify the no of topics and the amount of resources
 	// needed to run them as one unit. Operator calculates the total units
-	// based on this and the total number of topics it needs to sink. This
-	// greatly solves the scaling issues described in #167.
+	// based on the total number of topics and this unit spec. This majorly
+	// solves the scaling issues described in #167.
 	// +optional
 	DeploymentUnit *DeploymentUnit `json:"deploymentUnit,omitempty"`
 }
@@ -90,9 +91,10 @@ type SinkGroupSpec struct {
 // mask version, target table and the topic release status. This is the specification
 // to allow to have different set of SinkGroupSpec for each type of SinkGroups.
 // Explaining the precedence:
-// The first time sink of a table requires different values for MaxSizePerBatch
-// and different pod resources.
-// a) If All is specified and none of the others are specified, All is used.
+// The configuration required for full sink and the realtime sink can be different.
+// SinkGroupSpec for each of the type of sink groups helps us provide different
+// configurations for each of them. Following are the precedence:
+// a) If All is specified and none of the others are specified, All is used for all SinkGroups.
 // b) If All and Main both are specified then Main gets used for MainSinkGroup
 // c) If All and Reload are specified then Reload gets used for ReloadSinkGroup
 // d) If All and ReloadDupe are specified then ReloadDupe gets used for ReloadDupeSinkGroup
@@ -166,8 +168,10 @@ type RedshiftLoaderSpec struct {
 	// RedshiftSchema to sink the data in
 	RedshiftSchema string `json:"redshiftSchema"`
 	// RedshiftMaxOpenConns is the maximum open connections allowed
+	// +optional
 	RedshiftMaxOpenConns *int `json:"redshiftMaxOpenConns,omitempty"`
 	// RedshiftMaxIdleConns is the maximum idle connections allowed
+	// +optional
 	RedshiftMaxIdleConns *int `json:"redshiftMaxIdleConns,omitempty"`
 	// RedshiftGroup to give the access to when new topics gets released
 	RedshiftGroup *string `json:"redshiftGroup"`
diff --git a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
index 95d37f739..a3a9ff411 100644
--- a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
+++ b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
@@ -158,9 +158,8 @@ spec:
                             the batcher or the loader. Using this user can specify
                             the no of topics and the amount of resources needed to
                             run them as one unit. Operator calculates the total units
-                            based on this and the total number of topics it needs
-                            to sink. This greatly solves the scaling issues described
-                            in #167.'
+                            based on the total number of topics and this unit spec.
+                            This majorly solves the scaling issues described in #167.'
                           properties:
                             maxTopics:
                               description: MaxTopics specify the maximum number of
@@ -255,13 +254,14 @@ spec:
                           type: object
                         maxConcurrency:
                           description: MaxConcurrency is the maximum no, of batch
-                            processors to run concurrently. this spec is useful only
-                            when the sink group pod operates on asynchronous mode.
-                            loader pods does not needed this.
+                            processors to run concurrently. This spec is useful when
+                            the sink group pod operates in asynchronous mode. Loader
+                            pods does not needed this as they are synchronous.
                           type: integer
                         maxProcessingTime:
                           description: MaxProcessingTime is the max time in ms required
-                            to consume one message. Defaults to 1000ms
+                            to consume one message. Defaults for the batcher is 180000ms
+                            and loader is 600000ms.
                           format: int32
                           type: integer
                         maxSizePerBatch:
@@ -269,9 +269,9 @@ spec:
                           - type: integer
                           - type: string
                           description: 'MaxSizePerBatch is the maximum size of the
-                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
-                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
-                            bytes'
+                            batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki,
+                            100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi
+                            is 100 mega bytes, 1Gi is 1 Giga bytes'
                           pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                           x-kubernetes-int-or-string: true
                         maxWaitSeconds:
@@ -289,9 +289,8 @@ spec:
                             the batcher or the loader. Using this user can specify
                             the no of topics and the amount of resources needed to
                             run them as one unit. Operator calculates the total units
-                            based on this and the total number of topics it needs
-                            to sink. This greatly solves the scaling issues described
-                            in #167.'
+                            based on the total number of topics and this unit spec.
+                            This majorly solves the scaling issues described in #167.'
                           properties:
                             maxTopics:
                               description: MaxTopics specify the maximum number of
@@ -386,13 +385,14 @@ spec:
                           type: object
                         maxConcurrency:
                           description: MaxConcurrency is the maximum no, of batch
-                            processors to run concurrently. this spec is useful only
-                            when the sink group pod operates on asynchronous mode.
-                            loader pods does not needed this.
+                            processors to run concurrently. This spec is useful when
+                            the sink group pod operates in asynchronous mode. Loader
+                            pods does not needed this as they are synchronous.
                           type: integer
                         maxProcessingTime:
                           description: MaxProcessingTime is the max time in ms required
-                            to consume one message. Defaults to 1000ms
+                            to consume one message. Defaults for the batcher is 180000ms
+                            and loader is 600000ms.
                           format: int32
                           type: integer
                         maxSizePerBatch:
@@ -400,9 +400,9 @@ spec:
                           - type: integer
                           - type: string
                           description: 'MaxSizePerBatch is the maximum size of the
-                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
-                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
-                            bytes'
+                            batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki,
+                            100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi
+                            is 100 mega bytes, 1Gi is 1 Giga bytes'
                           pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                           x-kubernetes-int-or-string: true
                         maxWaitSeconds:
@@ -420,9 +420,8 @@ spec:
                             the batcher or the loader. Using this user can specify
                             the no of topics and the amount of resources needed to
                             run them as one unit. Operator calculates the total units
-                            based on this and the total number of topics it needs
-                            to sink. This greatly solves the scaling issues described
-                            in #167.'
+                            based on the total number of topics and this unit spec.
+                            This majorly solves the scaling issues described in #167.'
                           properties:
                             maxTopics:
                               description: MaxTopics specify the maximum number of
@@ -517,13 +516,14 @@ spec:
                           type: object
                         maxConcurrency:
                           description: MaxConcurrency is the maximum no, of batch
-                            processors to run concurrently. this spec is useful only
-                            when the sink group pod operates on asynchronous mode.
-                            loader pods does not needed this.
+                            processors to run concurrently. This spec is useful when
+                            the sink group pod operates in asynchronous mode. Loader
+                            pods does not needed this as they are synchronous.
                           type: integer
                         maxProcessingTime:
                           description: MaxProcessingTime is the max time in ms required
-                            to consume one message. Defaults to 1000ms
+                            to consume one message. Defaults for the batcher is 180000ms
+                            and loader is 600000ms.
                           format: int32
                           type: integer
                         maxSizePerBatch:
@@ -531,9 +531,9 @@ spec:
                           - type: integer
                           - type: string
                           description: 'MaxSizePerBatch is the maximum size of the
-                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
-                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
-                            bytes'
+                            batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki,
+                            100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi
+                            is 100 mega bytes, 1Gi is 1 Giga bytes'
                           pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                           x-kubernetes-int-or-string: true
                         maxWaitSeconds:
@@ -551,9 +551,8 @@ spec:
                             the batcher or the loader. Using this user can specify
                             the no of topics and the amount of resources needed to
                             run them as one unit. Operator calculates the total units
-                            based on this and the total number of topics it needs
-                            to sink. This greatly solves the scaling issues described
-                            in #167.'
+                            based on the total number of topics and this unit spec.
+                            This majorly solves the scaling issues described in #167.'
                           properties:
                             maxTopics:
                               description: MaxTopics specify the maximum number of
@@ -648,13 +647,14 @@ spec:
                           type: object
                         maxConcurrency:
                           description: MaxConcurrency is the maximum no, of batch
-                            processors to run concurrently. this spec is useful only
-                            when the sink group pod operates on asynchronous mode.
-                            loader pods does not needed this.
+                            processors to run concurrently. This spec is useful when
+                            the sink group pod operates in asynchronous mode. Loader
+                            pods does not needed this as they are synchronous.
                           type: integer
                         maxProcessingTime:
                           description: MaxProcessingTime is the max time in ms required
-                            to consume one message. Defaults to 1000ms
+                            to consume one message. Defaults for the batcher is 180000ms
+                            and loader is 600000ms.
                           format: int32
                           type: integer
                         maxSizePerBatch:
@@ -662,9 +662,9 @@ spec:
                           - type: integer
                           - type: string
                           description: 'MaxSizePerBatch is the maximum size of the
-                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
-                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
-                            bytes'
+                            batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki,
+                            100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi
+                            is 100 mega bytes, 1Gi is 1 Giga bytes'
                           pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                           x-kubernetes-int-or-string: true
                         maxWaitSeconds:
@@ -817,9 +817,8 @@ spec:
                             the batcher or the loader. Using this user can specify
                             the no of topics and the amount of resources needed to
                             run them as one unit. Operator calculates the total units
-                            based on this and the total number of topics it needs
-                            to sink. This greatly solves the scaling issues described
-                            in #167.'
+                            based on the total number of topics and this unit spec.
+                            This majorly solves the scaling issues described in #167.'
                           properties:
                             maxTopics:
                               description: MaxTopics specify the maximum number of
@@ -914,13 +913,14 @@ spec:
                           type: object
                         maxConcurrency:
                           description: MaxConcurrency is the maximum no, of batch
-                            processors to run concurrently. this spec is useful only
-                            when the sink group pod operates on asynchronous mode.
-                            loader pods does not needed this.
+                            processors to run concurrently. This spec is useful when
+                            the sink group pod operates in asynchronous mode. Loader
+                            pods does not needed this as they are synchronous.
                           type: integer
                         maxProcessingTime:
                           description: MaxProcessingTime is the max time in ms required
-                            to consume one message. Defaults to 1000ms
+                            to consume one message. Defaults for the batcher is 180000ms
+                            and loader is 600000ms.
                           format: int32
                           type: integer
                         maxSizePerBatch:
@@ -928,9 +928,9 @@ spec:
                           - type: integer
                           - type: string
                           description: 'MaxSizePerBatch is the maximum size of the
-                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
-                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
-                            bytes'
+                            batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki,
+                            100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi
+                            is 100 mega bytes, 1Gi is 1 Giga bytes'
                           pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                           x-kubernetes-int-or-string: true
                         maxWaitSeconds:
@@ -948,9 +948,8 @@ spec:
                             the batcher or the loader. Using this user can specify
                             the no of topics and the amount of resources needed to
                             run them as one unit. Operator calculates the total units
-                            based on this and the total number of topics it needs
-                            to sink. This greatly solves the scaling issues described
-                            in #167.'
+                            based on the total number of topics and this unit spec.
+                            This majorly solves the scaling issues described in #167.'
                           properties:
                             maxTopics:
                               description: MaxTopics specify the maximum number of
@@ -1045,13 +1044,14 @@ spec:
                           type: object
                         maxConcurrency:
                           description: MaxConcurrency is the maximum no, of batch
-                            processors to run concurrently. this spec is useful only
-                            when the sink group pod operates on asynchronous mode.
-                            loader pods does not needed this.
+                            processors to run concurrently. This spec is useful when
+                            the sink group pod operates in asynchronous mode. Loader
+                            pods does not needed this as they are synchronous.
                           type: integer
                         maxProcessingTime:
                           description: MaxProcessingTime is the max time in ms required
-                            to consume one message. Defaults to 1000ms
+                            to consume one message. Defaults for the batcher is 180000ms
+                            and loader is 600000ms.
                           format: int32
                           type: integer
                         maxSizePerBatch:
@@ -1059,9 +1059,9 @@ spec:
                           - type: integer
                           - type: string
                           description: 'MaxSizePerBatch is the maximum size of the
-                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
-                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
-                            bytes'
+                            batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki,
+                            100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi
+                            is 100 mega bytes, 1Gi is 1 Giga bytes'
                           pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                           x-kubernetes-int-or-string: true
                         maxWaitSeconds:
@@ -1079,9 +1079,8 @@ spec:
                             the batcher or the loader. Using this user can specify
                             the no of topics and the amount of resources needed to
                             run them as one unit. Operator calculates the total units
-                            based on this and the total number of topics it needs
-                            to sink. This greatly solves the scaling issues described
-                            in #167.'
+                            based on the total number of topics and this unit spec.
+                            This majorly solves the scaling issues described in #167.'
                           properties:
                             maxTopics:
                               description: MaxTopics specify the maximum number of
@@ -1176,13 +1175,14 @@ spec:
                           type: object
                         maxConcurrency:
                           description: MaxConcurrency is the maximum no, of batch
-                            processors to run concurrently. this spec is useful only
-                            when the sink group pod operates on asynchronous mode.
-                            loader pods does not needed this.
+                            processors to run concurrently. This spec is useful when
+                            the sink group pod operates in asynchronous mode. Loader
+                            pods does not needed this as they are synchronous.
                           type: integer
                         maxProcessingTime:
                           description: MaxProcessingTime is the max time in ms required
-                            to consume one message. Defaults to 1000ms
+                            to consume one message. Defaults for the batcher is 180000ms
+                            and loader is 600000ms.
                           format: int32
                           type: integer
                         maxSizePerBatch:
@@ -1190,9 +1190,9 @@ spec:
                           - type: integer
                           - type: string
                           description: 'MaxSizePerBatch is the maximum size of the
-                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
-                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
-                            bytes'
+                            batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki,
+                            100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi
+                            is 100 mega bytes, 1Gi is 1 Giga bytes'
                           pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                           x-kubernetes-int-or-string: true
                         maxWaitSeconds:
@@ -1210,9 +1210,8 @@ spec:
                             the batcher or the loader. Using this user can specify
                             the no of topics and the amount of resources needed to
                             run them as one unit. Operator calculates the total units
-                            based on this and the total number of topics it needs
-                            to sink. This greatly solves the scaling issues described
-                            in #167.'
+                            based on the total number of topics and this unit spec.
+                            This majorly solves the scaling issues described in #167.'
                           properties:
                             maxTopics:
                               description: MaxTopics specify the maximum number of
@@ -1307,13 +1306,14 @@ spec:
                           type: object
                         maxConcurrency:
                           description: MaxConcurrency is the maximum no, of batch
-                            processors to run concurrently. this spec is useful only
-                            when the sink group pod operates on asynchronous mode.
-                            loader pods does not needed this.
+                            processors to run concurrently. This spec is useful when
+                            the sink group pod operates in asynchronous mode. Loader
+                            pods does not needed this as they are synchronous.
                           type: integer
                         maxProcessingTime:
                           description: MaxProcessingTime is the max time in ms required
-                            to consume one message. Defaults to 1000ms
+                            to consume one message. Defaults for the batcher is 180000ms
+                            and loader is 600000ms.
                           format: int32
                           type: integer
                         maxSizePerBatch:
@@ -1321,9 +1321,9 @@ spec:
                           - type: integer
                           - type: string
                           description: 'MaxSizePerBatch is the maximum size of the
-                            batch in Bytes, Ki, Mi, Gi Examples: 1000 is 1000 bytes,
-                            1Ki is 1 Killo byte, 100Mi 100 mega bytes, 1Gi is 1 Giga
-                            bytes'
+                            batch in bytes, Ki, Mi, Gi Example values: 1000, 1Ki,
+                            100Mi, 1Gi 1000 is 1000 bytes, 1Ki is 1 Killo byte, 100Mi
+                            is 100 mega bytes, 1Gi is 1 Giga bytes'
                           pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
                           x-kubernetes-int-or-string: true
                         maxWaitSeconds:
diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go
index 866372cca..0a4acb97b 100644
--- a/controllers/batcher_deployment.go
+++ b/controllers/batcher_deployment.go
@@ -68,7 +68,7 @@ func applyBatcherSinkGroupDefaults(
 		}
 	}
 
-	// overwrite with the defaults with the specified values
+	// overwrite the defaults with the specified values
 	if specifiedSpec != nil {
 		if specifiedSpec.MaxSizePerBatch != nil {
 			maxSizePerBatch = specifiedSpec.MaxSizePerBatch
diff --git a/controllers/loader_deployment.go b/controllers/loader_deployment.go
index 2f82d7a92..5dd81ce6a 100644
--- a/controllers/loader_deployment.go
+++ b/controllers/loader_deployment.go
@@ -43,7 +43,7 @@ func applyLoaderSinkGroupDefaults(
 	maxSizePerBatch := &defaultMaxBytesPerBatch
 	maxWaitSeconds := &redshiftloader.DefaultMaxWaitSeconds
 	maxProcessingTime := &redshiftloader.DefaultMaxProcessingTime
-	maxTopics := &DefaultMaxBatcherTopics
+	maxTopics := &DefaultMaxLoaderTopics
 	image := &defaultImage
 	var resources *corev1.ResourceRequirements
 	var tolerations *[]corev1.Toleration
@@ -68,7 +68,7 @@ func applyLoaderSinkGroupDefaults(
 		}
 	}
 
-	// overwrite with the defaults with the specified values
+	// overwrite the defaults with the specified values
 	if specifiedSpec != nil {
 		if specifiedSpec.MaxSizePerBatch != nil {
 			maxSizePerBatch = specifiedSpec.MaxSizePerBatch
diff --git a/controllers/sinkgroup_controller.go b/controllers/sinkgroup_controller.go
index a4a19e066..f872d0de3 100644
--- a/controllers/sinkgroup_controller.go
+++ b/controllers/sinkgroup_controller.go
@@ -31,7 +31,10 @@ const (
 	ReloadTableSuffix = "_ts_adx_reload"
 )
 
-var DefaultMaxBatcherTopics int = 30
+var (
+	DefaultMaxBatcherTopics int = 30
+	DefaultMaxLoaderTopics  int = 300
+)
 
 type sinkGroupInterface interface {
 	reconcile(ctx context.Context) (ctrl.Result, ReconcilerEvent, error)
diff --git a/pkg/redshiftbatcher/batch_processor.go b/pkg/redshiftbatcher/batch_processor.go
index dc9a9e4e3..e34945011 100644
--- a/pkg/redshiftbatcher/batch_processor.go
+++ b/pkg/redshiftbatcher/batch_processor.go
@@ -288,7 +288,7 @@ func (b *batchProcessor) processMessage(
 	}
 
 	if resp.batchSchemaID != message.SchemaId {
-		return bytesProcessed, fmt.Errorf("topic:%s, schema id mismatch in the batch, %d != %d",
+		return bytesProcessed, fmt.Errorf("%s: schema id mismatch in the batch, %d != %d",
 			b.topic,
 			resp.batchSchemaID,
 			message.SchemaId,

From c203962cabb84571a8ede8aa36c68902f70961e0 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 24 Mar 2021 16:24:35 +0530
Subject: [PATCH 09/49] Make deprecated fields optional

---
 api/v1/redshiftsink_types.go                              | 5 ++++-
 config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml | 2 --
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/api/v1/redshiftsink_types.go b/api/v1/redshiftsink_types.go
index b8de932f1..1f6b8786b 100644
--- a/api/v1/redshiftsink_types.go
+++ b/api/v1/redshiftsink_types.go
@@ -178,11 +178,14 @@ type RedshiftLoaderSpec struct {
 
 	// Deprecated all of the below spec in favour of SinkGroup #167
 	// Max configurations for the loader to batch the load
-	MaxSize        int `json:"maxSize"`
+	// +optional
+	MaxSize int `json:"maxSize"`
+	// +optional
 	MaxWaitSeconds int `json:"maxWaitSeconds"`
 	// MaxProcessingTime is the sarama configuration MaxProcessingTime
 	// It is the max time in milliseconds required to consume one message.
 	// Defaults to 600000ms (10mins)
+	// +optional
 	MaxProcessingTime *int32 `json:"maxProcessingTime,omitempty"`
 	// PodTemplate describes the pods that will be created.
 	// if this is not specifed, a default pod template is created
diff --git a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
index a3a9ff411..a08252840 100644
--- a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
+++ b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
@@ -1338,8 +1338,6 @@ spec:
                     are running for this CRD object. Default: false'
                   type: boolean
               required:
-              - maxSize
-              - maxWaitSeconds
               - redshiftGroup
               - redshiftSchema
               type: object

From 2eea5c57f1d6da959f9a9d3b81bbd6ca9930cd2e Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 24 Mar 2021 16:44:43 +0530
Subject: [PATCH 10/49] Cleanup bug fix: name should be object name

---
 controllers/batcher_deployment.go | 2 +-
 controllers/loader_deployment.go  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go
index 0a4acb97b..9822ae62b 100644
--- a/controllers/batcher_deployment.go
+++ b/controllers/batcher_deployment.go
@@ -290,7 +290,7 @@ func NewBatcher(
 	}
 
 	return &Batcher{
-		name:       name,
+		name:       objectName,
 		namespace:  rsk.Namespace,
 		deployment: deploymentFromSpec(deploySpec, configSpec),
 		config:     configFromSpec(configSpec),
diff --git a/controllers/loader_deployment.go b/controllers/loader_deployment.go
index 5dd81ce6a..a48160862 100644
--- a/controllers/loader_deployment.go
+++ b/controllers/loader_deployment.go
@@ -313,7 +313,7 @@ func NewLoader(
 	}
 
 	return &Loader{
-		name:       name,
+		name:       objectName,
 		namespace:  rsk.Namespace,
 		deployment: deploymentFromSpec(deploySpec, configSpec),
 		config:     configFromSpec(configSpec),

From 749b8fa9dc5ca73436593d5217e7d518890a22bd Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 24 Mar 2021 18:45:21 +0530
Subject: [PATCH 11/49] Add omitempty; needed for deprecated particularly

---
 api/v1/redshiftsink_types.go                  | 28 +++++++++----------
 .../tipoca.k8s.practo.dev_redshiftsinks.yaml  | 11 --------
 2 files changed, 14 insertions(+), 25 deletions(-)

diff --git a/api/v1/redshiftsink_types.go b/api/v1/redshiftsink_types.go
index 1f6b8786b..bbe98d6d0 100644
--- a/api/v1/redshiftsink_types.go
+++ b/api/v1/redshiftsink_types.go
@@ -124,7 +124,7 @@ type RedshiftBatcherSpec struct {
 	Mask bool `json:"mask"`
 	// MaskFile to use to apply mask configurations
 	// +optional
-	MaskFile string `json:"maskFile"`
+	MaskFile string `json:"maskFile,omitempty"`
 	// +optional
 
 	// SinkGroup contains the specification for main, reload and reloadDupe
@@ -137,8 +137,8 @@ type RedshiftBatcherSpec struct {
 	SinkGroup *SinkGroup `json:"sinkGroup,omitempty"`
 
 	// Deprecated all of the below spec in favour of SinkGroup #167
-	MaxSize        int  `json:"maxSize"`
-	MaxWaitSeconds int  `json:"maxWaitSeconds"`
+	MaxSize        int  `json:"maxSize,omitempty"`
+	MaxWaitSeconds int  `json:"maxWaitSeconds,omitempty"`
 	MaxConcurrency *int `json:"maxConcurrency,omitempty"`
 	// MaxProcessingTime is the sarama configuration MaxProcessingTime
 	// It is the max time in milliseconds required to consume one message.
@@ -179,9 +179,9 @@ type RedshiftLoaderSpec struct {
 	// Deprecated all of the below spec in favour of SinkGroup #167
 	// Max configurations for the loader to batch the load
 	// +optional
-	MaxSize int `json:"maxSize"`
+	MaxSize int `json:"maxSize,omitempty"`
 	// +optional
-	MaxWaitSeconds int `json:"maxWaitSeconds"`
+	MaxWaitSeconds int `json:"maxWaitSeconds,omitempty"`
 	// MaxProcessingTime is the sarama configuration MaxProcessingTime
 	// It is the max time in milliseconds required to consume one message.
 	// Defaults to 600000ms (10mins)
@@ -213,7 +213,7 @@ type RedshiftSinkSpec struct {
 	KafkaVersion      string `json:"kafkaVersion"`
 	KafkaTopicRegexes string `json:"kafkaTopicRegexes"`
 	// +optional
-	KafkaLoaderTopicPrefix string `json:"kafkaLoaderTopicPrefix"`
+	KafkaLoaderTopicPrefix string `json:"kafkaLoaderTopicPrefix,omitempty"`
 
 	Batcher RedshiftBatcherSpec `json:"batcher"`
 	Loader  RedshiftLoaderSpec  `json:"loader"`
@@ -223,25 +223,25 @@ type RedshiftSinkSpec struct {
 	// This is relevant only if masking is turned on in mask configuration.
 	// It is used for live mask reloading.
 	// +optional
-	ReleaseCondition *ReleaseCondition `json:"releaseCondition"`
+	ReleaseCondition *ReleaseCondition `json:"releaseCondition,omitempty"`
 
 	// TopicReleaseCondition is considered instead of ReleaseCondition
 	// if it is defined for a topic. This is used for topics which
 	// does not work well with central ReleaseCondition for all topics
 	// +optional
-	TopicReleaseCondition map[string]ReleaseCondition `json:"topicReleaseCondition"`
+	TopicReleaseCondition map[string]ReleaseCondition `json:"topicReleaseCondition,omitempty"`
 }
 
 type ReleaseCondition struct {
 	// MaxBatcherLag is the maximum lag the batcher consumer group
 	// shoud have to be be considered to be operating in realtime and
 	// to be considered for release.
-	MaxBatcherLag *int64 `json:"maxBatcherLag"`
+	MaxBatcherLag *int64 `json:"maxBatcherLag,omitempty"`
 
 	// MaxLoaderLag is the maximum lag the loader consumer group
 	// shoud have to be be considered to be operating in realtime and
 	// to be considered for release.
-	MaxLoaderLag *int64 `json:"maxLoaderLag"`
+	MaxLoaderLag *int64 `json:"maxLoaderLag,omitempty"`
 }
 
 // MaskPhase is a label for the condition of a masking at the current time.
@@ -299,7 +299,7 @@ type MaskStatus struct {
 
 type Group struct {
 	// LoaderTopicPrefix stores the name of the loader topic prefix
-	LoaderTopicPrefix string `json:"loaderTopicPrefix"`
+	LoaderTopicPrefix string `json:"loaderTopicPrefix,omitempty"`
 
 	// LoaderCurrentOffset stores the last read current offset of the consumer group
 	// This is required to determine if the consumer group has performed any
@@ -309,7 +309,7 @@ type Group struct {
 	// throughput consumer groups not getting moved to realtime from reloading.
 	// TODO: This is not dead field once a group moves to released and
 	// should be cleaned after that(status needs to be updated)
-	LoaderCurrentOffset *int64 `json:"currentOffset"`
+	LoaderCurrentOffset *int64 `json:"currentOffset,omitempty"`
 
 	// ID stores the name of the consumer group for the topic
 	// based on this batcher and loader consumer groups are made
@@ -323,11 +323,11 @@ type RedshiftSinkStatus struct {
 
 	// MaskStatus stores the status of masking for topics if masking is enabled
 	// +optional
-	MaskStatus *MaskStatus `json:"maskStatus"`
+	MaskStatus *MaskStatus `json:"maskStatus,omitempty"`
 
 	// TopicGroup stores the group info for the topic
 	// +optional
-	TopicGroup map[string]Group `json:"topicGroups"`
+	TopicGroup map[string]Group `json:"topicGroups,omitempty"`
 }
 
 // +kubebuilder:resource:path=redshiftsinks,shortName=rsk;rsks
diff --git a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
index a08252840..31db0fb6d 100644
--- a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
+++ b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
@@ -678,9 +678,6 @@ spec:
                   description: Supsend is used to suspend batcher pods. Defaults to
                     false.
                   type: boolean
-              required:
-              - maxSize
-              - maxWaitSeconds
               type: object
             kafkaBrokers:
               description: Kafka configurations like consumer group and topics to
@@ -1359,9 +1356,6 @@ spec:
                     and to be considered for release.
                   format: int64
                   type: integer
-              required:
-              - maxBatcherLag
-              - maxLoaderLag
               type: object
             secretRefName:
               description: 'Secrets to be used Default: the secret name and namespace
@@ -1386,9 +1380,6 @@ spec:
                       and to be considered for release.
                     format: int64
                     type: integer
-                required:
-                - maxBatcherLag
-                - maxLoaderLag
                 type: object
               description: TopicReleaseCondition is considered instead of ReleaseCondition
                 if it is defined for a topic. This is used for topics which does not
@@ -1480,9 +1471,7 @@ spec:
                       prefix
                     type: string
                 required:
-                - currentOffset
                 - id
-                - loaderTopicPrefix
                 type: object
               description: TopicGroup stores the group info for the topic
               type: object

From 3089cfa8c5a5bc2c6868c47ab736e137e710c5d8 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 24 Mar 2021 19:01:34 +0530
Subject: [PATCH 12/49] Bug fix for image backward compatibility

---
 controllers/batcher_deployment.go | 32 ++++++++++++++++---------------
 controllers/loader_deployment.go  | 32 ++++++++++++++++---------------
 2 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go
index 9822ae62b..d8f3186ca 100644
--- a/controllers/batcher_deployment.go
+++ b/controllers/batcher_deployment.go
@@ -82,14 +82,16 @@ func applyBatcherSinkGroupDefaults(
 		if specifiedSpec.MaxProcessingTime != nil {
 			maxProcessingTime = specifiedSpec.MaxProcessingTime
 		}
-		if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
-			image = specifiedSpec.DeploymentUnit.PodTemplate.Image
-		}
-		if specifiedSpec.DeploymentUnit.PodTemplate.Resources != nil {
-			resources = specifiedSpec.DeploymentUnit.PodTemplate.Resources
-		}
-		if specifiedSpec.DeploymentUnit.PodTemplate.Tolerations != nil {
-			tolerations = specifiedSpec.DeploymentUnit.PodTemplate.Tolerations
+		if specifiedSpec.DeploymentUnit.PodTemplate != nil {
+			if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
+				image = specifiedSpec.DeploymentUnit.PodTemplate.Image
+			}
+			if specifiedSpec.DeploymentUnit.PodTemplate.Resources != nil {
+				resources = specifiedSpec.DeploymentUnit.PodTemplate.Resources
+			}
+			if specifiedSpec.DeploymentUnit.PodTemplate.Tolerations != nil {
+				tolerations = specifiedSpec.DeploymentUnit.PodTemplate.Tolerations
+			}
 		}
 	}
 
@@ -172,12 +174,14 @@ func NewBatcher(
 	var maxBytesPerBatch *int64
 	var maxWaitSeconds, maxConcurrency *int
 	var maxProcessingTime int32 = redshiftbatcher.DefaultMaxProcessingTime
+	var image string
 	if sinkGroupSpec != nil {
 		m := sinkGroupSpec.MaxSizePerBatch.Value()
 		maxBytesPerBatch = &m
 		maxWaitSeconds = sinkGroupSpec.MaxWaitSeconds
 		maxConcurrency = sinkGroupSpec.MaxConcurrency
 		maxProcessingTime = *sinkGroupSpec.MaxProcessingTime
+		image = *sinkGroupSpec.DeploymentUnit.PodTemplate.Image
 	} else { // Deprecated
 		maxSize = rsk.Spec.Batcher.MaxSize
 		maxWaitSeconds = &rsk.Spec.Batcher.MaxWaitSeconds
@@ -188,6 +192,11 @@ func NewBatcher(
 		if rsk.Spec.Batcher.MaxProcessingTime != nil {
 			maxProcessingTime = *rsk.Spec.Batcher.MaxProcessingTime
 		}
+		if rsk.Spec.Batcher.PodTemplate.Image != nil {
+			image = *rsk.Spec.Batcher.PodTemplate.Image
+		} else {
+			image = defaultImage
+		}
 	}
 	// defaults which are not configurable for the user
 	var sessionTimeoutSeconds int = 10
@@ -251,13 +260,6 @@ func NewBatcher(
 		totalTopics,
 	)
 
-	var image string
-	if rsk.Spec.Batcher.PodTemplate.Image != nil {
-		image = *rsk.Spec.Batcher.PodTemplate.Image
-	} else {
-		image = defaultImage
-	}
-
 	confString := string(confBytes)
 	hash, err := getHashStructure(conf)
 	if err != nil {
diff --git a/controllers/loader_deployment.go b/controllers/loader_deployment.go
index a48160862..966b5cf30 100644
--- a/controllers/loader_deployment.go
+++ b/controllers/loader_deployment.go
@@ -79,14 +79,16 @@ func applyLoaderSinkGroupDefaults(
 		if specifiedSpec.MaxProcessingTime != nil {
 			maxProcessingTime = specifiedSpec.MaxProcessingTime
 		}
-		if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
-			image = specifiedSpec.DeploymentUnit.PodTemplate.Image
-		}
-		if specifiedSpec.DeploymentUnit.PodTemplate.Resources != nil {
-			resources = specifiedSpec.DeploymentUnit.PodTemplate.Resources
-		}
-		if specifiedSpec.DeploymentUnit.PodTemplate.Tolerations != nil {
-			tolerations = specifiedSpec.DeploymentUnit.PodTemplate.Tolerations
+		if specifiedSpec.DeploymentUnit.PodTemplate != nil {
+			if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
+				image = specifiedSpec.DeploymentUnit.PodTemplate.Image
+			}
+			if specifiedSpec.DeploymentUnit.PodTemplate.Resources != nil {
+				resources = specifiedSpec.DeploymentUnit.PodTemplate.Resources
+			}
+			if specifiedSpec.DeploymentUnit.PodTemplate.Tolerations != nil {
+				tolerations = specifiedSpec.DeploymentUnit.PodTemplate.Tolerations
+			}
 		}
 	}
 
@@ -186,17 +188,24 @@ func NewLoader(
 	var maxBytesPerBatch *int64
 	var maxWaitSeconds *int
 	var maxProcessingTime int32 = redshiftloader.DefaultMaxProcessingTime
+	var image string
 	if sinkGroupSpec != nil {
 		m := sinkGroupSpec.MaxSizePerBatch.Value()
 		maxBytesPerBatch = &m
 		maxWaitSeconds = sinkGroupSpec.MaxWaitSeconds
 		maxProcessingTime = *sinkGroupSpec.MaxProcessingTime
+		image = *sinkGroupSpec.DeploymentUnit.PodTemplate.Image
 	} else { // Deprecated
 		maxSize = rsk.Spec.Loader.MaxSize
 		maxWaitSeconds = &rsk.Spec.Loader.MaxWaitSeconds
 		if rsk.Spec.Loader.MaxProcessingTime != nil {
 			maxProcessingTime = *rsk.Spec.Loader.MaxProcessingTime
 		}
+		if rsk.Spec.Loader.PodTemplate.Image != nil {
+			image = *rsk.Spec.Loader.PodTemplate.Image
+		} else {
+			image = defaultImage
+		}
 	}
 
 	// defaults which are not configurable for the user
@@ -274,13 +283,6 @@ func NewLoader(
 		totalTopics,
 	)
 
-	var image string
-	if rsk.Spec.Loader.PodTemplate.Image != nil {
-		image = *rsk.Spec.Loader.PodTemplate.Image
-	} else {
-		image = defaultImage
-	}
-
 	confString := string(confBytes)
 	hash, err := getHashStructure(conf)
 	if err != nil {

From 680909993c0526f71b73bfc3dc74338dbf6c39ea Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 24 Mar 2021 19:20:46 +0530
Subject: [PATCH 13/49] Bug fix for resource and tolerations backwardness

---
 controllers/batcher_deployment.go | 10 ++++++++--
 controllers/loader_deployment.go  | 10 ++++++++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go
index d8f3186ca..d5164345d 100644
--- a/controllers/batcher_deployment.go
+++ b/controllers/batcher_deployment.go
@@ -175,6 +175,8 @@ func NewBatcher(
 	var maxWaitSeconds, maxConcurrency *int
 	var maxProcessingTime int32 = redshiftbatcher.DefaultMaxProcessingTime
 	var image string
+	var resources *corev1.ResourceRequirements
+	var tolerations *[]corev1.Toleration
 	if sinkGroupSpec != nil {
 		m := sinkGroupSpec.MaxSizePerBatch.Value()
 		maxBytesPerBatch = &m
@@ -182,6 +184,8 @@ func NewBatcher(
 		maxConcurrency = sinkGroupSpec.MaxConcurrency
 		maxProcessingTime = *sinkGroupSpec.MaxProcessingTime
 		image = *sinkGroupSpec.DeploymentUnit.PodTemplate.Image
+		resources = sinkGroupSpec.DeploymentUnit.PodTemplate.Resources
+		tolerations = sinkGroupSpec.DeploymentUnit.PodTemplate.Tolerations
 	} else { // Deprecated
 		maxSize = rsk.Spec.Batcher.MaxSize
 		maxWaitSeconds = &rsk.Spec.Batcher.MaxWaitSeconds
@@ -197,6 +201,8 @@ func NewBatcher(
 		} else {
 			image = defaultImage
 		}
+		resources = rsk.Spec.Batcher.PodTemplate.Resources
+		tolerations = rsk.Spec.Batcher.PodTemplate.Tolerations
 	}
 	// defaults which are not configurable for the user
 	var sessionTimeoutSeconds int = 10
@@ -285,8 +291,8 @@ func NewBatcher(
 		namespace:   rsk.Namespace,
 		labels:      labels,
 		replicas:    &replicas,
-		resources:   rsk.Spec.Batcher.PodTemplate.Resources,
-		tolerations: rsk.Spec.Batcher.PodTemplate.Tolerations,
+		resources:   resources,
+		tolerations: tolerations,
 		image:       image,
 		args:        []string{"-v=4", "--config=/config.yaml"},
 	}
diff --git a/controllers/loader_deployment.go b/controllers/loader_deployment.go
index 966b5cf30..86631ff3b 100644
--- a/controllers/loader_deployment.go
+++ b/controllers/loader_deployment.go
@@ -189,12 +189,16 @@ func NewLoader(
 	var maxWaitSeconds *int
 	var maxProcessingTime int32 = redshiftloader.DefaultMaxProcessingTime
 	var image string
+	var resources *corev1.ResourceRequirements
+	var tolerations *[]corev1.Toleration
 	if sinkGroupSpec != nil {
 		m := sinkGroupSpec.MaxSizePerBatch.Value()
 		maxBytesPerBatch = &m
 		maxWaitSeconds = sinkGroupSpec.MaxWaitSeconds
 		maxProcessingTime = *sinkGroupSpec.MaxProcessingTime
 		image = *sinkGroupSpec.DeploymentUnit.PodTemplate.Image
+		resources = sinkGroupSpec.DeploymentUnit.PodTemplate.Resources
+		tolerations = sinkGroupSpec.DeploymentUnit.PodTemplate.Tolerations
 	} else { // Deprecated
 		maxSize = rsk.Spec.Loader.MaxSize
 		maxWaitSeconds = &rsk.Spec.Loader.MaxWaitSeconds
@@ -206,6 +210,8 @@ func NewLoader(
 		} else {
 			image = defaultImage
 		}
+		resources = rsk.Spec.Loader.PodTemplate.Resources
+		tolerations = rsk.Spec.Loader.PodTemplate.Tolerations
 	}
 
 	// defaults which are not configurable for the user
@@ -308,8 +314,8 @@ func NewLoader(
 		namespace:   rsk.Namespace,
 		labels:      labels,
 		replicas:    &replicas,
-		resources:   rsk.Spec.Loader.PodTemplate.Resources,
-		tolerations: rsk.Spec.Loader.PodTemplate.Tolerations,
+		resources:   resources,
+		tolerations: tolerations,
 		image:       image,
 		args:        []string{"-v=2", "--config=/config.yaml"},
 	}

From 76ae37714085f5f43e815223b7295841ef2a900f Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 24 Mar 2021 20:16:56 +0530
Subject: [PATCH 14/49] Use counter and not gauge

---
 pkg/redshiftbatcher/batch_processor.go |  6 +----
 pkg/redshiftbatcher/metrics.go         | 37 +++++++++++++++-----------
 pkg/redshiftloader/load_processor.go   | 18 +++++--------
 pkg/redshiftloader/metrics.go          | 37 +++++++++++++++-----------
 redshiftsink/README.md                 |  8 +++---
 5 files changed, 54 insertions(+), 52 deletions(-)

diff --git a/pkg/redshiftbatcher/batch_processor.go b/pkg/redshiftbatcher/batch_processor.go
index e34945011..c648c7b78 100644
--- a/pkg/redshiftbatcher/batch_processor.go
+++ b/pkg/redshiftbatcher/batch_processor.go
@@ -559,14 +559,10 @@ func (b *batchProcessor) Process(
 		last := responses[len(responses)-1]
 		b.markOffset(session, b.topic, 0, last.endOffset, b.autoCommit)
 
-		setBytesProcessedPerSecond(
+		setMetrics(
 			b.consumerGroupID,
 			b.topic,
 			float64(totalBytesProcessed)/time.Since(now).Seconds(),
-		)
-		setMsgsProcessedPerSecond(
-			b.consumerGroupID,
-			b.topic,
 			float64(totalMessagesProcessed)/time.Since(now).Seconds(),
 		)
 
diff --git a/pkg/redshiftbatcher/metrics.go b/pkg/redshiftbatcher/metrics.go
index da76fc185..ab098b52e 100644
--- a/pkg/redshiftbatcher/metrics.go
+++ b/pkg/redshiftbatcher/metrics.go
@@ -5,41 +5,46 @@ import (
 )
 
 var (
-	bytesPerSecMetric = prometheus.NewGaugeVec(
-		prometheus.GaugeOpts{
+	bytesProcessedMetric = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
 			Namespace: "rsk",
 			Subsystem: "batcher",
-			Name:      "bytes_processed_per_second",
-			Help:      "bytes processed per second",
+			Name:      "bytes_processed",
+			Help:      "total number of bytes processed",
 		},
 		[]string{"consumergroup", "topic"},
 	)
-	msgsPerSecMetric = prometheus.NewGaugeVec(
-		prometheus.GaugeOpts{
+	msgsProcessedMetric = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
 			Namespace: "rsk",
 			Subsystem: "batcher",
-			Name:      "messages_processed_per_second",
-			Help:      "number of messages processed per second",
+			Name:      "messages_processed",
+			Help:      "total number of messages processed",
 		},
 		[]string{"consumergroup", "topic"},
 	)
 )
 
 func init() {
-	prometheus.MustRegister(bytesPerSecMetric)
-	prometheus.MustRegister(msgsPerSecMetric)
+	prometheus.MustRegister(bytesProcessedMetric)
+	prometheus.MustRegister(msgsProcessedMetric)
 }
 
-func setBytesProcessedPerSecond(consumergroup string, topic string, bytesPerSec float64) {
-	bytesPerSecMetric.WithLabelValues(
+func setBytesProcessed(consumergroup string, topic string, bytes float64) {
+	bytesProcessedMetric.WithLabelValues(
 		consumergroup,
 		topic,
-	).Set(bytesPerSec)
+	).Add(bytes)
 }
 
-func setMsgsProcessedPerSecond(consumergroup string, topic string, msgsPerSec float64) {
-	msgsPerSecMetric.WithLabelValues(
+func setMsgsProcessed(consumergroup string, topic string, msgs float64) {
+	msgsProcessedMetric.WithLabelValues(
 		consumergroup,
 		topic,
-	).Set(msgsPerSec)
+	).Add(msgs)
+}
+
+func setMetrics(consumergroup, topic string, bytes, msgs float64) {
+	setBytesProcessed(consumergroup, topic, bytes)
+	setMsgsProcessed(consumergroup, topic, msgs)
 }
diff --git a/pkg/redshiftloader/load_processor.go b/pkg/redshiftloader/load_processor.go
index 7fee6eae4..38ce68b10 100644
--- a/pkg/redshiftloader/load_processor.go
+++ b/pkg/redshiftloader/load_processor.go
@@ -733,7 +733,7 @@ func (b *loadProcessor) Process(session sarama.ConsumerGroupSession, msgBuf []*s
 	if err != nil {
 		return err
 	}
-	klog.Infof("%s, batchId:%d, size:%d: Processing...\n",
+	klog.Infof("%s, batchId:%d, size:%d: processing...\n",
 		b.topic, b.batchId, len(msgBuf),
 	)
 	bytesProcessed, err := b.processBatch(ctx, msgBuf)
@@ -755,21 +755,17 @@ func (b *loadProcessor) Process(session sarama.ConsumerGroupSession, msgBuf []*s
 		timeTaken = fmt.Sprintf("%.0fs", secondsTaken)
 	}
 
-	klog.Infof(
-		"%s, batchId:%d, size:%d, end:%d:, Processed in %s",
-		b.topic, b.batchId, len(msgBuf), b.batchEndOffset, timeTaken,
-	)
-
-	setBytesLoadedPerSecond(
+	setMetrics(
 		b.consumerGroupID,
 		b.topic,
 		float64(bytesProcessed)/secondsTaken,
-	)
-	setMsgsLoadedPerSecond(
-		b.consumerGroupID,
-		b.topic,
 		float64(len(msgBuf))/secondsTaken,
 	)
 
+	klog.Infof(
+		"%s, batchId:%d, size:%d, end:%d:, processed in %s",
+		b.topic, b.batchId, len(msgBuf), b.batchEndOffset, timeTaken,
+	)
+
 	return nil
 }
diff --git a/pkg/redshiftloader/metrics.go b/pkg/redshiftloader/metrics.go
index 0220de045..aba310bd6 100644
--- a/pkg/redshiftloader/metrics.go
+++ b/pkg/redshiftloader/metrics.go
@@ -5,41 +5,46 @@ import (
 )
 
 var (
-	bytesPerSecMetric = prometheus.NewGaugeVec(
-		prometheus.GaugeOpts{
+	bytesLoadedMetric = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
 			Namespace: "rsk",
 			Subsystem: "loader",
-			Name:      "bytes_loaded_per_second",
-			Help:      "bytes loaded per second",
+			Name:      "bytes_loaded",
+			Help:      "total number of bytes loaded",
 		},
 		[]string{"consumergroup", "topic"},
 	)
-	msgsPerSecMetric = prometheus.NewGaugeVec(
-		prometheus.GaugeOpts{
+	msgsLoadedMetric = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
 			Namespace: "rsk",
 			Subsystem: "loader",
-			Name:      "messages_loaded_per_second",
-			Help:      "number of messages loaded per second",
+			Name:      "messages_loaded",
+			Help:      "total number of messages loaded",
 		},
 		[]string{"consumergroup", "topic"},
 	)
 )
 
 func init() {
-	prometheus.MustRegister(bytesPerSecMetric)
-	prometheus.MustRegister(msgsPerSecMetric)
+	prometheus.MustRegister(bytesLoadedMetric)
+	prometheus.MustRegister(msgsLoadedMetric)
 }
 
-func setBytesLoadedPerSecond(consumergroup string, topic string, bytesPerSec float64) {
-	bytesPerSecMetric.WithLabelValues(
+func setBytesLoaded(consumergroup string, topic string, bytes float64) {
+	bytesLoadedMetric.WithLabelValues(
 		consumergroup,
 		topic,
-	).Set(bytesPerSec)
+	).Add(bytes)
 }
 
-func setMsgsLoadedPerSecond(consumergroup string, topic string, msgsPerSec float64) {
-	msgsPerSecMetric.WithLabelValues(
+func setMsgsLoaded(consumergroup string, topic string, msgs float64) {
+	msgsLoadedMetric.WithLabelValues(
 		consumergroup,
 		topic,
-	).Set(msgsPerSec)
+	).Add(msgs)
+}
+
+func setMetrics(consumergroup, topic string, bytes, msgs float64) {
+	setBytesLoaded(consumergroup, topic, bytes)
+	setMsgsLoaded(consumergroup, topic, msgs)
 }
diff --git a/redshiftsink/README.md b/redshiftsink/README.md
index bcee07783..5275ad2ad 100644
--- a/redshiftsink/README.md
+++ b/redshiftsink/README.md
@@ -117,8 +117,8 @@ Flags:
 
 #### Metrics
 ```
-rsk_batcher_bytes_processed_per_second
-rsk_batcher_messages_processed_per_second
+rsk_batcher_bytes_processed
+rsk_batcher_messages_processed
 ```
 
 ### Configuration
@@ -149,8 +149,8 @@ Flags:
 
 #### Metrics
 ```
-rsk_loader_bytes_loaded_per_second
-rsk_loader_messages_loaded_per_second
+rsk_loader_bytes_loaded
+rsk_loader_messages_loaded
 ```
 
 ### Configuration

From b1b7d7c0f027d99d63bdb305822652d7f62cb4b2 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Thu, 25 Mar 2021 14:00:40 +0530
Subject: [PATCH 15/49] Unit configuration is parallel now and part of rsk spec

This is not required
---
 controllers/redshiftsink_controller.go | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/controllers/redshiftsink_controller.go b/controllers/redshiftsink_controller.go
index a1e2e6171..abaa0e045 100644
--- a/controllers/redshiftsink_controller.go
+++ b/controllers/redshiftsink_controller.go
@@ -67,8 +67,7 @@ type RedshiftSinkReconciler struct {
 }
 
 const (
-	MaxConcurrentReloading = 30
-	MaxTopicRelease        = 50
+	MaxTopicRelease = 50
 )
 
 // +kubebuilder:rbac:groups=tipoca.k8s.practo.dev,resources=redshiftsinks,verbs=get;list;watch;create;update;patch;delete
@@ -431,14 +430,10 @@ func (r *RedshiftSinkReconciler) reconcile(
 	//      tableSuffix: ""
 	var reload, reloadDupe, main *sinkGroup
 
-	allowedReloadingTopics := status.reloading
-	if len(status.reloading) > MaxConcurrentReloading {
-		allowedReloadingTopics = status.reloading[:MaxConcurrentReloading]
-	}
 	reload = sgBuilder.
 		setRedshiftSink(rsk).setClient(r.Client).setScheme(r.Scheme).
 		setType(ReloadSinkGroup).
-		setTopics(allowedReloadingTopics).
+		setTopics(status.reloading).
 		setMaskVersion(status.desiredVersion).
 		setTopicGroups().
 		buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).

From db8a13bb6af01f3f6449c252c921bd7401022807 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Thu, 25 Mar 2021 14:01:31 +0530
Subject: [PATCH 16/49] Remove status info

---
 controllers/status.go | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/controllers/status.go b/controllers/status.go
index dd54c353f..fb88c3308 100644
--- a/controllers/status.go
+++ b/controllers/status.go
@@ -301,9 +301,6 @@ func (s *status) info() {
 	klog.V(2).Infof("%s reloading:  %d %v", rskName, len(s.reloading), s.reloading)
 	klog.V(2).Infof("%s rDupe:      %d %v", rskName, len(s.reloadingDupe), s.reloadingDupe)
 	klog.V(2).Infof("%s realtime:   %d %v", rskName, len(s.realtime), s.realtime)
-	if len(s.reloading) > MaxConcurrentReloading {
-		klog.V(2).Infof("%s reloadingC: %d %v", rskName, MaxConcurrentReloading, s.reloading[:MaxConcurrentReloading])
-	}
 }
 
 // manyReloading checks the percentage of reloading topics of the total topics

From f32f5569fa2b8ab37a86722c7b9a0436393ce547 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Thu, 25 Mar 2021 17:22:14 +0530
Subject: [PATCH 17/49] Fix maxTopics not being set bug

---
 controllers/batcher_deployment.go | 21 +++++++++++++--------
 controllers/loader_deployment.go  | 21 +++++++++++++--------
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go
index d5164345d..b98968a6a 100644
--- a/controllers/batcher_deployment.go
+++ b/controllers/batcher_deployment.go
@@ -82,15 +82,20 @@ func applyBatcherSinkGroupDefaults(
 		if specifiedSpec.MaxProcessingTime != nil {
 			maxProcessingTime = specifiedSpec.MaxProcessingTime
 		}
-		if specifiedSpec.DeploymentUnit.PodTemplate != nil {
-			if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
-				image = specifiedSpec.DeploymentUnit.PodTemplate.Image
+		if specifiedSpec.DeploymentUnit != nil {
+			if specifiedSpec.DeploymentUnit.MaxTopics != nil {
+				maxTopics = specifiedSpec.DeploymentUnit.MaxTopics
 			}
-			if specifiedSpec.DeploymentUnit.PodTemplate.Resources != nil {
-				resources = specifiedSpec.DeploymentUnit.PodTemplate.Resources
-			}
-			if specifiedSpec.DeploymentUnit.PodTemplate.Tolerations != nil {
-				tolerations = specifiedSpec.DeploymentUnit.PodTemplate.Tolerations
+			if specifiedSpec.DeploymentUnit.PodTemplate != nil {
+				if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
+					image = specifiedSpec.DeploymentUnit.PodTemplate.Image
+				}
+				if specifiedSpec.DeploymentUnit.PodTemplate.Resources != nil {
+					resources = specifiedSpec.DeploymentUnit.PodTemplate.Resources
+				}
+				if specifiedSpec.DeploymentUnit.PodTemplate.Tolerations != nil {
+					tolerations = specifiedSpec.DeploymentUnit.PodTemplate.Tolerations
+				}
 			}
 		}
 	}
diff --git a/controllers/loader_deployment.go b/controllers/loader_deployment.go
index 86631ff3b..b115474c4 100644
--- a/controllers/loader_deployment.go
+++ b/controllers/loader_deployment.go
@@ -79,15 +79,20 @@ func applyLoaderSinkGroupDefaults(
 		if specifiedSpec.MaxProcessingTime != nil {
 			maxProcessingTime = specifiedSpec.MaxProcessingTime
 		}
-		if specifiedSpec.DeploymentUnit.PodTemplate != nil {
-			if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
-				image = specifiedSpec.DeploymentUnit.PodTemplate.Image
+		if specifiedSpec.DeploymentUnit != nil {
+			if specifiedSpec.DeploymentUnit.MaxTopics != nil {
+				maxTopics = specifiedSpec.DeploymentUnit.MaxTopics
 			}
-			if specifiedSpec.DeploymentUnit.PodTemplate.Resources != nil {
-				resources = specifiedSpec.DeploymentUnit.PodTemplate.Resources
-			}
-			if specifiedSpec.DeploymentUnit.PodTemplate.Tolerations != nil {
-				tolerations = specifiedSpec.DeploymentUnit.PodTemplate.Tolerations
+			if specifiedSpec.DeploymentUnit.PodTemplate != nil {
+				if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
+					image = specifiedSpec.DeploymentUnit.PodTemplate.Image
+				}
+				if specifiedSpec.DeploymentUnit.PodTemplate.Resources != nil {
+					resources = specifiedSpec.DeploymentUnit.PodTemplate.Resources
+				}
+				if specifiedSpec.DeploymentUnit.PodTemplate.Tolerations != nil {
+					tolerations = specifiedSpec.DeploymentUnit.PodTemplate.Tolerations
+				}
 			}
 		}
 	}

From 7df3797a7a5fa8048b7e80c545f4528f00113182 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Thu, 25 Mar 2021 20:18:48 +0530
Subject: [PATCH 18/49] Log improvements

---
 controllers/sinkgroup_controller.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/controllers/sinkgroup_controller.go b/controllers/sinkgroup_controller.go
index f872d0de3..8913c6071 100644
--- a/controllers/sinkgroup_controller.go
+++ b/controllers/sinkgroup_controller.go
@@ -485,7 +485,7 @@ func (s *sinkGroup) reconcileConfigMap(
 		return nil, err
 	}
 
-	klog.V(2).Infof("Creating configMap: %v", config.Name)
+	klog.V(2).Infof("rsk/%s Creating configMap: %v", s.rsk.Name, config.Name)
 	event, err := createConfigMap(ctx, s.client, config, s.rsk)
 	if err != nil {
 		return nil, err
@@ -521,7 +521,7 @@ func (s *sinkGroup) reconcileDeployment(
 			return nil, err
 		}
 
-		klog.V(2).Infof("Updating deployment: %v", deployment.Name)
+		klog.V(2).Infof("rsk/%s Updating deployment: %v", s.rsk.Name, deployment.Name)
 		event, err := updateDeployment(ctx, s.client, deployment, s.rsk)
 		if err != nil {
 			return nil, err
@@ -537,7 +537,7 @@ func (s *sinkGroup) reconcileDeployment(
 	}
 
 	// create new deployment pointing to new config map
-	klog.V(2).Infof("Creating deployment: %v", deployment.Name)
+	klog.V(2).Infof("rsk/%s Creating deployment: %v", s.rsk.Name, deployment.Name)
 	event, err := createDeployment(ctx, s.client, deployment, s.rsk)
 	if err != nil {
 		return nil, err

From 8f61fbdddf3fae3892f325d4830096baa95ec4c5 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Fri, 26 Mar 2021 06:15:44 +0530
Subject: [PATCH 19/49] Sort all states

---
 controllers/redshiftsink_controller.go | 2 +-
 controllers/status.go                  | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/controllers/redshiftsink_controller.go b/controllers/redshiftsink_controller.go
index abaa0e045..966c3c9f0 100644
--- a/controllers/redshiftsink_controller.go
+++ b/controllers/redshiftsink_controller.go
@@ -529,7 +529,7 @@ func (r *RedshiftSinkReconciler) reconcile(
 	if len(status.realtime) >= MaxTopicRelease {
 		releaseCandidates = status.realtime[:MaxTopicRelease]
 	}
-	klog.V(2).Infof("release candidates: %v", releaseCandidates)
+	klog.V(2).Infof("rsk/%s release candidates: %v", rsk.Name, releaseCandidates)
 
 	var releaser *releaser
 	if len(releaseCandidates) > 0 {
diff --git a/controllers/status.go b/controllers/status.go
index fb88c3308..a9ff2c9a5 100644
--- a/controllers/status.go
+++ b/controllers/status.go
@@ -68,11 +68,13 @@ func (sb *buildStatus) setDesiredVersion(version string) statusBuilder {
 
 func (sb *buildStatus) setAllTopics(topics []string) statusBuilder {
 	sb.allTopics = topics
+	sortStringSlice(sb.allTopics)
 	return sb
 }
 
 func (sb *buildStatus) setDiffTopics(topics []string) statusBuilder {
 	sb.diffTopics = topics
+	sortStringSlice(sb.diffTopics)
 	return sb
 }
 
@@ -108,6 +110,7 @@ func (sb *buildStatus) computeReleased() statusBuilder {
 		klog.V(2).Infof("rsk/%s, Status empty, released=0 ", sb.rsk.Name)
 	}
 	sb.released = released
+	sortStringSlice(sb.released)
 
 	return sb
 }
@@ -116,6 +119,8 @@ func (sb *buildStatus) setRealtime() statusBuilder {
 	sb.realtime = currentTopicsByMaskStatus(
 		sb.rsk, tipocav1.MaskRealtime, sb.desiredVersion,
 	)
+	sortStringSlice(sb.realtime)
+
 	return sb
 }
 
@@ -124,6 +129,7 @@ func (sb *buildStatus) computeReloading() statusBuilder {
 		sb.rsk.Status.MaskStatus.CurrentMaskStatus == nil {
 		klog.V(2).Infof("rsk/%s, Status empty, reloading=diffTopics ", sb.rsk.Name)
 		sb.reloading = sb.diffTopics
+		sortStringSlice(sb.reloading)
 		return sb
 	}
 
@@ -160,6 +166,7 @@ func (sb *buildStatus) computeReloading() statusBuilder {
 	}
 
 	sb.reloading = reConstructingReloading
+	sortStringSlice(sb.reloading)
 	return sb
 }
 
@@ -180,6 +187,7 @@ func (sb *buildStatus) computeReloadingDupe() statusBuilder {
 	}
 
 	sb.reloadingDupe = reloadDupeTopics
+	sortStringSlice(sb.reloadingDupe)
 	return sb
 }
 

From 50ef538fefeaaa3bd3d0a3472f712aed1b5c1e55 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Fri, 26 Mar 2021 16:48:03 +0530
Subject: [PATCH 20/49] Realtime calculator refactored out.

Separate out realtime calculation and sinkgroup.
Separation of concern.
Main reason: Need batcher and loader lag to allocateDeploymentUnits
---
 api/v1/redshiftsink_types.go           |   5 +
 controllers/realtime_calculator.go     | 308 +++++++++++++++++++++++++
 controllers/redshiftsink_controller.go |  81 ++++---
 controllers/sinkgroup_controller.go    | 259 ---------------------
 controllers/util.go                    |  13 ++
 5 files changed, 365 insertions(+), 301 deletions(-)
 create mode 100644 controllers/realtime_calculator.go

diff --git a/api/v1/redshiftsink_types.go b/api/v1/redshiftsink_types.go
index bbe98d6d0..b87fa9d0a 100644
--- a/api/v1/redshiftsink_types.go
+++ b/api/v1/redshiftsink_types.go
@@ -78,6 +78,11 @@ type SinkGroupSpec struct {
 	// Defaults for the batcher is 180000ms and loader is 600000ms.
 	// +optional
 	MaxProcessingTime *int32 `json:"maxProcessingTime,omitempty"`
+
+	// MaxUnits is the maximum number of units(pods) that can be launched
+	// based on the DeploymentUnit specification
+	// +optional
+	MaxUnits *int32 `json:"maxUnits,omitempty"`
 	// DeploymentUnit is the unit of deployment for the batcher or the loader.
 	// Using this user can specify the no of topics and the amount of resources
 	// needed to run them as one unit. Operator calculates the total units
diff --git a/controllers/realtime_calculator.go b/controllers/realtime_calculator.go
new file mode 100644
index 000000000..778987e47
--- /dev/null
+++ b/controllers/realtime_calculator.go
@@ -0,0 +1,308 @@
+package controllers
+
+import (
+	"fmt"
+	klog "github.com/practo/klog/v2"
+	tipocav1 "github.com/practo/tipoca-stream/redshiftsink/api/v1"
+	kafka "github.com/practo/tipoca-stream/redshiftsink/pkg/kafka"
+	"math/rand"
+	"sync"
+	"time"
+)
+
+var (
+	DefaultMaxBatcherTopics int = 30
+	DefaultMaxLoaderTopics  int = 300
+)
+
+type realtimeCalculatorInterface interface {
+	calculate(reloading []string, currentRealtime []string) []string
+}
+
+type offsetPosition struct {
+	last    *int64
+	current *int64
+}
+
+type topicRealtimeInfo struct {
+	lastUpdate      *int64
+	batcher         *offsetPosition
+	loader          *offsetPosition
+	batcherRealtime bool
+	loaderRealtime  bool
+}
+
+type realtimeCalculator struct {
+	rsk         *tipocav1.RedshiftSink
+	watcher     kafka.Watcher
+	topicGroups map[string]tipocav1.Group
+	cache       *sync.Map
+
+	batchersRealtime []string
+	loadersRealtime  []string
+}
+
+func newRealtimeCalculator(
+	rsk *tipocav1.RedshiftSink,
+	watcher kafka.Watcher,
+	topicGroups map[string]tipocav1.Group,
+	cache *sync.Map,
+) realtimeCalculatorInterface {
+
+	return &realtimeCalculator{
+		rsk:         rsk,
+		watcher:     watcher,
+		topicGroups: topicGroups,
+		cache:       cache,
+	}
+}
+
+func (r *realtimeCalculator) maxLag(topic string) (int64, int64) {
+	var maxBatcherLag, maxLoaderLag int64
+	if r.rsk.Spec.ReleaseCondition == nil {
+		maxBatcherLag = DefaultMaxBatcherLag
+		maxLoaderLag = DefautMaxLoaderLag
+	} else {
+		if r.rsk.Spec.ReleaseCondition.MaxBatcherLag != nil {
+			maxBatcherLag = *r.rsk.Spec.ReleaseCondition.MaxBatcherLag
+		}
+		if r.rsk.Spec.ReleaseCondition.MaxLoaderLag != nil {
+			maxLoaderLag = *r.rsk.Spec.ReleaseCondition.MaxLoaderLag
+		}
+		if r.rsk.Spec.TopicReleaseCondition != nil {
+			d, ok := r.rsk.Spec.TopicReleaseCondition[topic]
+			if ok {
+				if d.MaxBatcherLag != nil {
+					maxBatcherLag = *d.MaxBatcherLag
+				}
+				if d.MaxLoaderLag != nil {
+					maxLoaderLag = *d.MaxLoaderLag
+				}
+			}
+		}
+	}
+
+	return maxBatcherLag, maxLoaderLag
+}
+
+// fetchRealtimeCache tires to get the topicRealtimeInfo from cache
+// if found in cache and cache is valid it returns true and the info
+// else it returns no info and false
+func (r *realtimeCalculator) fetchRealtimeCache(
+	topic string,
+) (
+	topicRealtimeInfo, bool,
+) {
+	loadedInfo, ok := r.cache.Load(topic)
+	if !ok {
+		return topicRealtimeInfo{}, false
+	}
+
+	// 600 to 840 seconds, randomness to prevent multiple parallel calls
+	validSec := rand.Intn(240) + 600
+	klog.V(5).Infof(
+		"rsk/%s validSec: %v topic: %s",
+		r.rsk.Name,
+		validSec,
+		topic,
+	)
+
+	info := loadedInfo.(topicRealtimeInfo)
+	if cacheValid(time.Second*time.Duration(validSec), info.lastUpdate) {
+		klog.V(4).Infof(
+			"rsk/%s (realtime cache hit) topic: %s",
+			r.rsk.Name,
+			topic,
+		)
+		return info, true
+	}
+
+	return topicRealtimeInfo{}, false
+}
+
+// fetchRealtimeInfo fetches the offset info for the topic
+func (r *realtimeCalculator) fetchRealtimeInfo(
+	topic string,
+	loaderTopic *string,
+	group tipocav1.Group,
+) (
+	topicRealtimeInfo, error,
+) {
+	klog.V(2).Infof("rsk/%s (fetching realtime) topic: %s", r.rsk.Name, topic)
+
+	now := time.Now().UnixNano()
+	info := topicRealtimeInfo{
+		batcher:         &offsetPosition{},
+		loader:          &offsetPosition{},
+		batcherRealtime: false,
+		loaderRealtime:  false,
+		lastUpdate:      &now,
+	}
+	// batcher's lag analysis: a) get last
+	last, err := r.watcher.LastOffset(topic, 0)
+	if err != nil {
+		return info, fmt.Errorf("Error getting last offset for %s", topic)
+	}
+	info.batcher.last = &last
+	klog.V(4).Infof("%s, lastOffset=%v", topic, last)
+
+	// batcher's lag analysis: b) get current
+	current, err := r.watcher.CurrentOffset(
+		consumerGroupID(r.rsk.Name, r.rsk.Namespace, group.ID, "-batcher"),
+		topic,
+		0,
+	)
+	if err != nil {
+		return info, err
+	}
+	klog.V(4).Infof("%s, currentOffset=%v", topic, current)
+	if current == -1 {
+		info.batcher.current = nil
+		klog.V(2).Infof("%s, batcher cg 404, not realtime", topic)
+		return info, nil
+	} else {
+		info.batcher.current = &current
+	}
+
+	if loaderTopic == nil {
+		return info, nil
+	}
+
+	// loader's lag analysis: a) get last
+	last, err = r.watcher.LastOffset(*loaderTopic, 0)
+	if err != nil {
+		return info, fmt.Errorf("Error getting last offset for %s", *loaderTopic)
+	}
+	info.loader.last = &last
+	klog.V(4).Infof("%s, lastOffset=%v", *loaderTopic, last)
+
+	// loader's lag analysis: b) get current
+	current, err = r.watcher.CurrentOffset(
+		consumerGroupID(r.rsk.Name, r.rsk.Namespace, group.ID, "-loader"),
+		*loaderTopic,
+		0,
+	)
+	if err != nil {
+		return info, err
+	}
+	klog.V(4).Infof("%s, currentOffset=%v (queried)", *loaderTopic, current)
+	if current == -1 {
+		// CurrentOffset can be -1 in two cases (this may be required in batcher also)
+		// 1. When the Consumer Group was never created in that case we return and consider the topic not realtime
+		// 2. When the Consumer Group had processed before but now is showing -1 currentOffset as it is inactive due to less throughput.
+		//    On such a scenario, we consider it realtime. We find this case by saving the currentOffset for the loader topcics in RedshiftSinkStatus.TopicGroup
+		if group.LoaderCurrentOffset == nil {
+			klog.V(2).Infof("%s, loader cg 404, not realtime", *loaderTopic)
+			return info, nil
+		}
+		klog.V(2).Infof("%s, currentOffset=%v (old), cg 404, try realtime", *loaderTopic, *group.LoaderCurrentOffset)
+		// give the topic the opportunity to release based on its last found currentOffset
+		info.loader.current = group.LoaderCurrentOffset
+	} else {
+		group.LoaderCurrentOffset = &current
+		// updates the new queried loader offset
+		klog.V(4).Infof("%s, cg found", *loaderTopic)
+		updateTopicGroup(r.rsk, topic, group)
+		info.loader.current = &current
+	}
+
+	return info, nil
+}
+
+// calculate computes the realtime topics and updates its realtime info
+func (r *realtimeCalculator) calculate(reloading []string, currentRealtime []string) []string {
+	if len(reloading) == 0 {
+		return currentRealtime
+	}
+
+	realtimeTopics := []string{}
+	allTopics, err := r.watcher.Topics()
+	if err != nil {
+		klog.Errorf(
+			"Ignoring realtime update. Error fetching all topics, err:%v",
+			err,
+		)
+		return currentRealtime
+	}
+	allTopicsMap := toMap(allTopics)
+
+	current := toMap(currentRealtime)
+	for _, topic := range reloading {
+
+		group, ok := r.topicGroups[topic]
+		if !ok {
+			klog.Errorf("topicGroup 404 in status for: %s", topic)
+			continue
+		}
+
+		var loaderTopic *string
+		ltopic := r.rsk.Spec.KafkaLoaderTopicPrefix + group.ID + "-" + topic
+		_, ok = allTopicsMap[ltopic]
+		if !ok {
+			klog.V(2).Infof("%s topic 404, not realtime.", loaderTopic)
+		} else {
+			loaderTopic = &ltopic
+		}
+
+		now := time.Now().UnixNano()
+
+		info, hit := r.fetchRealtimeCache(topic)
+		if !hit { // fetch again, cache miss
+			info, err = r.fetchRealtimeInfo(topic, loaderTopic, group)
+			if err != nil {
+				klog.Errorf(
+					"rsk/%s Error fetching realtime info for topic: %s, err: %v",
+					r.rsk.Name,
+					topic,
+					err,
+				)
+				// if there is an error in finding lag
+				// and the topic was already in realtime consider it realtime
+				// consumer groups disappear due to inactivity, hence this
+				_, ok := current[topic]
+				if ok {
+					r.cache.Store(
+						topic,
+						topicRealtimeInfo{
+							batcherRealtime: true,
+							loaderRealtime:  true,
+							lastUpdate:      &now,
+						},
+					)
+					realtimeTopics = append(realtimeTopics, topic)
+					r.batchersRealtime = append(r.batchersRealtime, topic)
+					r.loadersRealtime = append(r.loadersRealtime, ltopic)
+					continue
+				}
+			}
+		}
+
+		// compute realtime
+		maxBatcherLag, maxLoaderLag := r.maxLag(topic)
+		if info.batcher != nil && info.batcher.last != nil && info.batcher.current != nil {
+			if *info.batcher.last-*info.batcher.current <= maxBatcherLag {
+				klog.V(3).Infof("rsk/s: %s, batcher realtime", r.rsk.Name, topic)
+				info.batcherRealtime = true
+				r.batchersRealtime = append(r.batchersRealtime, topic)
+			}
+		}
+		if info.loader != nil && info.loader.last != nil && info.loader.current != nil {
+			if *info.loader.last-*info.loader.current <= maxLoaderLag {
+				klog.V(3).Infof("rsk/s: %s, loader realtime", r.rsk.Name, ltopic)
+				info.loaderRealtime = true
+				r.loadersRealtime = append(r.loadersRealtime, ltopic)
+			}
+		}
+		if info.batcherRealtime && info.loaderRealtime {
+			klog.V(2).Infof("rsk/s: %s, realtime", r.rsk.Name, topic)
+			realtimeTopics = append(realtimeTopics, topic)
+		} else {
+			klog.V(2).Infof("%v: waiting to reach realtime", topic)
+		}
+
+		info.lastUpdate = &now
+		r.cache.Store(topic, info)
+	}
+
+	return realtimeTopics
+}
diff --git a/controllers/redshiftsink_controller.go b/controllers/redshiftsink_controller.go
index 966c3c9f0..fa7c4be2e 100644
--- a/controllers/redshiftsink_controller.go
+++ b/controllers/redshiftsink_controller.go
@@ -365,7 +365,6 @@ func (r *RedshiftSinkReconciler) reconcile(
 	var currentMaskVersion string
 	if rsk.Status.MaskStatus != nil &&
 		rsk.Status.MaskStatus.CurrentMaskVersion != nil {
-
 		currentMaskVersion = *rsk.Status.MaskStatus.CurrentMaskVersion
 	} else {
 		klog.V(2).Infof("rsk/%s, Status empty, currentVersion=''", rsk.Name)
@@ -414,32 +413,14 @@ func (r *RedshiftSinkReconciler) reconcile(
 		klog.Fatalf("rsk/%s unexpected status, no diff but reloading", rsk.Name)
 	}
 
-	// SinkGroup are of following types:
-	// 1. main: sink group which has desiredMaskVersion
-	//      and has topics which have been released
-	//      consumer group: main
-	//      tableSuffix: ""
-	// 2. reload: sink group which has the desiredMaskVersion and is
-	//      is undergoing reload with new mask configurations
-	//      consumer group: desiredMaskVersion
-	//      tableSuffix: "_reload_desiredMaskVersion"
-	// 3. reloadDupe: sink group which has the currentMaskVersion
-	//      and will be stopped when reload ones moves to realtime
-	//      and when they are released.
-	//      consumer group: currentMaskVersion
-	//      tableSuffix: ""
-	var reload, reloadDupe, main *sinkGroup
-
-	reload = sgBuilder.
-		setRedshiftSink(rsk).setClient(r.Client).setScheme(r.Scheme).
-		setType(ReloadSinkGroup).
-		setTopics(status.reloading).
-		setMaskVersion(status.desiredVersion).
-		setTopicGroups().
-		buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
-		buildLoaders(secret, r.DefaultLoaderImage, ReloadTableSuffix, r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
-		build()
+	// Realtime status is always calculated to keep the CurrentOffset
+	// info updated in the rsk status. This is required so that low throughput
+	// release do not get blocked due to missing consumer group currentOffset.
+	reloadTopicGroup := topicGroupBySinkGroup(rsk, ReloadSinkGroup, status.reloading, status.desiredVersion, rsk.Spec.KafkaLoaderTopicPrefix)
+	calc := newRealtimeCalculator(rsk, kafkaWatcher, reloadTopicGroup, r.KafkaRealtimeCache)
+	currentRealtime := calc.calculate(status.reloading, status.realtime)
 
+	// set allowShuffle
 	reloadingRatio := status.reloadingRatio()
 	allowShuffle := true
 	if reloadingRatio > 0.2 {
@@ -449,38 +430,54 @@ func (r *RedshiftSinkReconciler) reconcile(
 			if cacheValid(time.Second*time.Duration(r.ReleaseWaitSeconds), cache.lastCacheRefresh) {
 				allowShuffle = false
 			}
-			// } else {
-			// 	klog.V(2).Infof("rsk/%v init release cache", rsk.Name)
-			// 	now := time.Now().UnixNano()
-			// 	r.ReleaseCache.Store(
-			// 		rsk.Namespace+rsk.Name,
-			// 		releaseCache{lastCacheRefresh: &now},
-			// 	)
-			// 	return resultRequeueMilliSeconds(100), nil, nil
 		}
 	}
 	klog.V(2).Infof("rsk/%v allowShuffle=%v, reloadingRatio=%v", rsk.Name, allowShuffle, reloadingRatio)
-
-	// Realtime status is always calculated to keep the CurrentOffset
-	// info updated in the rsk status. This is required so that low throughput
-	// release do not get blocked due to missing consumer group currentOffset.
-	currentRealtime := reload.realtimeTopics(status.realtime, kafkaWatcher, r.KafkaRealtimeCache)
-
-	// Allow realtime update only during release window, to minimize shuffle
+	// allow realtime update only during release window, to minimize shuffle
 	if allowShuffle {
 		if !subSetSlice(currentRealtime, status.realtime) {
 			for _, moreRealtime := range currentRealtime {
 				status.realtime = appendIfMissing(status.realtime, moreRealtime)
 			}
 			klog.V(2).Infof(
-				"Reconcile needed, realtime topics updated: %v", status.realtime)
+				"rsk/%s reconcile needed, realtime topics updated: %v",
+				rsk.Name,
+				status.realtime,
+			)
 			return resultRequeueMilliSeconds(1500), nil, nil
 		}
 		klog.V(2).Infof("rsk/%v reconciling all sinkGroups", rsk.Name)
 	} else {
 		klog.V(2).Infof("rsk/%s realtime (waiting): %d %v", rsk.Name, len(currentRealtime), currentRealtime)
+		klog.V(2).Infof("rsk/%v reconciling all sinkGroups (still)", rsk.Name)
 	}
 
+	// SinkGroup are of following types:
+	// 1. main: sink group which has desiredMaskVersion
+	//      and has topics which have been released
+	//      consumer group: main
+	//      tableSuffix: ""
+	// 2. reload: sink group which has the desiredMaskVersion and is
+	//      is undergoing reload with new mask configurations
+	//      consumer group: desiredMaskVersion
+	//      tableSuffix: "_reload_desiredMaskVersion"
+	// 3. reloadDupe: sink group which has the currentMaskVersion
+	//      and will be stopped when reload ones moves to realtime
+	//      and when they are released.
+	//      consumer group: currentMaskVersion
+	//      tableSuffix: ""
+	var reload, reloadDupe, main *sinkGroup
+
+	reload = sgBuilder.
+		setRedshiftSink(rsk).setClient(r.Client).setScheme(r.Scheme).
+		setType(ReloadSinkGroup).
+		setTopics(status.reloading).
+		setMaskVersion(status.desiredVersion).
+		setTopicGroups().
+		buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
+		buildLoaders(secret, r.DefaultLoaderImage, ReloadTableSuffix, r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
+		build()
+
 	reloadDupe = sgBuilder.
 		setRedshiftSink(rsk).setClient(r.Client).setScheme(r.Scheme).
 		setType(ReloadDupeSinkGroup).
diff --git a/controllers/sinkgroup_controller.go b/controllers/sinkgroup_controller.go
index 8913c6071..fc91f9bd3 100644
--- a/controllers/sinkgroup_controller.go
+++ b/controllers/sinkgroup_controller.go
@@ -4,8 +4,6 @@ import (
 	"context"
 	"fmt"
 	"math"
-	"math/rand"
-	"sync"
 	"time"
 
 	klog "github.com/practo/klog/v2"
@@ -31,14 +29,8 @@ const (
 	ReloadTableSuffix = "_ts_adx_reload"
 )
 
-var (
-	DefaultMaxBatcherTopics int = 30
-	DefaultMaxLoaderTopics  int = 300
-)
-
 type sinkGroupInterface interface {
 	reconcile(ctx context.Context) (ctrl.Result, ReconcilerEvent, error)
-	realtimeTopics(currentRealtime []string, watcher kafka.Watcher, cache *sync.Map) []string
 }
 
 type Deployment interface {
@@ -758,257 +750,6 @@ func (s *sinkGroup) reconcileLoaders(
 	return nil, nil
 }
 
-func maxLag(rsk *tipocav1.RedshiftSink, topic string) (int64, int64) {
-	var maxBatcherLag, maxLoaderLag int64
-	if rsk.Spec.ReleaseCondition == nil {
-		maxBatcherLag = DefaultMaxBatcherLag
-		maxLoaderLag = DefautMaxLoaderLag
-	} else {
-		if rsk.Spec.ReleaseCondition.MaxBatcherLag != nil {
-			maxBatcherLag = *rsk.Spec.ReleaseCondition.MaxBatcherLag
-		}
-		if rsk.Spec.ReleaseCondition.MaxLoaderLag != nil {
-			maxLoaderLag = *rsk.Spec.ReleaseCondition.MaxLoaderLag
-		}
-		if rsk.Spec.TopicReleaseCondition != nil {
-			d, ok := rsk.Spec.TopicReleaseCondition[topic]
-			if ok {
-				if d.MaxBatcherLag != nil {
-					maxBatcherLag = *d.MaxBatcherLag
-				}
-				if d.MaxLoaderLag != nil {
-					maxLoaderLag = *d.MaxLoaderLag
-				}
-			}
-		}
-	}
-
-	return maxBatcherLag, maxLoaderLag
-}
-
-func (s *sinkGroup) lagBelowThreshold(
-	topic string,
-	batcherLag,
-	loaderLag,
-	maxBatcherLag,
-	maxLoaderLag int64,
-) bool {
-	// if batcherLag <= maxBatcherLag && loaderLag == -1 {
-	// 	// TODO: this might lead to false positives, solve it
-	// 	// but without it some very low throughput topics wont go live.
-	// 	// may need to plugin prometheus time series data for analysis later
-	// 	// to solve it
-	// 	klog.Warningf("topic: %s assumed to have reached realtime as batcherLag<=threshold and loaderLag=-1 (consumer group not active)", topic)
-	// 	return true
-	// }
-
-	klog.V(4).Infof("topic: %s lag=%v", topic, batcherLag)
-	klog.V(4).Infof("topic: %s lag=%v", topic, loaderLag)
-
-	if batcherLag <= maxBatcherLag &&
-		loaderLag <= maxLoaderLag {
-
-		return true
-	}
-
-	return false
-}
-
-func cacheValid(validity time.Duration, lastCachedTime *int64) bool {
-	if lastCachedTime == nil {
-		return false
-	}
-
-	if (*lastCachedTime + validity.Nanoseconds()) > time.Now().UnixNano() {
-		return true
-	}
-
-	return false
-}
-
-type kafkaRealtimeCache struct {
-	lastCacheRefresh *int64
-	realtime         bool
-}
-
-func (s *sinkGroup) topicRealtime(
-	watcher kafka.Watcher,
-	topic string,
-	cache *sync.Map,
-	allTopics map[string]bool,
-) (
-	bool, *int64, error,
-) {
-	// use cache to prevent calls to kafka
-	var realtimeCache kafkaRealtimeCache
-	cacheLoaded, ok := cache.Load(topic)
-	if ok {
-		realtimeCache = cacheLoaded.(kafkaRealtimeCache)
-		// 600 to 840 seconds
-		validitySeconds := rand.Intn(240) + 300
-		klog.V(5).Infof("rsk/%s validity seconds: %v topic: %s", s.rsk.Name, validitySeconds, topic)
-		if cacheValid(time.Second*time.Duration(validitySeconds), realtimeCache.lastCacheRefresh) {
-			klog.V(4).Infof("rsk/%s (realtime cache hit) topic: %s", s.rsk.Name, topic)
-			if realtimeCache.realtime {
-				return true, realtimeCache.lastCacheRefresh, nil
-			}
-			return false, realtimeCache.lastCacheRefresh, nil
-		}
-	}
-
-	// new cache refresh time so that topics are only checked after an interval
-	// reduces the request to Kafka by big factor
-	now := time.Now().UnixNano()
-	maxBatcherLag, maxLoaderLag := maxLag(s.rsk, topic)
-
-	klog.V(2).Infof("rsk/%s (fetching realtime stats) topic: %s", s.rsk.Name, topic)
-	group, ok := s.topicGroups[topic]
-	if !ok {
-		return false, &now, fmt.Errorf("consumerGroupID not found for %s", topic)
-	}
-
-	// batcher's lag analysis
-	batcherLastOffset, err := watcher.LastOffset(topic, 0)
-	if err != nil {
-		return false, &now, fmt.Errorf("Error getting current offset for %s", topic)
-	}
-	klog.V(4).Infof("%s, lastOffset=%v", topic, batcherLastOffset)
-
-	// This won't work for topics which have lastOffset less than lag
-	// klog.V(2).Infof("%s, lastOffset=%v", topic, batcherLastOffset)
-	// if batcherLastOffset < maxBatcherLag {
-	// 	klog.V(2).Infof("%s, lastOffset < %v, not realtime", topic, maxBatcherLag)
-	// 	return false, &now, nil
-	// }
-
-	batcherCGID := consumerGroupID(s.rsk.Name, s.rsk.Namespace, group.ID, "-batcher")
-	batcherCurrentOffset, err := watcher.CurrentOffset(
-		batcherCGID,
-		topic,
-		0,
-	)
-	if err != nil {
-		return false, &now, err
-	}
-	klog.V(4).Infof("%s, currentOffset=%v", topic, batcherCurrentOffset)
-	if batcherCurrentOffset == -1 {
-		klog.V(2).Infof("%s, batcher cg 404, not realtime", topic)
-		return false, &now, nil
-	}
-
-	// loader's lag analysis
-	loaderTopic := s.rsk.Spec.KafkaLoaderTopicPrefix + group.ID + "-" + topic
-	_, ok = allTopics[loaderTopic]
-	if !ok {
-		klog.V(2).Infof("%s topic 404, not realtime.", loaderTopic)
-		return false, &now, nil
-	}
-	loaderLastOffset, err := watcher.LastOffset(loaderTopic, 0)
-	if err != nil {
-		return false, &now, fmt.Errorf("Error getting last offset for %s", loaderTopic)
-	}
-	klog.V(4).Infof("%s, lastOffset=%v", loaderTopic, loaderLastOffset)
-
-	// This won't work for topics which have lastOffset less than lag
-	// if loaderLastOffset < maxLoaderLag {
-	// 	klog.V(2).Infof("%s, lastOffset < %v, not realtime", loaderTopic, maxLoaderLag)
-	// 	return false, &now, nil
-	// }
-	loaderCGID := consumerGroupID(s.rsk.Name, s.rsk.Namespace, group.ID, "-loader")
-	loaderCurrentOffset, err := watcher.CurrentOffset(
-		loaderCGID,
-		loaderTopic,
-		0,
-	)
-	if err != nil {
-		return false, &now, err
-	}
-	klog.V(4).Infof("%s, currentOffset=%v (queried)", loaderTopic, loaderCurrentOffset)
-	if loaderCurrentOffset == -1 {
-		// CurrentOffset can be -1 in two cases
-		// 1. When the Consumer Group was never created in that case we return and consider the topic not realtime
-		// 2. When the Consumer Group had processed before but now is showing -1 currentOffset as it is inactive due to less throughput,
-		//    On such a scenario, we consider it realtime.
-		//    we find this case by saving the currentOffset for the loader topcics in RedshiftSink Topic Group Status
-		if group.LoaderCurrentOffset == nil {
-			klog.V(2).Infof("%s, loader cg 404, not realtime", loaderTopic)
-			return false, &now, nil
-		}
-		klog.V(2).Infof("%s, currentOffset=%v (old), cg 404, try realtime", loaderTopic, *group.LoaderCurrentOffset)
-		// give the topic the opportunity to release based on its last found currentOffset
-		loaderCurrentOffset = *group.LoaderCurrentOffset
-	} else {
-		group.LoaderCurrentOffset = &loaderCurrentOffset
-		// updates the new queried lodaer offset
-		klog.V(4).Infof("%s, cg found", loaderTopic)
-		updateTopicGroup(s.rsk, topic, group)
-	}
-	klog.V(2).Infof("%s, currentOffset=%v, checking realtime", loaderTopic, *group.LoaderCurrentOffset)
-
-	// check realtime
-	if s.lagBelowThreshold(
-		topic,
-		batcherLastOffset-batcherCurrentOffset, // batcher lag
-		loaderLastOffset-loaderCurrentOffset,   // loader lag
-		maxBatcherLag,
-		maxLoaderLag,
-	) {
-		klog.V(2).Infof("%s, realtime", topic)
-		return true, &now, nil
-	} else {
-		klog.V(2).Infof("%v: waiting to reach realtime", topic)
-		return false, &now, nil
-	}
-}
-
-// realtimeTopics gives back the list of topics whose consumer lags are
-// less than or equal to the specified thresholds to be considered realtime
-func (s *sinkGroup) realtimeTopics(
-	currentRealtime []string,
-	watcher kafka.Watcher,
-	cache *sync.Map,
-) []string {
-	current := toMap(currentRealtime)
-	realtimeTopics := []string{}
-
-	allTopics, err := watcher.Topics()
-	if err != nil {
-		klog.Errorf(
-			"Ignoring realtime update. Error fetching all topics, err:%v",
-			err,
-		)
-		return currentRealtime
-	}
-
-	for _, topic := range s.topics {
-		realtime, lastRefresh, err := s.topicRealtime(
-			watcher, topic, cache, toMap(allTopics),
-		)
-		if err != nil {
-			klog.Errorf(
-				"rsk/%s Error getting realtime for topic: %s, err: %v",
-				s.rsk.Name,
-				topic,
-				err,
-			)
-			_, ok := current[topic]
-			// if there is an error in finding lag
-			// and the topic was already in realtime consider it realtime
-			if ok {
-				cache.Store(topic, kafkaRealtimeCache{lastCacheRefresh: lastRefresh, realtime: true})
-				realtimeTopics = append(realtimeTopics, topic)
-				continue
-			}
-		}
-		if realtime {
-			realtimeTopics = append(realtimeTopics, topic)
-		}
-		cache.Store(topic, kafkaRealtimeCache{lastCacheRefresh: lastRefresh, realtime: realtime})
-	}
-
-	return realtimeTopics
-}
-
 func (s *sinkGroup) reconcile(
 	ctx context.Context,
 ) (
diff --git a/controllers/util.go b/controllers/util.go
index 6c5a4acf6..516c00ae8 100644
--- a/controllers/util.go
+++ b/controllers/util.go
@@ -6,6 +6,7 @@ import (
 	"reflect"
 	"sort"
 	"strings"
+	"time"
 
 	hashstructure "github.com/mitchellh/hashstructure/v2"
 	klog "github.com/practo/klog/v2"
@@ -176,6 +177,18 @@ func getReplicas(suspend bool, totalGroups, totalTopics int) int32 {
 	return 1
 }
 
+func cacheValid(validity time.Duration, lastCachedTime *int64) bool {
+	if lastCachedTime == nil {
+		return false
+	}
+
+	if (*lastCachedTime + validity.Nanoseconds()) > time.Now().UnixNano() {
+		return true
+	}
+
+	return false
+}
+
 func makeLoaderTopics(prefix string, topics []string) []string {
 	var prefixedTopics []string
 	for _, topic := range topics {

From 8bee4fe373588f48d47933b92bd20724742a7987 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Sat, 27 Mar 2021 09:27:24 +0530
Subject: [PATCH 21/49] S3 path bug; make unique using consumerGroupID

Fixes https://github.com/practo/tipoca-stream/issues/151
---
 pkg/redshiftbatcher/batch_processor.go | 4 ++++
 pkg/redshiftloader/load_processor.go   | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/pkg/redshiftbatcher/batch_processor.go b/pkg/redshiftbatcher/batch_processor.go
index c648c7b78..b5d45f2c2 100644
--- a/pkg/redshiftbatcher/batch_processor.go
+++ b/pkg/redshiftbatcher/batch_processor.go
@@ -145,6 +145,7 @@ func (b *batchProcessor) ctxCancelled(ctx context.Context) error {
 
 func constructS3key(
 	s3ucketDir string,
+	consumerGroupID string,
 	topic string,
 	partition int32,
 	offset int64,
@@ -159,6 +160,7 @@ func constructS3key(
 	if maskFileVersion != "" {
 		return filepath.Join(
 			s3ucketDir,
+			consumerGroupID,
 			topic,
 			maskFileVersion,
 			s3FileName,
@@ -166,6 +168,7 @@ func constructS3key(
 	} else {
 		return filepath.Join(
 			s3ucketDir,
+			consumerGroupID,
 			topic,
 			s3FileName,
 		)
@@ -280,6 +283,7 @@ func (b *batchProcessor) processMessage(
 		resp.batchSchemaTable = r.(redshift.Table)
 		resp.s3Key = constructS3key(
 			b.s3BucketDir,
+			b.consumerGroupID,
 			message.Topic,
 			message.Partition,
 			message.Offset,
diff --git a/pkg/redshiftloader/load_processor.go b/pkg/redshiftloader/load_processor.go
index 38ce68b10..cac250072 100644
--- a/pkg/redshiftloader/load_processor.go
+++ b/pkg/redshiftloader/load_processor.go
@@ -306,6 +306,7 @@ func (b *loadProcessor) insertIntoTargetTable(ctx context.Context, tx *sql.Tx) e
 	s3CopyDir := filepath.Join(
 		viper.GetString("s3sink.bucketDir"),
 		b.topic,
+		b.consumerGroupID,
 		util.NewUUIDString(),
 		"unload_",
 	)
@@ -510,6 +511,7 @@ func (b *loadProcessor) migrateTable(
 	s3CopyDir := filepath.Join(
 		viper.GetString("s3sink.bucketDir"),
 		b.topic,
+		b.consumerGroupID,
 		util.NewUUIDString(),
 		"migrating_unload_",
 	)
@@ -682,6 +684,7 @@ func (b *loadProcessor) processBatch(
 	s3ManifestKey := filepath.Join(
 		viper.GetString("s3sink.bucketDir"),
 		b.topic,
+		b.consumerGroupID,
 		util.NewUUIDString(),
 		"manifest.json",
 	)

From fbaf50a5fcbb97c47f17e4449179177a651c52d9 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Sat, 27 Mar 2021 15:25:04 +0530
Subject: [PATCH 22/49] Keep consumerGroupID the first dir

Easier to debug and easier to delete this way
---
 pkg/redshiftloader/load_processor.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/redshiftloader/load_processor.go b/pkg/redshiftloader/load_processor.go
index cac250072..bbefa6d96 100644
--- a/pkg/redshiftloader/load_processor.go
+++ b/pkg/redshiftloader/load_processor.go
@@ -305,8 +305,8 @@ func (b *loadProcessor) insertIntoTargetTable(ctx context.Context, tx *sql.Tx) e
 
 	s3CopyDir := filepath.Join(
 		viper.GetString("s3sink.bucketDir"),
-		b.topic,
 		b.consumerGroupID,
+		b.topic,
 		util.NewUUIDString(),
 		"unload_",
 	)
@@ -510,8 +510,8 @@ func (b *loadProcessor) migrateTable(
 
 	s3CopyDir := filepath.Join(
 		viper.GetString("s3sink.bucketDir"),
-		b.topic,
 		b.consumerGroupID,
+		b.topic,
 		util.NewUUIDString(),
 		"migrating_unload_",
 	)
@@ -683,8 +683,8 @@ func (b *loadProcessor) processBatch(
 	// upload s3 manifest file to bulk copy data to staging table
 	s3ManifestKey := filepath.Join(
 		viper.GetString("s3sink.bucketDir"),
-		b.topic,
 		b.consumerGroupID,
+		b.topic,
 		util.NewUUIDString(),
 		"manifest.json",
 	)

From f27557ec7a1f30c77af5903b325ba218548a461e Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Sun, 28 Mar 2021 20:38:45 +0530
Subject: [PATCH 23/49] Unit allocation

Reload topics load as a separate pod for each topic, a new spec max units for reloading comes in to control how many can concurrently reload.

Any topic that gets realtime moves to the realtime group and runs as a single pod. It should use main resources and not reload resources (next PR)
---
 api/v1/redshiftsink_types.go                  |  22 +-
 api/v1/zz_generated.deepcopy.go               |  15 +-
 .../tipoca.k8s.practo.dev_redshiftsinks.yaml  | 200 ++++++++------
 controllers/batcher_deployment.go             |  40 ++-
 controllers/loader_deployment.go              |  40 ++-
 controllers/realtime_calculator.go            |  52 ++--
 controllers/redshiftsink_controller.go        |   8 +
 controllers/sinkgroup_controller.go           |  95 ++++---
 controllers/sinkgroup_controller_test.go      | 115 --------
 controllers/status.go                         |   5 +
 controllers/unit_allocator.go                 | 101 +++++++
 controllers/unit_allocator_test.go            | 260 ++++++++++++++++++
 12 files changed, 655 insertions(+), 298 deletions(-)
 delete mode 100644 controllers/sinkgroup_controller_test.go
 create mode 100644 controllers/unit_allocator.go
 create mode 100644 controllers/unit_allocator_test.go

diff --git a/api/v1/redshiftsink_types.go b/api/v1/redshiftsink_types.go
index b87fa9d0a..0d7126acb 100644
--- a/api/v1/redshiftsink_types.go
+++ b/api/v1/redshiftsink_types.go
@@ -46,11 +46,6 @@ type RedshiftPodTemplateSpec struct {
 // DeploymentUnit is used to specify how many topics will run together in a unit
 // and how much resources it needs.
 type DeploymentUnit struct {
-	// MaxTopics specify the maximum number of topics that
-	// can be part of this unit of deployment.
-	// +optional
-	MaxTopics *int `json:"maxTopics,omitempty"`
-
 	// PodTemplate describes the pod specification for the unit.
 	// +optional
 	PodTemplate *RedshiftPodTemplateSpec `json:"podTemplate,omitempty"`
@@ -79,12 +74,13 @@ type SinkGroupSpec struct {
 	// +optional
 	MaxProcessingTime *int32 `json:"maxProcessingTime,omitempty"`
 
-	// MaxUnits is the maximum number of units(pods) that can be launched
-	// based on the DeploymentUnit specification
+	// MaxReloadingUnits is the maximum number of units(pods) that can be launched
+	// based on the DeploymentUnit specification. Only valid for Reloading SinkGroup.
+	// This value is at present supported to be configurable only for batcher
 	// +optional
-	MaxUnits *int32 `json:"maxUnits,omitempty"`
-	// DeploymentUnit is the unit of deployment for the batcher or the loader.
-	// Using this user can specify the no of topics and the amount of resources
+	MaxReloadingUnits *int32 `json:"maxReloadingUnits,omitempty"`
+	// DeploymentUnit(pod) is the unit of deployment for the batcher or the loader.
+	// Using this user can specify the amount of resources
 	// needed to run them as one unit. Operator calculates the total units
 	// based on the total number of topics and this unit spec. This majorly
 	// solves the scaling issues described in #167.
@@ -333,6 +329,12 @@ type RedshiftSinkStatus struct {
 	// TopicGroup stores the group info for the topic
 	// +optional
 	TopicGroup map[string]Group `json:"topicGroups,omitempty"`
+
+	// BatcherReloadingTopics stores the list of topics which are currently reloading
+	// for the batcher deployments in the reload sink group.
+	// There is a limit to maximum topics that can be reloaded. (MaxReloadingUnits)
+	// +optional
+	BatcherReloadingTopics []string `json:"batcherReloadingTopics,omitempty"`
 }
 
 // +kubebuilder:resource:path=redshiftsinks,shortName=rsk;rsks
diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
index faa9e95a6..1e0a7156c 100644
--- a/api/v1/zz_generated.deepcopy.go
+++ b/api/v1/zz_generated.deepcopy.go
@@ -28,11 +28,6 @@ import (
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *DeploymentUnit) DeepCopyInto(out *DeploymentUnit) {
 	*out = *in
-	if in.MaxTopics != nil {
-		in, out := &in.MaxTopics, &out.MaxTopics
-		*out = new(int)
-		**out = **in
-	}
 	if in.PodTemplate != nil {
 		in, out := &in.PodTemplate, &out.PodTemplate
 		*out = new(RedshiftPodTemplateSpec)
@@ -338,6 +333,11 @@ func (in *RedshiftSinkStatus) DeepCopyInto(out *RedshiftSinkStatus) {
 			(*out)[key] = *val.DeepCopy()
 		}
 	}
+	if in.BatcherReloadingTopics != nil {
+		in, out := &in.BatcherReloadingTopics, &out.BatcherReloadingTopics
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RedshiftSinkStatus.
@@ -433,6 +433,11 @@ func (in *SinkGroupSpec) DeepCopyInto(out *SinkGroupSpec) {
 		*out = new(int32)
 		**out = **in
 	}
+	if in.MaxReloadingUnits != nil {
+		in, out := &in.MaxReloadingUnits, &out.MaxReloadingUnits
+		*out = new(int32)
+		**out = **in
+	}
 	if in.DeploymentUnit != nil {
 		in, out := &in.DeploymentUnit, &out.DeploymentUnit
 		*out = new(DeploymentUnit)
diff --git a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
index 31db0fb6d..349e5b8e1 100644
--- a/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
+++ b/config/crd/bases/tipoca.k8s.practo.dev_redshiftsinks.yaml
@@ -154,17 +154,13 @@ spec:
                       description: All specifies a common specification for all SinkGroups
                       properties:
                         deploymentUnit:
-                          description: 'DeploymentUnit is the unit of deployment for
-                            the batcher or the loader. Using this user can specify
-                            the no of topics and the amount of resources needed to
-                            run them as one unit. Operator calculates the total units
-                            based on the total number of topics and this unit spec.
-                            This majorly solves the scaling issues described in #167.'
+                          description: 'DeploymentUnit(pod) is the unit of deployment
+                            for the batcher or the loader. Using this user can specify
+                            the amount of resources needed to run them as one unit.
+                            Operator calculates the total units based on the total
+                            number of topics and this unit spec. This majorly solves
+                            the scaling issues described in #167.'
                           properties:
-                            maxTopics:
-                              description: MaxTopics specify the maximum number of
-                                topics that can be part of this unit of deployment.
-                              type: integer
                             podTemplate:
                               description: PodTemplate describes the pod specification
                                 for the unit.
@@ -264,6 +260,14 @@ spec:
                             and loader is 600000ms.
                           format: int32
                           type: integer
+                        maxReloadingUnits:
+                          description: MaxReloadingUnits is the maximum number of
+                            units(pods) that can be launched based on the DeploymentUnit
+                            specification. Only valid for Reloading SinkGroup. This
+                            value is at present supported to be configurable only
+                            for batcher
+                          format: int32
+                          type: integer
                         maxSizePerBatch:
                           anyOf:
                           - type: integer
@@ -285,17 +289,13 @@ spec:
                         overwrites All
                       properties:
                         deploymentUnit:
-                          description: 'DeploymentUnit is the unit of deployment for
-                            the batcher or the loader. Using this user can specify
-                            the no of topics and the amount of resources needed to
-                            run them as one unit. Operator calculates the total units
-                            based on the total number of topics and this unit spec.
-                            This majorly solves the scaling issues described in #167.'
+                          description: 'DeploymentUnit(pod) is the unit of deployment
+                            for the batcher or the loader. Using this user can specify
+                            the amount of resources needed to run them as one unit.
+                            Operator calculates the total units based on the total
+                            number of topics and this unit spec. This majorly solves
+                            the scaling issues described in #167.'
                           properties:
-                            maxTopics:
-                              description: MaxTopics specify the maximum number of
-                                topics that can be part of this unit of deployment.
-                              type: integer
                             podTemplate:
                               description: PodTemplate describes the pod specification
                                 for the unit.
@@ -395,6 +395,14 @@ spec:
                             and loader is 600000ms.
                           format: int32
                           type: integer
+                        maxReloadingUnits:
+                          description: MaxReloadingUnits is the maximum number of
+                            units(pods) that can be launched based on the DeploymentUnit
+                            specification. Only valid for Reloading SinkGroup. This
+                            value is at present supported to be configurable only
+                            for batcher
+                          format: int32
+                          type: integer
                         maxSizePerBatch:
                           anyOf:
                           - type: integer
@@ -416,17 +424,13 @@ spec:
                         overwrites All
                       properties:
                         deploymentUnit:
-                          description: 'DeploymentUnit is the unit of deployment for
-                            the batcher or the loader. Using this user can specify
-                            the no of topics and the amount of resources needed to
-                            run them as one unit. Operator calculates the total units
-                            based on the total number of topics and this unit spec.
-                            This majorly solves the scaling issues described in #167.'
+                          description: 'DeploymentUnit(pod) is the unit of deployment
+                            for the batcher or the loader. Using this user can specify
+                            the amount of resources needed to run them as one unit.
+                            Operator calculates the total units based on the total
+                            number of topics and this unit spec. This majorly solves
+                            the scaling issues described in #167.'
                           properties:
-                            maxTopics:
-                              description: MaxTopics specify the maximum number of
-                                topics that can be part of this unit of deployment.
-                              type: integer
                             podTemplate:
                               description: PodTemplate describes the pod specification
                                 for the unit.
@@ -526,6 +530,14 @@ spec:
                             and loader is 600000ms.
                           format: int32
                           type: integer
+                        maxReloadingUnits:
+                          description: MaxReloadingUnits is the maximum number of
+                            units(pods) that can be launched based on the DeploymentUnit
+                            specification. Only valid for Reloading SinkGroup. This
+                            value is at present supported to be configurable only
+                            for batcher
+                          format: int32
+                          type: integer
                         maxSizePerBatch:
                           anyOf:
                           - type: integer
@@ -547,17 +559,13 @@ spec:
                         overwrites All
                       properties:
                         deploymentUnit:
-                          description: 'DeploymentUnit is the unit of deployment for
-                            the batcher or the loader. Using this user can specify
-                            the no of topics and the amount of resources needed to
-                            run them as one unit. Operator calculates the total units
-                            based on the total number of topics and this unit spec.
-                            This majorly solves the scaling issues described in #167.'
+                          description: 'DeploymentUnit(pod) is the unit of deployment
+                            for the batcher or the loader. Using this user can specify
+                            the amount of resources needed to run them as one unit.
+                            Operator calculates the total units based on the total
+                            number of topics and this unit spec. This majorly solves
+                            the scaling issues described in #167.'
                           properties:
-                            maxTopics:
-                              description: MaxTopics specify the maximum number of
-                                topics that can be part of this unit of deployment.
-                              type: integer
                             podTemplate:
                               description: PodTemplate describes the pod specification
                                 for the unit.
@@ -657,6 +665,14 @@ spec:
                             and loader is 600000ms.
                           format: int32
                           type: integer
+                        maxReloadingUnits:
+                          description: MaxReloadingUnits is the maximum number of
+                            units(pods) that can be launched based on the DeploymentUnit
+                            specification. Only valid for Reloading SinkGroup. This
+                            value is at present supported to be configurable only
+                            for batcher
+                          format: int32
+                          type: integer
                         maxSizePerBatch:
                           anyOf:
                           - type: integer
@@ -810,17 +826,13 @@ spec:
                       description: All specifies a common specification for all SinkGroups
                       properties:
                         deploymentUnit:
-                          description: 'DeploymentUnit is the unit of deployment for
-                            the batcher or the loader. Using this user can specify
-                            the no of topics and the amount of resources needed to
-                            run them as one unit. Operator calculates the total units
-                            based on the total number of topics and this unit spec.
-                            This majorly solves the scaling issues described in #167.'
+                          description: 'DeploymentUnit(pod) is the unit of deployment
+                            for the batcher or the loader. Using this user can specify
+                            the amount of resources needed to run them as one unit.
+                            Operator calculates the total units based on the total
+                            number of topics and this unit spec. This majorly solves
+                            the scaling issues described in #167.'
                           properties:
-                            maxTopics:
-                              description: MaxTopics specify the maximum number of
-                                topics that can be part of this unit of deployment.
-                              type: integer
                             podTemplate:
                               description: PodTemplate describes the pod specification
                                 for the unit.
@@ -920,6 +932,14 @@ spec:
                             and loader is 600000ms.
                           format: int32
                           type: integer
+                        maxReloadingUnits:
+                          description: MaxReloadingUnits is the maximum number of
+                            units(pods) that can be launched based on the DeploymentUnit
+                            specification. Only valid for Reloading SinkGroup. This
+                            value is at present supported to be configurable only
+                            for batcher
+                          format: int32
+                          type: integer
                         maxSizePerBatch:
                           anyOf:
                           - type: integer
@@ -941,17 +961,13 @@ spec:
                         overwrites All
                       properties:
                         deploymentUnit:
-                          description: 'DeploymentUnit is the unit of deployment for
-                            the batcher or the loader. Using this user can specify
-                            the no of topics and the amount of resources needed to
-                            run them as one unit. Operator calculates the total units
-                            based on the total number of topics and this unit spec.
-                            This majorly solves the scaling issues described in #167.'
+                          description: 'DeploymentUnit(pod) is the unit of deployment
+                            for the batcher or the loader. Using this user can specify
+                            the amount of resources needed to run them as one unit.
+                            Operator calculates the total units based on the total
+                            number of topics and this unit spec. This majorly solves
+                            the scaling issues described in #167.'
                           properties:
-                            maxTopics:
-                              description: MaxTopics specify the maximum number of
-                                topics that can be part of this unit of deployment.
-                              type: integer
                             podTemplate:
                               description: PodTemplate describes the pod specification
                                 for the unit.
@@ -1051,6 +1067,14 @@ spec:
                             and loader is 600000ms.
                           format: int32
                           type: integer
+                        maxReloadingUnits:
+                          description: MaxReloadingUnits is the maximum number of
+                            units(pods) that can be launched based on the DeploymentUnit
+                            specification. Only valid for Reloading SinkGroup. This
+                            value is at present supported to be configurable only
+                            for batcher
+                          format: int32
+                          type: integer
                         maxSizePerBatch:
                           anyOf:
                           - type: integer
@@ -1072,17 +1096,13 @@ spec:
                         overwrites All
                       properties:
                         deploymentUnit:
-                          description: 'DeploymentUnit is the unit of deployment for
-                            the batcher or the loader. Using this user can specify
-                            the no of topics and the amount of resources needed to
-                            run them as one unit. Operator calculates the total units
-                            based on the total number of topics and this unit spec.
-                            This majorly solves the scaling issues described in #167.'
+                          description: 'DeploymentUnit(pod) is the unit of deployment
+                            for the batcher or the loader. Using this user can specify
+                            the amount of resources needed to run them as one unit.
+                            Operator calculates the total units based on the total
+                            number of topics and this unit spec. This majorly solves
+                            the scaling issues described in #167.'
                           properties:
-                            maxTopics:
-                              description: MaxTopics specify the maximum number of
-                                topics that can be part of this unit of deployment.
-                              type: integer
                             podTemplate:
                               description: PodTemplate describes the pod specification
                                 for the unit.
@@ -1182,6 +1202,14 @@ spec:
                             and loader is 600000ms.
                           format: int32
                           type: integer
+                        maxReloadingUnits:
+                          description: MaxReloadingUnits is the maximum number of
+                            units(pods) that can be launched based on the DeploymentUnit
+                            specification. Only valid for Reloading SinkGroup. This
+                            value is at present supported to be configurable only
+                            for batcher
+                          format: int32
+                          type: integer
                         maxSizePerBatch:
                           anyOf:
                           - type: integer
@@ -1203,17 +1231,13 @@ spec:
                         overwrites All
                       properties:
                         deploymentUnit:
-                          description: 'DeploymentUnit is the unit of deployment for
-                            the batcher or the loader. Using this user can specify
-                            the no of topics and the amount of resources needed to
-                            run them as one unit. Operator calculates the total units
-                            based on the total number of topics and this unit spec.
-                            This majorly solves the scaling issues described in #167.'
+                          description: 'DeploymentUnit(pod) is the unit of deployment
+                            for the batcher or the loader. Using this user can specify
+                            the amount of resources needed to run them as one unit.
+                            Operator calculates the total units based on the total
+                            number of topics and this unit spec. This majorly solves
+                            the scaling issues described in #167.'
                           properties:
-                            maxTopics:
-                              description: MaxTopics specify the maximum number of
-                                topics that can be part of this unit of deployment.
-                              type: integer
                             podTemplate:
                               description: PodTemplate describes the pod specification
                                 for the unit.
@@ -1313,6 +1337,14 @@ spec:
                             and loader is 600000ms.
                           format: int32
                           type: integer
+                        maxReloadingUnits:
+                          description: MaxReloadingUnits is the maximum number of
+                            units(pods) that can be launched based on the DeploymentUnit
+                            specification. Only valid for Reloading SinkGroup. This
+                            value is at present supported to be configurable only
+                            for batcher
+                          format: int32
+                          type: integer
                         maxSizePerBatch:
                           anyOf:
                           - type: integer
@@ -1394,6 +1426,14 @@ spec:
         status:
           description: RedshiftSinkStatus defines the observed state of RedshiftSink
           properties:
+            batcherReloadingTopics:
+              description: BatcherReloadingTopics stores the list of topics which
+                are currently reloading for the batcher deployments in the reload
+                sink group. There is a limit to maximum topics that can be reloaded.
+                (MaxReloadingUnits)
+              items:
+                type: string
+              type: array
             maskStatus:
               description: MaskStatus stores the status of masking for topics if masking
                 is enabled
diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go
index b98968a6a..1ebaf8ab2 100644
--- a/controllers/batcher_deployment.go
+++ b/controllers/batcher_deployment.go
@@ -15,7 +15,7 @@ import (
 )
 
 const (
-	BatcherSuffix        = "-batcher"
+	BatcherTag           = "batcher"
 	BatcherLabelInstance = "redshiftbatcher"
 )
 
@@ -24,6 +24,7 @@ type Batcher struct {
 	namespace  string
 	deployment *appsv1.Deployment
 	config     *corev1.ConfigMap
+	topics     []string
 }
 
 // applyBatcherSinkGroupDefaults applies the defaults for the batcher
@@ -43,7 +44,6 @@ func applyBatcherSinkGroupDefaults(
 	maxWaitSeconds := &redshiftbatcher.DefaultMaxWaitSeconds
 	maxConcurrency := &redshiftbatcher.DefaultMaxConcurrency
 	maxProcessingTime := &redshiftbatcher.DefaultMaxProcessingTime
-	maxTopics := &DefaultMaxBatcherTopics
 	image := &defaultImage
 	var resources *corev1.ResourceRequirements
 	var tolerations *[]corev1.Toleration
@@ -83,9 +83,6 @@ func applyBatcherSinkGroupDefaults(
 			maxProcessingTime = specifiedSpec.MaxProcessingTime
 		}
 		if specifiedSpec.DeploymentUnit != nil {
-			if specifiedSpec.DeploymentUnit.MaxTopics != nil {
-				maxTopics = specifiedSpec.DeploymentUnit.MaxTopics
-			}
 			if specifiedSpec.DeploymentUnit.PodTemplate != nil {
 				if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
 					image = specifiedSpec.DeploymentUnit.PodTemplate.Image
@@ -106,7 +103,6 @@ func applyBatcherSinkGroupDefaults(
 		MaxConcurrency:    maxConcurrency,
 		MaxProcessingTime: maxProcessingTime,
 		DeploymentUnit: &tipocav1.DeploymentUnit{
-			MaxTopics: maxTopics,
 			PodTemplate: &tipocav1.RedshiftPodTemplateSpec{
 				Image:       image,
 				Resources:   resources,
@@ -140,14 +136,23 @@ func batcherSecret(secret map[string]string) (map[string]string, error) {
 	return s, nil
 }
 
-func batcherName(rskName, sinkGroup string, id string) string {
-	return fmt.Sprintf(
-		"%s-%s%s%s",
-		rskName,
-		sinkGroup,
-		id,
-		BatcherSuffix,
-	)
+func batcherName(rskName, sinkGroup, id string) string {
+	if id == "" {
+		return fmt.Sprintf(
+			"%s-%s-%s",
+			rskName,
+			BatcherTag,
+			sinkGroup,
+		)
+	} else {
+		return fmt.Sprintf(
+			"%s-%s-%s-%s",
+			rskName,
+			BatcherTag,
+			sinkGroup,
+			id,
+		)
+	}
 }
 
 func NewBatcher(
@@ -213,9 +218,11 @@ func NewBatcher(
 	var sessionTimeoutSeconds int = 10
 	var hearbeatIntervalSeconds int = 2
 
+	topics := []string{}
 	totalTopics := 0
 	var groupConfigs []kafka.ConsumerGroupConfig
 	for groupID, group := range consumerGroups {
+		topics = append(topics, group.topics...)
 		totalTopics += len(group.topics)
 		groupConfigs = append(groupConfigs, kafka.ConsumerGroupConfig{
 			GroupID:           consumerGroupID(rsk.Name, rsk.Namespace, groupID, "-batcher"),
@@ -307,6 +314,7 @@ func NewBatcher(
 		namespace:  rsk.Namespace,
 		deployment: deploymentFromSpec(deploySpec, configSpec),
 		config:     configFromSpec(configSpec),
+		topics:     topics,
 	}, nil
 }
 
@@ -333,3 +341,7 @@ func (b Batcher) UpdateDeployment(current *appsv1.Deployment) bool {
 func (b Batcher) UpdateConfig(current *corev1.ConfigMap) bool {
 	return !configSpecEqual(current, b.Config())
 }
+
+func (b Batcher) Topics() []string {
+	return b.topics
+}
diff --git a/controllers/loader_deployment.go b/controllers/loader_deployment.go
index b115474c4..06ac26408 100644
--- a/controllers/loader_deployment.go
+++ b/controllers/loader_deployment.go
@@ -16,7 +16,7 @@ import (
 )
 
 const (
-	LoaderSuffix        = "-loader"
+	LoaderTag           = "loader"
 	LoaderLabelInstance = "redshiftloader"
 )
 
@@ -25,6 +25,7 @@ type Loader struct {
 	namespace  string
 	deployment *appsv1.Deployment
 	config     *corev1.ConfigMap
+	topics     []string
 }
 
 // applyLoaderSinkGroupDefaults applies the defaults for the loader
@@ -43,7 +44,6 @@ func applyLoaderSinkGroupDefaults(
 	maxSizePerBatch := &defaultMaxBytesPerBatch
 	maxWaitSeconds := &redshiftloader.DefaultMaxWaitSeconds
 	maxProcessingTime := &redshiftloader.DefaultMaxProcessingTime
-	maxTopics := &DefaultMaxLoaderTopics
 	image := &defaultImage
 	var resources *corev1.ResourceRequirements
 	var tolerations *[]corev1.Toleration
@@ -80,9 +80,6 @@ func applyLoaderSinkGroupDefaults(
 			maxProcessingTime = specifiedSpec.MaxProcessingTime
 		}
 		if specifiedSpec.DeploymentUnit != nil {
-			if specifiedSpec.DeploymentUnit.MaxTopics != nil {
-				maxTopics = specifiedSpec.DeploymentUnit.MaxTopics
-			}
 			if specifiedSpec.DeploymentUnit.PodTemplate != nil {
 				if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
 					image = specifiedSpec.DeploymentUnit.PodTemplate.Image
@@ -102,7 +99,6 @@ func applyLoaderSinkGroupDefaults(
 		MaxWaitSeconds:    maxWaitSeconds,
 		MaxProcessingTime: maxProcessingTime,
 		DeploymentUnit: &tipocav1.DeploymentUnit{
-			MaxTopics: maxTopics,
 			PodTemplate: &tipocav1.RedshiftPodTemplateSpec{
 				Image:       image,
 				Resources:   resources,
@@ -139,14 +135,23 @@ func loaderSecret(secret map[string]string) (map[string]string, error) {
 	return s, nil
 }
 
-func loaderName(rskName, sinkGroup string, id string) string {
-	return fmt.Sprintf(
-		"%s-%s%s%s",
-		rskName,
-		sinkGroup,
-		id,
-		LoaderSuffix,
-	)
+func loaderName(rskName, sinkGroup, id string) string {
+	if id == "" {
+		return fmt.Sprintf(
+			"%s-%s-%s",
+			rskName,
+			LoaderTag,
+			sinkGroup,
+		)
+	} else {
+		return fmt.Sprintf(
+			"%s-%s-%s-%s",
+			rskName,
+			LoaderTag,
+			sinkGroup,
+			id,
+		)
+	}
 }
 
 func redshiftConnections(rsk *tipocav1.RedshiftSink, defaultMaxOpenConns, defaultMaxIdleConns int) (int, int) {
@@ -223,9 +228,11 @@ func NewLoader(
 	var sessionTimeoutSeconds int = 10
 	var hearbeatIntervalSeconds int = 2
 
+	topics := []string{}
 	totalTopics := 0
 	var groupConfigs []kafka.ConsumerGroupConfig
 	for groupID, group := range consumerGroups {
+		topics = append(topics, group.topics...)
 		totalTopics += len(group.topics)
 		groupConfigs = append(groupConfigs, kafka.ConsumerGroupConfig{
 			GroupID: consumerGroupID(rsk.Name, rsk.Namespace, groupID, "-loader"),
@@ -330,6 +337,7 @@ func NewLoader(
 		namespace:  rsk.Namespace,
 		deployment: deploymentFromSpec(deploySpec, configSpec),
 		config:     configFromSpec(configSpec),
+		topics:     topics,
 	}, nil
 }
 
@@ -356,3 +364,7 @@ func (l Loader) UpdateDeployment(current *appsv1.Deployment) bool {
 func (l Loader) UpdateConfig(current *corev1.ConfigMap) bool {
 	return !configSpecEqual(current, l.Config())
 }
+
+func (l Loader) Topics() []string {
+	return l.topics
+}
diff --git a/controllers/realtime_calculator.go b/controllers/realtime_calculator.go
index 778987e47..18229482f 100644
--- a/controllers/realtime_calculator.go
+++ b/controllers/realtime_calculator.go
@@ -10,15 +10,6 @@ import (
 	"time"
 )
 
-var (
-	DefaultMaxBatcherTopics int = 30
-	DefaultMaxLoaderTopics  int = 300
-)
-
-type realtimeCalculatorInterface interface {
-	calculate(reloading []string, currentRealtime []string) []string
-}
-
 type offsetPosition struct {
 	last    *int64
 	current *int64
@@ -32,6 +23,11 @@ type topicRealtimeInfo struct {
 	loaderRealtime  bool
 }
 
+type topicLag struct {
+	topic string
+	lag   int64
+}
+
 type realtimeCalculator struct {
 	rsk         *tipocav1.RedshiftSink
 	watcher     kafka.Watcher
@@ -40,6 +36,9 @@ type realtimeCalculator struct {
 
 	batchersRealtime []string
 	loadersRealtime  []string
+
+	batchersLag []topicLag
+	loadersLag  []topicLag
 }
 
 func newRealtimeCalculator(
@@ -47,13 +46,15 @@ func newRealtimeCalculator(
 	watcher kafka.Watcher,
 	topicGroups map[string]tipocav1.Group,
 	cache *sync.Map,
-) realtimeCalculatorInterface {
+) *realtimeCalculator {
 
 	return &realtimeCalculator{
 		rsk:         rsk,
 		watcher:     watcher,
 		topicGroups: topicGroups,
 		cache:       cache,
+		batchersLag: []topicLag{},
+		loadersLag:  []topicLag{},
 	}
 }
 
@@ -216,6 +217,8 @@ func (r *realtimeCalculator) calculate(reloading []string, currentRealtime []str
 	}
 
 	realtimeTopics := []string{}
+	current := toMap(currentRealtime)
+
 	allTopics, err := r.watcher.Topics()
 	if err != nil {
 		klog.Errorf(
@@ -226,9 +229,7 @@ func (r *realtimeCalculator) calculate(reloading []string, currentRealtime []str
 	}
 	allTopicsMap := toMap(allTopics)
 
-	current := toMap(currentRealtime)
 	for _, topic := range reloading {
-
 		group, ok := r.topicGroups[topic]
 		if !ok {
 			klog.Errorf("topicGroup 404 in status for: %s", topic)
@@ -239,7 +240,7 @@ func (r *realtimeCalculator) calculate(reloading []string, currentRealtime []str
 		ltopic := r.rsk.Spec.KafkaLoaderTopicPrefix + group.ID + "-" + topic
 		_, ok = allTopicsMap[ltopic]
 		if !ok {
-			klog.V(2).Infof("%s topic 404, not realtime.", loaderTopic)
+			klog.V(2).Infof("%s topic 404, not realtime.", *loaderTopic)
 		} else {
 			loaderTopic = &ltopic
 		}
@@ -280,21 +281,38 @@ func (r *realtimeCalculator) calculate(reloading []string, currentRealtime []str
 		// compute realtime
 		maxBatcherLag, maxLoaderLag := r.maxLag(topic)
 		if info.batcher != nil && info.batcher.last != nil && info.batcher.current != nil {
-			if *info.batcher.last-*info.batcher.current <= maxBatcherLag {
-				klog.V(3).Infof("rsk/s: %s, batcher realtime", r.rsk.Name, topic)
+			lag := *info.batcher.last - *info.batcher.current
+			if lag <= maxBatcherLag {
+				klog.V(3).Infof("rsk/%s: %s batcher realtime", r.rsk.Name, topic)
 				info.batcherRealtime = true
 				r.batchersRealtime = append(r.batchersRealtime, topic)
 			}
+			r.batchersLag = append(
+				r.batchersLag,
+				topicLag{
+					topic: topic,
+					lag:   lag,
+				},
+			)
 		}
 		if info.loader != nil && info.loader.last != nil && info.loader.current != nil {
+			lag := *info.loader.last - *info.loader.current
 			if *info.loader.last-*info.loader.current <= maxLoaderLag {
-				klog.V(3).Infof("rsk/s: %s, loader realtime", r.rsk.Name, ltopic)
+				klog.V(3).Infof("rsk/%s: %s loader realtime", r.rsk.Name, ltopic)
 				info.loaderRealtime = true
 				r.loadersRealtime = append(r.loadersRealtime, ltopic)
 			}
+			r.loadersLag = append(
+				r.loadersLag,
+				topicLag{
+					topic: topic,
+					lag:   lag,
+				},
+			)
 		}
+
 		if info.batcherRealtime && info.loaderRealtime {
-			klog.V(2).Infof("rsk/s: %s, realtime", r.rsk.Name, topic)
+			klog.V(2).Infof("rsk/%s: %s realtime", r.rsk.Name, topic)
 			realtimeTopics = append(realtimeTopics, topic)
 		} else {
 			klog.V(2).Infof("%v: waiting to reach realtime", topic)
diff --git a/controllers/redshiftsink_controller.go b/controllers/redshiftsink_controller.go
index fa7c4be2e..c8c2e84f9 100644
--- a/controllers/redshiftsink_controller.go
+++ b/controllers/redshiftsink_controller.go
@@ -419,6 +419,10 @@ func (r *RedshiftSinkReconciler) reconcile(
 	reloadTopicGroup := topicGroupBySinkGroup(rsk, ReloadSinkGroup, status.reloading, status.desiredVersion, rsk.Spec.KafkaLoaderTopicPrefix)
 	calc := newRealtimeCalculator(rsk, kafkaWatcher, reloadTopicGroup, r.KafkaRealtimeCache)
 	currentRealtime := calc.calculate(status.reloading, status.realtime)
+	if len(status.reloading) > 0 {
+		klog.V(2).Infof("rsk/%v batchers realtime: %d / %d", rsk.Name, len(calc.batchersRealtime), len(status.reloading))
+		klog.V(2).Infof("rsk/%v loaders  realtime: %d / %d", rsk.Name, len(calc.loadersRealtime), len(status.reloading))
+	}
 
 	// set allowShuffle
 	reloadingRatio := status.reloadingRatio()
@@ -474,9 +478,11 @@ func (r *RedshiftSinkReconciler) reconcile(
 		setTopics(status.reloading).
 		setMaskVersion(status.desiredVersion).
 		setTopicGroups().
+		setRealtimeCalculator(calc).
 		buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
 		buildLoaders(secret, r.DefaultLoaderImage, ReloadTableSuffix, r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
 		build()
+	status.updateBatcherReloadingTopics(reload.batcherDeploymentTopics())
 
 	reloadDupe = sgBuilder.
 		setRedshiftSink(rsk).setClient(r.Client).setScheme(r.Scheme).
@@ -484,6 +490,7 @@ func (r *RedshiftSinkReconciler) reconcile(
 		setTopics(status.reloadingDupe).
 		setMaskVersion(status.currentVersion).
 		setTopicGroups().
+		setRealtimeCalculator(nil).
 		buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
 		buildLoaders(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
 		build()
@@ -494,6 +501,7 @@ func (r *RedshiftSinkReconciler) reconcile(
 		setTopics(status.released).
 		setMaskVersion(status.desiredVersion).
 		setTopicGroups().
+		setRealtimeCalculator(nil).
 		buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
 		buildLoaders(secret, r.DefaultLoaderImage, "", r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
 		build()
diff --git a/controllers/sinkgroup_controller.go b/controllers/sinkgroup_controller.go
index fc91f9bd3..56e343ad0 100644
--- a/controllers/sinkgroup_controller.go
+++ b/controllers/sinkgroup_controller.go
@@ -3,7 +3,6 @@ package controllers
 import (
 	"context"
 	"fmt"
-	"math"
 	"time"
 
 	klog "github.com/practo/klog/v2"
@@ -30,6 +29,8 @@ const (
 )
 
 type sinkGroupInterface interface {
+	batcherDeploymentTopics() []string
+	loaderDeploymentTopics() []string
 	reconcile(ctx context.Context) (ctrl.Result, ReconcilerEvent, error)
 }
 
@@ -40,6 +41,7 @@ type Deployment interface {
 	Deployment() *appsv1.Deployment
 	UpdateConfig(current *corev1.ConfigMap) bool
 	UpdateDeployment(current *appsv1.Deployment) bool
+	Topics() []string
 }
 
 type sinkGroup struct {
@@ -49,6 +51,7 @@ type sinkGroup struct {
 	sgType      string
 	topics      []string
 	topicGroups map[string]tipocav1.Group
+	calc        *realtimeCalculator
 
 	batchers []Deployment
 	loaders  []Deployment
@@ -62,6 +65,7 @@ type sinkGroupBuilder interface {
 	setTopics(topics []string) sinkGroupBuilder
 	setMaskVersion(version string) sinkGroupBuilder
 	setTopicGroups() sinkGroupBuilder
+	setRealtimeCalculator(calc *realtimeCalculator) sinkGroupBuilder
 
 	buildBatchers(secret map[string]string, defaultImage, defaultKafkaVersion string, tlsConfig *kafka.TLSConfig) sinkGroupBuilder
 	buildLoaders(secret map[string]string, defaultImage, tableSuffix string, defaultKafkaVersion string, tlsConfig *kafka.TLSConfig, defaultMaxOpenConns int, defaultMaxIdleConns int) sinkGroupBuilder
@@ -81,6 +85,7 @@ type buildSinkGroup struct {
 	topics      []string
 	topicGroups map[string]tipocav1.Group
 	maskVersion string
+	calc        *realtimeCalculator
 
 	batchers []Deployment
 	loaders  []Deployment
@@ -127,41 +132,10 @@ func (sb *buildSinkGroup) setTopicGroups() sinkGroupBuilder {
 	return sb
 }
 
-type deploymentUnit struct {
-	id     string
-	topics []string
-}
-
-// assignDeploymentUnits allocates the total topics into units of deployments.
-func assignDeploymentUnits(allTopics []string, maxTopics int) []deploymentUnit {
-	if len(allTopics) <= maxTopics {
-		return []deploymentUnit{
-			deploymentUnit{
-				id:     "0",
-				topics: allTopics,
-			},
-		}
-	}
-
-	units := []deploymentUnit{}
-	totalUnits := int(math.Ceil(float64(len(allTopics)) / float64(maxTopics)))
-	startIndex := 0
-	lastIndex := maxTopics
-	for id := 0; id < totalUnits; id++ {
-		topics := allTopics[startIndex:lastIndex]
-		startIndex = lastIndex
-		if lastIndex+maxTopics >= len(allTopics) {
-			lastIndex = len(allTopics)
-		} else {
-			lastIndex = lastIndex + maxTopics
-		}
-		units = append(units, deploymentUnit{
-			id:     fmt.Sprintf("%d", id),
-			topics: topics,
-		})
-	}
+func (sb *buildSinkGroup) setRealtimeCalculator(calc *realtimeCalculator) sinkGroupBuilder {
+	sb.calc = calc
 
-	return units
+	return sb
 }
 
 func (sb *buildSinkGroup) buildBatchers(
@@ -177,10 +151,25 @@ func (sb *buildSinkGroup) buildBatchers(
 			sb.sgType,
 			defaultImage,
 		)
-		units := assignDeploymentUnits(
-			sb.topics,
-			*sinkGroupSpec.DeploymentUnit.MaxTopics,
-		)
+		var units []deploymentUnit
+		if sb.calc != nil {
+			allocator := newUnitAllocator(
+				sb.topics,
+				sb.calc.batchersRealtime,
+				sb.calc.batchersLag,
+				*sinkGroupSpec.MaxReloadingUnits,
+				sb.rsk.Status.BatcherReloadingTopics,
+			)
+			allocator.allocateReloadingUnits()
+			units = allocator.units
+		} else {
+			units = []deploymentUnit{
+				deploymentUnit{
+					id:     "",
+					topics: sb.topics,
+				},
+			}
+		}
 		for _, unit := range units {
 			consumerGroups, err := computeConsumerGroups(
 				sb.topicGroups, unit.topics)
@@ -249,10 +238,12 @@ func (sb *buildSinkGroup) buildLoaders(
 			sb.sgType,
 			defaultImage,
 		)
-		units := assignDeploymentUnits(
-			sb.topics,
-			*sinkGroupSpec.DeploymentUnit.MaxTopics,
-		)
+		units := []deploymentUnit{
+			deploymentUnit{
+				id:     "",
+				topics: sb.topics,
+			},
+		}
 		for _, unit := range units {
 			consumerGroups, err := computeConsumerGroups(
 				sb.topicGroups, unit.topics)
@@ -775,3 +766,21 @@ func (s *sinkGroup) reconcile(
 
 	return result, nil, nil
 }
+
+func (s *sinkGroup) batcherDeploymentTopics() []string {
+	t := []string{}
+	for _, d := range s.batchers {
+		t = append(t, d.Topics()...)
+	}
+
+	return t
+}
+
+func (s *sinkGroup) loaderDeploymentTopics() []string {
+	t := []string{}
+	for _, d := range s.loaders {
+		t = append(t, d.Topics()...)
+	}
+
+	return t
+}
diff --git a/controllers/sinkgroup_controller_test.go b/controllers/sinkgroup_controller_test.go
deleted file mode 100644
index c62cc36d0..000000000
--- a/controllers/sinkgroup_controller_test.go
+++ /dev/null
@@ -1,115 +0,0 @@
-package controllers
-
-import (
-	"reflect"
-	"testing"
-)
-
-func TestDeploymentUnitAssignment(t *testing.T) {
-	t.Parallel()
-
-	tests := []struct {
-		name        string
-		allTopics   []string
-		maxTopics   int
-		resultUnits []deploymentUnit
-	}{
-		{
-			name:      "single group",
-			allTopics: []string{"t1", "t2", "t3", "t4", "t5"},
-			maxTopics: 10,
-			resultUnits: []deploymentUnit{
-				deploymentUnit{
-					id:     "0",
-					topics: []string{"t1", "t2", "t3", "t4", "t5"},
-				},
-			},
-		},
-		{
-			name:      "equal group",
-			allTopics: []string{"t1", "t2", "t3", "t4", "t5"},
-			maxTopics: 1,
-			resultUnits: []deploymentUnit{
-				deploymentUnit{
-					id:     "0",
-					topics: []string{"t1"},
-				},
-				deploymentUnit{
-					id:     "1",
-					topics: []string{"t2"},
-				},
-				deploymentUnit{
-					id:     "2",
-					topics: []string{"t3"},
-				},
-				deploymentUnit{
-					id:     "3",
-					topics: []string{"t4"},
-				},
-				deploymentUnit{
-					id:     "4",
-					topics: []string{"t5"},
-				},
-			},
-		},
-		{
-			name:      "unequal group",
-			allTopics: []string{"t1", "t2", "t3", "t4", "t5"},
-			maxTopics: 3,
-			resultUnits: []deploymentUnit{
-				deploymentUnit{
-					id:     "0",
-					topics: []string{"t1", "t2", "t3"},
-				},
-				deploymentUnit{
-					id:     "1",
-					topics: []string{"t4", "t5"},
-				},
-			},
-		},
-		{
-			name:      "equal group one more",
-			allTopics: []string{"t1", "t2", "t3", "t4", "t5", "t6"},
-			maxTopics: 2,
-			resultUnits: []deploymentUnit{
-				deploymentUnit{
-					id:     "0",
-					topics: []string{"t1", "t2"},
-				},
-				deploymentUnit{
-					id:     "1",
-					topics: []string{"t3", "t4"},
-				},
-				deploymentUnit{
-					id:     "2",
-					topics: []string{"t5", "t6"},
-				},
-			},
-		},
-		{
-			name:      "unequal group",
-			allTopics: []string{"t1", "t2", "t3", "t4", "t5", "t6"},
-			maxTopics: 5,
-			resultUnits: []deploymentUnit{
-				deploymentUnit{
-					id:     "0",
-					topics: []string{"t1", "t2", "t3", "t4", "t5"},
-				},
-				deploymentUnit{
-					id:     "1",
-					topics: []string{"t6"},
-				},
-			},
-		},
-	}
-
-	for _, tc := range tests {
-		tc := tc
-		t.Run(tc.name, func(t *testing.T) {
-			resultUnits := assignDeploymentUnits(tc.allTopics, tc.maxTopics)
-			if !reflect.DeepEqual(tc.resultUnits, resultUnits) {
-				t.Errorf("expected: %v, got: %v\n", tc.resultUnits, resultUnits)
-			}
-		})
-	}
-}
diff --git a/controllers/status.go b/controllers/status.go
index a9ff2c9a5..492cf7cf4 100644
--- a/controllers/status.go
+++ b/controllers/status.go
@@ -19,6 +19,7 @@ type status struct {
 	released      []string
 	realtime      []string
 	reloading     []string
+	reloadWaiting []string
 	reloadingDupe []string
 }
 
@@ -491,6 +492,10 @@ func (s *status) updateTopicGroup(topic string) {
 	updateTopicGroup(s.rsk, topic, group)
 }
 
+func (s *status) updateBatcherReloadingTopics(topics []string) {
+	s.rsk.Status.BatcherReloadingTopics = topics
+}
+
 func updateTopicGroup(rsk *tipocav1.RedshiftSink, topic string, group tipocav1.Group) {
 	if rsk.Status.TopicGroup == nil {
 		rsk.Status.TopicGroup = make(map[string]tipocav1.Group)
diff --git a/controllers/unit_allocator.go b/controllers/unit_allocator.go
new file mode 100644
index 000000000..25327c57e
--- /dev/null
+++ b/controllers/unit_allocator.go
@@ -0,0 +1,101 @@
+package controllers
+
+import (
+	"sort"
+)
+
+type unitAllocator struct {
+	topics   []string
+	realtime []string
+
+	topicsLag              []topicLag
+	maxReloadingUnits      int
+	currentReloadingTopics []string
+
+	units []deploymentUnit
+}
+
+func newUnitAllocator(
+	topics,
+	realtime []string,
+	topicsLag []topicLag,
+	maxReloadingUnits int32,
+	currentReloadingTopics []string,
+) *unitAllocator {
+	return &unitAllocator{
+		topics:                 topics,
+		realtime:               realtime,
+		topicsLag:              topicsLag,
+		maxReloadingUnits:      int(maxReloadingUnits),
+		currentReloadingTopics: currentReloadingTopics,
+		units:                  []deploymentUnit{},
+	}
+}
+
+type deploymentUnit struct {
+	id     string
+	topics []string
+}
+
+func sortTopicsByLag(topicsLag []topicLag) []string {
+	sort.SliceStable(topicsLag, func(i, j int) bool {
+		return topicsLag[i].lag < topicsLag[j].lag
+	})
+
+	topics := []string{}
+	for _, tl := range topicsLag {
+		topics = append(topics, tl.topic)
+	}
+
+	return topics
+}
+
+// for the reloading sinkGroup
+func (u *unitAllocator) allocateReloadingUnits() {
+	realtime := toMap(u.realtime)
+	realtimeUnit := deploymentUnit{
+		id:     "realtime",
+		topics: u.realtime,
+	}
+
+	// don't shuffle the already reloading topics unless realtime
+	reloadingUnits := []deploymentUnit{}
+	for _, topic := range u.currentReloadingTopics {
+		_, ok := realtime[topic]
+		if ok {
+			continue
+		}
+		reloadingUnits = append(reloadingUnits, deploymentUnit{
+			id:     topic,
+			topics: []string{topic},
+		})
+	}
+
+	if len(reloadingUnits) >= u.maxReloadingUnits {
+		u.units = reloadingUnits
+		if len(realtimeUnit.topics) > 0 {
+			u.units = append(u.units, realtimeUnit)
+		}
+		return
+	}
+
+	topicsByLagAsc := sortTopicsByLag(u.topicsLag)
+	for _, topic := range topicsByLagAsc {
+		_, ok := realtime[topic]
+		if ok {
+			continue
+		}
+		if len(reloadingUnits) >= u.maxReloadingUnits {
+			break
+		}
+		reloadingUnits = append(reloadingUnits, deploymentUnit{
+			id:     topic,
+			topics: []string{topic},
+		})
+	}
+
+	u.units = reloadingUnits
+	if len(realtimeUnit.topics) > 0 {
+		u.units = append(u.units, realtimeUnit)
+	}
+}
diff --git a/controllers/unit_allocator_test.go b/controllers/unit_allocator_test.go
new file mode 100644
index 000000000..192ae6ff7
--- /dev/null
+++ b/controllers/unit_allocator_test.go
@@ -0,0 +1,260 @@
+package controllers
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestAllocateReloadingUnits(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name                   string
+		topics                 []string
+		realtime               []string
+		topicsLag              []topicLag
+		maxReloadingUnits      int32
+		currentReloadingTopics []string
+		units                  []deploymentUnit
+	}{
+		{
+			name:     "allReloadingFirstCase",
+			topics:   []string{"t1", "t2", "t3", "t4"},
+			realtime: []string{},
+			topicsLag: []topicLag{
+				topicLag{
+					topic: "t1",
+					lag:   1500,
+				},
+				topicLag{
+					topic: "t2",
+					lag:   1500,
+				},
+				topicLag{
+					topic: "t3",
+					lag:   1400,
+				},
+				topicLag{
+					topic: "t4",
+					lag:   1400,
+				},
+			},
+			maxReloadingUnits:      1,
+			currentReloadingTopics: []string{},
+			units: []deploymentUnit{
+				deploymentUnit{
+					id:     "t3",
+					topics: []string{"t3"},
+				},
+			},
+		},
+		{
+			name:     "allReloadingSecondCaseMax3",
+			topics:   []string{"t1", "t2", "t3", "t4"},
+			realtime: []string{},
+			topicsLag: []topicLag{
+				topicLag{
+					topic: "t1",
+					lag:   1500,
+				},
+				topicLag{
+					topic: "t2",
+					lag:   1500,
+				},
+				topicLag{
+					topic: "t3",
+					lag:   1400,
+				},
+				topicLag{
+					topic: "t4",
+					lag:   1400,
+				},
+			},
+			maxReloadingUnits:      3,
+			currentReloadingTopics: []string{},
+			units: []deploymentUnit{
+				deploymentUnit{
+					id:     "t3",
+					topics: []string{"t3"},
+				},
+				deploymentUnit{
+					id:     "t4",
+					topics: []string{"t4"},
+				},
+				deploymentUnit{
+					id:     "t1",
+					topics: []string{"t1"},
+				},
+			},
+		},
+		{
+			name:     "allReloadingThirdCaseCurrentThere",
+			topics:   []string{"t1", "t2", "t3", "t4"},
+			realtime: []string{},
+			topicsLag: []topicLag{
+				topicLag{
+					topic: "t1",
+					lag:   1500,
+				},
+				topicLag{
+					topic: "t2",
+					lag:   1500,
+				},
+				topicLag{
+					topic: "t3",
+					lag:   1400,
+				},
+				topicLag{
+					topic: "t4",
+					lag:   1400,
+				},
+			},
+			maxReloadingUnits:      3,
+			currentReloadingTopics: []string{"t1", "t2", "t3"},
+			units: []deploymentUnit{
+				deploymentUnit{
+					id:     "t1",
+					topics: []string{"t1"},
+				},
+				deploymentUnit{
+					id:     "t2",
+					topics: []string{"t2"},
+				},
+				deploymentUnit{
+					id:     "t3",
+					topics: []string{"t3"},
+				},
+			},
+		},
+		{
+			name:     "allReloadingFourthCaseLagChangedShouldNotChangeAnything",
+			topics:   []string{"t1", "t2", "t3", "t4"},
+			realtime: []string{},
+			topicsLag: []topicLag{
+				topicLag{
+					topic: "t1",
+					lag:   1500,
+				},
+				topicLag{
+					topic: "t2",
+					lag:   1500,
+				},
+				topicLag{
+					topic: "t3",
+					lag:   2,
+				},
+				topicLag{
+					topic: "t4",
+					lag:   1,
+				},
+			},
+			maxReloadingUnits:      3,
+			currentReloadingTopics: []string{"t1", "t2", "t3"},
+			units: []deploymentUnit{
+				deploymentUnit{
+					id:     "t1",
+					topics: []string{"t1"},
+				},
+				deploymentUnit{
+					id:     "t2",
+					topics: []string{"t2"},
+				},
+				deploymentUnit{
+					id:     "t3",
+					topics: []string{"t3"},
+				},
+			},
+		},
+		{
+			name:     "allReloadingFifthCaseOneRealtimeOneMovesin",
+			topics:   []string{"t1", "t2", "t3", "t4"},
+			realtime: []string{"t3"},
+			topicsLag: []topicLag{
+				topicLag{
+					topic: "t1",
+					lag:   1500,
+				},
+				topicLag{
+					topic: "t2",
+					lag:   1500,
+				},
+				topicLag{
+					topic: "t3",
+					lag:   2,
+				},
+				topicLag{
+					topic: "t4",
+					lag:   1,
+				},
+			},
+			maxReloadingUnits:      3,
+			currentReloadingTopics: []string{"t1", "t2", "t3"},
+			units: []deploymentUnit{
+				deploymentUnit{
+					id:     "t1",
+					topics: []string{"t1"},
+				},
+				deploymentUnit{
+					id:     "t2",
+					topics: []string{"t2"},
+				},
+				deploymentUnit{
+					id:     "t4",
+					topics: []string{"t4"},
+				},
+				deploymentUnit{
+					id:     "realtime",
+					topics: []string{"t3"},
+				},
+			},
+		},
+		{
+			name:     "allReloadingSixthCaseAllRealtime",
+			topics:   []string{"t1", "t2", "t3", "t4"},
+			realtime: []string{"t1", "t2", "t3", "t4"},
+			topicsLag: []topicLag{
+				topicLag{
+					topic: "t1",
+					lag:   1,
+				},
+				topicLag{
+					topic: "t2",
+					lag:   1,
+				},
+				topicLag{
+					topic: "t3",
+					lag:   2,
+				},
+				topicLag{
+					topic: "t4",
+					lag:   1,
+				},
+			},
+			maxReloadingUnits:      3,
+			currentReloadingTopics: []string{"t1", "t2", "t4", "t3"},
+			units: []deploymentUnit{
+				deploymentUnit{
+					id:     "realtime",
+					topics: []string{"t1", "t2", "t3", "t4"},
+				},
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			allocator := newUnitAllocator(
+				tc.topics,
+				tc.realtime,
+				tc.topicsLag,
+				tc.maxReloadingUnits,
+				tc.currentReloadingTopics,
+			)
+			allocator.allocateReloadingUnits()
+			if !reflect.DeepEqual(allocator.units, tc.units) {
+				t.Errorf("expected: %+v, got: %+v\n", tc.units, allocator.units)
+			}
+		})
+	}
+}

From 05e113d45a893a9ad6067378d57d61776ef96fb2 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Mon, 29 Mar 2021 08:57:22 +0530
Subject: [PATCH 24/49] Defaults by sinkgroup

---
 controllers/batcher_deployment.go      | 44 ++++++++++++++++++++------
 controllers/loader_deployment.go       | 40 +++++++++++++++++------
 controllers/util.go                    | 13 ++++++++
 pkg/redshiftbatcher/batcher_handler.go |  7 ++--
 pkg/redshiftloader/loader_handler.go   |  5 ++-
 5 files changed, 83 insertions(+), 26 deletions(-)

diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go
index 1ebaf8ab2..54aff77ff 100644
--- a/controllers/batcher_deployment.go
+++ b/controllers/batcher_deployment.go
@@ -36,20 +36,40 @@ func applyBatcherSinkGroupDefaults(
 	sgType string,
 	defaultImage string,
 ) *tipocav1.SinkGroupSpec {
-	// defaults
-	defaultMaxBytesPerBatch := resource.MustParse(
-		redshiftbatcher.DefaultMaxBytesPerBatch,
-	)
-	maxSizePerBatch := &defaultMaxBytesPerBatch
-	maxWaitSeconds := &redshiftbatcher.DefaultMaxWaitSeconds
-	maxConcurrency := &redshiftbatcher.DefaultMaxConcurrency
-	maxProcessingTime := &redshiftbatcher.DefaultMaxProcessingTime
-	image := &defaultImage
+	var maxSizePerBatch *resource.Quantity
+	var maxWaitSeconds *int
+	var maxConcurrency *int
+	var maxProcessingTime *int32
+	var image *string
 	var resources *corev1.ResourceRequirements
 	var tolerations *[]corev1.Toleration
+	var maxReloadingUnits *int32
 
-	// apply the sinkGroup spec rules
+	// defaults by sinkgroup
 	var specifiedSpec *tipocav1.SinkGroupSpec
+	switch sgType {
+	case MainSinkGroup:
+		maxSizePerBatch = toQuantityPtr(resource.MustParse("0.5Mi"))
+		maxWaitSeconds = toIntPtr(60)
+		maxConcurrency = toIntPtr(2)
+		maxProcessingTime = &redshiftbatcher.DefaultMaxProcessingTime
+		image = &defaultImage
+	case ReloadSinkGroup:
+		maxSizePerBatch = toQuantityPtr(resource.MustParse("0.5Mi"))
+		maxWaitSeconds = toIntPtr(60)
+		maxConcurrency = toIntPtr(10)
+		maxProcessingTime = &redshiftbatcher.DefaultMaxProcessingTime
+		image = &defaultImage
+		maxReloadingUnits = toInt32Ptr(10)
+	case ReloadDupeSinkGroup:
+		maxSizePerBatch = toQuantityPtr(resource.MustParse("0.5Mi"))
+		maxWaitSeconds = toIntPtr(60)
+		maxConcurrency = toIntPtr(10)
+		maxProcessingTime = &redshiftbatcher.DefaultMaxProcessingTime
+		image = &defaultImage
+	}
+
+	// apply the sinkGroup spec rules
 	if rsk.Spec.Batcher.SinkGroup.All != nil {
 		specifiedSpec = rsk.Spec.Batcher.SinkGroup.All
 	}
@@ -82,6 +102,9 @@ func applyBatcherSinkGroupDefaults(
 		if specifiedSpec.MaxProcessingTime != nil {
 			maxProcessingTime = specifiedSpec.MaxProcessingTime
 		}
+		if specifiedSpec.MaxReloadingUnits != nil {
+			maxReloadingUnits = specifiedSpec.MaxReloadingUnits
+		}
 		if specifiedSpec.DeploymentUnit != nil {
 			if specifiedSpec.DeploymentUnit.PodTemplate != nil {
 				if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
@@ -102,6 +125,7 @@ func applyBatcherSinkGroupDefaults(
 		MaxWaitSeconds:    maxWaitSeconds,
 		MaxConcurrency:    maxConcurrency,
 		MaxProcessingTime: maxProcessingTime,
+		MaxReloadingUnits: maxReloadingUnits,
 		DeploymentUnit: &tipocav1.DeploymentUnit{
 			PodTemplate: &tipocav1.RedshiftPodTemplateSpec{
 				Image:       image,
diff --git a/controllers/loader_deployment.go b/controllers/loader_deployment.go
index 06ac26408..f45214b8a 100644
--- a/controllers/loader_deployment.go
+++ b/controllers/loader_deployment.go
@@ -37,19 +37,36 @@ func applyLoaderSinkGroupDefaults(
 	sgType string,
 	defaultImage string,
 ) *tipocav1.SinkGroupSpec {
-	// defaults
-	defaultMaxBytesPerBatch := resource.MustParse(
-		redshiftloader.DefaultMaxBytesPerBatch,
-	)
-	maxSizePerBatch := &defaultMaxBytesPerBatch
-	maxWaitSeconds := &redshiftloader.DefaultMaxWaitSeconds
-	maxProcessingTime := &redshiftloader.DefaultMaxProcessingTime
-	image := &defaultImage
+	var maxSizePerBatch *resource.Quantity
+	var maxWaitSeconds *int
+	var maxProcessingTime *int32
+	var image *string
 	var resources *corev1.ResourceRequirements
 	var tolerations *[]corev1.Toleration
+	var maxReloadingUnits *int32
 
-	// apply the sinkGroup spec rules
+	// defaults by sinkgroup
 	var specifiedSpec *tipocav1.SinkGroupSpec
+	switch sgType {
+	case MainSinkGroup:
+		maxSizePerBatch = toQuantityPtr(resource.MustParse("0.5Mi"))
+		maxWaitSeconds = toIntPtr(60)
+		maxProcessingTime = &redshiftloader.DefaultMaxProcessingTime
+		image = &defaultImage
+	case ReloadSinkGroup:
+		maxSizePerBatch = toQuantityPtr(resource.MustParse("0.5Mi"))
+		maxWaitSeconds = toIntPtr(60)
+		maxProcessingTime = &redshiftloader.DefaultMaxProcessingTime
+		image = &defaultImage
+		maxReloadingUnits = toInt32Ptr(1) // loader only supports one for this at present (there is no need as of now to run multiple)
+	case ReloadDupeSinkGroup:
+		maxSizePerBatch = toQuantityPtr(resource.MustParse("0.5Mi"))
+		maxWaitSeconds = toIntPtr(60)
+		maxProcessingTime = &redshiftloader.DefaultMaxProcessingTime
+		image = &defaultImage
+	}
+
+	// apply the sinkGroup spec rules
 	if rsk.Spec.Loader.SinkGroup.All != nil {
 		specifiedSpec = rsk.Spec.Loader.SinkGroup.All
 	}
@@ -79,6 +96,10 @@ func applyLoaderSinkGroupDefaults(
 		if specifiedSpec.MaxProcessingTime != nil {
 			maxProcessingTime = specifiedSpec.MaxProcessingTime
 		}
+		// Loader does not support MaxReloadingUnits yet
+		// if specifiedSpec.MaxReloadingUnits != nil {
+		// 	maxReloadingUnits = specifiedSpec.MaxReloadingUnits
+		// }
 		if specifiedSpec.DeploymentUnit != nil {
 			if specifiedSpec.DeploymentUnit.PodTemplate != nil {
 				if specifiedSpec.DeploymentUnit.PodTemplate.Image != nil {
@@ -98,6 +119,7 @@ func applyLoaderSinkGroupDefaults(
 		MaxSizePerBatch:   maxSizePerBatch,
 		MaxWaitSeconds:    maxWaitSeconds,
 		MaxProcessingTime: maxProcessingTime,
+		MaxReloadingUnits: maxReloadingUnits,
 		DeploymentUnit: &tipocav1.DeploymentUnit{
 			PodTemplate: &tipocav1.RedshiftPodTemplateSpec{
 				Image:       image,
diff --git a/controllers/util.go b/controllers/util.go
index 516c00ae8..d5cf1854c 100644
--- a/controllers/util.go
+++ b/controllers/util.go
@@ -14,6 +14,7 @@ import (
 	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	resource "k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	types "k8s.io/apimachinery/pkg/types"
 	client "sigs.k8s.io/controller-runtime/pkg/client"
@@ -140,6 +141,18 @@ func getHashStructure(v interface{}) (string, error) {
 	return hash[:6], nil
 }
 
+func toIntPtr(i int) *int {
+	return &i
+}
+
+func toInt32Ptr(i int32) *int32 {
+	return &i
+}
+
+func toQuantityPtr(r resource.Quantity) *resource.Quantity {
+	return &r
+}
+
 func sortStringSlice(t []string) {
 	sort.Sort(sort.StringSlice(t))
 }
diff --git a/pkg/redshiftbatcher/batcher_handler.go b/pkg/redshiftbatcher/batcher_handler.go
index c209f5e60..dcf1a9252 100644
--- a/pkg/redshiftbatcher/batcher_handler.go
+++ b/pkg/redshiftbatcher/batcher_handler.go
@@ -15,10 +15,9 @@ import (
 )
 
 var (
-	DefaultMaxBytesPerBatch  string = "1024"
-	DefaultMaxWaitSeconds    int    = 30
-	DefaultMaxConcurrency    int    = 10
-	DefaultMaxProcessingTime int32  = 180000
+	DefaultMaxWaitSeconds    int   = 30
+	DefaultMaxConcurrency    int   = 10
+	DefaultMaxProcessingTime int32 = 180000
 )
 
 type BatcherConfig struct {
diff --git a/pkg/redshiftloader/loader_handler.go b/pkg/redshiftloader/loader_handler.go
index c4786100f..85a04c5e8 100644
--- a/pkg/redshiftloader/loader_handler.go
+++ b/pkg/redshiftloader/loader_handler.go
@@ -13,9 +13,8 @@ import (
 )
 
 var (
-	DefaultMaxBytesPerBatch  string = "1000000" // 1 MB
-	DefaultMaxWaitSeconds    int    = 60
-	DefaultMaxProcessingTime int32  = 600000
+	DefaultMaxWaitSeconds    int   = 60
+	DefaultMaxProcessingTime int32 = 600000
 )
 
 type LoaderConfig struct {

From 162ed325ff2f2084f122c25e0f6f45ce52410739 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Mon, 29 Mar 2021 09:11:14 +0530
Subject: [PATCH 25/49] Realtime topics should run as main sink group spec

---
 controllers/sinkgroup_controller.go | 19 +++++++++++++++----
 controllers/unit_allocator.go       | 27 +++++++++++++++++++--------
 2 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/controllers/sinkgroup_controller.go b/controllers/sinkgroup_controller.go
index 56e343ad0..0b6ebd9c1 100644
--- a/controllers/sinkgroup_controller.go
+++ b/controllers/sinkgroup_controller.go
@@ -146,11 +146,19 @@ func (sb *buildSinkGroup) buildBatchers(
 ) sinkGroupBuilder {
 	batchers := []Deployment{}
 	if sb.rsk.Spec.Batcher.SinkGroup != nil {
-		sinkGroupSpec := applyBatcherSinkGroupDefaults(
+		var sinkGroupSpec, mainSinkGroupSpec *tipocav1.SinkGroupSpec
+		sinkGroupSpec = applyBatcherSinkGroupDefaults(
 			sb.rsk,
 			sb.sgType,
 			defaultImage,
 		)
+		if len(sb.calc.batchersRealtime) > 0 {
+			mainSinkGroupSpec = applyBatcherSinkGroupDefaults(
+				sb.rsk,
+				MainSinkGroup,
+				defaultImage,
+			)
+		}
 		var units []deploymentUnit
 		if sb.calc != nil {
 			allocator := newUnitAllocator(
@@ -159,14 +167,17 @@ func (sb *buildSinkGroup) buildBatchers(
 				sb.calc.batchersLag,
 				*sinkGroupSpec.MaxReloadingUnits,
 				sb.rsk.Status.BatcherReloadingTopics,
+				mainSinkGroupSpec,
+				sinkGroupSpec,
 			)
 			allocator.allocateReloadingUnits()
 			units = allocator.units
 		} else {
 			units = []deploymentUnit{
 				deploymentUnit{
-					id:     "",
-					topics: sb.topics,
+					id:            "",
+					sinkGroupSpec: sinkGroupSpec,
+					topics:        sb.topics,
 				},
 			}
 		}
@@ -183,7 +194,7 @@ func (sb *buildSinkGroup) buildBatchers(
 				sb.maskVersion,
 				secret,
 				sb.sgType,
-				sinkGroupSpec,
+				unit.sinkGroupSpec,
 				consumerGroups,
 				defaultImage,
 				defaultKafkaVersion,
diff --git a/controllers/unit_allocator.go b/controllers/unit_allocator.go
index 25327c57e..832968d9d 100644
--- a/controllers/unit_allocator.go
+++ b/controllers/unit_allocator.go
@@ -1,6 +1,7 @@
 package controllers
 
 import (
+	tipocav1 "github.com/practo/tipoca-stream/redshiftsink/api/v1"
 	"sort"
 )
 
@@ -11,6 +12,8 @@ type unitAllocator struct {
 	topicsLag              []topicLag
 	maxReloadingUnits      int
 	currentReloadingTopics []string
+	mainSinkGroupSpec      *tipocav1.SinkGroupSpec
+	reloadSinkGroupSpec    *tipocav1.SinkGroupSpec
 
 	units []deploymentUnit
 }
@@ -21,6 +24,8 @@ func newUnitAllocator(
 	topicsLag []topicLag,
 	maxReloadingUnits int32,
 	currentReloadingTopics []string,
+	main *tipocav1.SinkGroupSpec,
+	reload *tipocav1.SinkGroupSpec,
 ) *unitAllocator {
 	return &unitAllocator{
 		topics:                 topics,
@@ -29,12 +34,15 @@ func newUnitAllocator(
 		maxReloadingUnits:      int(maxReloadingUnits),
 		currentReloadingTopics: currentReloadingTopics,
 		units:                  []deploymentUnit{},
+		mainSinkGroupSpec:      main,
+		reloadSinkGroupSpec:    reload,
 	}
 }
 
 type deploymentUnit struct {
-	id     string
-	topics []string
+	id            string
+	sinkGroupSpec *tipocav1.SinkGroupSpec
+	topics        []string
 }
 
 func sortTopicsByLag(topicsLag []topicLag) []string {
@@ -54,8 +62,9 @@ func sortTopicsByLag(topicsLag []topicLag) []string {
 func (u *unitAllocator) allocateReloadingUnits() {
 	realtime := toMap(u.realtime)
 	realtimeUnit := deploymentUnit{
-		id:     "realtime",
-		topics: u.realtime,
+		id:            "realtime",
+		sinkGroupSpec: u.mainSinkGroupSpec,
+		topics:        u.realtime,
 	}
 
 	// don't shuffle the already reloading topics unless realtime
@@ -66,8 +75,9 @@ func (u *unitAllocator) allocateReloadingUnits() {
 			continue
 		}
 		reloadingUnits = append(reloadingUnits, deploymentUnit{
-			id:     topic,
-			topics: []string{topic},
+			id:            topic,
+			sinkGroupSpec: u.reloadSinkGroupSpec,
+			topics:        []string{topic},
 		})
 	}
 
@@ -89,8 +99,9 @@ func (u *unitAllocator) allocateReloadingUnits() {
 			break
 		}
 		reloadingUnits = append(reloadingUnits, deploymentUnit{
-			id:     topic,
-			topics: []string{topic},
+			id:            topic,
+			sinkGroupSpec: u.reloadSinkGroupSpec,
+			topics:        []string{topic},
 		})
 	}
 

From 8f0d365a4c520e7ea484a041cac7126b557bd09b Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Mon, 29 Mar 2021 10:02:29 +0530
Subject: [PATCH 26/49] SinkGroup name before tags

---
 controllers/batcher_deployment.go | 4 ++--
 controllers/loader_deployment.go  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go
index 54aff77ff..78950dd71 100644
--- a/controllers/batcher_deployment.go
+++ b/controllers/batcher_deployment.go
@@ -165,15 +165,15 @@ func batcherName(rskName, sinkGroup, id string) string {
 		return fmt.Sprintf(
 			"%s-%s-%s",
 			rskName,
-			BatcherTag,
 			sinkGroup,
+			BatcherTag,
 		)
 	} else {
 		return fmt.Sprintf(
 			"%s-%s-%s-%s",
 			rskName,
-			BatcherTag,
 			sinkGroup,
+			BatcherTag,
 			id,
 		)
 	}
diff --git a/controllers/loader_deployment.go b/controllers/loader_deployment.go
index f45214b8a..519daf2f2 100644
--- a/controllers/loader_deployment.go
+++ b/controllers/loader_deployment.go
@@ -162,15 +162,15 @@ func loaderName(rskName, sinkGroup, id string) string {
 		return fmt.Sprintf(
 			"%s-%s-%s",
 			rskName,
-			LoaderTag,
 			sinkGroup,
+			LoaderTag,
 		)
 	} else {
 		return fmt.Sprintf(
 			"%s-%s-%s-%s",
 			rskName,
-			LoaderTag,
 			sinkGroup,
+			LoaderTag,
 			id,
 		)
 	}

From cd27af2572bffadcf3350a48e7debd350b569e73 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Mon, 29 Mar 2021 10:13:26 +0530
Subject: [PATCH 27/49] rm dead conf

---
 controllers/status.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/controllers/status.go b/controllers/status.go
index 492cf7cf4..83ffb3156 100644
--- a/controllers/status.go
+++ b/controllers/status.go
@@ -19,7 +19,6 @@ type status struct {
 	released      []string
 	realtime      []string
 	reloading     []string
-	reloadWaiting []string
 	reloadingDupe []string
 }
 

From 8148ef365dd3464142bb38cd24bd1e6dbd5cb0c0 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Tue, 30 Mar 2021 11:46:13 +0530
Subject: [PATCH 28/49] rm allowshuffle

not required since reload has one pod per topic, so one topic getting live does not impact others now.
---
 controllers/redshiftsink_controller.go | 40 +++++++-------------------
 1 file changed, 10 insertions(+), 30 deletions(-)

diff --git a/controllers/redshiftsink_controller.go b/controllers/redshiftsink_controller.go
index c8c2e84f9..99dfaa85b 100644
--- a/controllers/redshiftsink_controller.go
+++ b/controllers/redshiftsink_controller.go
@@ -61,7 +61,6 @@ type RedshiftSinkReconciler struct {
 	DefaultSecretRefName        string
 	DefaultSecretRefNamespace   string
 	DefaultKafkaVersion         string
-	ReleaseWaitSeconds          int64
 	DefaultRedshiftMaxIdleConns int
 	DefaultRedshiftMaxOpenConns int
 }
@@ -424,37 +423,18 @@ func (r *RedshiftSinkReconciler) reconcile(
 		klog.V(2).Infof("rsk/%v loaders  realtime: %d / %d", rsk.Name, len(calc.loadersRealtime), len(status.reloading))
 	}
 
-	// set allowShuffle
-	reloadingRatio := status.reloadingRatio()
-	allowShuffle := true
-	if reloadingRatio > 0.2 {
-		rcloaded, ok := r.ReleaseCache.Load(rsk.Namespace + rsk.Name)
-		if ok {
-			cache := rcloaded.(releaseCache)
-			if cacheValid(time.Second*time.Duration(r.ReleaseWaitSeconds), cache.lastCacheRefresh) {
-				allowShuffle = false
-			}
+	if !subSetSlice(currentRealtime, status.realtime) {
+		for _, moreRealtime := range currentRealtime {
+			status.realtime = appendIfMissing(status.realtime, moreRealtime)
 		}
+		klog.V(2).Infof(
+			"rsk/%s reconcile needed, realtime topics updated: %v",
+			rsk.Name,
+			status.realtime,
+		)
+		return resultRequeueMilliSeconds(1500), nil, nil
 	}
-	klog.V(2).Infof("rsk/%v allowShuffle=%v, reloadingRatio=%v", rsk.Name, allowShuffle, reloadingRatio)
-	// allow realtime update only during release window, to minimize shuffle
-	if allowShuffle {
-		if !subSetSlice(currentRealtime, status.realtime) {
-			for _, moreRealtime := range currentRealtime {
-				status.realtime = appendIfMissing(status.realtime, moreRealtime)
-			}
-			klog.V(2).Infof(
-				"rsk/%s reconcile needed, realtime topics updated: %v",
-				rsk.Name,
-				status.realtime,
-			)
-			return resultRequeueMilliSeconds(1500), nil, nil
-		}
-		klog.V(2).Infof("rsk/%v reconciling all sinkGroups", rsk.Name)
-	} else {
-		klog.V(2).Infof("rsk/%s realtime (waiting): %d %v", rsk.Name, len(currentRealtime), currentRealtime)
-		klog.V(2).Infof("rsk/%v reconciling all sinkGroups (still)", rsk.Name)
-	}
+	klog.V(2).Infof("rsk/%v reconciling all sinkGroups", rsk.Name)
 
 	// SinkGroup are of following types:
 	// 1. main: sink group which has desiredMaskVersion

From 3dbc3d3a64182d7b4cc7b873fbf52b1004933c32 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Tue, 30 Mar 2021 12:02:34 +0530
Subject: [PATCH 29/49] Fix nil pointer bug

---
 controllers/sinkgroup_controller.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/controllers/sinkgroup_controller.go b/controllers/sinkgroup_controller.go
index 0b6ebd9c1..7c27401eb 100644
--- a/controllers/sinkgroup_controller.go
+++ b/controllers/sinkgroup_controller.go
@@ -152,15 +152,15 @@ func (sb *buildSinkGroup) buildBatchers(
 			sb.sgType,
 			defaultImage,
 		)
-		if len(sb.calc.batchersRealtime) > 0 {
-			mainSinkGroupSpec = applyBatcherSinkGroupDefaults(
-				sb.rsk,
-				MainSinkGroup,
-				defaultImage,
-			)
-		}
 		var units []deploymentUnit
 		if sb.calc != nil {
+			if len(sb.calc.batchersRealtime) > 0 {
+				mainSinkGroupSpec = applyBatcherSinkGroupDefaults(
+					sb.rsk,
+					MainSinkGroup,
+					defaultImage,
+				)
+			}
 			allocator := newUnitAllocator(
 				sb.topics,
 				sb.calc.batchersRealtime,

From 8ea83344dae58a296c357a55d8d8449f830a5f93 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Tue, 30 Mar 2021 12:13:44 +0530
Subject: [PATCH 30/49] Fix nil pointer bug; change default

---
 controllers/loader_deployment.go   | 6 +++---
 controllers/realtime_calculator.go | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/controllers/loader_deployment.go b/controllers/loader_deployment.go
index 519daf2f2..c322c7ca1 100644
--- a/controllers/loader_deployment.go
+++ b/controllers/loader_deployment.go
@@ -49,18 +49,18 @@ func applyLoaderSinkGroupDefaults(
 	var specifiedSpec *tipocav1.SinkGroupSpec
 	switch sgType {
 	case MainSinkGroup:
-		maxSizePerBatch = toQuantityPtr(resource.MustParse("0.5Mi"))
+		maxSizePerBatch = toQuantityPtr(resource.MustParse("1Gi"))
 		maxWaitSeconds = toIntPtr(60)
 		maxProcessingTime = &redshiftloader.DefaultMaxProcessingTime
 		image = &defaultImage
 	case ReloadSinkGroup:
-		maxSizePerBatch = toQuantityPtr(resource.MustParse("0.5Mi"))
+		maxSizePerBatch = toQuantityPtr(resource.MustParse("1Gi"))
 		maxWaitSeconds = toIntPtr(60)
 		maxProcessingTime = &redshiftloader.DefaultMaxProcessingTime
 		image = &defaultImage
 		maxReloadingUnits = toInt32Ptr(1) // loader only supports one for this at present (there is no need as of now to run multiple)
 	case ReloadDupeSinkGroup:
-		maxSizePerBatch = toQuantityPtr(resource.MustParse("0.5Mi"))
+		maxSizePerBatch = toQuantityPtr(resource.MustParse("1Gi"))
 		maxWaitSeconds = toIntPtr(60)
 		maxProcessingTime = &redshiftloader.DefaultMaxProcessingTime
 		image = &defaultImage
diff --git a/controllers/realtime_calculator.go b/controllers/realtime_calculator.go
index 18229482f..b0ace726e 100644
--- a/controllers/realtime_calculator.go
+++ b/controllers/realtime_calculator.go
@@ -240,7 +240,7 @@ func (r *realtimeCalculator) calculate(reloading []string, currentRealtime []str
 		ltopic := r.rsk.Spec.KafkaLoaderTopicPrefix + group.ID + "-" + topic
 		_, ok = allTopicsMap[ltopic]
 		if !ok {
-			klog.V(2).Infof("%s topic 404, not realtime.", *loaderTopic)
+			klog.V(2).Infof("%s topic 404, not realtime.", ltopic)
 		} else {
 			loaderTopic = &ltopic
 		}

From 10f913b53abc0edc40bff40d7e73e13f2683611c Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Tue, 30 Mar 2021 12:56:36 +0530
Subject: [PATCH 31/49] When topic lag is empty take any combination

---
 controllers/unit_allocator.go      |  5 +++++
 controllers/unit_allocator_test.go | 32 ++++++++++++++++++++++++------
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/controllers/unit_allocator.go b/controllers/unit_allocator.go
index 832968d9d..45bef132b 100644
--- a/controllers/unit_allocator.go
+++ b/controllers/unit_allocator.go
@@ -1,6 +1,7 @@
 package controllers
 
 import (
+	"github.com/practo/klog/v2"
 	tipocav1 "github.com/practo/tipoca-stream/redshiftsink/api/v1"
 	"sort"
 )
@@ -90,6 +91,10 @@ func (u *unitAllocator) allocateReloadingUnits() {
 	}
 
 	topicsByLagAsc := sortTopicsByLag(u.topicsLag)
+	if len(topicsByLagAsc) == 0 && len(u.topics) != 0 {
+		klog.Infof("empty topicsLag, using %+v", u.topics)
+		topicsByLagAsc = u.topics
+	}
 	for _, topic := range topicsByLagAsc {
 		_, ok := realtime[topic]
 		if ok {
diff --git a/controllers/unit_allocator_test.go b/controllers/unit_allocator_test.go
index 192ae6ff7..45c076929 100644
--- a/controllers/unit_allocator_test.go
+++ b/controllers/unit_allocator_test.go
@@ -18,7 +18,25 @@ func TestAllocateReloadingUnits(t *testing.T) {
 		units                  []deploymentUnit
 	}{
 		{
-			name:     "allReloadingFirstCase",
+			name:                   "RealFirstCaseWhenTopicLagEmpty",
+			topics:                 []string{"t1", "t2"},
+			realtime:               []string{},
+			topicsLag:              []topicLag{},
+			maxReloadingUnits:      3,
+			currentReloadingTopics: []string{},
+			units: []deploymentUnit{
+				deploymentUnit{
+					id:     "t1",
+					topics: []string{"t1"},
+				},
+				deploymentUnit{
+					id:     "t2",
+					topics: []string{"t2"},
+				},
+			},
+		},
+		{
+			name:     "FirstCase",
 			topics:   []string{"t1", "t2", "t3", "t4"},
 			realtime: []string{},
 			topicsLag: []topicLag{
@@ -49,7 +67,7 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			},
 		},
 		{
-			name:     "allReloadingSecondCaseMax3",
+			name:     "SecondCaseMax3",
 			topics:   []string{"t1", "t2", "t3", "t4"},
 			realtime: []string{},
 			topicsLag: []topicLag{
@@ -88,7 +106,7 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			},
 		},
 		{
-			name:     "allReloadingThirdCaseCurrentThere",
+			name:     "ThirdCaseCurrentThere",
 			topics:   []string{"t1", "t2", "t3", "t4"},
 			realtime: []string{},
 			topicsLag: []topicLag{
@@ -127,7 +145,7 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			},
 		},
 		{
-			name:     "allReloadingFourthCaseLagChangedShouldNotChangeAnything",
+			name:     "FourthCaseLagChangedShouldNotChangeAnything",
 			topics:   []string{"t1", "t2", "t3", "t4"},
 			realtime: []string{},
 			topicsLag: []topicLag{
@@ -166,7 +184,7 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			},
 		},
 		{
-			name:     "allReloadingFifthCaseOneRealtimeOneMovesin",
+			name:     "FifthCaseOneRealtimeOneMovesin",
 			topics:   []string{"t1", "t2", "t3", "t4"},
 			realtime: []string{"t3"},
 			topicsLag: []topicLag{
@@ -209,7 +227,7 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			},
 		},
 		{
-			name:     "allReloadingSixthCaseAllRealtime",
+			name:     "SixthCaseAllRealtime",
 			topics:   []string{"t1", "t2", "t3", "t4"},
 			realtime: []string{"t1", "t2", "t3", "t4"},
 			topicsLag: []topicLag{
@@ -250,6 +268,8 @@ func TestAllocateReloadingUnits(t *testing.T) {
 				tc.topicsLag,
 				tc.maxReloadingUnits,
 				tc.currentReloadingTopics,
+				nil, // TODO add test cases for them also
+				nil,
 			)
 			allocator.allocateReloadingUnits()
 			if !reflect.DeepEqual(allocator.units, tc.units) {

From d49a0d14c9c3a611ad4fbf3cb7054bd9f69fcee5 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Tue, 30 Mar 2021 13:15:28 +0530
Subject: [PATCH 32/49] UnitID from table name

---
 controllers/unit_allocator.go | 14 ++++++++++++--
 redshiftsink/README.md        |  2 --
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/controllers/unit_allocator.go b/controllers/unit_allocator.go
index 45bef132b..55948b0d6 100644
--- a/controllers/unit_allocator.go
+++ b/controllers/unit_allocator.go
@@ -3,6 +3,7 @@ package controllers
 import (
 	"github.com/practo/klog/v2"
 	tipocav1 "github.com/practo/tipoca-stream/redshiftsink/api/v1"
+	transformer "github.com/practo/tipoca-stream/redshiftsink/pkg/transformer"
 	"sort"
 )
 
@@ -59,6 +60,15 @@ func sortTopicsByLag(topicsLag []topicLag) []string {
 	return topics
 }
 
+func (u *unitAllocator) unitID(topic string) string {
+	_, _, table := transformer.ParseTopic(topic)
+	if len(table) > 10 {
+		return table[:10]
+	}
+
+	return table
+}
+
 // for the reloading sinkGroup
 func (u *unitAllocator) allocateReloadingUnits() {
 	realtime := toMap(u.realtime)
@@ -76,7 +86,7 @@ func (u *unitAllocator) allocateReloadingUnits() {
 			continue
 		}
 		reloadingUnits = append(reloadingUnits, deploymentUnit{
-			id:            topic,
+			id:            u.unitID(topic),
 			sinkGroupSpec: u.reloadSinkGroupSpec,
 			topics:        []string{topic},
 		})
@@ -104,7 +114,7 @@ func (u *unitAllocator) allocateReloadingUnits() {
 			break
 		}
 		reloadingUnits = append(reloadingUnits, deploymentUnit{
-			id:            topic,
+			id:            u.unitID(topic),
 			sinkGroupSpec: u.reloadSinkGroupSpec,
 			topics:        []string{topic},
 		})
diff --git a/redshiftsink/README.md b/redshiftsink/README.md
index 5275ad2ad..7e07039a8 100644
--- a/redshiftsink/README.md
+++ b/redshiftsink/README.md
@@ -58,7 +58,6 @@ spec:
           maxWaitSeconds: 30
           maxConcurrency: 10
           deploymentUnit:
-              maxTopics: 30
               podTemplate:
                 resources:
                   requests:
@@ -74,7 +73,6 @@ spec:
           maxWaitSeconds: 30
           maxProcessingTime: 60000
           deploymentUnit:
-              maxTopics: 30
               podTemplate:
                 resources:
                   requests:

From f5bcecc002ac31e6dd406a286e03163ba4fe5ad6 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Tue, 30 Mar 2021 14:22:32 +0530
Subject: [PATCH 33/49] Update lastCacheUpdate time only if cache miss

Bug fix for cache
---
 controllers/realtime_calculator.go | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/controllers/realtime_calculator.go b/controllers/realtime_calculator.go
index b0ace726e..930501a22 100644
--- a/controllers/realtime_calculator.go
+++ b/controllers/realtime_calculator.go
@@ -99,13 +99,13 @@ func (r *realtimeCalculator) fetchRealtimeCache(
 		return topicRealtimeInfo{}, false
 	}
 
-	// 600 to 840 seconds, randomness to prevent multiple parallel calls
-	validSec := rand.Intn(240) + 600
+	// 120 to 240 seconds, randomness to prevent multiple parallel calls
+	validSec := rand.Intn(120) + 120
 	klog.V(5).Infof(
-		"rsk/%s validSec: %v topic: %s",
+		"rsk/%s, %s, cacheValid=%ss"
 		r.rsk.Name,
-		validSec,
 		topic,
+		validSec,
 	)
 
 	info := loadedInfo.(topicRealtimeInfo)
@@ -318,7 +318,9 @@ func (r *realtimeCalculator) calculate(reloading []string, currentRealtime []str
 			klog.V(2).Infof("%v: waiting to reach realtime", topic)
 		}
 
-		info.lastUpdate = &now
+		if !hit {
+			info.lastUpdate = &now
+		}
 		r.cache.Store(topic, info)
 	}
 

From 4e73b428aaec0dac7976471adffc8a87a119b0bb Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Tue, 30 Mar 2021 14:25:35 +0530
Subject: [PATCH 34/49] Syntax fix

---
 controllers/realtime_calculator.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/controllers/realtime_calculator.go b/controllers/realtime_calculator.go
index 930501a22..ebdc4003d 100644
--- a/controllers/realtime_calculator.go
+++ b/controllers/realtime_calculator.go
@@ -102,7 +102,7 @@ func (r *realtimeCalculator) fetchRealtimeCache(
 	// 120 to 240 seconds, randomness to prevent multiple parallel calls
 	validSec := rand.Intn(120) + 120
 	klog.V(5).Infof(
-		"rsk/%s, %s, cacheValid=%ss"
+		"rsk/%s, %s, cacheValid=%ss",
 		r.rsk.Name,
 		topic,
 		validSec,

From da6728827ea046fa2a8b3ff800d2dcbfaccd08cd Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Tue, 30 Mar 2021 19:47:20 +0530
Subject: [PATCH 35/49] Use lastOffset and not lag for SJF

This because the lag is not available untill the consumer group exist. But last offset can be fetched for the topic.
---
 controllers/realtime_calculator.go  | 60 +++++++++++++-------------
 controllers/sinkgroup_controller.go |  2 +-
 controllers/unit_allocator.go       | 24 +++++------
 controllers/unit_allocator_test.go  | 66 ++++++++++++++---------------
 4 files changed, 77 insertions(+), 75 deletions(-)

diff --git a/controllers/realtime_calculator.go b/controllers/realtime_calculator.go
index ebdc4003d..d48f44ae2 100644
--- a/controllers/realtime_calculator.go
+++ b/controllers/realtime_calculator.go
@@ -23,9 +23,9 @@ type topicRealtimeInfo struct {
 	loaderRealtime  bool
 }
 
-type topicLag struct {
+type topicLast struct {
 	topic string
-	lag   int64
+	last  int64
 }
 
 type realtimeCalculator struct {
@@ -37,8 +37,8 @@ type realtimeCalculator struct {
 	batchersRealtime []string
 	loadersRealtime  []string
 
-	batchersLag []topicLag
-	loadersLag  []topicLag
+	batchersLast []topicLast
+	loadersLast  []topicLast
 }
 
 func newRealtimeCalculator(
@@ -49,12 +49,12 @@ func newRealtimeCalculator(
 ) *realtimeCalculator {
 
 	return &realtimeCalculator{
-		rsk:         rsk,
-		watcher:     watcher,
-		topicGroups: topicGroups,
-		cache:       cache,
-		batchersLag: []topicLag{},
-		loadersLag:  []topicLag{},
+		rsk:          rsk,
+		watcher:      watcher,
+		topicGroups:  topicGroups,
+		cache:        cache,
+		batchersLast: []topicLast{},
+		loadersLast:  []topicLast{},
 	}
 }
 
@@ -281,32 +281,34 @@ func (r *realtimeCalculator) calculate(reloading []string, currentRealtime []str
 		// compute realtime
 		maxBatcherLag, maxLoaderLag := r.maxLag(topic)
 		if info.batcher != nil && info.batcher.last != nil && info.batcher.current != nil {
-			lag := *info.batcher.last - *info.batcher.current
-			if lag <= maxBatcherLag {
-				klog.V(3).Infof("rsk/%s: %s batcher realtime", r.rsk.Name, topic)
-				info.batcherRealtime = true
-				r.batchersRealtime = append(r.batchersRealtime, topic)
+			if info.batcher.current != nil {
+				if *info.batcher.last-*info.batcher.current <= maxBatcherLag {
+					klog.V(3).Infof("rsk/%s: %s batcher realtime", r.rsk.Name, topic)
+					info.batcherRealtime = true
+					r.batchersRealtime = append(r.batchersRealtime, topic)
+				}
 			}
-			r.batchersLag = append(
-				r.batchersLag,
-				topicLag{
+			r.batchersLast = append(
+				r.batchersLast,
+				topicLast{
 					topic: topic,
-					lag:   lag,
+					last:  *info.batcher.last,
 				},
 			)
 		}
-		if info.loader != nil && info.loader.last != nil && info.loader.current != nil {
-			lag := *info.loader.last - *info.loader.current
-			if *info.loader.last-*info.loader.current <= maxLoaderLag {
-				klog.V(3).Infof("rsk/%s: %s loader realtime", r.rsk.Name, ltopic)
-				info.loaderRealtime = true
-				r.loadersRealtime = append(r.loadersRealtime, ltopic)
+		if info.loader != nil && info.loader.last != nil {
+			if info.loader.current != nil {
+				if *info.loader.last-*info.loader.current <= maxLoaderLag {
+					klog.V(3).Infof("rsk/%s: %s loader realtime", r.rsk.Name, ltopic)
+					info.loaderRealtime = true
+					r.loadersRealtime = append(r.loadersRealtime, ltopic)
+				}
 			}
-			r.loadersLag = append(
-				r.loadersLag,
-				topicLag{
+			r.loadersLast = append(
+				r.loadersLast,
+				topicLast{
 					topic: topic,
-					lag:   lag,
+					last:  *info.loader.last,
 				},
 			)
 		}
diff --git a/controllers/sinkgroup_controller.go b/controllers/sinkgroup_controller.go
index 7c27401eb..b223e6f87 100644
--- a/controllers/sinkgroup_controller.go
+++ b/controllers/sinkgroup_controller.go
@@ -164,7 +164,7 @@ func (sb *buildSinkGroup) buildBatchers(
 			allocator := newUnitAllocator(
 				sb.topics,
 				sb.calc.batchersRealtime,
-				sb.calc.batchersLag,
+				sb.calc.batchersLast,
 				*sinkGroupSpec.MaxReloadingUnits,
 				sb.rsk.Status.BatcherReloadingTopics,
 				mainSinkGroupSpec,
diff --git a/controllers/unit_allocator.go b/controllers/unit_allocator.go
index 55948b0d6..84eb5f035 100644
--- a/controllers/unit_allocator.go
+++ b/controllers/unit_allocator.go
@@ -11,7 +11,7 @@ type unitAllocator struct {
 	topics   []string
 	realtime []string
 
-	topicsLag              []topicLag
+	topicsLast             []topicLast
 	maxReloadingUnits      int
 	currentReloadingTopics []string
 	mainSinkGroupSpec      *tipocav1.SinkGroupSpec
@@ -23,7 +23,7 @@ type unitAllocator struct {
 func newUnitAllocator(
 	topics,
 	realtime []string,
-	topicsLag []topicLag,
+	topicsLast []topicLast,
 	maxReloadingUnits int32,
 	currentReloadingTopics []string,
 	main *tipocav1.SinkGroupSpec,
@@ -32,7 +32,7 @@ func newUnitAllocator(
 	return &unitAllocator{
 		topics:                 topics,
 		realtime:               realtime,
-		topicsLag:              topicsLag,
+		topicsLast:             topicsLast,
 		maxReloadingUnits:      int(maxReloadingUnits),
 		currentReloadingTopics: currentReloadingTopics,
 		units:                  []deploymentUnit{},
@@ -47,13 +47,13 @@ type deploymentUnit struct {
 	topics        []string
 }
 
-func sortTopicsByLag(topicsLag []topicLag) []string {
-	sort.SliceStable(topicsLag, func(i, j int) bool {
-		return topicsLag[i].lag < topicsLag[j].lag
+func sortTopicsByLastOffset(topicsLast []topicLast) []string {
+	sort.SliceStable(topicsLast, func(i, j int) bool {
+		return topicsLast[i].last < topicsLast[j].last
 	})
 
 	topics := []string{}
-	for _, tl := range topicsLag {
+	for _, tl := range topicsLast {
 		topics = append(topics, tl.topic)
 	}
 
@@ -100,12 +100,12 @@ func (u *unitAllocator) allocateReloadingUnits() {
 		return
 	}
 
-	topicsByLagAsc := sortTopicsByLag(u.topicsLag)
-	if len(topicsByLagAsc) == 0 && len(u.topics) != 0 {
-		klog.Infof("empty topicsLag, using %+v", u.topics)
-		topicsByLagAsc = u.topics
+	topicsByLastAsc := sortTopicsByLastOffset(u.topicsLast)
+	if len(topicsByLastAsc) == 0 && len(u.topics) != 0 {
+		klog.Infof("empty topicsLast, using %+v", u.topics)
+		topicsByLastAsc = u.topics
 	}
-	for _, topic := range topicsByLagAsc {
+	for _, topic := range topicsByLastAsc {
 		_, ok := realtime[topic]
 		if ok {
 			continue
diff --git a/controllers/unit_allocator_test.go b/controllers/unit_allocator_test.go
index 45c076929..b9545614c 100644
--- a/controllers/unit_allocator_test.go
+++ b/controllers/unit_allocator_test.go
@@ -12,7 +12,7 @@ func TestAllocateReloadingUnits(t *testing.T) {
 		name                   string
 		topics                 []string
 		realtime               []string
-		topicsLag              []topicLag
+		topicsLast             []topicLast
 		maxReloadingUnits      int32
 		currentReloadingTopics []string
 		units                  []deploymentUnit
@@ -21,7 +21,7 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			name:                   "RealFirstCaseWhenTopicLagEmpty",
 			topics:                 []string{"t1", "t2"},
 			realtime:               []string{},
-			topicsLag:              []topicLag{},
+			topicsLast:             []topicLast{},
 			maxReloadingUnits:      3,
 			currentReloadingTopics: []string{},
 			units: []deploymentUnit{
@@ -39,20 +39,20 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			name:     "FirstCase",
 			topics:   []string{"t1", "t2", "t3", "t4"},
 			realtime: []string{},
-			topicsLag: []topicLag{
-				topicLag{
+			topicsLast: []topicLast{
+				topicLast{
 					topic: "t1",
 					lag:   1500,
 				},
-				topicLag{
+				topicLast{
 					topic: "t2",
 					lag:   1500,
 				},
-				topicLag{
+				topicLast{
 					topic: "t3",
 					lag:   1400,
 				},
-				topicLag{
+				topicLast{
 					topic: "t4",
 					lag:   1400,
 				},
@@ -70,20 +70,20 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			name:     "SecondCaseMax3",
 			topics:   []string{"t1", "t2", "t3", "t4"},
 			realtime: []string{},
-			topicsLag: []topicLag{
-				topicLag{
+			topicsLast: []topicLast{
+				topicLast{
 					topic: "t1",
 					lag:   1500,
 				},
-				topicLag{
+				topicLast{
 					topic: "t2",
 					lag:   1500,
 				},
-				topicLag{
+				topicLast{
 					topic: "t3",
 					lag:   1400,
 				},
-				topicLag{
+				topicLast{
 					topic: "t4",
 					lag:   1400,
 				},
@@ -109,20 +109,20 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			name:     "ThirdCaseCurrentThere",
 			topics:   []string{"t1", "t2", "t3", "t4"},
 			realtime: []string{},
-			topicsLag: []topicLag{
-				topicLag{
+			topicsLast: []topicLast{
+				topicLast{
 					topic: "t1",
 					lag:   1500,
 				},
-				topicLag{
+				topicLast{
 					topic: "t2",
 					lag:   1500,
 				},
-				topicLag{
+				topicLast{
 					topic: "t3",
 					lag:   1400,
 				},
-				topicLag{
+				topicLast{
 					topic: "t4",
 					lag:   1400,
 				},
@@ -148,20 +148,20 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			name:     "FourthCaseLagChangedShouldNotChangeAnything",
 			topics:   []string{"t1", "t2", "t3", "t4"},
 			realtime: []string{},
-			topicsLag: []topicLag{
-				topicLag{
+			topicsLast: []topicLast{
+				topicLast{
 					topic: "t1",
 					lag:   1500,
 				},
-				topicLag{
+				topicLast{
 					topic: "t2",
 					lag:   1500,
 				},
-				topicLag{
+				topicLast{
 					topic: "t3",
 					lag:   2,
 				},
-				topicLag{
+				topicLast{
 					topic: "t4",
 					lag:   1,
 				},
@@ -187,20 +187,20 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			name:     "FifthCaseOneRealtimeOneMovesin",
 			topics:   []string{"t1", "t2", "t3", "t4"},
 			realtime: []string{"t3"},
-			topicsLag: []topicLag{
-				topicLag{
+			topicsLast: []topicLast{
+				topicLast{
 					topic: "t1",
 					lag:   1500,
 				},
-				topicLag{
+				topicLast{
 					topic: "t2",
 					lag:   1500,
 				},
-				topicLag{
+				topicLast{
 					topic: "t3",
 					lag:   2,
 				},
-				topicLag{
+				topicLast{
 					topic: "t4",
 					lag:   1,
 				},
@@ -230,20 +230,20 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			name:     "SixthCaseAllRealtime",
 			topics:   []string{"t1", "t2", "t3", "t4"},
 			realtime: []string{"t1", "t2", "t3", "t4"},
-			topicsLag: []topicLag{
-				topicLag{
+			topicsLast: []topicLast{
+				topicLast{
 					topic: "t1",
 					lag:   1,
 				},
-				topicLag{
+				topicLast{
 					topic: "t2",
 					lag:   1,
 				},
-				topicLag{
+				topicLast{
 					topic: "t3",
 					lag:   2,
 				},
-				topicLag{
+				topicLast{
 					topic: "t4",
 					lag:   1,
 				},
@@ -265,7 +265,7 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			allocator := newUnitAllocator(
 				tc.topics,
 				tc.realtime,
-				tc.topicsLag,
+				tc.topicsLast,
 				tc.maxReloadingUnits,
 				tc.currentReloadingTopics,
 				nil, // TODO add test cases for them also

From e4e5df1fc0f00ce26b6784121072bd91c1acbfab Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 31 Mar 2021 10:18:21 +0530
Subject: [PATCH 36/49] K8s compatible name and fix test

---
 controllers/realtime_calculator.go |   2 +-
 controllers/unit_allocator.go      |  10 ++
 controllers/unit_allocator_test.go | 177 ++++++++++++++++-------------
 3 files changed, 109 insertions(+), 80 deletions(-)

diff --git a/controllers/realtime_calculator.go b/controllers/realtime_calculator.go
index d48f44ae2..4b4cafa62 100644
--- a/controllers/realtime_calculator.go
+++ b/controllers/realtime_calculator.go
@@ -102,7 +102,7 @@ func (r *realtimeCalculator) fetchRealtimeCache(
 	// 120 to 240 seconds, randomness to prevent multiple parallel calls
 	validSec := rand.Intn(120) + 120
 	klog.V(5).Infof(
-		"rsk/%s, %s, cacheValid=%ss",
+		"rsk/%s, %s, cacheValid=%vs",
 		r.rsk.Name,
 		topic,
 		validSec,
diff --git a/controllers/unit_allocator.go b/controllers/unit_allocator.go
index 84eb5f035..5e1c09c31 100644
--- a/controllers/unit_allocator.go
+++ b/controllers/unit_allocator.go
@@ -5,6 +5,7 @@ import (
 	tipocav1 "github.com/practo/tipoca-stream/redshiftsink/api/v1"
 	transformer "github.com/practo/tipoca-stream/redshiftsink/pkg/transformer"
 	"sort"
+	"strings"
 )
 
 type unitAllocator struct {
@@ -60,8 +61,17 @@ func sortTopicsByLastOffset(topicsLast []topicLast) []string {
 	return topics
 }
 
+func k8sCompatibleName(name string) string {
+	// satisfy regex
+	// '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*'
+	return strings.ToLower(strings.ReplaceAll(name, "_", "-"))
+}
+
 func (u *unitAllocator) unitID(topic string) string {
 	_, _, table := transformer.ParseTopic(topic)
+
+	table = k8sCompatibleName(table)
+
 	if len(table) > 10 {
 		return table[:10]
 	}
diff --git a/controllers/unit_allocator_test.go b/controllers/unit_allocator_test.go
index b9545614c..472038d56 100644
--- a/controllers/unit_allocator_test.go
+++ b/controllers/unit_allocator_test.go
@@ -6,7 +6,7 @@ import (
 )
 
 func TestAllocateReloadingUnits(t *testing.T) {
-	t.Parallel()
+	// t.Parallel()
 
 	tests := []struct {
 		name                   string
@@ -19,7 +19,7 @@ func TestAllocateReloadingUnits(t *testing.T) {
 	}{
 		{
 			name:                   "RealFirstCaseWhenTopicLagEmpty",
-			topics:                 []string{"t1", "t2"},
+			topics:                 []string{"db.inventory.t1", "db.inventory.t2"},
 			realtime:               []string{},
 			topicsLast:             []topicLast{},
 			maxReloadingUnits:      3,
@@ -27,34 +27,34 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			units: []deploymentUnit{
 				deploymentUnit{
 					id:     "t1",
-					topics: []string{"t1"},
+					topics: []string{"db.inventory.t1"},
 				},
 				deploymentUnit{
 					id:     "t2",
-					topics: []string{"t2"},
+					topics: []string{"db.inventory.t2"},
 				},
 			},
 		},
 		{
 			name:     "FirstCase",
-			topics:   []string{"t1", "t2", "t3", "t4"},
+			topics:   []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"},
 			realtime: []string{},
 			topicsLast: []topicLast{
 				topicLast{
-					topic: "t1",
-					lag:   1500,
+					topic: "db.inventory.t1",
+					last:  1500,
 				},
 				topicLast{
-					topic: "t2",
-					lag:   1500,
+					topic: "db.inventory.t2",
+					last:  1500,
 				},
 				topicLast{
-					topic: "t3",
-					lag:   1400,
+					topic: "db.inventory.t3",
+					last:  1400,
 				},
 				topicLast{
-					topic: "t4",
-					lag:   1400,
+					topic: "db.inventory.t4",
+					last:  1400,
 				},
 			},
 			maxReloadingUnits:      1,
@@ -62,30 +62,30 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			units: []deploymentUnit{
 				deploymentUnit{
 					id:     "t3",
-					topics: []string{"t3"},
+					topics: []string{"db.inventory.t3"},
 				},
 			},
 		},
 		{
 			name:     "SecondCaseMax3",
-			topics:   []string{"t1", "t2", "t3", "t4"},
+			topics:   []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"},
 			realtime: []string{},
 			topicsLast: []topicLast{
 				topicLast{
-					topic: "t1",
-					lag:   1500,
+					topic: "db.inventory.t1",
+					last:  1500,
 				},
 				topicLast{
-					topic: "t2",
-					lag:   1500,
+					topic: "db.inventory.t2",
+					last:  1500,
 				},
 				topicLast{
-					topic: "t3",
-					lag:   1400,
+					topic: "db.inventory.t3",
+					last:  1400,
 				},
 				topicLast{
-					topic: "t4",
-					lag:   1400,
+					topic: "db.inventory.t4",
+					last:  1400,
 				},
 			},
 			maxReloadingUnits:      3,
@@ -93,167 +93,186 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			units: []deploymentUnit{
 				deploymentUnit{
 					id:     "t3",
-					topics: []string{"t3"},
+					topics: []string{"db.inventory.t3"},
 				},
 				deploymentUnit{
 					id:     "t4",
-					topics: []string{"t4"},
+					topics: []string{"db.inventory.t4"},
 				},
 				deploymentUnit{
 					id:     "t1",
-					topics: []string{"t1"},
+					topics: []string{"db.inventory.t1"},
 				},
 			},
 		},
 		{
 			name:     "ThirdCaseCurrentThere",
-			topics:   []string{"t1", "t2", "t3", "t4"},
+			topics:   []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"},
 			realtime: []string{},
 			topicsLast: []topicLast{
 				topicLast{
-					topic: "t1",
-					lag:   1500,
+					topic: "db.inventory.t1",
+					last:  1500,
 				},
 				topicLast{
-					topic: "t2",
-					lag:   1500,
+					topic: "db.inventory.t2",
+					last:  1500,
 				},
 				topicLast{
-					topic: "t3",
-					lag:   1400,
+					topic: "db.inventory.t3",
+					last:  1400,
 				},
 				topicLast{
-					topic: "t4",
-					lag:   1400,
+					topic: "db.inventory.t4",
+					last:  1400,
 				},
 			},
 			maxReloadingUnits:      3,
-			currentReloadingTopics: []string{"t1", "t2", "t3"},
+			currentReloadingTopics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3"},
 			units: []deploymentUnit{
 				deploymentUnit{
 					id:     "t1",
-					topics: []string{"t1"},
+					topics: []string{"db.inventory.t1"},
 				},
 				deploymentUnit{
 					id:     "t2",
-					topics: []string{"t2"},
+					topics: []string{"db.inventory.t2"},
 				},
 				deploymentUnit{
 					id:     "t3",
-					topics: []string{"t3"},
+					topics: []string{"db.inventory.t3"},
 				},
 			},
 		},
 		{
 			name:     "FourthCaseLagChangedShouldNotChangeAnything",
-			topics:   []string{"t1", "t2", "t3", "t4"},
+			topics:   []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"},
 			realtime: []string{},
 			topicsLast: []topicLast{
 				topicLast{
-					topic: "t1",
-					lag:   1500,
+					topic: "db.inventory.t1",
+					last:  1500,
 				},
 				topicLast{
-					topic: "t2",
-					lag:   1500,
+					topic: "db.inventory.t2",
+					last:  1500,
 				},
 				topicLast{
-					topic: "t3",
-					lag:   2,
+					topic: "db.inventory.t3",
+					last:  2,
 				},
 				topicLast{
-					topic: "t4",
-					lag:   1,
+					topic: "db.inventory.t4",
+					last:  1,
 				},
 			},
 			maxReloadingUnits:      3,
-			currentReloadingTopics: []string{"t1", "t2", "t3"},
+			currentReloadingTopics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3"},
 			units: []deploymentUnit{
 				deploymentUnit{
 					id:     "t1",
-					topics: []string{"t1"},
+					topics: []string{"db.inventory.t1"},
 				},
 				deploymentUnit{
 					id:     "t2",
-					topics: []string{"t2"},
+					topics: []string{"db.inventory.t2"},
 				},
 				deploymentUnit{
 					id:     "t3",
-					topics: []string{"t3"},
+					topics: []string{"db.inventory.t3"},
 				},
 			},
 		},
 		{
 			name:     "FifthCaseOneRealtimeOneMovesin",
-			topics:   []string{"t1", "t2", "t3", "t4"},
-			realtime: []string{"t3"},
+			topics:   []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"},
+			realtime: []string{"db.inventory.t3"},
 			topicsLast: []topicLast{
 				topicLast{
-					topic: "t1",
-					lag:   1500,
+					topic: "db.inventory.t1",
+					last:  1500,
 				},
 				topicLast{
-					topic: "t2",
-					lag:   1500,
+					topic: "db.inventory.t2",
+					last:  1500,
 				},
 				topicLast{
-					topic: "t3",
-					lag:   2,
+					topic: "db.inventory.t3",
+					last:  2,
 				},
 				topicLast{
-					topic: "t4",
-					lag:   1,
+					topic: "db.inventory.t4",
+					last:  1,
 				},
 			},
 			maxReloadingUnits:      3,
-			currentReloadingTopics: []string{"t1", "t2", "t3"},
+			currentReloadingTopics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3"},
 			units: []deploymentUnit{
 				deploymentUnit{
 					id:     "t1",
-					topics: []string{"t1"},
+					topics: []string{"db.inventory.t1"},
 				},
 				deploymentUnit{
 					id:     "t2",
-					topics: []string{"t2"},
+					topics: []string{"db.inventory.t2"},
 				},
 				deploymentUnit{
 					id:     "t4",
-					topics: []string{"t4"},
+					topics: []string{"db.inventory.t4"},
 				},
 				deploymentUnit{
 					id:     "realtime",
-					topics: []string{"t3"},
+					topics: []string{"db.inventory.t3"},
 				},
 			},
 		},
 		{
 			name:     "SixthCaseAllRealtime",
-			topics:   []string{"t1", "t2", "t3", "t4"},
-			realtime: []string{"t1", "t2", "t3", "t4"},
+			topics:   []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"},
+			realtime: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"},
 			topicsLast: []topicLast{
 				topicLast{
-					topic: "t1",
-					lag:   1,
+					topic: "db.inventory.t1",
+					last:  1,
 				},
 				topicLast{
-					topic: "t2",
-					lag:   1,
+					topic: "db.inventory.t2",
+					last:  1,
 				},
 				topicLast{
-					topic: "t3",
-					lag:   2,
+					topic: "db.inventory.t3",
+					last:  2,
 				},
 				topicLast{
-					topic: "t4",
-					lag:   1,
+					topic: "db.inventory.t4",
+					last:  1,
 				},
 			},
 			maxReloadingUnits:      3,
-			currentReloadingTopics: []string{"t1", "t2", "t4", "t3"},
+			currentReloadingTopics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t4", "db.inventory.t3"},
 			units: []deploymentUnit{
 				deploymentUnit{
 					id:     "realtime",
-					topics: []string{"t1", "t2", "t3", "t4"},
+					topics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"},
+				},
+			},
+		},
+		{
+			name:     "K8sNameCompatibility",
+			topics:   []string{"db.inventory.t1_aks"},
+			realtime: []string{},
+			topicsLast: []topicLast{
+				topicLast{
+					topic: "db.inventory.t1_aks",
+					last:  1,
+				},
+			},
+			maxReloadingUnits:      3,
+			currentReloadingTopics: []string{},
+			units: []deploymentUnit{
+				deploymentUnit{
+					id:     "t1-aks",
+					topics: []string{"db.inventory.t1_aks"},
 				},
 			},
 		},

From cb8394419a148bd6fbc3aa6865a8094275c96ffb Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 31 Mar 2021 12:18:02 +0530
Subject: [PATCH 37/49] Unit allocation debuggers

---
 controllers/batcher_deployment.go   |  2 +-
 controllers/loader_deployment.go    |  2 +-
 controllers/sinkgroup_controller.go | 19 +++++++------
 controllers/status.go               |  1 +
 controllers/unit_allocator.go       | 41 ++++++++++++++++++++++-------
 controllers/unit_allocator_test.go  | 19 +------------
 6 files changed, 44 insertions(+), 40 deletions(-)

diff --git a/controllers/batcher_deployment.go b/controllers/batcher_deployment.go
index 78950dd71..60258fbef 100644
--- a/controllers/batcher_deployment.go
+++ b/controllers/batcher_deployment.go
@@ -46,7 +46,6 @@ func applyBatcherSinkGroupDefaults(
 	var maxReloadingUnits *int32
 
 	// defaults by sinkgroup
-	var specifiedSpec *tipocav1.SinkGroupSpec
 	switch sgType {
 	case MainSinkGroup:
 		maxSizePerBatch = toQuantityPtr(resource.MustParse("0.5Mi"))
@@ -69,6 +68,7 @@ func applyBatcherSinkGroupDefaults(
 		image = &defaultImage
 	}
 
+	var specifiedSpec *tipocav1.SinkGroupSpec
 	// apply the sinkGroup spec rules
 	if rsk.Spec.Batcher.SinkGroup.All != nil {
 		specifiedSpec = rsk.Spec.Batcher.SinkGroup.All
diff --git a/controllers/loader_deployment.go b/controllers/loader_deployment.go
index c322c7ca1..e12fe4417 100644
--- a/controllers/loader_deployment.go
+++ b/controllers/loader_deployment.go
@@ -46,7 +46,6 @@ func applyLoaderSinkGroupDefaults(
 	var maxReloadingUnits *int32
 
 	// defaults by sinkgroup
-	var specifiedSpec *tipocav1.SinkGroupSpec
 	switch sgType {
 	case MainSinkGroup:
 		maxSizePerBatch = toQuantityPtr(resource.MustParse("1Gi"))
@@ -66,6 +65,7 @@ func applyLoaderSinkGroupDefaults(
 		image = &defaultImage
 	}
 
+	var specifiedSpec *tipocav1.SinkGroupSpec
 	// apply the sinkGroup spec rules
 	if rsk.Spec.Loader.SinkGroup.All != nil {
 		specifiedSpec = rsk.Spec.Loader.SinkGroup.All
diff --git a/controllers/sinkgroup_controller.go b/controllers/sinkgroup_controller.go
index b223e6f87..909dbb880 100644
--- a/controllers/sinkgroup_controller.go
+++ b/controllers/sinkgroup_controller.go
@@ -152,8 +152,14 @@ func (sb *buildSinkGroup) buildBatchers(
 			sb.sgType,
 			defaultImage,
 		)
-		var units []deploymentUnit
-		if sb.calc != nil {
+		units := []deploymentUnit{
+			deploymentUnit{
+				id:            "",
+				sinkGroupSpec: sinkGroupSpec,
+				topics:        sb.topics,
+			},
+		}
+		if len(sb.topics) > 0 && sb.calc != nil { // overwrite units if currently reloading and calculation is available
 			if len(sb.calc.batchersRealtime) > 0 {
 				mainSinkGroupSpec = applyBatcherSinkGroupDefaults(
 					sb.rsk,
@@ -162,6 +168,7 @@ func (sb *buildSinkGroup) buildBatchers(
 				)
 			}
 			allocator := newUnitAllocator(
+				sb.rsk.Name,
 				sb.topics,
 				sb.calc.batchersRealtime,
 				sb.calc.batchersLast,
@@ -172,14 +179,6 @@ func (sb *buildSinkGroup) buildBatchers(
 			)
 			allocator.allocateReloadingUnits()
 			units = allocator.units
-		} else {
-			units = []deploymentUnit{
-				deploymentUnit{
-					id:            "",
-					sinkGroupSpec: sinkGroupSpec,
-					topics:        sb.topics,
-				},
-			}
 		}
 		for _, unit := range units {
 			consumerGroups, err := computeConsumerGroups(
diff --git a/controllers/status.go b/controllers/status.go
index 83ffb3156..d727c3601 100644
--- a/controllers/status.go
+++ b/controllers/status.go
@@ -492,6 +492,7 @@ func (s *status) updateTopicGroup(topic string) {
 }
 
 func (s *status) updateBatcherReloadingTopics(topics []string) {
+	klog.V(3).Infof("rsk/%s currentReloading: %d %v", s.rsk.Name, len(topics), topics)
 	s.rsk.Status.BatcherReloadingTopics = topics
 }
 
diff --git a/controllers/unit_allocator.go b/controllers/unit_allocator.go
index 5e1c09c31..ba28c5045 100644
--- a/controllers/unit_allocator.go
+++ b/controllers/unit_allocator.go
@@ -9,6 +9,7 @@ import (
 )
 
 type unitAllocator struct {
+	rskName  string
 	topics   []string
 	realtime []string
 
@@ -22,6 +23,7 @@ type unitAllocator struct {
 }
 
 func newUnitAllocator(
+	rskName string,
 	topics,
 	realtime []string,
 	topicsLast []topicLast,
@@ -31,6 +33,7 @@ func newUnitAllocator(
 	reload *tipocav1.SinkGroupSpec,
 ) *unitAllocator {
 	return &unitAllocator{
+		rskName:                rskName,
 		topics:                 topics,
 		realtime:               realtime,
 		topicsLast:             topicsLast,
@@ -82,12 +85,19 @@ func (u *unitAllocator) unitID(topic string) string {
 // for the reloading sinkGroup
 func (u *unitAllocator) allocateReloadingUnits() {
 	realtime := toMap(u.realtime)
-	realtimeUnit := deploymentUnit{
-		id:            "realtime",
-		sinkGroupSpec: u.mainSinkGroupSpec,
-		topics:        u.realtime,
-	}
-
+	klog.V(3).Infof(
+		"rsk/%s realtime: %v, max: %v",
+		u.rskName,
+		u.realtime,
+		u.maxReloadingUnits,
+	)
+
+	klog.V(3).Infof(
+		"rsk/%s currentUnits: %v %v",
+		u.rskName,
+		len(u.currentReloadingTopics),
+		u.currentReloadingTopics,
+	)
 	// don't shuffle the already reloading topics unless realtime
 	reloadingUnits := []deploymentUnit{}
 	for _, topic := range u.currentReloadingTopics {
@@ -101,20 +111,30 @@ func (u *unitAllocator) allocateReloadingUnits() {
 			topics:        []string{topic},
 		})
 	}
+	klog.V(3).Infof(
+		"rsk/%s reloadingUnits(based on current): %v %v",
+		u.rskName,
+		len(reloadingUnits),
+		reloadingUnits,
+	)
+
+	realtimeUnit := deploymentUnit{
+		id:            "realtime",
+		sinkGroupSpec: u.mainSinkGroupSpec,
+		topics:        u.realtime,
+	}
 
 	if len(reloadingUnits) >= u.maxReloadingUnits {
 		u.units = reloadingUnits
 		if len(realtimeUnit.topics) > 0 {
 			u.units = append(u.units, realtimeUnit)
 		}
+		klog.V(2).Infof("rsk/%s units: %v", u.rskName, len(u.units))
 		return
 	}
 
 	topicsByLastAsc := sortTopicsByLastOffset(u.topicsLast)
-	if len(topicsByLastAsc) == 0 && len(u.topics) != 0 {
-		klog.Infof("empty topicsLast, using %+v", u.topics)
-		topicsByLastAsc = u.topics
-	}
+	klog.V(4).Infof("rsk/%s sortByLast: %v", u.rskName, topicsByLastAsc)
 	for _, topic := range topicsByLastAsc {
 		_, ok := realtime[topic]
 		if ok {
@@ -134,4 +154,5 @@ func (u *unitAllocator) allocateReloadingUnits() {
 	if len(realtimeUnit.topics) > 0 {
 		u.units = append(u.units, realtimeUnit)
 	}
+	klog.V(2).Infof("rsk/%s units: %v", u.rskName, len(u.units))
 }
diff --git a/controllers/unit_allocator_test.go b/controllers/unit_allocator_test.go
index 472038d56..78ade6a0b 100644
--- a/controllers/unit_allocator_test.go
+++ b/controllers/unit_allocator_test.go
@@ -17,24 +17,6 @@ func TestAllocateReloadingUnits(t *testing.T) {
 		currentReloadingTopics []string
 		units                  []deploymentUnit
 	}{
-		{
-			name:                   "RealFirstCaseWhenTopicLagEmpty",
-			topics:                 []string{"db.inventory.t1", "db.inventory.t2"},
-			realtime:               []string{},
-			topicsLast:             []topicLast{},
-			maxReloadingUnits:      3,
-			currentReloadingTopics: []string{},
-			units: []deploymentUnit{
-				deploymentUnit{
-					id:     "t1",
-					topics: []string{"db.inventory.t1"},
-				},
-				deploymentUnit{
-					id:     "t2",
-					topics: []string{"db.inventory.t2"},
-				},
-			},
-		},
 		{
 			name:     "FirstCase",
 			topics:   []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4"},
@@ -282,6 +264,7 @@ func TestAllocateReloadingUnits(t *testing.T) {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
 			allocator := newUnitAllocator(
+				"testrsk",
 				tc.topics,
 				tc.realtime,
 				tc.topicsLast,

From 89138401d0e6ef0d31c5519858548b902fa6ec88 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 31 Mar 2021 12:30:15 +0530
Subject: [PATCH 38/49] Fix the deadlock

Deadlock is
current cannot be populated until reload pods are there and reload cannot be done until current is not there
---
 controllers/realtime_calculator.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/controllers/realtime_calculator.go b/controllers/realtime_calculator.go
index 4b4cafa62..c0f398aef 100644
--- a/controllers/realtime_calculator.go
+++ b/controllers/realtime_calculator.go
@@ -280,7 +280,7 @@ func (r *realtimeCalculator) calculate(reloading []string, currentRealtime []str
 
 		// compute realtime
 		maxBatcherLag, maxLoaderLag := r.maxLag(topic)
-		if info.batcher != nil && info.batcher.last != nil && info.batcher.current != nil {
+		if info.batcher != nil && info.batcher.last != nil {
 			if info.batcher.current != nil {
 				if *info.batcher.last-*info.batcher.current <= maxBatcherLag {
 					klog.V(3).Infof("rsk/%s: %s batcher realtime", r.rsk.Name, topic)

From a4ea19a77cdafb7a155da1e815f0c6934caeed74 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 31 Mar 2021 13:40:22 +0530
Subject: [PATCH 39/49] Update batcherReloadingTopics whenever realtime status
 change

This is so that, we only operate on the topics which are not realtime but only reloading. Not doing this makes the allocator generate duplicates as it operates on the realtime topics also since the current status still has realtime topics. So whenever realtime updates happen always fix the state of batcher reloading topics.
---
 controllers/redshiftsink_controller.go |  1 +
 controllers/status.go                  | 15 +++++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/controllers/redshiftsink_controller.go b/controllers/redshiftsink_controller.go
index 99dfaa85b..a358836d2 100644
--- a/controllers/redshiftsink_controller.go
+++ b/controllers/redshiftsink_controller.go
@@ -432,6 +432,7 @@ func (r *RedshiftSinkReconciler) reconcile(
 			rsk.Name,
 			status.realtime,
 		)
+		status.updateBatcherReloadingTopics(rsk.Status.BatcherReloadingTopics)
 		return resultRequeueMilliSeconds(1500), nil, nil
 	}
 	klog.V(2).Infof("rsk/%v reconciling all sinkGroups", rsk.Name)
diff --git a/controllers/status.go b/controllers/status.go
index d727c3601..8e719838a 100644
--- a/controllers/status.go
+++ b/controllers/status.go
@@ -492,8 +492,19 @@ func (s *status) updateTopicGroup(topic string) {
 }
 
 func (s *status) updateBatcherReloadingTopics(topics []string) {
-	klog.V(3).Infof("rsk/%s currentReloading: %d %v", s.rsk.Name, len(topics), topics)
-	s.rsk.Status.BatcherReloadingTopics = topics
+	reloadingTopics := []string{}
+
+	// remove topics which have become realtime
+	realtime := toMap(s.realtime)
+	for _, t := range topics {
+		_, ok := realtime[t]
+		if !ok {
+			reloadingTopics = append(reloadingTopics, t)
+		}
+	}
+
+	klog.V(2).Infof("rsk/%s currentReloading: %d %v", s.rsk.Name, len(reloadingTopics), reloadingTopics)
+	s.rsk.Status.BatcherReloadingTopics = reloadingTopics
 }
 
 func updateTopicGroup(rsk *tipocav1.RedshiftSink, topic string, group tipocav1.Group) {

From 64bf65bdb639053e68f10bb27b9f57f5933e0887 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 31 Mar 2021 14:20:17 +0530
Subject: [PATCH 40/49] Fix duplicate bug for allocator

---
 controllers/unit_allocator.go      | 13 +++++-
 controllers/unit_allocator_test.go | 63 ++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/controllers/unit_allocator.go b/controllers/unit_allocator.go
index ba28c5045..e693cf67f 100644
--- a/controllers/unit_allocator.go
+++ b/controllers/unit_allocator.go
@@ -98,8 +98,11 @@ func (u *unitAllocator) allocateReloadingUnits() {
 		len(u.currentReloadingTopics),
 		u.currentReloadingTopics,
 	)
-	// don't shuffle the already reloading topics unless realtime
+
+	reloadingTopics := make(map[string]bool)
 	reloadingUnits := []deploymentUnit{}
+
+	// don't shuffle the already reloading topics unless realtime
 	for _, topic := range u.currentReloadingTopics {
 		_, ok := realtime[topic]
 		if ok {
@@ -110,6 +113,7 @@ func (u *unitAllocator) allocateReloadingUnits() {
 			sinkGroupSpec: u.reloadSinkGroupSpec,
 			topics:        []string{topic},
 		})
+		reloadingTopics[topic] = true
 	}
 	klog.V(3).Infof(
 		"rsk/%s reloadingUnits(based on current): %v %v",
@@ -134,12 +138,16 @@ func (u *unitAllocator) allocateReloadingUnits() {
 	}
 
 	topicsByLastAsc := sortTopicsByLastOffset(u.topicsLast)
-	klog.V(4).Infof("rsk/%s sortByLast: %v", u.rskName, topicsByLastAsc)
+	klog.V(3).Infof("rsk/%s sortByLast: %v", u.rskName, topicsByLastAsc)
 	for _, topic := range topicsByLastAsc {
 		_, ok := realtime[topic]
 		if ok {
 			continue
 		}
+		_, ok = reloadingTopics[topic]
+		if ok {
+			continue
+		}
 		if len(reloadingUnits) >= u.maxReloadingUnits {
 			break
 		}
@@ -148,6 +156,7 @@ func (u *unitAllocator) allocateReloadingUnits() {
 			sinkGroupSpec: u.reloadSinkGroupSpec,
 			topics:        []string{topic},
 		})
+		reloadingTopics[topic] = true
 	}
 
 	u.units = reloadingUnits
diff --git a/controllers/unit_allocator_test.go b/controllers/unit_allocator_test.go
index 78ade6a0b..952eadfc6 100644
--- a/controllers/unit_allocator_test.go
+++ b/controllers/unit_allocator_test.go
@@ -258,6 +258,69 @@ func TestAllocateReloadingUnits(t *testing.T) {
 				},
 			},
 		},
+		{
+			name:     "UnitsGoingAboveMax",
+			topics:   []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4", "db.inventory.t5", "db.inventory.t6", "db.inventory.t7", "db.inventory.t8", "db.inventory.t9"},
+			realtime: []string{"db.inventory.t1"},
+			topicsLast: []topicLast{
+				topicLast{
+					topic: "db.inventory.t1",
+					last:  1,
+				},
+				topicLast{
+					topic: "db.inventory.t2",
+					last:  10,
+				},
+				topicLast{
+					topic: "db.inventory.t3",
+					last:  100,
+				},
+				topicLast{
+					topic: "db.inventory.t4",
+					last:  1000,
+				},
+				topicLast{
+					topic: "db.inventory.t5",
+					last:  10000,
+				},
+				topicLast{
+					topic: "db.inventory.t6",
+					last:  20000,
+				},
+				topicLast{
+					topic: "db.inventory.t7",
+					last:  100000,
+				},
+			},
+			maxReloadingUnits:      5,
+			currentReloadingTopics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4", "db.inventory.t5"},
+			units: []deploymentUnit{
+				deploymentUnit{
+					id:     "t2",
+					topics: []string{"db.inventory.t2"},
+				},
+				deploymentUnit{
+					id:     "t3",
+					topics: []string{"db.inventory.t3"},
+				},
+				deploymentUnit{
+					id:     "t4",
+					topics: []string{"db.inventory.t4"},
+				},
+				deploymentUnit{
+					id:     "t5",
+					topics: []string{"db.inventory.t5"},
+				},
+				deploymentUnit{
+					id:     "t6",
+					topics: []string{"db.inventory.t6"},
+				},
+				deploymentUnit{
+					id:     "realtime",
+					topics: []string{"db.inventory.t1"},
+				},
+			},
+		},
 	}
 
 	for _, tc := range tests {

From 8f5c3fb3a285ed85549a8e2844e729f4ae7d891b Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 31 Mar 2021 15:06:26 +0530
Subject: [PATCH 41/49] Fix bug: Batcher realtime should be removed

---
 controllers/redshiftsink_controller.go |  4 +-
 controllers/status.go                  | 16 +++++--
 controllers/unit_allocator_test.go     | 65 +++++++++++++++++++++++++-
 3 files changed, 79 insertions(+), 6 deletions(-)

diff --git a/controllers/redshiftsink_controller.go b/controllers/redshiftsink_controller.go
index a358836d2..5f2a88807 100644
--- a/controllers/redshiftsink_controller.go
+++ b/controllers/redshiftsink_controller.go
@@ -432,7 +432,7 @@ func (r *RedshiftSinkReconciler) reconcile(
 			rsk.Name,
 			status.realtime,
 		)
-		status.updateBatcherReloadingTopics(rsk.Status.BatcherReloadingTopics)
+		status.updateBatcherReloadingTopics(rsk.Status.BatcherReloadingTopics, calc.batchersRealtime)
 		return resultRequeueMilliSeconds(1500), nil, nil
 	}
 	klog.V(2).Infof("rsk/%v reconciling all sinkGroups", rsk.Name)
@@ -463,7 +463,7 @@ func (r *RedshiftSinkReconciler) reconcile(
 		buildBatchers(secret, r.DefaultBatcherImage, r.DefaultKafkaVersion, tlsConfig).
 		buildLoaders(secret, r.DefaultLoaderImage, ReloadTableSuffix, r.DefaultKafkaVersion, tlsConfig, r.DefaultRedshiftMaxOpenConns, r.DefaultRedshiftMaxIdleConns).
 		build()
-	status.updateBatcherReloadingTopics(reload.batcherDeploymentTopics())
+	status.updateBatcherReloadingTopics(reload.batcherDeploymentTopics(), calc.batchersRealtime)
 
 	reloadDupe = sgBuilder.
 		setRedshiftSink(rsk).setClient(r.Client).setScheme(r.Scheme).
diff --git a/controllers/status.go b/controllers/status.go
index 8e719838a..6b6b1a2d9 100644
--- a/controllers/status.go
+++ b/controllers/status.go
@@ -491,16 +491,26 @@ func (s *status) updateTopicGroup(topic string) {
 	updateTopicGroup(s.rsk, topic, group)
 }
 
-func (s *status) updateBatcherReloadingTopics(topics []string) {
+func (s *status) updateBatcherReloadingTopics(topics []string, batcherRealtime []string) {
 	reloadingTopics := []string{}
 
 	// remove topics which have become realtime
 	realtime := toMap(s.realtime)
+	realtimeBatcher := toMap(batcherRealtime)
 	for _, t := range topics {
+		// remove topics which have become realtime (both batcher and loader)
 		_, ok := realtime[t]
-		if !ok {
-			reloadingTopics = append(reloadingTopics, t)
+		if ok {
+			continue
+
 		}
+		// remove topics which have become batcher realtime
+		_, ok = realtimeBatcher[t]
+		if ok {
+			continue
+		}
+
+		reloadingTopics = append(reloadingTopics, t)
 	}
 
 	klog.V(2).Infof("rsk/%s currentReloading: %d %v", s.rsk.Name, len(reloadingTopics), reloadingTopics)
diff --git a/controllers/unit_allocator_test.go b/controllers/unit_allocator_test.go
index 952eadfc6..81eecc5ff 100644
--- a/controllers/unit_allocator_test.go
+++ b/controllers/unit_allocator_test.go
@@ -321,6 +321,69 @@ func TestAllocateReloadingUnits(t *testing.T) {
 				},
 			},
 		},
+		{
+			name:     "UnitsGoingAboveMaxCase2",
+			topics:   []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4", "db.inventory.t5", "db.inventory.t6", "db.inventory.t7", "db.inventory.t8", "db.inventory.t9"},
+			realtime: []string{"db.inventory.t3", "db.inventory.t4"},
+			topicsLast: []topicLast{
+				topicLast{
+					topic: "db.inventory.t1",
+					last:  1,
+				},
+				topicLast{
+					topic: "db.inventory.t2",
+					last:  10,
+				},
+				topicLast{
+					topic: "db.inventory.t3",
+					last:  100,
+				},
+				topicLast{
+					topic: "db.inventory.t4",
+					last:  1000,
+				},
+				topicLast{
+					topic: "db.inventory.t5",
+					last:  10000,
+				},
+				topicLast{
+					topic: "db.inventory.t6",
+					last:  20000,
+				},
+				topicLast{
+					topic: "db.inventory.t7",
+					last:  100000,
+				},
+			},
+			maxReloadingUnits:      5,
+			currentReloadingTopics: []string{"db.inventory.t1", "db.inventory.t2", "db.inventory.t3", "db.inventory.t4", "db.inventory.t5"},
+			units: []deploymentUnit{
+				deploymentUnit{
+					id:     "t1",
+					topics: []string{"db.inventory.t1"},
+				},
+				deploymentUnit{
+					id:     "t2",
+					topics: []string{"db.inventory.t2"},
+				},
+				deploymentUnit{
+					id:     "t5",
+					topics: []string{"db.inventory.t5"},
+				},
+				deploymentUnit{
+					id:     "t6",
+					topics: []string{"db.inventory.t6"},
+				},
+				deploymentUnit{
+					id:     "t7",
+					topics: []string{"db.inventory.t7"},
+				},
+				deploymentUnit{
+					id:     "realtime",
+					topics: []string{"db.inventory.t3", "db.inventory.t4"},
+				},
+			},
+		},
 	}
 
 	for _, tc := range tests {
@@ -338,7 +401,7 @@ func TestAllocateReloadingUnits(t *testing.T) {
 			)
 			allocator.allocateReloadingUnits()
 			if !reflect.DeepEqual(allocator.units, tc.units) {
-				t.Errorf("expected: %+v, got: %+v\n", tc.units, allocator.units)
+				t.Errorf("\nexpected (%v): %+v\ngot (%v): %+v\n", len(tc.units), tc.units, len(allocator.units), allocator.units)
 			}
 		})
 	}

From e4ea66ee9ad918d736ca60949323ac51313060f7 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 31 Mar 2021 15:12:28 +0530
Subject: [PATCH 42/49] Increase the name default

---
 controllers/unit_allocator.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/controllers/unit_allocator.go b/controllers/unit_allocator.go
index e693cf67f..a072f5be9 100644
--- a/controllers/unit_allocator.go
+++ b/controllers/unit_allocator.go
@@ -75,8 +75,8 @@ func (u *unitAllocator) unitID(topic string) string {
 
 	table = k8sCompatibleName(table)
 
-	if len(table) > 10 {
-		return table[:10]
+	if len(table) > 20 {
+		return table[:20]
 	}
 
 	return table

From d2dffd33a1b15fdeb28c3096fc1361f2609fa8a3 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 31 Mar 2021 16:13:18 +0530
Subject: [PATCH 43/49] Fix maxReloading unit decrement bug

---
 controllers/redshiftsink_controller.go | 6 +++---
 controllers/unit_allocator.go          | 5 ++++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/controllers/redshiftsink_controller.go b/controllers/redshiftsink_controller.go
index 5f2a88807..59e4382e1 100644
--- a/controllers/redshiftsink_controller.go
+++ b/controllers/redshiftsink_controller.go
@@ -419,8 +419,8 @@ func (r *RedshiftSinkReconciler) reconcile(
 	calc := newRealtimeCalculator(rsk, kafkaWatcher, reloadTopicGroup, r.KafkaRealtimeCache)
 	currentRealtime := calc.calculate(status.reloading, status.realtime)
 	if len(status.reloading) > 0 {
-		klog.V(2).Infof("rsk/%v batchers realtime: %d / %d", rsk.Name, len(calc.batchersRealtime), len(status.reloading))
-		klog.V(2).Infof("rsk/%v loaders  realtime: %d / %d", rsk.Name, len(calc.loadersRealtime), len(status.reloading))
+		klog.V(2).Infof("rsk/%v batchersRealtime: %d / %d (current=%d)", rsk.Name, len(calc.batchersRealtime), len(status.reloading), len(rsk.Status.BatcherReloadingTopics))
+		klog.V(2).Infof("rsk/%v loadersRealtime:  %d / %d", rsk.Name, len(calc.loadersRealtime), len(status.reloading))
 	}
 
 	if !subSetSlice(currentRealtime, status.realtime) {
@@ -515,7 +515,7 @@ func (r *RedshiftSinkReconciler) reconcile(
 	if len(status.realtime) >= MaxTopicRelease {
 		releaseCandidates = status.realtime[:MaxTopicRelease]
 	}
-	klog.V(2).Infof("rsk/%s release candidates: %v", rsk.Name, releaseCandidates)
+	klog.V(2).Infof("rsk/%s releaseCandidates: %v", rsk.Name, releaseCandidates)
 
 	var releaser *releaser
 	if len(releaseCandidates) > 0 {
diff --git a/controllers/unit_allocator.go b/controllers/unit_allocator.go
index a072f5be9..293178aac 100644
--- a/controllers/unit_allocator.go
+++ b/controllers/unit_allocator.go
@@ -65,7 +65,7 @@ func sortTopicsByLastOffset(topicsLast []topicLast) []string {
 }
 
 func k8sCompatibleName(name string) string {
-	// satisfy regex
+	// satisfy k8s name regex
 	// '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*'
 	return strings.ToLower(strings.ReplaceAll(name, "_", "-"))
 }
@@ -114,6 +114,9 @@ func (u *unitAllocator) allocateReloadingUnits() {
 			topics:        []string{topic},
 		})
 		reloadingTopics[topic] = true
+		if len(reloadingUnits) >= u.maxReloadingUnits {
+			break
+		}
 	}
 	klog.V(3).Infof(
 		"rsk/%s reloadingUnits(based on current): %v %v",

From 7bb6fff0a2f512fb352c35561ed4f1f599796962 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 31 Mar 2021 16:40:06 +0530
Subject: [PATCH 44/49] Update logs info for delete

---
 controllers/sinkgroup_controller.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/controllers/sinkgroup_controller.go b/controllers/sinkgroup_controller.go
index 909dbb880..1c06afbfd 100644
--- a/controllers/sinkgroup_controller.go
+++ b/controllers/sinkgroup_controller.go
@@ -562,14 +562,14 @@ func (s *sinkGroup) cleanup(
 		return nil, err
 	}
 	for _, deploy := range deploymentList.Items {
-		klog.V(3).Infof("Cleanup suspect deployment: %v", deploy.Name)
+		klog.V(4).Infof("Cleanup suspect deployment: %v", deploy.Name)
 		labelValue, ok := deploy.Labels[InstanceName]
 		if !ok {
 			continue
 		}
 		_, ok = neededDeployments[labelValue]
 		if !ok {
-			klog.V(3).Infof("Cleanup deployment: %v", labelValue)
+			klog.V(2).Infof("rsk/%s Deleting deployment: %v", s.rsk.Name, labelValue)
 			event, err := deleteDeployment(ctx, s.client, &deploy, s.rsk)
 			if err != nil {
 				return nil, err
@@ -602,7 +602,7 @@ func (s *sinkGroup) cleanup(
 		}
 		_, ok = neededConfigMaps[labelValue]
 		if !ok {
-			klog.V(2).Infof("Cleanup configmap: %s", labelValue)
+			klog.V(2).Infof("rsk/%s Deleting configmap: %s", s.rsk.Name, labelValue)
 			event, err := deleteConfigMap(ctx, s.client, &config, s.rsk)
 			if err != nil {
 				return nil, err

From ebf01a79f1354d86210bc1176b7ab9764dadf45c Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 31 Mar 2021 18:00:52 +0530
Subject: [PATCH 45/49] Lag info

---
 controllers/realtime_calculator.go | 12 ++++++++----
 pkg/kafka/manager.go               |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/controllers/realtime_calculator.go b/controllers/realtime_calculator.go
index c0f398aef..81edc144c 100644
--- a/controllers/realtime_calculator.go
+++ b/controllers/realtime_calculator.go
@@ -282,8 +282,10 @@ func (r *realtimeCalculator) calculate(reloading []string, currentRealtime []str
 		maxBatcherLag, maxLoaderLag := r.maxLag(topic)
 		if info.batcher != nil && info.batcher.last != nil {
 			if info.batcher.current != nil {
-				if *info.batcher.last-*info.batcher.current <= maxBatcherLag {
-					klog.V(3).Infof("rsk/%s: %s batcher realtime", r.rsk.Name, topic)
+				lag := *info.batcher.last - *info.batcher.current
+				klog.V(2).Infof("rsk/%s: %s lag=%v", r.rsk.Name, topic, lag)
+				if lag <= maxBatcherLag {
+					klog.V(2).Infof("rsk/%s: %s batcher realtime", r.rsk.Name, topic)
 					info.batcherRealtime = true
 					r.batchersRealtime = append(r.batchersRealtime, topic)
 				}
@@ -298,8 +300,10 @@ func (r *realtimeCalculator) calculate(reloading []string, currentRealtime []str
 		}
 		if info.loader != nil && info.loader.last != nil {
 			if info.loader.current != nil {
-				if *info.loader.last-*info.loader.current <= maxLoaderLag {
-					klog.V(3).Infof("rsk/%s: %s loader realtime", r.rsk.Name, ltopic)
+				lag := *info.loader.last - *info.loader.current
+				klog.V(2).Infof("rsk/%s: %s lag=%v", r.rsk.Name, ltopic, lag)
+				if lag <= maxLoaderLag {
+					klog.V(2).Infof("rsk/%s: %s loader realtime", r.rsk.Name, ltopic)
 					info.loaderRealtime = true
 					r.loadersRealtime = append(r.loadersRealtime, ltopic)
 				}
diff --git a/pkg/kafka/manager.go b/pkg/kafka/manager.go
index ee994aba3..29b8866bb 100644
--- a/pkg/kafka/manager.go
+++ b/pkg/kafka/manager.go
@@ -164,6 +164,7 @@ func (c *Manager) SyncTopics(
 
 	ticker := time.NewTicker(time.Second * time.Duration(c.tickSeconds))
 	for {
+		klog.V(2).Info("fetching topics...")
 		err := c.refreshTopics()
 		if err != nil {
 			klog.Errorf("error refreshing topic, err:%v\n", err)

From 2489266c7060a631d7e09f8d0e6c9f53bbe8b6ad Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Wed, 31 Mar 2021 18:04:55 +0530
Subject: [PATCH 46/49] More log info

---
 controllers/realtime_calculator.go | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/controllers/realtime_calculator.go b/controllers/realtime_calculator.go
index 81edc144c..9e081b582 100644
--- a/controllers/realtime_calculator.go
+++ b/controllers/realtime_calculator.go
@@ -321,7 +321,14 @@ func (r *realtimeCalculator) calculate(reloading []string, currentRealtime []str
 			klog.V(2).Infof("rsk/%s: %s realtime", r.rsk.Name, topic)
 			realtimeTopics = append(realtimeTopics, topic)
 		} else {
-			klog.V(2).Infof("%v: waiting to reach realtime", topic)
+			if info.batcherRealtime == false && info.loaderRealtime == false {
+				klog.V(2).Infof("%v: waiting to reach realtime", topic)
+				klog.V(2).Infof("%v: waiting to reach realtime", ltopic)
+			} else if info.batcherRealtime == false {
+				klog.V(2).Infof("%v: waiting to reach realtime", topic)
+			} else if info.loaderRealtime == false {
+				klog.V(2).Infof("%v: waiting to reach realtime", ltopic)
+			}
 		}
 
 		if !hit {

From 11494f1391ec888b38d63027300f3423c61053a8 Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Thu, 1 Apr 2021 08:44:26 +0530
Subject: [PATCH 47/49] Debug flags for realtime calc

---
 controllers/realtime_calculator.go | 40 +++++++++++++++---------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/controllers/realtime_calculator.go b/controllers/realtime_calculator.go
index 9e081b582..01d1be56c 100644
--- a/controllers/realtime_calculator.go
+++ b/controllers/realtime_calculator.go
@@ -10,19 +10,6 @@ import (
 	"time"
 )
 
-type offsetPosition struct {
-	last    *int64
-	current *int64
-}
-
-type topicRealtimeInfo struct {
-	lastUpdate      *int64
-	batcher         *offsetPosition
-	loader          *offsetPosition
-	batcherRealtime bool
-	loaderRealtime  bool
-}
-
 type topicLast struct {
 	topic string
 	last  int64
@@ -121,6 +108,19 @@ func (r *realtimeCalculator) fetchRealtimeCache(
 	return topicRealtimeInfo{}, false
 }
 
+type offsetPosition struct {
+	last    *int64
+	current *int64
+}
+
+type topicRealtimeInfo struct {
+	lastUpdate      *int64
+	batcher         *offsetPosition
+	loader          *offsetPosition
+	batcherRealtime bool
+	loaderRealtime  bool
+}
+
 // fetchRealtimeInfo fetches the offset info for the topic
 func (r *realtimeCalculator) fetchRealtimeInfo(
 	topic string,
@@ -145,7 +145,7 @@ func (r *realtimeCalculator) fetchRealtimeInfo(
 		return info, fmt.Errorf("Error getting last offset for %s", topic)
 	}
 	info.batcher.last = &last
-	klog.V(4).Infof("%s, lastOffset=%v", topic, last)
+	klog.V(4).Infof("rsk/%s %s, lastOffset=%v", r.rsk.Name, topic, last)
 
 	// batcher's lag analysis: b) get current
 	current, err := r.watcher.CurrentOffset(
@@ -156,10 +156,10 @@ func (r *realtimeCalculator) fetchRealtimeInfo(
 	if err != nil {
 		return info, err
 	}
-	klog.V(4).Infof("%s, currentOffset=%v", topic, current)
+	klog.V(4).Infof("rsk/%s %s, currentOffset=%v (queried)", r.rsk.Name, topic, current)
 	if current == -1 {
 		info.batcher.current = nil
-		klog.V(2).Infof("%s, batcher cg 404, not realtime", topic)
+		klog.V(4).Infof("rsk/%s %s, batcher cg 404, not realtime", r.rsk.Name, topic)
 		return info, nil
 	} else {
 		info.batcher.current = &current
@@ -175,7 +175,7 @@ func (r *realtimeCalculator) fetchRealtimeInfo(
 		return info, fmt.Errorf("Error getting last offset for %s", *loaderTopic)
 	}
 	info.loader.last = &last
-	klog.V(4).Infof("%s, lastOffset=%v", *loaderTopic, last)
+	klog.V(4).Infof("rsk/%s %s, lastOffset=%v", r.rsk.Name, *loaderTopic, last)
 
 	// loader's lag analysis: b) get current
 	current, err = r.watcher.CurrentOffset(
@@ -186,7 +186,7 @@ func (r *realtimeCalculator) fetchRealtimeInfo(
 	if err != nil {
 		return info, err
 	}
-	klog.V(4).Infof("%s, currentOffset=%v (queried)", *loaderTopic, current)
+	klog.V(4).Infof("rsk/%s %s, currentOffset=%v (queried)", r.rsk.Name, *loaderTopic, current)
 	if current == -1 {
 		// CurrentOffset can be -1 in two cases (this may be required in batcher also)
 		// 1. When the Consumer Group was never created in that case we return and consider the topic not realtime
@@ -196,13 +196,13 @@ func (r *realtimeCalculator) fetchRealtimeInfo(
 			klog.V(2).Infof("%s, loader cg 404, not realtime", *loaderTopic)
 			return info, nil
 		}
-		klog.V(2).Infof("%s, currentOffset=%v (old), cg 404, try realtime", *loaderTopic, *group.LoaderCurrentOffset)
+		klog.V(4).Infof("rsk/%s %s, currentOffset=%v (old), cg 404, try realtime", r.rsk.Name, *loaderTopic, *group.LoaderCurrentOffset)
 		// give the topic the opportunity to release based on its last found currentOffset
 		info.loader.current = group.LoaderCurrentOffset
 	} else {
 		group.LoaderCurrentOffset = &current
 		// updates the new queried loader offset
-		klog.V(4).Infof("%s, cg found", *loaderTopic)
+		klog.V(4).Infof("rsk/%s %s, cg found", r.rsk.Name, *loaderTopic)
 		updateTopicGroup(r.rsk, topic, group)
 		info.loader.current = &current
 	}

From e88694ed916b8679a27b1745989aceaf53bc395f Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Thu, 1 Apr 2021 09:29:34 +0530
Subject: [PATCH 48/49] Fix batcher and loader realtime calc bug

---
 controllers/realtime_calculator.go | 31 +++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/controllers/realtime_calculator.go b/controllers/realtime_calculator.go
index 01d1be56c..6e74d1e90 100644
--- a/controllers/realtime_calculator.go
+++ b/controllers/realtime_calculator.go
@@ -139,16 +139,17 @@ func (r *realtimeCalculator) fetchRealtimeInfo(
 		loaderRealtime:  false,
 		lastUpdate:      &now,
 	}
+
 	// batcher's lag analysis: a) get last
-	last, err := r.watcher.LastOffset(topic, 0)
+	batcherLast, err := r.watcher.LastOffset(topic, 0)
 	if err != nil {
 		return info, fmt.Errorf("Error getting last offset for %s", topic)
 	}
-	info.batcher.last = &last
-	klog.V(4).Infof("rsk/%s %s, lastOffset=%v", r.rsk.Name, topic, last)
+	info.batcher.last = &batcherLast
+	klog.V(4).Infof("rsk/%s %s, lastOffset=%v", r.rsk.Name, topic, batcherLast)
 
 	// batcher's lag analysis: b) get current
-	current, err := r.watcher.CurrentOffset(
+	batcherCurrent, err := r.watcher.CurrentOffset(
 		consumerGroupID(r.rsk.Name, r.rsk.Namespace, group.ID, "-batcher"),
 		topic,
 		0,
@@ -156,13 +157,13 @@ func (r *realtimeCalculator) fetchRealtimeInfo(
 	if err != nil {
 		return info, err
 	}
-	klog.V(4).Infof("rsk/%s %s, currentOffset=%v (queried)", r.rsk.Name, topic, current)
-	if current == -1 {
+	klog.V(4).Infof("rsk/%s %s, currentOffset=%v (queried)", r.rsk.Name, topic, batcherCurrent)
+	if batcherCurrent == -1 {
 		info.batcher.current = nil
 		klog.V(4).Infof("rsk/%s %s, batcher cg 404, not realtime", r.rsk.Name, topic)
 		return info, nil
 	} else {
-		info.batcher.current = &current
+		info.batcher.current = &batcherCurrent
 	}
 
 	if loaderTopic == nil {
@@ -170,15 +171,15 @@ func (r *realtimeCalculator) fetchRealtimeInfo(
 	}
 
 	// loader's lag analysis: a) get last
-	last, err = r.watcher.LastOffset(*loaderTopic, 0)
+	loaderLast, err := r.watcher.LastOffset(*loaderTopic, 0)
 	if err != nil {
 		return info, fmt.Errorf("Error getting last offset for %s", *loaderTopic)
 	}
-	info.loader.last = &last
-	klog.V(4).Infof("rsk/%s %s, lastOffset=%v", r.rsk.Name, *loaderTopic, last)
+	info.loader.last = &loaderLast
+	klog.V(4).Infof("rsk/%s %s, lastOffset=%v", r.rsk.Name, *loaderTopic, loaderLast)
 
 	// loader's lag analysis: b) get current
-	current, err = r.watcher.CurrentOffset(
+	loaderCurrent, err := r.watcher.CurrentOffset(
 		consumerGroupID(r.rsk.Name, r.rsk.Namespace, group.ID, "-loader"),
 		*loaderTopic,
 		0,
@@ -186,8 +187,8 @@ func (r *realtimeCalculator) fetchRealtimeInfo(
 	if err != nil {
 		return info, err
 	}
-	klog.V(4).Infof("rsk/%s %s, currentOffset=%v (queried)", r.rsk.Name, *loaderTopic, current)
-	if current == -1 {
+	klog.V(4).Infof("rsk/%s %s, currentOffset=%v (queried)", r.rsk.Name, *loaderTopic, loaderCurrent)
+	if loaderCurrent == -1 {
 		// CurrentOffset can be -1 in two cases (this may be required in batcher also)
 		// 1. When the Consumer Group was never created in that case we return and consider the topic not realtime
 		// 2. When the Consumer Group had processed before but now is showing -1 currentOffset as it is inactive due to less throughput.
@@ -200,11 +201,11 @@ func (r *realtimeCalculator) fetchRealtimeInfo(
 		// give the topic the opportunity to release based on its last found currentOffset
 		info.loader.current = group.LoaderCurrentOffset
 	} else {
-		group.LoaderCurrentOffset = &current
+		group.LoaderCurrentOffset = &loaderCurrent
 		// updates the new queried loader offset
 		klog.V(4).Infof("rsk/%s %s, cg found", r.rsk.Name, *loaderTopic)
 		updateTopicGroup(r.rsk, topic, group)
-		info.loader.current = &current
+		info.loader.current = &loaderCurrent
 	}
 
 	return info, nil

From 91d3878ddc8341cc3250ae9f442acedfea7a33fb Mon Sep 17 00:00:00 2001
From: Alok Kumar Singh <mail.alok87@gmail.com>
Date: Thu, 1 Apr 2021 14:04:20 +0530
Subject: [PATCH 49/49] Stop and reset ticker after processing

This is required so that batches are made of big size at the time of full sink.
Solves Time part of https://github.com/practo/tipoca-stream/pull/172#issuecomment-811055840
---
 pkg/redshiftloader/loader_handler.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pkg/redshiftloader/loader_handler.go b/pkg/redshiftloader/loader_handler.go
index 85a04c5e8..b8206350a 100644
--- a/pkg/redshiftloader/loader_handler.go
+++ b/pkg/redshiftloader/loader_handler.go
@@ -162,6 +162,7 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession,
 			)
 			return fmt.Errorf("session ctx done, err: %v", session.Context().Err())
 		case message, ok := <-claimMsgChan:
+			maxWaitTicker.Stop()
 			if !ok {
 				klog.V(2).Infof(
 					"%s: consumeClaim returning. read msg channel closed",
@@ -220,6 +221,7 @@ func (h *loaderHandler) ConsumeClaim(session sarama.ConsumerGroupSession,
 				return err
 			}
 			*lastSchemaId = upstreamJobSchemaId
+			maxWaitTicker.Reset(time.Duration(*h.maxWaitSeconds) * time.Second)
 		case <-maxWaitTicker.C:
 			// Process the batch by time
 			klog.V(2).Infof(