diff --git a/controllers/apps/cluster_controller.go b/controllers/apps/cluster_controller.go index a41a3ededc2..1bc93ca6177 100644 --- a/controllers/apps/cluster_controller.go +++ b/controllers/apps/cluster_controller.go @@ -28,6 +28,7 @@ import ( corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" rbacv1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" @@ -139,6 +140,9 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct if re, ok := err.(intctrlutil.RequeueError); ok { return intctrlutil.RequeueAfter(re.RequeueAfter(), reqCtx.Log, re.Reason()) } + if apierrors.IsConflict(err) { + return intctrlutil.Requeue(reqCtx.Log, err.Error()) + } return intctrlutil.RequeueWithError(err, reqCtx.Log, "") } diff --git a/controllers/apps/cluster_plan_builder.go b/controllers/apps/cluster_plan_builder.go index bb04e036abd..25dbbb71daa 100644 --- a/controllers/apps/cluster_plan_builder.go +++ b/controllers/apps/cluster_plan_builder.go @@ -56,8 +56,6 @@ const ( clusterWeight ) -// TODO: cluster plan builder can be abstracted as a common flow - // clusterTransformContext a graph.TransformContext implementation for Cluster reconciliation type clusterTransformContext struct { context.Context @@ -233,21 +231,21 @@ func NewClusterPlanBuilder(ctx intctrlutil.RequestCtx, cli client.Client, req ct func (c *clusterPlanBuilder) defaultWalkFuncWithLogging(vertex graph.Vertex) error { node, ok := vertex.(*model.ObjectVertex) err := c.defaultWalkFunc(vertex) - if err != nil { - if !ok { - c.transCtx.Logger.Error(err, "") - } else { - if node.Action == nil { - c.transCtx.Logger.Error(err, fmt.Sprintf("%T", node)) - } else { - c.transCtx.Logger.Error(err, fmt.Sprintf("%s %T error", *node.Action, node.Obj)) - } - } + switch { + case err == nil: + return err + case !ok: + c.transCtx.Logger.Error(err, "") + case node.Action == nil: + c.transCtx.Logger.Error(err, fmt.Sprintf("%T", node)) + case apierrors.IsConflict(err): + return err + default: + c.transCtx.Logger.Error(err, fmt.Sprintf("%s %T error", *node.Action, node.Obj)) } return err } -// TODO: retry strategy on error func (c *clusterPlanBuilder) defaultWalkFunc(vertex graph.Vertex) error { node, ok := vertex.(*model.ObjectVertex) if !ok { @@ -281,7 +279,6 @@ func (c *clusterPlanBuilder) reconcileObject(node *model.ObjectVertex) error { case model.PATCH: patch := client.MergeFrom(node.OriObj) if err := c.cli.Patch(c.transCtx.Context, node.Obj, patch); err != nil && !apierrors.IsNotFound(err) { - c.transCtx.Logger.Error(err, fmt.Sprintf("patch %T error", node.OriObj)) return err } case model.DELETE: @@ -294,7 +291,6 @@ func (c *clusterPlanBuilder) reconcileObject(node *model.ObjectVertex) error { // delete secondary objects if _, ok := node.Obj.(*appsv1alpha1.Cluster); !ok { err := intctrlutil.BackgroundDeleteObject(c.cli, c.transCtx.Context, node.Obj) - // err := c.cli.Delete(c.transCtx.Context, node.obj) if err != nil && !apierrors.IsNotFound(err) { return err } @@ -323,20 +319,8 @@ func (c *clusterPlanBuilder) reconcileCluster(node *model.ObjectVertex) error { // cluster.meta and cluster.spec might change case model.STATUS: if !reflect.DeepEqual(cluster.ObjectMeta, origCluster.ObjectMeta) || !reflect.DeepEqual(cluster.Spec, origCluster.Spec) { - // TODO: we should Update instead of Patch cluster object, - // TODO: but Update failure happens too frequently as other controllers are updating cluster object too. - // TODO: use Patch here, revert to Update after refactoring done - // if err := c.cli.Update(c.ctx.Ctx, cluster); err != nil { - // tmpCluster := &appsv1alpha1.Cluster{} - // err = c.cli.Get(c.ctx.Ctx,client.ObjectKeyFromObject(origCluster), tmpCluster) - // c.ctx.Log.Error(err, fmt.Sprintf("update %T error, orig: %v, curr: %v, api-server: %v", origCluster, origCluster, cluster, tmpCluster)) - // return err - // } patch := client.MergeFrom(origCluster.DeepCopy()) if err := c.cli.Patch(c.transCtx.Context, cluster, patch); err != nil { - // log for debug - // TODO:(free6om) make error message smaller when refactor done. - c.transCtx.Logger.Error(err, fmt.Sprintf("patch %T error, orig: %v, curr: %v", origCluster, origCluster, cluster)) return err } } diff --git a/controllers/workloads/replicatedstatemachine_controller.go b/controllers/workloads/replicatedstatemachine_controller.go index 4c66f486748..dafccaa6483 100644 --- a/controllers/workloads/replicatedstatemachine_controller.go +++ b/controllers/workloads/replicatedstatemachine_controller.go @@ -25,6 +25,7 @@ import ( appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" @@ -74,6 +75,9 @@ func (r *ReplicatedStateMachineReconciler) Reconcile(ctx context.Context, req ct if re, ok := err.(model.RequeueError); ok { return intctrlutil.RequeueAfter(re.RequeueAfter(), reqCtx.Log, re.Reason()) } + if apierrors.IsConflict(err) { + return intctrlutil.Requeue(reqCtx.Log, err.Error()) + } return intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "") } diff --git a/deploy/llm/templates/clusterversion.yaml b/deploy/llm/templates/clusterversion.yaml index 385b67f9351..4e6125644eb 100644 --- a/deploy/llm/templates/clusterversion.yaml +++ b/deploy/llm/templates/clusterversion.yaml @@ -125,3 +125,87 @@ spec: volumeMounts: - name: models mountPath: /models +--- +apiVersion: apps.kubeblocks.io/v1alpha1 +kind: ClusterVersion +metadata: + name: ggml-baichuan2-13b-q4 + labels: + {{- include "llm.labels" . | nindent 4 }} +spec: + clusterDefinitionRef: ggml + componentVersions: + - componentDefRef: ggml + versionsContext: + initContainers: + - name: download + image: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com/apecloud/baichuan2-13b-gguf:ggml-model-q4 + command: ["sh", "-c", "cp /models/ggml-model-q4.gguf /models-target/"] + volumeMounts: + - name: models + mountPath: /models-target + containers: + - name: ggml + image: {{ .Values.imageDev.registry | default "docker.io" }}/{{ .Values.imageDev.repository}}:{{ default .Chart.AppVersion .Values.imageDev.tagNew }} + env: + - name: MODEL + value: /models/ggml-model-q4.gguf + volumeMounts: + - name: models + mountPath: /models +--- +apiVersion: apps.kubeblocks.io/v1alpha1 +kind: ClusterVersion +metadata: + name: ggml-replit-code-3b-f16 + labels: + {{- include "llm.labels" . | nindent 4 }} +spec: + clusterDefinitionRef: ggml + componentVersions: + - componentDefRef: ggml + versionsContext: + initContainers: + - name: download + image: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com/apecloud/replit-code-3b:ggml-model-f16 + command: ["sh", "-c", "cp /models/ggml-model-f16.gguf /models-target/"] + volumeMounts: + - name: models + mountPath: /models-target + containers: + - name: ggml + image: {{ .Values.imageDev.registry | default "docker.io" }}/{{ .Values.imageDev.repository}}:{{ default .Chart.AppVersion .Values.imageDev.tagNew }} + env: + - name: MODEL + value: /models/ggml-model-f16.gguf + volumeMounts: + - name: models + mountPath: /models +--- +apiVersion: apps.kubeblocks.io/v1alpha1 +kind: ClusterVersion +metadata: + name: ggml-codeshell-7b-chat-q4 + labels: + {{- include "llm.labels" . | nindent 4 }} +spec: + clusterDefinitionRef: ggml + componentVersions: + - componentDefRef: ggml + versionsContext: + initContainers: + - name: download + image: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com/apecloud/codeshell-7b-chat:codeshell-chat-q4_0 + command: ["sh", "-c", "cp /models/codeshell-chat-q4_0.gguf /models-target/"] + volumeMounts: + - name: models + mountPath: /models-target + containers: + - name: ggml + image: {{ .Values.imageDev.registry | default "docker.io" }}/{{ .Values.imageDev.repository}}:{{ default .Chart.AppVersion .Values.imageDev.tagCodeShell }} + env: + - name: MODEL + value: /models/codeshell-chat-q4_0.gguf + volumeMounts: + - name: models + mountPath: /models diff --git a/deploy/llm/values.yaml b/deploy/llm/values.yaml index 9178bc4eac2..6e7ba8d12eb 100644 --- a/deploy/llm/values.yaml +++ b/deploy/llm/values.yaml @@ -10,3 +10,5 @@ imageDev: registry: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com # Overrides the image tag whose default is the chart appVersion. tag: latest + tagNew: latest-new + tagCodeShell: latest-codeshell diff --git a/deploy/nyancat/templates/clusterrole.yaml b/deploy/nyancat/templates/clusterrole.yaml index 78c8f0495ce..4a951814d5b 100644 --- a/deploy/nyancat/templates/clusterrole.yaml +++ b/deploy/nyancat/templates/clusterrole.yaml @@ -7,4 +7,7 @@ metadata: rules: - apiGroups: [""] resources: ["services", "pods", "secrets"] - verbs: ["get", "list"] + verbs: ["get", "list", "watch"] + - apiGroups: ["apps.kubeblocks.io"] + resources: ["clusters"] + verbs: ["get", "list", "watch"] diff --git a/deploy/oracle-mysql/templates/actionset-xtrabackup.yaml b/deploy/oracle-mysql/templates/actionset-xtrabackup.yaml index db5ccdc1fde..dc4afe2cac1 100644 --- a/deploy/oracle-mysql/templates/actionset-xtrabackup.yaml +++ b/deploy/oracle-mysql/templates/actionset-xtrabackup.yaml @@ -9,18 +9,18 @@ spec: backupType: Full env: - name: DATA_DIR - value: /var/lib/mysql + value: {{ .Values.dataMountPath }} backup: preBackup: [] postBackup: [] backupData: image: docker.io/perconalab/percona-xtrabackup:8.0.32 - runOnTargetPodNode: false + runOnTargetPodNode: true command: - bash - -c - | - {{- .Files.Get "dataprotection/bakcup.sh" | nindent 8 }} + {{- .Files.Get "dataprotection/backup.sh" | nindent 8 }} syncProgress: enabled: true intervalSeconds: 5 diff --git a/lorry/component/postgres/officalpostgres/manager_test.go b/lorry/component/postgres/officalpostgres/manager_test.go index 5bb80dce4f1..9e5d5a86475 100644 --- a/lorry/component/postgres/officalpostgres/manager_test.go +++ b/lorry/component/postgres/officalpostgres/manager_test.go @@ -165,7 +165,7 @@ func TestGetMemberAddrs(t *testing.T) { ctx := context.TODO() manager, mock, _ := MockDatabase(t) defer mock.Close() - cluster := &dcs.Cluster{} + cluster := &dcs.Cluster{Namespace: "default"} t.Run("get empty addrs", func(t *testing.T) { addrs := manager.GetMemberAddrs(ctx, cluster) @@ -182,7 +182,7 @@ func TestGetMemberAddrs(t *testing.T) { addrs := manager.GetMemberAddrs(ctx, cluster) assert.Equal(t, 1, len(addrs)) - assert.Equal(t, "test.pg-headless:5432", addrs[0]) + assert.Equal(t, "test.pg-headless.default.svc.cluster.local:5432", addrs[0]) }) } diff --git a/lorry/dcs/types.go b/lorry/dcs/types.go index a9410f532fa..f3b4dd92d9f 100644 --- a/lorry/dcs/types.go +++ b/lorry/dcs/types.go @@ -21,6 +21,9 @@ package dcs import ( "fmt" + + "github.com/apecloud/kubeblocks/pkg/constant" + viper "github.com/apecloud/kubeblocks/pkg/viperx" ) type Cluster struct { @@ -91,7 +94,8 @@ func (c *Cluster) GetMemberAddrWithPort(member Member) string { } func (c *Cluster) GetMemberAddr(member Member) string { - return fmt.Sprintf("%s.%s-headless", member.Name, c.ClusterCompName) + clusterDomain := viper.GetString(constant.KubernetesClusterDomainEnv) + return fmt.Sprintf("%s.%s-headless.%s.svc.%s", member.Name, c.ClusterCompName, c.Namespace, clusterDomain) } func (c *Cluster) GetMemberAddrWithName(name string) string { diff --git a/pkg/controller/rsm/plan_builder.go b/pkg/controller/rsm/plan_builder.go index c3c1e6d6196..eb028fadc33 100644 --- a/pkg/controller/rsm/plan_builder.go +++ b/pkg/controller/rsm/plan_builder.go @@ -120,7 +120,6 @@ func (b *PlanBuilder) rsmWalkFunc(v graph.Vertex) error { case model.UPDATE: err := b.cli.Update(b.transCtx.Context, vertex.Obj) if err != nil && !apierrors.IsNotFound(err) { - b.transCtx.Logger.Error(err, fmt.Sprintf("update %T error: %s", vertex.Obj, vertex.OriObj.GetName())) return err } case model.DELETE: