diff --git a/addons/qdrant/Chart.yaml b/addons/qdrant/Chart.yaml index 6af508b90..3329092a5 100644 --- a/addons/qdrant/Chart.yaml +++ b/addons/qdrant/Chart.yaml @@ -10,19 +10,24 @@ version: 1.0.0-alpha.0 # This is the version number of qdrant. appVersion: "1.10.0" +# Add a dependency to the kubeblocks definition library chart +dependencies: + - name: kblib + version: 0.1.0 + repository: file://../kblib + alias: extra + home: https://qdrant.tech/ icon: https://qdrant.tech/images/logo_with_text.svg - maintainers: - name: iziang url: https://github.com/apecloud/kubeblocks/ - sources: - https://github.com/apecloud/kubeblocks/ annotations: - addon.kubeblocks.io/kubeblocks-version: ">=0.9.0" + addon.kubeblocks.io/kubeblocks-version: ">=1.0.0" addon.kubeblocks.io/model: "vector" addon.kubeblocks.io/provider: "community" diff --git a/addons/qdrant/scripts-ut-spec/qdrant_member_leave_spec.sh b/addons/qdrant/scripts-ut-spec/qdrant_member_leave_spec.sh new file mode 100644 index 000000000..ce65ca914 --- /dev/null +++ b/addons/qdrant/scripts-ut-spec/qdrant_member_leave_spec.sh @@ -0,0 +1,204 @@ +# shellcheck shell=bash +# shellcheck disable=SC2034 + +# validate_shell_type_and_version defined in shellspec/spec_helper.sh used to validate the expected shell type and version this script needs to run. +if ! validate_shell_type_and_version "bash" 4 &>/dev/null; then + echo "qdrant_member_leave_spec.sh skip cases because dependency bash version 4 or higher is not installed." + exit 0 +fi + +source ./utils.sh + +# The unit test needs to rely on the common library functions defined in kblib. +# Therefore, we first dynamically generate the required common library files from the kblib library chart. +common_library_file="./common.sh" +generate_common_library $common_library_file + +Describe "Qdrant Member Leave Script Tests" + # Load the script to be tested + Include $common_library_file + Include ../scripts/qdrant-member-leave.sh + + init() { + # set ut_mode to true to hack control flow in the script + ut_mode="true" + } + BeforeAll "init" + + cleanup() { + rm -f $common_library_file; + } + AfterAll 'cleanup' + + un_setup() { + # Reset environment variables before each test + unset KB_LEAVE_MEMBER_POD_IP + leave_peer_uri="http://test-ip:6333" + } + + # Mock jq to simulate its output + jq() { + case $2 in + *".result.peer_id"*) + echo "leave-peer-id" + ;; + *".result.raft_info.leader"*) + echo "leader-peer-id" + ;; + *".result.collections | length"*) + echo 2 + ;; + *".result.collections[].name"*) + echo "collection1" "collection2" + ;; + *".result.local_shards[] | length"*) + echo 2 + ;; + *".result.local_shards[].shard_id"*) + echo "shard1" "shard2" + ;; + *".result.local_shards"*) + echo '["shard1","shard2"]' + ;; + *) + echo "unknown jq filter" + ;; + esac + } + + Describe "move_shards()" + It "moves shards from leave peer to leader" + un_setup + KB_LEAVE_MEMBER_POD_IP="test-ip" + cluster_info='{"result":{"peer_id":"leave-peer-id","raft_info":{"leader":"leader-peer-id"}}}' + leave_peer_id="leave-peer-id" + leader_peer_id="leader-peer-id" + + curl() { + # Mock the response for collections + if [[ $1 == *"/collections"* ]]; then + echo '{"result":{"collections":[{"name":"collection1"},{"name":"collection2"}]}}' + elif [[ $1 == *"/collections/collection1/cluster"* ]]; then + echo '{"result":{"local_shards":[{"shard_id":"shard1"},{"shard_id":"shard2"}]}}' + elif [[ $1 == *"/collections/collection2/cluster"* ]]; then + echo '{"result":{"local_shards":[{"shard_id":"shard3"}]}}' + fi + return 0 # Simulate successful curl + } + + check_leave_shard_ids() { + echo "mock check_leave_shard_ids called" + return 0 + } + + When run move_shards + The output should include "move shard shard1 in col_name collection1 from leave-peer-id to leader-peer-id" + The output should include "move shard shard2 in col_name collection1 from leave-peer-id to leader-peer-id" + The status should be success + End + + It "handles no collections found" + un_setup + KB_LEAVE_MEMBER_POD_IP="test-ip" + curl() { + echo '{"result":{"collections":[]}}' + return 0 + } + + jq() { + case $2 in + *".result.collections | length"*) + echo 0 + ;; + *) + echo "unknown jq filter" + ;; + esac + } + + When run move_shards + The output should include "no collections found in the cluster" + The status should be success + End + + It "handles no shards found in a collection" + un_setup + KB_LEAVE_MEMBER_POD_IP="test-ip" + curl() { + if [[ $1 == *"/collections"* ]]; then + echo '{"result":{"collections":[{"name":"collection1"}]}}' + elif [[ $1 == *"/collections/collection1/cluster"* ]]; then + echo '{"result":{"local_shards":[]}}' + fi + return 0 + } + + jq() { + case $2 in + *".result.peer_id"*) + echo "leave-peer-id" + ;; + *".result.raft_info.leader"*) + echo "leader-peer-id" + ;; + *".result.collections | length"*) + echo 2 + ;; + *".result.collections[].name"*) + echo "collection1" "collection2" + ;; + *".result.local_shards[] | length"*) + echo 0 + ;; + *) + echo "unknown jq filter" + ;; + esac + } + + When run move_shards + The output should include "no shards found in collection collection1" + The output should include "no shards found in collection collection2" + The status should be success + End + End + + Describe "remove_peer()" + It "removes the peer from the cluster" + un_setup + KB_LEAVE_MEMBER_POD_IP="test-ip" + leave_peer_id="leave-peer-id" + + curl() { + return 0 # Simulate successful delete + } + + When run remove_peer + The output should include "remove peer leave-peer-id from cluster" + The status should be success + End + End + + Describe "leave_member()" + It "executes the leave member process" + un_setup + KB_LEAVE_MEMBER_POD_IP="test-ip" + cluster_info='{"result":{"peer_id":"leave-peer-id","raft_info":{"leader":"leader-peer-id"}}}' + leave_peer_id="leave-peer-id" + leader_peer_id="leader-peer-id" + + move_shards() { + echo "mock move_shards called" + } + remove_peer() { + echo "mock remove_peer called" + } + + When run leave_member + The output should include "scaling in, we need to move local shards to other peers and remove local peer from the cluster" + The output should include "mock move_shards called" + The output should include "mock remove_peer called" + The status should be success + End + End +End \ No newline at end of file diff --git a/addons/qdrant/scripts-ut-spec/qdrant_setup_spec.sh b/addons/qdrant/scripts-ut-spec/qdrant_setup_spec.sh new file mode 100644 index 000000000..93da1b15b --- /dev/null +++ b/addons/qdrant/scripts-ut-spec/qdrant_setup_spec.sh @@ -0,0 +1,110 @@ +# shellcheck shell=bash +# shellcheck disable=SC2034 + +# validate_shell_type_and_version defined in shellspec/spec_helper.sh used to validate the expected shell type and version this script needs to run. +if ! validate_shell_type_and_version "bash" 4 &>/dev/null; then + echo "qdrant_setup_spec.sh skip cases because dependency bash version 4 or higher is not installed." + exit 0 +fi + +source ./utils.sh + +# The unit test needs to rely on the common library functions defined in kblib. +# Therefore, we first dynamically generate the required common library files from the kblib library chart. +common_library_file="./common.sh" +generate_common_library $common_library_file + +Describe "Qdrant Server Setup Script Tests" + # Load the script to be tested + Include $common_library_file + Include ../scripts/qdrant-setup.sh + + init() { + # set ut_mode to true to hack control flow in the script + ut_mode="true" + } + BeforeAll "init" + + cleanup() { + rm -f $common_library_file; + } + AfterAll 'cleanup' + + un_setup() { + # Reset environment variables before each test + unset QDRANT_POD_NAME_LIST + unset QDRANT_POD_FQDN_LIST + unset CURRENT_POD_NAME + } + + Describe "get_boostrap_node()" + It "returns the minimum lexicographical pod fqdn" + un_setup + QDRANT_POD_NAME_LIST="pod-a,pod-b,pod-c" + QDRANT_POD_FQDN_LIST="pod-a.example.com,pod-b.example.com,pod-c.example.com" + When run get_boostrap_node + The output should equal "pod-a.example.com" + The status should be success + End + + It "returns an error if the fqdn cannot be found" + un_setup + QDRANT_POD_NAME_LIST="pod-x,pod-y,pod-z" + QDRANT_POD_FQDN_LIST="pod-a.example.com,pod-y.example.com,pod-z.example.com" + When run get_boostrap_node + The stderr should include "Error: Failed to get pod: pod-x fqdn from pod fqdn list:" + The status should be failure + End + End + + Describe "start_server()" + It "starts server with bootstrap node when current pod is not bootstrap" + un_setup + CURRENT_POD_NAME="pod-b" + QDRANT_POD_NAME_LIST="pod-a,pod-b,pod-c" + QDRANT_POD_FQDN_LIST="pod-a.example.com,pod-b.example.com,pod-c.example.com" + ./tools/curl() { + echo "mock func get params: $1" + return 0 # Simulate successful curl + } + ./qdrant() { + echo "mock qdrant func get params: $1 $2 $3 $4" + return 0 # Simulate successful curl + } + When run start_server + The output should include "mock func get params: http://pod-a.example.com:6333/cluster" + The output should include "mock qdrant func get params: --bootstrap http://pod-a.example.com:6335 --uri http://pod-b.example.com:6335" + The status should be success + End + + It "starts server with bootstrap node when current pod is bootstrap" + un_setup + CURRENT_POD_NAME="pod-a" + QDRANT_POD_NAME_LIST="pod-a,pod-b,pod-c" + QDRANT_POD_FQDN_LIST="pod-a.example.com,pod-b.example.com,pod-c.example.com" + ./qdrant() { + echo "mock qdrant func get params: $1 $2" + return 0 # Simulate successful curl + } + When run start_server + The output should include "mock qdrant func get params: --uri http://pod-a.example.com:6335" + The status should be success + End + + It "exits with error if QDRANT_POD_NAME_LIST is not set" + un_setup + QDRANT_POD_FQDN_LIST="pod-a.example.com,pod-b.example.com,pod-c.example.com" + When run start_server + The stderr should include "QDRANT_POD_NAME_LIST or QDRANT_POD_FQDN_LIST is not set, please check." + The status should be failure + End + + It "exits with error if QDRANT_POD_FQDN_LIST is not set" + un_setup + QDRANT_POD_NAME_LIST="pod-a,pod-b,pod-c" + When run start_server + The stderr should include "QDRANT_POD_NAME_LIST or QDRANT_POD_FQDN_LIST is not set, please check." + The status should be failure + End + End +End \ No newline at end of file diff --git a/addons/qdrant/scripts-ut-spec/utils.sh b/addons/qdrant/scripts-ut-spec/utils.sh new file mode 100644 index 000000000..5b2506969 --- /dev/null +++ b/addons/qdrant/scripts-ut-spec/utils.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# utils functions for shellspec unit tests + +convert_tpl_to_bash() { + local input_file="$1" + local output_file="$2" + + sed -e '/^{{\/\*$/,/^\*\/}}$/d' \ + -e '/^{{-.*}}/d' \ + -e 's/{{- define ".*" }}//' \ + -e 's/{{- end }}//' \ + "$input_file" >> "$output_file" +} + +generate_common_library() { + local library_file="$1" + + libcommons_tpl_file="../../kblib/templates/_libcommons.tpl" + libpods_tpl_file="../../kblib/templates/_libpods.tpl" + libstrings_tpl_file="../../kblib/templates/_libstrings.tpl" + libenvs_tpl_file="../../kblib/templates/_libenvs.tpl" + libcompvars_tpl_file="../../kblib/templates/_libcompvars.tpl" + libututils_tpl_file="../../kblib/templates/_libututils.tpl" + + convert_tpl_to_bash $libcommons_tpl_file "$library_file" + convert_tpl_to_bash $libpods_tpl_file "$library_file" + convert_tpl_to_bash $libstrings_tpl_file "$library_file" + convert_tpl_to_bash $libenvs_tpl_file "$library_file" + convert_tpl_to_bash $libcompvars_tpl_file "$library_file" + convert_tpl_to_bash $libututils_tpl_file "$library_file" +} \ No newline at end of file diff --git a/addons/qdrant/scripts/qdrant-backup.sh b/addons/qdrant/scripts/qdrant-backup.sh index e7bc47582..ecf8e7792 100644 --- a/addons/qdrant/scripts/qdrant-backup.sh +++ b/addons/qdrant/scripts/qdrant-backup.sh @@ -1,13 +1,24 @@ #!/usr/bin/env bash -set -e -set -o pipefail -export PATH="$PATH:$DP_DATASAFED_BIN_PATH" -export DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH" +# shellcheck disable=SC2034 +ut_mode="false" +test || __() { + # when running in non-unit test mode, set the options "set -ex". + set -ex; +} + +init_env() { + PATH="$PATH:$DP_DATASAFED_BIN_PATH" + export PATH + DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH" + export DATASAFED_BACKEND_BASE_PATH + + endpoint="http://${DP_DB_HOST}:6333" +} # if the script exits with a non-zero exit code, touch a file to indicate that the backup failed, # the sync progress container will check this file and exit if it exists -function handle_exit() { +handle_exit() { exit_code=$? if [ $exit_code -ne 0 ]; then echo "failed with exit code $exit_code" @@ -15,33 +26,96 @@ function handle_exit() { exit 1 fi } -trap handle_exit EXIT -function save_backup_size() { - export DATASAFED_BACKEND_BASE_PATH="$(dirname $DP_BACKUP_BASE_PATH)" - TOTAL_SIZE=$(datasafed stat / | grep TotalSize | awk '{print $2}') - echo "{\"totalSize\":\"$TOTAL_SIZE\"}" >"${DP_BACKUP_INFO_FILE}" -} - -endpoint=http://${DP_DB_HOST}:6333 -collectionRes=$(curl ${endpoint}/collections) -collections=$(echo ${collectionRes} | jq -r '.result.collections[].name') -if [ -z $collections ]; then - save_backup_size - exit 0 -fi -# snapshot all collections -for c in ${collections}; do - echo "INFO: start to snapshot collection ${c}..." - snapshot=$(curl -XPOST ${endpoint}/collections/${c}/snapshots) - status=$(echo ${snapshot} | jq '.status') +save_backup_size() { + DATASAFED_BACKEND_BASE_PATH="$(dirname "$DP_BACKUP_BASE_PATH")" + export DATASAFED_BACKEND_BASE_PATH + + TOTAL_SIZE=$(datasafed stat / | grep TotalSize | awk '{print $2}') + echo "{\"totalSize\":\"$TOTAL_SIZE\"}" > "${DP_BACKUP_INFO_FILE}" +} + +get_collections() { + collections_response=$(curl "${endpoint}/collections") + echo "${collections_response}" | jq -r '.result.collections[].name' +} + +create_snapshot() { + collection="$1" + snapshot_response=$(curl -XPOST "${endpoint}/collections/${collection}/snapshots") + echo "${snapshot_response}" +} + +validate_snapshot_status() { + snapshot_response="$1" + status=$(echo "${snapshot_response}" | jq '.status') + if [ "${status}" != "ok" ] && [ "${status}" != "\"ok\"" ]; then - echo "backup failed, status: ${status}" - exit 1 + echo "backup failed, status: ${status}" >&2 + return 1 + fi + return 0 +} + +upload_snapshot() { + collection="$1" + name="$2" + + curl -v --fail-with-body \ + "${endpoint}/collections/${collection}/snapshots/${name}" | \ + datasafed push - "/${collection}.snapshot" +} + +delete_snapshot() { + collection="$1" + name="$2" + + curl -XDELETE "${endpoint}/collections/${collection}/snapshots/${name}" +} + +backup_collection() { + collection="$1" + echo "INFO: start to snapshot collection ${collection}..." + + snapshot_response=$(create_snapshot "${collection}") + validate_snapshot_status "${snapshot_response}" || return 1 + + name=$(echo "${snapshot_response}" | jq -r '.result.name') + upload_snapshot "${collection}" "${name}" + delete_snapshot "${collection}" "${name}" + + echo "INFO: snapshot collection ${collection} successfully." +} + +backup_all_collections() { + collections=$(get_collections) + if [ -z "${collections}" ]; then + save_backup_size + exit 0 fi - name=$(echo ${snapshot} | jq -r '.result.name') - curl -v --fail-with-body ${endpoint}/collections/${c}/snapshots/${name} | datasafed push - "/${c}.snapshot" - curl -XDELETE ${endpoint}/collections/${c}/snapshots/${name} - echo "INFO: snapshot collection ${c} successfully." -done -save_backup_size + + for collection in ${collections}; do + if ! backup_collection "${collection}"; then + echo "backup failed for collection ${collection}" >&2 + exit 1 + fi + done + + save_backup_size +} + +do_backup() { + init_env + backup_all_collections +} + +# This is magic for shellspec ut framework. +# Sometime, functions are defined in a single shell script. +# You will want to test it. but you do not want to run the script. +# When included from shellspec, __SOURCED__ variable defined and script +# end here. The script path is assigned to the __SOURCED__ variable. +${__SOURCED__:+false} : || return 0 + +# main +trap handle_exit EXIT +do_backup diff --git a/addons/qdrant/scripts/qdrant-member-leave.sh b/addons/qdrant/scripts/qdrant-member-leave.sh index 5670417a4..48c94a7b1 100644 --- a/addons/qdrant/scripts/qdrant-member-leave.sh +++ b/addons/qdrant/scripts/qdrant-member-leave.sh @@ -1,68 +1,90 @@ #!/usr/bin/env sh -set -x -set -o errtrace -set -o nounset -set -o pipefail +# shellcheck disable=SC2034 +ut_mode="false" +test || __() { + # when running in non-unit test mode, set the options "set -ex". + set -ex; +} -leave_peer_uri=http://${KB_LEAVE_MEMBER_POD_IP}:6333 -cluster_info=`curl -s ${leave_peer_uri}/cluster` -leave_peer_id=`echo "${cluster_info}"| jq -r .result.peer_id` -leader_peer_id=`echo "${cluster_info}" | jq -r .result.raft_info.leader` +init_cluster_info() { + leave_peer_uri="http://${KB_LEAVE_MEMBER_POD_IP}:6333" + cluster_info=$(curl -s "${leave_peer_uri}/cluster") + leave_peer_id=$(echo "${cluster_info}" | jq -r .result.peer_id) + leader_peer_id=$(echo "${cluster_info}" | jq -r .result.raft_info.leader) +} move_shards() { - cols=`curl -s ${leave_peer_uri}/collections` - col_count=`echo ${cols} | jq -r '.result.collections | length'` - if [[ ${col_count} -eq 0 ]]; then - echo "no collections found in the cluster" - return - fi - col_names=`echo ${cols} | jq -r '.result.collections[].name'` - for col_name in ${col_names}; do - col_cluster_info=`curl -s ${leave_peer_uri}/collections/${col_name}/cluster` - col_shard_count=`echo ${col_cluster_info} | jq -r '.result.local_shards[] | length'` - if [[ ${col_shard_count} -eq 0 ]]; then - echo "no shards found in collection ${col_name}" - continue - fi + cols=$(curl -s "${leave_peer_uri}/collections") + col_count=$(echo "${cols}" | jq -r '.result.collections | length') - leave_shard_ids=`echo ${col_cluster_info} | jq -r '.result.local_shards[].shard_id'` - for shard_id in ${leave_shard_ids}; do - echo "move shard ${shard_id} in col_name ${col_name} from ${leave_peer_id} to ${leader_peer_id}" - curl -s -X POST -H "Content-Type: application/json" \ - -d '{"move_shard":{"shard_id": '${shard_id}',"to_peer_id": '${leader_peer_id}',"from_peer_id": '${leave_peer_id}}}'' \ - ${leave_peer_uri}/collections/${col_name}/cluster - done + if [ "${col_count}" -eq 0 ]; then + echo "no collections found in the cluster" + return + fi + + col_names=$(echo "${cols}" | jq -r '.result.collections[].name') + for col_name in ${col_names}; do + col_cluster_info=$(curl -s "${leave_peer_uri}/collections/${col_name}/cluster") + col_shard_count=$(echo "${col_cluster_info}" | jq -r '.result.local_shards[] | length') + + if [ "${col_shard_count}" -eq 0 ]; then + echo "no shards found in collection ${col_name}" + continue + fi - while true; do - col_cluster_info=`curl -s ${leave_peer_uri}/collections/${col_name}/cluster` - leave_shard_ids=`echo ${col_cluster_info} | jq -r '.result.local_shards[].shard_id'` - if [ -z "${leave_shard_ids}" ]; then - echo "all shards in collection ${col_name} has been moved" - break - fi - sleep 1 - done + leave_shard_ids=$(echo "${col_cluster_info}" | jq -r '.result.local_shards[].shard_id') + for shard_id in ${leave_shard_ids}; do + echo "move shard ${shard_id} in col_name ${col_name} from ${leave_peer_id} to ${leader_peer_id}" + curl -s -X POST -H "Content-Type: application/json" \ + -d "{\"move_shard\":{\"shard_id\": ${shard_id},\"to_peer_id\": ${leader_peer_id},\"from_peer_id\": ${leave_peer_id}}}" \ + "${leave_peer_uri}/collections/${col_name}/cluster" done + + check_leave_shard_ids "${leave_peer_uri}" "${col_name}" + done +} + +check_leave_shard_ids() { + leave_peer_uri=$1 + col_name=$2 + + while true; do + col_cluster_info=$(curl -s "${leave_peer_uri}/collections/${col_name}/cluster") + leave_shard_ids=$(echo "${col_cluster_info}" | jq -r '.result.local_shards[].shard_id') + if [ -z "${leave_shard_ids}" ]; then + echo "all shards in collection ${col_name} has been moved" + break + fi + echo "shards ${leave_shard_ids} in collection ${col_name} are still moving..." + sleep 1 + done } remove_peer() { - echo "remove peer ${leave_peer_id} from cluster" - curl -v -XDELETE ${leave_peer_uri}/cluster/peer/${leave_peer_id} + echo "remove peer ${leave_peer_id} from cluster" + curl -v -XDELETE "${leave_peer_uri}/cluster/peer/${leave_peer_id}" } leave_member() { - echo "scaling in, we need to move local shards to other peers and remove local peer from the cluster" - echo "cluster info: ${cluster_info}" - move_shards - remove_peer + echo "scaling in, we need to move local shards to other peers and remove local peer from the cluster" + echo "cluster info: ${cluster_info}" + move_shards + remove_peer } +# This is magic for shellspec ut framework. +# Sometime, functions are defined in a single shell script. +# You will want to test it. but you do not want to run the script. +# When included from shellspec, __SOURCED__ variable defined and script +# end here. The script path is assigned to the __SOURCED__ variable. +${__SOURCED__:+false} : || return 0 + # lock file to prevent concurrent leave_member # flock will return 1 if the lock is already held by another process, this is expected +init_cluster_info ( - flock -n -x 9 - if [ $? != 0 ]; then + if ! flock -n -x 9; then echo "member is already in leaving" exit 1 fi diff --git a/addons/qdrant/scripts/qdrant-pre-stop.sh b/addons/qdrant/scripts/qdrant-pre-stop.sh deleted file mode 100644 index c3266111e..000000000 --- a/addons/qdrant/scripts/qdrant-pre-stop.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env bash - -set -x -set -o errexit -set -o errtrace -set -o nounset -set -o pipefail - -curl=/qdrant/tools/curl -jq=/qdrant/tools/jq - -idx=${KB_POD_NAME##*-} -current_component_replicas=$(cat /etc/annotations/component-replicas) -local_uri=http://localhost:6333 - -cluster_info=`$curl -s ${local_uri}/cluster` -local_peer_id=`echo "${cluster_info}"| $jq -r .result.peer_id` -leader_peer_id=`echo "${cluster_info}" | $jq -r .result.raft_info.leader` - -move_shards() { - cols=`$curl -s ${local_uri}/collections` - col_count=`echo ${cols} | $jq -r '.result.collections | length'` - if [[ ${col_count} -eq 0 ]]; then - echo "no collections found in the cluster" - return - fi - col_names=`echo ${cols} | $jq -r '.result.collections[].name'` - for col_name in ${col_names}; do - col_cluster_info=`$curl -s ${local_uri}/collections/${col_name}/cluster` - col_shard_count=`echo ${col_cluster_info} | $jq -r '.result.local_shards[] | length'` - if [[ ${col_shard_count} -eq 0 ]]; then - echo "no shards found in collection ${col_name}" - continue - fi - - local_shard_ids=`echo ${col_cluster_info} | $jq -r '.result.local_shards[].shard_id'` - for shard_id in ${local_shard_ids}; do - echo "move shard ${shard_id} in col_name ${col_name} from ${local_peer_id} to ${leader_peer_id}" - $curl -s -X POST -H "Content-Type: application/json" \ - -d '{"move_shard":{"shard_id": '${shard_id}',"to_peer_id": '${leader_peer_id}',"from_peer_id": '${local_peer_id}}}'' \ - ${local_uri}/collections/${col_name}/cluster - done - - while true; do - col_cluster_info=`$curl -s ${local_uri}/collections/${col_name}/cluster` - local_shard_ids=`echo ${col_cluster_info} | $jq -r '.result.local_shards[].shard_id'` - if [ -z "${local_shard_ids}" ]; then - echo "all shards in collection ${col_name} has been moved" - break - fi - sleep 1 - done - done -} - -remove_peer() { -# declare -A peer_to_uri=() -# peer_ids="`echo ${cluster_info} | jq -r '.result.peers | keys'`" -# for peer_id in "${peer_ids[@]}"; do -# peer_uri=`echo ${cluster_info} | jq -r ".result.peers.${peer_id}.uri"` -# peer_to_uri[peer_id]=peer_uri -# done - - echo "remove local peer ${local_peer_id} from cluster" - $curl -v -XDELETE ${local_uri}/cluster/peer/${local_peer_id} -} - -if [ ! "$idx" -lt "$current_component_replicas" ] && [ "$current_component_replicas" -ne 0 ]; then - echo "scaling in, we need to move local shards to other peers and remove local peer from the cluster" - - echo "cluster info: ${cluster_info}" - - move_shards - - remove_peer -else - # stop, do nothing. - echo "stop, do nothing" -fi \ No newline at end of file diff --git a/addons/qdrant/scripts/qdrant-restore.sh b/addons/qdrant/scripts/qdrant-restore.sh index 8d458f92c..ce4f76b98 100644 --- a/addons/qdrant/scripts/qdrant-restore.sh +++ b/addons/qdrant/scripts/qdrant-restore.sh @@ -1,20 +1,50 @@ -#!/usr/bin/env bash +#!/usr/bin/env sh -set -e -set -o pipefail -export PATH="$PATH:$DP_DATASAFED_BIN_PATH" -export DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH" +# shellcheck disable=SC2034 +ut_mode="false" +test || __() { + # when running in non-unit test mode, set the options "set -ex". + set -ex; +} + +init_restore() { + PATH="$PATH:$DP_DATASAFED_BIN_PATH" + export PATH + DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH" + export DATASAFED_BACKEND_BASE_PATH + + SNAPSHOT_DIR="${DATA_DIR}/_dp_snapshots" + mkdir -p "${SNAPSHOT_DIR}" +} + +restore_snapshot() { + snapshot="$1" + collection_name="${snapshot%.*}" -SNAPSHOT_DIR="${DATA_DIR}/_dp_snapshots" -mkdir -p "${SNAPSHOT_DIR}" -for snapshot in $(datasafed list /) ; do - collection_name=${snapshot%.*} echo "INFO: start to restore collection ${collection_name}..." - # download snapshot file datasafed pull "${snapshot}" "${SNAPSHOT_DIR}/${snapshot}" - curl -X POST "http://${DP_DB_HOST}:6333/collections/${collection_name}/snapshots/upload?priority=snapshot" \ + + curl -X POST \ + "http://${DP_DB_HOST}:6333/collections/${collection_name}/snapshots/upload?priority=snapshot" \ -H 'Content-Type:multipart/form-data' \ -F "snapshot=@${SNAPSHOT_DIR}/${snapshot}" + echo "upload collection ${collection_name} successfully" -done +} + +restore_all() { + datasafed list / | while read -r snapshot; do + [ -n "${snapshot}" ] && restore_snapshot "${snapshot}" + done +} + +# This is magic for shellspec ut framework. +# Sometime, functions are defined in a single shell script. +# You will want to test it. but you do not want to run the script. +# When included from shellspec, __SOURCED__ variable defined and script +# end here. The script path is assigned to the __SOURCED__ variable. +${__SOURCED__:+false} : || return 0 +# main +init_restore +restore_all diff --git a/addons/qdrant/scripts/qdrant-setup.sh b/addons/qdrant/scripts/qdrant-setup.sh index 4b1c4a375..bd8695578 100644 --- a/addons/qdrant/scripts/qdrant-setup.sh +++ b/addons/qdrant/scripts/qdrant-setup.sh @@ -1,16 +1,63 @@ #!/usr/bin/env bash -IDX=${KB_POD_NAME##*-} -HOSTNAME=$(eval echo \$KB_QDRANT_"${IDX}"_HOSTNAME).${KB_NAMESPACE}.svc${CLUSTER_DOMAIN} -BOOTSTRAP_HOSTNAME=$(eval echo \$KB_QDRANT_0_HOSTNAME).${KB_NAMESPACE}.svc${CLUSTER_DOMAIN} - -if [ "$IDX" == "0" ]; then - ./qdrant --uri "http://${HOSTNAME}:6335" -else - echo "BOOTSTRAP_HOSTNAME: ${BOOTSTRAP_HOSTNAME}" - until ./tools/curl http://${BOOTSTRAP_HOSTNAME}:6333/cluster; do - echo "INFO: wait for bootstrap node starting..." - sleep 1; - done - ./qdrant --bootstrap "http://${BOOTSTRAP_HOSTNAME}:6335" --uri "http://${HOSTNAME}:6335" -fi +load_common_library() { + # the common.sh scripts is mounted to the same path which is defined in the cmpd.spec.scripts + common_library_file="/qdrant/scripts/common.sh" + # shellcheck disable=SC1090 + source "${common_library_file}" +} + +# get the min lexicographical order pod fqdn as the bootstrap node +get_boostrap_node() { + min_lexicographical_pod_name=$(min_lexicographical_order_pod "$QDRANT_POD_NAME_LIST") + min_lexicographical_pod_fqdn=$(get_target_pod_fqdn_from_pod_fqdn_vars "$QDRANT_POD_FQDN_LIST" "$min_lexicographical_pod_name") + if is_empty "$min_lexicographical_pod_fqdn"; then + echo "Error: Failed to get pod: $min_lexicographical_pod_name fqdn from pod fqdn list: $QDRANT_POD_FQDN_LIST. Exiting." >&2 + return 1 + fi + echo $min_lexicographical_pod_fqdn + return 0 +} + +start_server() { + # check QDRANT_POD_NAME_LIST and QDRANT_POD_FQDN_LIST are set + if is_empty "$QDRANT_POD_NAME_LIST" || is_empty "$QDRANT_POD_FQDN_LIST"; then + echo "QDRANT_POD_NAME_LIST or QDRANT_POD_FQDN_LIST is not set, please check." >&2 + return 1 + fi + + current_pod_fqdn=$(get_target_pod_fqdn_from_pod_fqdn_vars "$QDRANT_POD_FQDN_LIST" "$CURRENT_POD_NAME") + if is_empty "$current_pod_fqdn"; then + echo "Error: Failed to get current pod: $CURRENT_POD_NAME fqdn from qdrant pod fqdn list: $QDRANT_POD_FQDN_LIST. Exiting." >&2 + exit 1 + fi + + # get the min lexicographical order pod fqdn as the bootstrap node + boostrap_node_fqdn=$(get_boostrap_node) + status=$? + if [ $status -ne 0 ]; then + echo "Error: Failed to get bootstrap node fqdn. Exiting." >&2 + exit 1 + fi + + if [ "$current_pod_fqdn" == "$boostrap_node_fqdn" ]; then + ./qdrant --uri "http://${current_pod_fqdn}:6335" + else + until ./tools/curl http://${boostrap_node_fqdn}:6333/cluster; do + echo "INFO: wait for bootstrap node: $boostrap_node_fqdn starting..." + sleep 1; + done + ./qdrant --bootstrap "http://${boostrap_node_fqdn}:6335" --uri "http://${current_pod_fqdn}:6335" + fi +} + +# This is magic for shellspec ut framework. +# Sometime, functions are defined in a single shell script. +# You will want to test it. but you do not want to run the script. +# When included from shellspec, __SOURCED__ variable defined and script +# end here. The script path is assigned to the __SOURCED__ variable. +${__SOURCED__:+false} : || return 0 + +# main +load_common_library +start_server \ No newline at end of file diff --git a/addons/qdrant/templates/_helpers.tpl b/addons/qdrant/templates/_helpers.tpl index d8772d725..61ef891d4 100644 --- a/addons/qdrant/templates/_helpers.tpl +++ b/addons/qdrant/templates/_helpers.tpl @@ -48,4 +48,46 @@ Selector labels {{- define "qdrant.selectorLabels" -}} app.kubernetes.io/name: {{ include "qdrant.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} -{{- end }} \ No newline at end of file +{{- end }} + +{{/* +Common annotations +*/}} +{{- define "qdrant.annotations" -}} +helm.sh/resource-policy: keep +{{- end }} + +{{/* +Define qdrant component definition name +*/}} +{{- define "qdrant.cmpdName" -}} +qdrant-{{ .Chart.Version }} +{{- end -}} + +{{/* +Define qdrant component definition regex pattern +*/}} +{{- define "qdrant.cmpdRegexPattern" -}} +^qdrant- +{{- end -}} + +{{/* +Define qdrant scripts tpl name +*/}} +{{- define "qdrant.scriptsTplName" -}} +qdrant-scripts-template +{{- end -}} + +{{/* +Define qdrant configuration tpl name +*/}} +{{- define "qdrant.configTplName" -}} +qdrant-config-template +{{- end -}} + +{{/* +Define qdrant config constraint name +*/}} +{{- define "qdrant.configConstraintName" -}} +qdrant-config-constraints +{{- end -}} \ No newline at end of file diff --git a/addons/qdrant/templates/backuppolicytemplate.yaml b/addons/qdrant/templates/backuppolicytemplate.yaml index 1435da12a..94bff3b05 100644 --- a/addons/qdrant/templates/backuppolicytemplate.yaml +++ b/addons/qdrant/templates/backuppolicytemplate.yaml @@ -5,9 +5,9 @@ metadata: labels: {{- include "qdrant.labels" . | nindent 4 }} spec: - serviceKind: Qdrant + serviceKind: qdrant compDefs: - - qdrant + - {{ include "qdrant.cmpdRegexPattern" . }} target: role: "" strategy: All diff --git a/addons/qdrant/templates/clusterdefinition.yaml b/addons/qdrant/templates/clusterdefinition.yaml index ee0fad70e..cc64ae417 100644 --- a/addons/qdrant/templates/clusterdefinition.yaml +++ b/addons/qdrant/templates/clusterdefinition.yaml @@ -10,4 +10,4 @@ spec: - name: cluster components: - name: qdrant - compDef: qdrant + compDef: {{ include "qdrant.cmpdRegexPattern" . }} diff --git a/addons/qdrant/templates/componentdefinition.yaml b/addons/qdrant/templates/componentdefinition.yaml index 7f84632c3..14f7ed86a 100644 --- a/addons/qdrant/templates/componentdefinition.yaml +++ b/addons/qdrant/templates/componentdefinition.yaml @@ -1,9 +1,11 @@ apiVersion: apps.kubeblocks.io/v1 kind: ComponentDefinition metadata: - name: qdrant + name: {{ include "qdrant.cmpdName" . }} labels: {{- include "qdrant.labels" . | nindent 4 }} + annotations: + {{- include "qdrant.annotations" . | nindent 4 }} spec: provider: kubeblocks description: High-performance, massive-scale Vector Database for the next generation of AI. @@ -21,32 +23,42 @@ spec: - name: grpc-qdrant port: 6334 targetPort: grpc-qdrant - lifecycleActions: - memberLeave: - exec: - image: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.tools.repository }}:{{ .Values.image.tools.tag | default "latest" }} - command: - - /bin/sh - - -c - - | - {{- .Files.Get "scripts/qdrant-member-leave.sh" | nindent 12 }} - targetPodSelector: Any - container: qdrant configs: - name: qdrant-config-template - templateRef: qdrant-config-template - constraintRef: qdrant-config-constraints + templateRef: {{ include "qdrant.configTplName" . }} + constraintRef: {{ include "qdrant.configConstraintName" . }} volumeName: qdrant-config namespace: {{ .Release.Namespace }} scripts: - name: qdrant-scripts - templateRef: qdrant-scripts + templateRef: {{ include "qdrant.scriptsTplName" . }} namespace: {{ .Release.Namespace }} volumeName: scripts defaultMode: 0555 volumes: - name: data needSnapshot: true + vars: + - name: QDRANT_POD_NAME_LIST + valueFrom: + componentVarRef: + optional: false + podNames: Required + - name: QDRANT_POD_FQDN_LIST + valueFrom: + componentVarRef: + optional: false + podFQDNs: Required + lifecycleActions: + memberLeave: + exec: + image: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.tools.repository }}:{{ .Values.image.tools.tag | default "latest" }} + command: + - /bin/sh + - -c + - /qdrant/scripts/qdrant-member-leave.sh + targetPodSelector: Any + container: qdrant runtime: securityContext: fsGroup: 1001 @@ -130,8 +142,11 @@ spec: value: "true" - name: SERVICE_PORT value: "6333" - - name: CLUSTER_DOMAIN - value: "{{ .Values.clusterDomain }}" + - name: CURRENT_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name dnsPolicy: ClusterFirst enableServiceLinks: true volumes: diff --git a/addons/qdrant/templates/componentversion.yaml b/addons/qdrant/templates/componentversion.yaml index a6deb03f1..a7e812ae8 100644 --- a/addons/qdrant/templates/componentversion.yaml +++ b/addons/qdrant/templates/componentversion.yaml @@ -7,7 +7,7 @@ metadata: spec: compatibilityRules: - compDefs: - - qdrant + - {{ include "qdrant.cmpdRegexPattern" . }} releases: - 1.5.0 - 1.7.3 @@ -20,23 +20,28 @@ spec: images: qdrant: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:v1.5.0 qdrant-tools: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.tools.repository }}:{{ .Values.image.tools.tag | default "latest" }} + memberleave: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.tools.repository }}:{{ .Values.image.tools.tag | default "latest" }} - name: 1.7.3 serviceVersion: 1.7.3 images: qdrant: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:v1.7.3 qdrant-tools: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.tools.repository }}:{{ .Values.image.tools.tag | default "latest" }} + memberleave: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.tools.repository }}:{{ .Values.image.tools.tag | default "latest" }} - name: 1.8.1 serviceVersion: 1.8.1 images: qdrant: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:v1.8.1 qdrant-tools: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.tools.repository }}:{{ .Values.image.tools.tag | default "latest" }} + memberleave: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.tools.repository }}:{{ .Values.image.tools.tag | default "latest" }} - name: 1.8.4 serviceVersion: 1.8.4 images: qdrant: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:v1.8.4 qdrant-tools: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.tools.repository }}:{{ .Values.image.tools.tag | default "latest" }} + memberleave: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.tools.repository }}:{{ .Values.image.tools.tag | default "latest" }} - name: 1.10.0 serviceVersion: 1.10.0 images: qdrant: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:v1.10.0 qdrant-tools: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.tools.repository }}:{{ .Values.image.tools.tag | default "latest" }} + memberleave: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.tools.repository }}:{{ .Values.image.tools.tag | default "latest" }} diff --git a/addons/qdrant/templates/configconstraint.yaml b/addons/qdrant/templates/configconstraint.yaml index 73f11aaa1..43b030066 100644 --- a/addons/qdrant/templates/configconstraint.yaml +++ b/addons/qdrant/templates/configconstraint.yaml @@ -1,7 +1,7 @@ apiVersion: apps.kubeblocks.io/v1beta1 kind: ConfigConstraint metadata: - name: qdrant-config-constraints + name: {{ include "qdrant.configConstraintName" . }} labels: {{- include "qdrant.labels" . | nindent 4 }} spec: diff --git a/addons/qdrant/templates/configmap.yaml b/addons/qdrant/templates/configuration-template.yaml similarity index 99% rename from addons/qdrant/templates/configmap.yaml rename to addons/qdrant/templates/configuration-template.yaml index a19856d68..462d43c58 100644 --- a/addons/qdrant/templates/configmap.yaml +++ b/addons/qdrant/templates/configuration-template.yaml @@ -1,11 +1,10 @@ apiVersion: v1 kind: ConfigMap metadata: - name: qdrant-config-template + name: {{ include "qdrant.configTplName" . }} namespace: {{ .Release.Namespace | quote }} labels: {{- include "qdrant.labels" . | nindent 4 }} - data: production.yaml: |- debug: false diff --git a/addons/qdrant/templates/scripts-template.yaml b/addons/qdrant/templates/scripts-template.yaml new file mode 100644 index 000000000..65620546f --- /dev/null +++ b/addons/qdrant/templates/scripts-template.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "qdrant.scriptsTplName" . }} + labels: + {{- include "qdrant.labels" . | nindent 4 }} +data: + common.sh: |- + #!/bin/bash + {{- include "kblib.compvars.get_target_pod_fqdn_from_pod_fqdn_vars" $ | nindent 4 }} + {{- include "kblib.pods.min_lexicographical_order_pod" $ | nindent 4 }} + {{- include "kblib.strings.is_empty" $ | nindent 4 }} + setup.sh: |- + {{- .Files.Get "scripts/qdrant-setup.sh" | nindent 4 }} + member-leave.sh: |- + {{- .Files.Get "scripts/qdrant-member-leave.sh" | nindent 4 }} diff --git a/addons/qdrant/templates/scripts.yaml b/addons/qdrant/templates/scripts.yaml deleted file mode 100644 index bcdc26004..000000000 --- a/addons/qdrant/templates/scripts.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: qdrant-scripts - labels: - {{- include "qdrant.labels" . | nindent 4 }} -data: - setup.sh: |- - {{- .Files.Get "scripts/qdrant-setup.sh" | nindent 4 }} - pre-stop.sh: |- - {{- .Files.Get "scripts/qdrant-pre-stop.sh" | nindent 4 }} - member-leave.sh: |- - {{- .Files.Get "scripts/qdrant-member-leave.sh" | nindent 4 }} diff --git a/addons/qdrant/values.yaml b/addons/qdrant/values.yaml index 9dfdb8591..38ed3e0c3 100644 --- a/addons/qdrant/values.yaml +++ b/addons/qdrant/values.yaml @@ -2,14 +2,9 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. - nameOverride: "" -fullnameOverride: "" - -## @param commonLabels Labels to add to all deployed objects -## -commonLabels: {} +fullnameOverride: "" ## @param application images ## @@ -22,10 +17,5 @@ image: repository: apecloud/curl-jq tag: 0.1.0 -## @param debugEnabled enables containers' debug logging -## -debugEnabled: true - +## @param data volume mount path dataMountPath: /qdrant/storage - -clusterDomain: ".cluster.local"