Skip to content

Commit

Permalink
feat(cluster): Add support for request proxying for scale out (#2385)
Browse files Browse the repository at this point in the history
* feat(cluster): initial commit for scale-out cluster

Signed-off-by: Ramkumar Chinchani <[email protected]>

* feat(cluster): support shared storage scale out

This change introduces support for shared storage backed
zot cluster scale out.

New feature
Multiple stateless zot instances can run using the same shared
storage backend where each instance looks at a specific set
of repositories based on a siphash of the repository name to improve
scale as the load is distributed across multiple instances.
For a given config, there will only be one instance that can perform
dist-spec read/write on a given repository.

What's changed?
- introduced a transparent request proxy for dist-spec endpoints based on
siphash of repository name.
- new config for scale out cluster that specifies list of
cluster members.

Signed-off-by: Vishwas Rajashekar <[email protected]>

---------

Signed-off-by: Ramkumar Chinchani <[email protected]>
Signed-off-by: Vishwas Rajashekar <[email protected]>
Co-authored-by: Ramkumar Chinchani <[email protected]>
  • Loading branch information
vrajashkr and rchincha authored May 20, 2024
1 parent be5ad66 commit 5ae7a02
Show file tree
Hide file tree
Showing 30 changed files with 2,320 additions and 24 deletions.
35 changes: 34 additions & 1 deletion .github/workflows/ecosystem-tools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
go install github.com/swaggo/swag/cmd/[email protected]
go mod download
sudo apt-get update
sudo apt-get install libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev pkg-config rpm uidmap
sudo apt-get install libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev pkg-config rpm uidmap haproxy jq
# install skopeo
git clone -b v1.12.0 https://github.com/containers/skopeo.git
cd skopeo
Expand Down Expand Up @@ -80,4 +80,37 @@ jobs:
env:
AWS_ACCESS_KEY_ID: fake
AWS_SECRET_ACCESS_KEY: fake
- name: Run cloud scale-out tests
id: scale
run: |
make run-cloud-scale-out-tests
env:
AWS_ACCESS_KEY_ID: fake
AWS_SECRET_ACCESS_KEY: fake
continue-on-error: true
- name: print service logs for scale-out
run: |
find /tmp/zot-ft-logs -name '*.log' -print0 | xargs -0 cat
- name: multi-hop detection
id: multihop
run: |
if find /tmp/zot-ft-logs -name '*.log' -print0 | xargs -0 cat | grep 'cannot proxy an already proxied request'; then
echo "detected multi-hop"
exit 1
else
exit 0
fi
continue-on-error: true
- name: clean up scale-out logs
run: |
rm -r /tmp/zot-ft-logs
- name: fail job if error
if: ${{ steps.scale.outcome != 'success' || steps.multihop.outcome != 'success' }}
run: |
exit 1
- name: Upload zb test results zip as build artifact
uses: actions/upload-artifact@v4
with:
name: zb-cloud-scale-out-functional-results-${{ github.sha }}
path: ./zb-results/
- uses: ./.github/actions/teardown-localstack
83 changes: 82 additions & 1 deletion .github/workflows/nightly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ on:

permissions: read-all

# Here we are running two tests:
# The following tests are run:
# 1. run zot with local storage and dedupe disabled, push images, restart zot with dedupe enabled
# task scheduler will start a dedupe all blobs process at zot startup and it shouldn't interfere with clients.
# 2. run zot with s3 storage and dynamodb and dedupe enabled, push images, restart zot with dedupe false and no cache
# task scheduler will start a restore all blobs process at zot startup, after it finishes all blobs should be restored to their original state (have content)
# 3. run many, many, many instances of zot with shared storage and metadata front-ended by HAProxy. start a long-running zb run with high concurrency and number of requests
# to achieve a long-running sustained load on the system. The system is expected to perform well without errors and return performance data after the test.
jobs:
dedupe:
name: Dedupe/restore blobs
Expand Down Expand Up @@ -195,3 +197,82 @@ jobs:
- name: Run tests
run: |
./examples/kind/kind-ci.sh
cloud-scale-out:
name: s3+dynamodb scale-out
runs-on: ubuntu-latest-16-cores
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
cache: false
go-version: 1.22.x
- name: Install dependencies
run: |
cd $GITHUB_WORKSPACE
go install github.com/swaggo/swag/cmd/[email protected]
go mod download
sudo apt-get update
sudo apt-get install libgpgme-dev libassuan-dev libbtrfs-dev libdevmapper-dev pkg-config rpm uidmap haproxy jq
# install skopeo
git clone -b v1.12.0 https://github.com/containers/skopeo.git
cd skopeo
make bin/skopeo
sudo cp bin/skopeo /usr/bin
skopeo -v
cd $GITHUB_WORKSPACE
- name: Log in to GitHub Docker Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ github.token }}
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install localstack
run: |
pip install --upgrade pyopenssl
pip install localstack==3.3.0 awscli-local[ver1] # install LocalStack cli and awslocal
docker pull ghcr.io/project-zot/ci-images/localstack:3.3.0 # Make sure to pull a working version of the image
localstack start -d # Start LocalStack in the background
echo "Waiting for LocalStack startup..." # Wait 30 seconds for the LocalStack container
localstack wait -t 30 # to become ready before timing out
echo "Startup complete"
- name: Run cloud scale-out high scale performance tests
id: scale
run: |
make run-cloud-scale-out-high-scale-tests
env:
AWS_ACCESS_KEY_ID: fake
AWS_SECRET_ACCESS_KEY: fake
continue-on-error: true
- name: print service logs
run: |
sudo dmesg
cat /tmp/zot-logs/*.log
- name: multi-hop detection
id: multihop
run: |
if cat /tmp/zot-logs/*.log | grep 'cannot proxy an already proxied request'; then
echo "detected multi-hop"
exit 1
else
exit 0
fi
continue-on-error: true
- name: clean up logs
run: |
rm -r /tmp/zot-logs
- name: fail job if error
if: ${{ steps.scale.outcome != 'success' || steps.multihop.outcome != 'success' }}
run: |
exit 1
- name: Upload zb test results zip as build artifact
if: steps.scale.outcome == 'success'
uses: actions/upload-artifact@v4
with:
name: zb-cloud-scale-out-perf-results-${{ github.sha }}
path: ./zb-results/
- uses: ./.github/actions/teardown-localstack
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,17 @@ run-blackbox-tests: $(BATS_TEST_FILE_PATH) check-blackbox-prerequisites binary b
echo running bats test "$(BATS_TEST_FILE_PATH)"; \
$(BATS) $(BATS_FLAGS) $(BATS_TEST_FILE_PATH)

.PHONY: run-cloud-scale-out-tests
run-cloud-scale-out-tests: check-blackbox-prerequisites check-awslocal binary bench test-prereq
echo running scale out bats test; \
$(BATS) $(BATS_FLAGS) test/scale-out/cloud_scale_out_no_auth.bats; \
$(BATS) $(BATS_FLAGS) test/scale-out/cloud_scale_out_basic_auth_tls.bats

.PHONY: run-cloud-scale-out-high-scale-tests
run-cloud-scale-out-high-scale-tests: check-blackbox-prerequisites check-awslocal binary bench test-prereq
echo running cloud scale out bats high scale test; \
$(BATS) $(BATS_FLAGS) test/scale-out/cloud_scale_out_basic_auth_tls_scale.bats

.PHONY: run-blackbox-ci
run-blackbox-ci: check-blackbox-prerequisites binary binary-minimal cli
echo running CI bats tests concurently
Expand Down
44 changes: 44 additions & 0 deletions examples/scale-out-cluster-cloud/config-cluster-member0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"distSpecVersion": "1.1.0",
"storage": {
"rootDirectory": "/tmp/zot",
"dedupe": false,
"remoteCache": true,
"storageDriver": {
"name": "s3",
"rootdirectory": "/zot",
"region": "us-east-1",
"regionendpoint": "localhost:4566",
"bucket": "zot-storage",
"secure": false,
"skipverify": false
},
"cacheDriver": {
"name": "dynamodb",
"endpoint": "http://localhost:4566",
"region": "us-east-1",
"cacheTablename": "ZotBlobTable",
"repoMetaTablename": "ZotRepoMetadataTable",
"imageMetaTablename": "ZotImageMetaTable",
"repoBlobsInfoTablename": "ZotRepoBlobsInfoTable",
"userDataTablename": "ZotUserDataTable",
"versionTablename": "ZotVersion",
"apiKeyTablename": "ZotApiKeyTable"
}
},
"http": {
"address": "127.0.0.1",
"port": "9000"
},
"log": {
"level": "debug"
},
"cluster": {
"members": [
"127.0.0.1:9000",
"127.0.0.1:9001",
"127.0.0.1:9002"
],
"hashKey": "loremipsumdolors"
}
}
44 changes: 44 additions & 0 deletions examples/scale-out-cluster-cloud/config-cluster-member1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"distSpecVersion": "1.1.0",
"storage": {
"rootDirectory": "/tmp/zot",
"dedupe": false,
"remoteCache": true,
"storageDriver": {
"name": "s3",
"rootdirectory": "/zot",
"region": "us-east-1",
"regionendpoint": "localhost:4566",
"bucket": "zot-storage",
"secure": false,
"skipverify": false
},
"cacheDriver": {
"name": "dynamodb",
"endpoint": "http://localhost:4566",
"region": "us-east-1",
"cacheTablename": "ZotBlobTable",
"repoMetaTablename": "ZotRepoMetadataTable",
"imageMetaTablename": "ZotImageMetaTable",
"repoBlobsInfoTablename": "ZotRepoBlobsInfoTable",
"userDataTablename": "ZotUserDataTable",
"versionTablename": "ZotVersion",
"apiKeyTablename": "ZotApiKeyTable"
}
},
"http": {
"address": "127.0.0.1",
"port": "9001"
},
"log": {
"level": "debug"
},
"cluster": {
"members": [
"127.0.0.1:9000",
"127.0.0.1:9001",
"127.0.0.1:9002"
],
"hashKey": "loremipsumdolors"
}
}
44 changes: 44 additions & 0 deletions examples/scale-out-cluster-cloud/config-cluster-member2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"distSpecVersion": "1.1.0",
"storage": {
"rootDirectory": "/tmp/zot",
"dedupe": false,
"remoteCache": true,
"storageDriver": {
"name": "s3",
"rootdirectory": "/zot",
"region": "us-east-1",
"regionendpoint": "localhost:4566",
"bucket": "zot-storage",
"secure": false,
"skipverify": false
},
"cacheDriver": {
"name": "dynamodb",
"endpoint": "http://localhost:4566",
"region": "us-east-1",
"cacheTablename": "ZotBlobTable",
"repoMetaTablename": "ZotRepoMetadataTable",
"imageMetaTablename": "ZotImageMetaTable",
"repoBlobsInfoTablename": "ZotRepoBlobsInfoTable",
"userDataTablename": "ZotUserDataTable",
"versionTablename": "ZotVersion",
"apiKeyTablename": "ZotApiKeyTable"
}
},
"http": {
"address": "127.0.0.1",
"port": "9002"
},
"log": {
"level": "debug"
},
"cluster": {
"members": [
"127.0.0.1:9000",
"127.0.0.1:9001",
"127.0.0.1:9002"
],
"hashKey": "loremipsumdolors"
}
}
26 changes: 26 additions & 0 deletions examples/scale-out-cluster-cloud/haproxy.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
global
log /tmp/log local0
log /tmp/log local1 notice
maxconn 2000
stats timeout 30s
daemon

defaults
log global
mode http
option httplog
option dontlognull
timeout connect 5000
timeout client 50000
timeout server 50000

frontend zot
bind *:8080
default_backend zot-cluster

backend zot-cluster
balance roundrobin
cookie SERVER insert indirect nocache
server zot0 127.0.0.1:9000 cookie zot0
server zot1 127.0.0.1:9001 cookie zot1
server zot2 127.0.0.1:9002 cookie zot2
51 changes: 51 additions & 0 deletions examples/scale-out-cluster-cloud/tls/config-cluster-member0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"distSpecVersion": "1.1.0",
"storage": {
"rootDirectory": "/tmp/zot",
"dedupe": false,
"remoteCache": true,
"storageDriver": {
"name": "s3",
"rootdirectory": "/zot",
"region": "us-east-1",
"regionendpoint": "localhost:4566",
"bucket": "zot-storage",
"secure": false,
"skipverify": false
},
"cacheDriver": {
"name": "dynamodb",
"endpoint": "http://localhost:4566",
"region": "us-east-1",
"cacheTablename": "ZotBlobTable",
"repoMetaTablename": "ZotRepoMetadataTable",
"imageMetaTablename": "ZotImageMetaTable",
"repoBlobsInfoTablename": "ZotRepoBlobsInfoTable",
"userDataTablename": "ZotUserDataTable",
"versionTablename": "ZotVersion",
"apiKeyTablename": "ZotApiKeyTable"
}
},
"http": {
"address": "127.0.0.1",
"port": "9000",
"tls": {
"cert": "test/data/server.cert",
"key": "test/data/server.key"
}
},
"log": {
"level": "debug"
},
"cluster": {
"members": [
"127.0.0.1:9000",
"127.0.0.1:9001",
"127.0.0.1:9002"
],
"hashKey": "loremipsumdolors",
"tls": {
"cacert": "test/data/ca.crt"
}
}
}
Loading

0 comments on commit 5ae7a02

Please sign in to comment.