From d26df510ee3b2b6879f0cf4e4d8de6a35b646f4b Mon Sep 17 00:00:00 2001 From: Christos Hadjiaslanis Date: Mon, 20 Jan 2025 21:50:28 +0000 Subject: [PATCH 1/5] Reduced the number of validators in the Local Kube tests to 1. --- linera-service/src/cli_wrappers/local_kubernetes_net.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linera-service/src/cli_wrappers/local_kubernetes_net.rs b/linera-service/src/cli_wrappers/local_kubernetes_net.rs index 92c9af39f44..da3c45ec88a 100644 --- a/linera-service/src/cli_wrappers/local_kubernetes_net.rs +++ b/linera-service/src/cli_wrappers/local_kubernetes_net.rs @@ -97,7 +97,7 @@ impl SharedLocalKubernetesNetTestingConfig { testing_prng_seed: Some(37), num_other_initial_chains: 2, initial_amount: Amount::from_tokens(2000), - num_initial_validators: 4, + num_initial_validators: 1, num_shards: 4, binaries, no_build: false, From 48e03149eae72b66ffbb07b26bfe462874738e01 Mon Sep 17 00:00:00 2001 From: Christos Hadjiaslanis Date: Wed, 22 Jan 2025 14:34:16 +0000 Subject: [PATCH 2/5] WIP: debugging --- .github/workflows/kubernetes.yml | 96 ++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 .github/workflows/kubernetes.yml diff --git a/.github/workflows/kubernetes.yml b/.github/workflows/kubernetes.yml new file mode 100644 index 00000000000..5d9d4202772 --- /dev/null +++ b/.github/workflows/kubernetes.yml @@ -0,0 +1,96 @@ +name: Kubernetes + +on: + push: + branches: [ main ] + pull_request: + branches: + - "**" + paths: + - '.github/workflows/kubernetes.yml' + - 'toolchains/**' + - 'configuration/**' + - 'docker/**' + - 'kubernetes/**' + - 'linera-service/**' + - 'linera-rpc/**' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs on pull-requests +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.run_id }}' + cancel-in-progress: true + +env: + CARGO_TERM_COLOR: always + CARGO_INCREMENTAL: 0 + CARGO_NET_RETRY: 10 + RUST_BACKTRACE: short + RUSTFLAGS: "-D warnings" + RUSTUP_MAX_RETRIES: 10 + LINERA_TRY_RELEASE_BINARIES: "true" + +permissions: + contents: read + +jobs: + kind-deployment-e2e-tests: + runs-on: ubuntu-latest-16-cores + timeout-minutes: 90 + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: actions-rust-lang/setup-rust-toolchain@v1 + - name: Install Protoc + uses: arduino/setup-protoc@v1 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Build client binary + run: | + cargo build --release --locked --bin linera --bin linera-proxy --bin linera-server --features scylladb,rocksdb,kubernetes,metrics + strip target/release/linera + strip target/release/linera-proxy + strip target/release/linera-server + - name: Setup helmfile + uses: mamezou-tech/setup-helmfile@v1.3.0 + - name: Run e2e tests + uses: nick-fields/retry@v2 + with: + # Port forwarding sometimes dies, which makes all requests timeout + # Which is why we need retries + max_attempts: 1 + timeout_minutes: 20 + command: | + kind get clusters | xargs -I {} kind delete cluster --name {} + RUST_LOG=linera=info cargo test --locked -p linera-service --features scylladb,kubernetes --test linera_net_tests -- kubernetes --nocapture + - name: Setup upterm session + uses: lhotari/action-upterm@v1 + with: + ## If no one connects after 5 minutes, shut down server. + wait-timeout-minutes: 20 + - name: Port forward Prometheus + run: | + kubectl port-forward prometheus-linera-core-kube-prometheu-prometheus-0 9090 & + - name: Check Proxy metric + # Check one random proxy metric that we expect to be logged after running the e2e tests + run: | + curl -s 'http://127.0.0.1:9090/api/v1/query?query=linera_proxy_request_latency_bucket' | jq -r '.data.result[]' | grep -q . + - name: Check Server metric + # Check one random server metric that we expect to be logged after running the e2e tests + run: | + curl -s 'http://127.0.0.1:9090/api/v1/query?query=linera_server_request_latency_bucket' | jq -r '.data.result[]' | grep -q . + - name: Check Scylla metric + # Check one random server metric that we expect to be logged after running the e2e tests + run: | + curl -s 'http://127.0.0.1:9090/api/v1/query?query=scylla_database_total_reads' | jq -r '.data.result[]' | grep -q . + - name: Check Scylla Manager metric + # Check one random server metric that we expect to be logged after running the e2e tests + run: | + curl -s 'http://127.0.0.1:9090/api/v1/query?query=scylla_manager_agent_rclone_bytes_transferred_total' | jq -r '.data.result[]' | grep -q . + - name: Destroy the kind clusters + if: always() + shell: bash + run: >- + kind get clusters | xargs -I {} kind delete cluster --name {} From c87e0c0ca2cc6d073bbd1f1f4daee58fa5fa838e Mon Sep 17 00:00:00 2001 From: Christos Hadjiaslanis Date: Wed, 22 Jan 2025 15:03:52 +0000 Subject: [PATCH 3/5] Added failure condition for upterm session. --- .github/workflows/kubernetes.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/kubernetes.yml b/.github/workflows/kubernetes.yml index 5d9d4202772..f60921b6d50 100644 --- a/.github/workflows/kubernetes.yml +++ b/.github/workflows/kubernetes.yml @@ -67,9 +67,10 @@ jobs: RUST_LOG=linera=info cargo test --locked -p linera-service --features scylladb,kubernetes --test linera_net_tests -- kubernetes --nocapture - name: Setup upterm session uses: lhotari/action-upterm@v1 + if: ${{ failure() }} with: ## If no one connects after 5 minutes, shut down server. - wait-timeout-minutes: 20 + wait-timeout-minutes: 10 - name: Port forward Prometheus run: | kubectl port-forward prometheus-linera-core-kube-prometheu-prometheus-0 9090 & From dad0afebe744a47f60fe1b26cb984f630ca5a0a5 Mon Sep 17 00:00:00 2001 From: Christos Hadjiaslanis Date: Wed, 22 Jan 2025 16:52:30 +0000 Subject: [PATCH 4/5] Bumped Docker image version. --- .github/workflows/docker_image.yml | 2 +- docker/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker_image.yml b/.github/workflows/docker_image.yml index 85fc610f441..9bdafcbb435 100644 --- a/.github/workflows/docker_image.yml +++ b/.github/workflows/docker_image.yml @@ -54,7 +54,7 @@ jobs: -t ${{ env.DOCKER_IMAGE }}:${{ env.BRANCH_NAME }} \ -t ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_SHORT }} \ -t ${{ env.DOCKER_IMAGE }}:${{ env.GIT_COMMIT_LONG }} - + docker run --rm linera ./linera --version - name: Push Docker image to Google Artifact Registry run: | docker push ${{ env.DOCKER_IMAGE }}:${{ env.BRANCH_NAME }} diff --git a/docker/Dockerfile b/docker/Dockerfile index c46ca94e85e..c72f7a2f72c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -21,7 +21,7 @@ ARG target=x86_64-unknown-linux-gnu ARG binaries= ARG copy=${binaries:+_copy} -FROM rust:1.74-slim-bookworm AS builder +FROM rust:1.81-slim-bookworm AS builder ARG git_commit ARG target From 7feffbcbc1f78dd08038faee8b0aa994bbf37bef Mon Sep 17 00:00:00 2001 From: Christos Hadjiaslanis Date: Thu, 23 Jan 2025 14:17:27 +0000 Subject: [PATCH 5/5] WIP --- .github/workflows/kubernetes.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/kubernetes.yml b/.github/workflows/kubernetes.yml index f60921b6d50..85861ae2a23 100644 --- a/.github/workflows/kubernetes.yml +++ b/.github/workflows/kubernetes.yml @@ -69,8 +69,7 @@ jobs: uses: lhotari/action-upterm@v1 if: ${{ failure() }} with: - ## If no one connects after 5 minutes, shut down server. - wait-timeout-minutes: 10 + wait-timeout-minutes: 120 - name: Port forward Prometheus run: | kubectl port-forward prometheus-linera-core-kube-prometheu-prometheus-0 9090 &