diff --git a/.github/workflows/auto-assign.yaml b/.github/workflows/auto-assign.yaml
index fce7665e5f6..907e702af40 100644
--- a/.github/workflows/auto-assign.yaml
+++ b/.github/workflows/auto-assign.yaml
@@ -11,7 +11,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: take the issue
- uses: bdougie/take-action@main
+ # yamllint disable-line rule:line-length
+ uses: bdougie/take-action@1439165ac45a7461c2d89a59952cd7d941964b87 # main
with:
message: >
Thanks for taking this issue!
diff --git a/.github/workflows/build-multi-stage.yaml b/.github/workflows/build-multi-stage.yaml
index 333bcc13bab..55549c6e657 100644
--- a/.github/workflows/build-multi-stage.yaml
+++ b/.github/workflows/build-multi-stage.yaml
@@ -13,7 +13,8 @@ jobs:
name: multi-arch-build
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: multi-arch-build
# yamllint disable-line rule:line-length
if: ${{ ! contains(github.event.pull_request.labels.*.name, 'ci/skip/multi-arch-build') }}
diff --git a/.github/workflows/codespell.yaml b/.github/workflows/codespell.yaml
index ece48c9f122..2b0de44297f 100644
--- a/.github/workflows/codespell.yaml
+++ b/.github/workflows/codespell.yaml
@@ -15,6 +15,7 @@ jobs:
name: codespell
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: codespell
run: make containerized-test TARGET=codespell
diff --git a/.github/workflows/commitlint.yaml b/.github/workflows/commitlint.yaml
index 7b7b653ba8c..877151a0f1b 100644
--- a/.github/workflows/commitlint.yaml
+++ b/.github/workflows/commitlint.yaml
@@ -14,7 +14,8 @@ jobs:
if: ${{ github.event.pull_request.user.login != 'dependabot[bot]' }}
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: commitlint
diff --git a/.github/workflows/dependency-review.yaml b/.github/workflows/dependency-review.yaml
index fd204ebee64..867c2674d34 100644
--- a/.github/workflows/dependency-review.yaml
+++ b/.github/workflows/dependency-review.yaml
@@ -15,8 +15,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: 'Checkout Repository'
- uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: 'Dependency Review'
- uses: actions/dependency-review-action@v4
+ # yamllint disable-line rule:line-length
+ uses: actions/dependency-review-action@5a2ce3f5b92ee19cbb1541a4984c76d921601d7c # v4.3.4
with:
allow-ghsas: GHSA-f4w6-3rh6-6q4q
diff --git a/.github/workflows/go-test.yaml b/.github/workflows/go-test.yaml
index da1adca2d28..1818375de48 100644
--- a/.github/workflows/go-test.yaml
+++ b/.github/workflows/go-test.yaml
@@ -14,7 +14,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout the repo
- uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Check generated deploy code
run: make generate-deploy
@@ -29,20 +30,23 @@ jobs:
name: e2e-build-test
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: e2e-build-test
run: make containerized-build TARGET=e2e.test
go-test:
name: go-test
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: go-test
run: make containerized-test TARGET=go-test
go-test-api:
name: go-test-api
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: go-test-api
run: make containerized-test TARGET=go-test-api
diff --git a/.github/workflows/golangci-lint.yaml b/.github/workflows/golangci-lint.yaml
index 7d18eafb00f..f0473ef8d09 100644
--- a/.github/workflows/golangci-lint.yaml
+++ b/.github/workflows/golangci-lint.yaml
@@ -13,6 +13,7 @@ jobs:
name: golangci-lint
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: golangci-lint
run: make containerized-test TARGET=go-lint
diff --git a/.github/workflows/lint-extras.yaml b/.github/workflows/lint-extras.yaml
index de6fcd363fa..8c04d7eea6c 100644
--- a/.github/workflows/lint-extras.yaml
+++ b/.github/workflows/lint-extras.yaml
@@ -13,6 +13,7 @@ jobs:
name: lint-extras
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: lint-extras
run: make containerized-test TARGET=lint-extras
diff --git a/.github/workflows/mergify-copy-labels.yaml b/.github/workflows/mergify-copy-labels.yaml
index 3323b438c14..dbcdd5c44d8 100644
--- a/.github/workflows/mergify-copy-labels.yaml
+++ b/.github/workflows/mergify-copy-labels.yaml
@@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Copying labels
- uses: Mergifyio/gha-mergify-merge-queue-labels-copier@main
+ uses: Mergifyio/gha-mergify-merge-queue-labels-copier@1d2b277f94d52987008ec05b571fb68f2357e63f # main
with:
additional-labels: 'ok-to-test'
token: ${{ secrets.CEPH_CSI_BOT_TOKEN }}
diff --git a/.github/workflows/mod-check.yaml b/.github/workflows/mod-check.yaml
index b94460ad38f..3809d9e393e 100644
--- a/.github/workflows/mod-check.yaml
+++ b/.github/workflows/mod-check.yaml
@@ -13,6 +13,7 @@ jobs:
name: mod-check
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: mod-check
run: make containerized-test TARGET=mod-check
diff --git a/.github/workflows/publish-artifacts.yaml b/.github/workflows/publish-artifacts.yaml
index b29f7cf2f4b..066a9681d57 100644
--- a/.github/workflows/publish-artifacts.yaml
+++ b/.github/workflows/publish-artifacts.yaml
@@ -18,10 +18,12 @@ jobs:
runs-on: ubuntu-latest
if: github.repository == 'ceph/ceph-csi'
steps:
- - uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Login to Quay
- uses: docker/login-action@v3
+ # yamllint disable-line rule:line-length
+ uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
with:
registry: quay.io
username: ${{ secrets.QUAY_IO_USERNAME }}
diff --git a/.github/workflows/pull-request-commentor.yaml b/.github/workflows/pull-request-commentor.yaml
index 4941651a53f..d64d74cdd0d 100644
--- a/.github/workflows/pull-request-commentor.yaml
+++ b/.github/workflows/pull-request-commentor.yaml
@@ -51,7 +51,8 @@ jobs:
Add comment to trigger external storage tests for Kubernetes
${{ matrix.k8s }}
if: ${{ github.base_ref == matrix.branch }}
- uses: peter-evans/create-or-update-comment@v4
+ # yamllint disable-line rule:line-length
+ uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
with:
token: ${{ secrets.CEPH_CSI_BOT_TOKEN }}
issue-number: ${{ github.event.pull_request.number }}
@@ -62,7 +63,8 @@ jobs:
Add comment to trigger helm E2E tests for Kubernetes
${{ matrix.k8s }}
if: ${{ github.base_ref == matrix.branch }}
- uses: peter-evans/create-or-update-comment@v4
+ # yamllint disable-line rule:line-length
+ uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
with:
token: ${{ secrets.CEPH_CSI_BOT_TOKEN }}
issue-number: ${{ github.event.pull_request.number }}
@@ -70,7 +72,8 @@ jobs:
/test ci/centos/mini-e2e-helm/k8s-${{ matrix.k8s }}
- name: Add comment to trigger E2E tests for Kubernetes ${{ matrix.k8s }}
- uses: peter-evans/create-or-update-comment@v4
+ # yamllint disable-line rule:line-length
+ uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
if: ${{ github.base_ref == matrix.branch }}
with:
token: ${{ secrets.CEPH_CSI_BOT_TOKEN }}
@@ -87,7 +90,8 @@ jobs:
steps:
- name: Add comment to trigger cephfs upgrade tests
- uses: peter-evans/create-or-update-comment@v4
+ # yamllint disable-line rule:line-length
+ uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
with:
token: ${{ secrets.CEPH_CSI_BOT_TOKEN }}
issue-number: ${{ github.event.pull_request.number }}
@@ -95,7 +99,8 @@ jobs:
/test ci/centos/upgrade-tests-cephfs
- name: Add comment to trigger rbd upgrade tests
- uses: peter-evans/create-or-update-comment@v4
+ # yamllint disable-line rule:line-length
+ uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
with:
token: ${{ secrets.CEPH_CSI_BOT_TOKEN }}
issue-number: ${{ github.event.pull_request.number }}
@@ -116,7 +121,8 @@ jobs:
steps:
- name: remove ok-to-test-label after commenting
- uses: actions/github-script@v7
+ # yamllint disable-line rule:line-length
+ uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
github-token: ${{ secrets.CEPH_CSI_BOT_TOKEN }}
script: |
diff --git a/.github/workflows/retest.yaml b/.github/workflows/retest.yaml
index 81879627590..f99d3f59ee1 100644
--- a/.github/workflows/retest.yaml
+++ b/.github/workflows/retest.yaml
@@ -15,7 +15,8 @@ jobs:
runs-on: ubuntu-latest
steps:
# path to the retest action
- - uses: ceph/ceph-csi/actions/retest@devel
+ # yamllint disable-line rule:line-length
+ - uses: ceph/ceph-csi/actions/retest@28dc64dcae3cec8d11d84bdf525bda0ef757c688 # devel
with:
GITHUB_TOKEN: ${{ secrets.CEPH_CSI_BOT_TOKEN }}
required-label: "ci/retry/e2e"
diff --git a/.github/workflows/snyk-container-image.yaml b/.github/workflows/snyk-container-image.yaml
index 74b9b55d2b6..eabfca77b86 100644
--- a/.github/workflows/snyk-container-image.yaml
+++ b/.github/workflows/snyk-container-image.yaml
@@ -26,18 +26,21 @@ jobs:
if: github.repository == 'ceph/ceph-csi'
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Build a Docker image
run: make image-cephcsi
- name: Run Snyk to check Docker image for vulnerabilities
continue-on-error: true
- uses: snyk/actions/docker@master
+ # yamllint disable-line rule:line-length
+ uses: snyk/actions/docker@cdb760004ba9ea4d525f2e043745dfe85bb9077e # master
env:
SNYK_TOKEN: ${{ secrets.SYNK_TOKEN }}
with:
image: quay.io/cephcsi/cephcsi:${{ github.base_ref }}
args: --file=Dockerfilei
- name: Upload result to GitHub Code Scanning
- uses: github/codeql-action/upload-sarif@v3
+ # yamllint disable-line rule:line-length
+ uses: github/codeql-action/upload-sarif@8214744c546c1e5c8f03dde8fab3a7353211988d # v3.26.7
with:
sarif_file: snyk.sarif
diff --git a/.github/workflows/snyk.yaml b/.github/workflows/snyk.yaml
index dd1ed75f7e7..8ee96f14d5b 100644
--- a/.github/workflows/snyk.yaml
+++ b/.github/workflows/snyk.yaml
@@ -20,11 +20,13 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: checkout
- uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
fetch-depth: 0
- name: run Snyk to check for code vulnerabilities
- uses: snyk/actions/golang@master
+ # yamllint disable-line rule:line-length
+ uses: snyk/actions/golang@cdb760004ba9ea4d525f2e043745dfe85bb9077e # master
env:
SNYK_TOKEN: ${{ secrets.SYNK_TOKEN }}
diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml
index 01fa31fe3e9..629919591e8 100644
--- a/.github/workflows/stale.yaml
+++ b/.github/workflows/stale.yaml
@@ -18,7 +18,8 @@ jobs:
runs-on: ubuntu-latest
if: github.repository == 'ceph/ceph-csi'
steps:
- - uses: actions/stale@v9
+ # yamllint disable-line rule:line-length
+ - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
days-before-issue-stale: 30
diff --git a/.github/workflows/test-retest-action.yaml b/.github/workflows/test-retest-action.yaml
index e8578179bce..183bda6336e 100644
--- a/.github/workflows/test-retest-action.yaml
+++ b/.github/workflows/test-retest-action.yaml
@@ -15,7 +15,8 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Docker build
# Run cd to avoid loading complete cephcsi directory in docker context
diff --git a/.github/workflows/tickgit.yaml b/.github/workflows/tickgit.yaml
index 2b49b48eb1d..106e380fdbb 100644
--- a/.github/workflows/tickgit.yaml
+++ b/.github/workflows/tickgit.yaml
@@ -14,5 +14,6 @@ jobs:
name: tickgit
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v4
+ # yamllint disable-line rule:line-length
+ - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- run: make containerized-test TARGET=tickgit
diff --git a/PendingReleaseNotes.md b/PendingReleaseNotes.md
index a77eb58d405..43c886283ef 100644
--- a/PendingReleaseNotes.md
+++ b/PendingReleaseNotes.md
@@ -12,5 +12,7 @@
- deploy: radosNamespaceCephFS can be configured for ceph-csi-cephfs chart in [PR](https://github.com/ceph/ceph-csi/pull/4652)
- build: update ceph release to squid in [PR](https://github.com/ceph/ceph-csi/pull/4735)
- build: CentOS Stream 9 is used as OS in the container-images [PR](https://github.com/ceph/ceph-csi/pull/4735)
+- util: a log message "Slow GRPC" is now emitted when
+ CSI GRPC call outlives its deadline [PR](https://github.com/ceph/ceph-csi/pull/4847)
## NOTE
diff --git a/charts/ceph-csi-cephfs/README.md b/charts/ceph-csi-cephfs/README.md
index 2033ca3ad09..1be352f3f65 100644
--- a/charts/ceph-csi-cephfs/README.md
+++ b/charts/ceph-csi-cephfs/README.md
@@ -118,6 +118,7 @@ charts and their default values.
| `commonLabels` | Labels to apply to all resources | `{}` |
| `logLevel` | Set logging level for csi containers. Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity. | `5` |
| `sidecarLogLevel` | Set logging level for csi sidecar containers. Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity. | `1` |
+| `logSlowOperationInterval` | Log slow operations at the specified rate. Operation is considered slow if it outlives its deadline. | `30s` |
| `nodeplugin.name` | Specifies the nodeplugin name | `nodeplugin` |
| `nodeplugin.updateStrategy` | Specifies the update Strategy. If you are using ceph-fuse client set this value to OnDelete | `RollingUpdate` |
| `nodeplugin.priorityClassName` | Set user created priorityClassName for csi plugin pods. default is system-node-critical which is highest priority | `system-node-critical` |
diff --git a/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml b/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml
index b91b8047a00..5be4632799a 100644
--- a/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml
+++ b/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml
@@ -72,6 +72,7 @@ spec:
{{- if and .Values.readAffinity .Values.readAffinity.enabled }}
- "--crush-location-labels={{ .Values.readAffinity.crushLocationLabels | join "," }}"
{{- end }}
+ - "--logslowopinterval={{ .Values.logSlowOperationInterval }}"
env:
- name: POD_IP
valueFrom:
diff --git a/charts/ceph-csi-cephfs/templates/provisioner-deployment.yaml b/charts/ceph-csi-cephfs/templates/provisioner-deployment.yaml
index 3257705af59..14b0f2c0cdf 100644
--- a/charts/ceph-csi-cephfs/templates/provisioner-deployment.yaml
+++ b/charts/ceph-csi-cephfs/templates/provisioner-deployment.yaml
@@ -92,6 +92,7 @@ spec:
- "--clustername={{ .Values.provisioner.clustername }}"
{{- end }}
- "--setmetadata={{ .Values.provisioner.setmetadata }}"
+ - "--logslowopinterval={{ .Values.logSlowOperationInterval }}"
env:
- name: POD_IP
valueFrom:
diff --git a/charts/ceph-csi-cephfs/values.yaml b/charts/ceph-csi-cephfs/values.yaml
index 4b52084641c..7d73851e61b 100644
--- a/charts/ceph-csi-cephfs/values.yaml
+++ b/charts/ceph-csi-cephfs/values.yaml
@@ -40,6 +40,9 @@ commonLabels: {}
logLevel: 5
# sidecarLogLevel is the variable for Kubernetes sidecar container's log level
sidecarLogLevel: 1
+# Log slow operations at the specified rate.
+# Operation is considered slow if it outlives its deadline.
+logSlowOperationInterval: 30s
# Set fsGroupPolicy for CSI Driver object spec
# https://kubernetes-csi.github.io/docs/support-fsgroup.html
diff --git a/charts/ceph-csi-rbd/README.md b/charts/ceph-csi-rbd/README.md
index c516d410664..ba5fe684be8 100644
--- a/charts/ceph-csi-rbd/README.md
+++ b/charts/ceph-csi-rbd/README.md
@@ -120,6 +120,7 @@ charts and their default values.
| `commonLabels` | Labels to apply to all resources | `{}` |
| `logLevel` | Set logging level for csi containers. Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity. | `5` |
| `sidecarLogLevel` | Set logging level for csi sidecar containers. Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity. | `1` |
+| `logSlowOperationInterval` | Log slow operations at the specified rate. Operation is considered slow if it outlives its deadline. | `30s` |
| `nodeplugin.name` | Specifies the nodeplugins name | `nodeplugin` |
| `nodeplugin.updateStrategy` | Specifies the update Strategy. If you are using ceph-fuse client set this value to OnDelete | `RollingUpdate` |
| `nodeplugin.priorityClassName` | Set user created priorityclassName for csi plugin pods. default is system-node-critical which is highest priority | `system-node-critical` |
diff --git a/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml b/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml
index cf9b201cedc..e640031b7e2 100644
--- a/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml
+++ b/charts/ceph-csi-rbd/templates/nodeplugin-daemonset.yaml
@@ -70,6 +70,7 @@ spec:
{{- if and .Values.readAffinity .Values.readAffinity.enabled }}
- "--crush-location-labels={{ .Values.readAffinity.crushLocationLabels | join "," }}"
{{- end }}
+ - "--logslowopinterval={{ .Values.logSlowOperationInterval }}"
env:
- name: POD_IP
valueFrom:
diff --git a/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml b/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml
index 70c393c00cb..fe61b56bff8 100644
--- a/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml
+++ b/charts/ceph-csi-rbd/templates/provisioner-deployment.yaml
@@ -97,6 +97,7 @@ spec:
- "--clustername={{ .Values.provisioner.clustername }}"
{{- end }}
- "--setmetadata={{ .Values.provisioner.setmetadata }}"
+ - "--logslowopinterval={{ .Values.logSlowOperationInterval }}"
env:
- name: POD_IP
valueFrom:
diff --git a/charts/ceph-csi-rbd/values.yaml b/charts/ceph-csi-rbd/values.yaml
index 88bad96dd69..40f6f84ff0a 100644
--- a/charts/ceph-csi-rbd/values.yaml
+++ b/charts/ceph-csi-rbd/values.yaml
@@ -69,6 +69,9 @@ commonLabels: {}
logLevel: 5
# sidecarLogLevel is the variable for Kubernetes sidecar container's log level
sidecarLogLevel: 1
+# Log slow operations at the specified rate.
+# Operation is considered slow if it outlives its deadline.
+logSlowOperationInterval: 30s
# Set fsGroupPolicy for CSI Driver object spec
# https://kubernetes-csi.github.io/docs/support-fsgroup.html
diff --git a/cmd/cephcsi.go b/cmd/cephcsi.go
index a43ac276362..bc6e1c7a3a0 100644
--- a/cmd/cephcsi.go
+++ b/cmd/cephcsi.go
@@ -120,6 +120,11 @@ func init() {
"path of prometheus endpoint where metrics will be available")
flag.DurationVar(&conf.PollTime, "polltime", time.Second*pollTime, "time interval in seconds between each poll")
flag.DurationVar(&conf.PoolTimeout, "timeout", time.Second*probeTimeout, "probe timeout in seconds")
+ flag.DurationVar(
+ &conf.LogSlowOpInterval,
+ "logslowopinterval",
+ time.Second*30,
+ "how often to inform about slow gRPC calls")
flag.UintVar(
&conf.RbdHardMaxCloneDepth,
diff --git a/docs/deploy-cephfs.md b/docs/deploy-cephfs.md
index da798cbae08..ba052b44b4a 100644
--- a/docs/deploy-cephfs.md
+++ b/docs/deploy-cephfs.md
@@ -50,6 +50,7 @@ make image-cephcsi
| `--enable-read-affinity` | `false` | enable read affinity |
| `--crush-location-labels`| _empty_ | Kubernetes node labels that determine the CRUSH location the node belongs to, separated by ','.
`Note: These labels will be replaced if crush location labels are defined in the ceph-csi-config ConfigMap for the specific cluster.` |
| `--radosnamespacecephfs`| _empty_ | CephFS RadosNamespace used to store CSI specific objects and keys. |
+| `--logslowopinterval` | `30s` | Log slow operations at the specified rate. Operation is considered slow if it outlives its deadline. |
**NOTE:** The parameter `-forcecephkernelclient` enables the Kernel
CephFS mounter on kernels < 4.17.
diff --git a/docs/deploy-rbd.md b/docs/deploy-rbd.md
index e493113b99f..f27f32df022 100644
--- a/docs/deploy-rbd.md
+++ b/docs/deploy-rbd.md
@@ -48,6 +48,7 @@ make image-cephcsi
| `--setmetadata` | `false` | Set metadata on volume |
| `--enable-read-affinity` | `false` | enable read affinity |
| `--crush-location-labels`| _empty_ | Kubernetes node labels that determine the CRUSH location the node belongs to, separated by ','.
`Note: These labels will be replaced if crush location labels are defined in the ceph-csi-config ConfigMap for the specific cluster.` |
+| `--logslowopinterval` | `30s` | Log slow operations at the specified rate. Operation is considered slow if it outlives its deadline. |
**Available volume parameters:**
diff --git a/go.mod b/go.mod
index 6b730194448..3c137c4dad5 100644
--- a/go.mod
+++ b/go.mod
@@ -28,7 +28,7 @@ require (
golang.org/x/crypto v0.27.0
golang.org/x/net v0.29.0
golang.org/x/sys v0.25.0
- google.golang.org/grpc v1.66.0
+ google.golang.org/grpc v1.66.2
google.golang.org/protobuf v1.34.2
//
// when updating k8s.io/kubernetes, make sure to update the replace section too
diff --git a/go.sum b/go.sum
index 49b5410524b..df63d1e416c 100644
--- a/go.sum
+++ b/go.sum
@@ -3362,8 +3362,8 @@ google.golang.org/grpc v1.63.0/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDom
google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA=
google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg=
google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ=
-google.golang.org/grpc v1.66.0 h1:DibZuoBznOxbDQxRINckZcUvnCEvrW9pcWIE2yF9r1c=
-google.golang.org/grpc v1.66.0/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y=
+google.golang.org/grpc v1.66.2 h1:3QdXkuq3Bkh7w+ywLdLvM56cmGvQHUMZpiCzt6Rqaoo=
+google.golang.org/grpc v1.66.2/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y=
google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
diff --git a/internal/cephfs/driver.go b/internal/cephfs/driver.go
index 97509956d77..8023bb1a798 100644
--- a/internal/cephfs/driver.go
+++ b/internal/cephfs/driver.go
@@ -199,7 +199,9 @@ func (fs *Driver) Run(conf *util.Config) {
NS: fs.ns,
GS: fs.cs,
}
- server.Start(conf.Endpoint, srv)
+ server.Start(conf.Endpoint, srv, csicommon.MiddlewareServerOptionConfig{
+ LogSlowOpInterval: conf.LogSlowOpInterval,
+ })
if conf.EnableProfiling {
go util.StartMetricsServer(conf)
@@ -230,7 +232,9 @@ func (fs *Driver) setupCSIAddonsServer(conf *util.Config) error {
}
// start the server, this does not block, it runs a new go-routine
- err = fs.cas.Start()
+ err = fs.cas.Start(csicommon.MiddlewareServerOptionConfig{
+ LogSlowOpInterval: conf.LogSlowOpInterval,
+ })
if err != nil {
return fmt.Errorf("failed to start CSI-Addons server: %w", err)
}
diff --git a/internal/csi-addons/server/server.go b/internal/csi-addons/server/server.go
index e5c6310eec3..c41e27100e0 100644
--- a/internal/csi-addons/server/server.go
+++ b/internal/csi-addons/server/server.go
@@ -85,9 +85,9 @@ func (cas *CSIAddonsServer) RegisterService(svc CSIAddonsService) {
// Start creates the internal gRPC server, and registers the CSIAddonsServices.
// The internal gRPC server is started in it's own go-routine when no error is
// returned.
-func (cas *CSIAddonsServer) Start() error {
+func (cas *CSIAddonsServer) Start(middlewareConfig csicommon.MiddlewareServerOptionConfig) error {
// create the gRPC server and register services
- cas.server = grpc.NewServer(csicommon.NewMiddlewareServerOption())
+ cas.server = grpc.NewServer(csicommon.NewMiddlewareServerOption(middlewareConfig))
for _, svc := range cas.services {
svc.RegisterService(cas.server)
diff --git a/internal/csi-common/server.go b/internal/csi-common/server.go
index 727ef57f80f..b758f8d4a10 100644
--- a/internal/csi-common/server.go
+++ b/internal/csi-common/server.go
@@ -31,7 +31,7 @@ import (
// NonBlockingGRPCServer defines Non blocking GRPC server interfaces.
type NonBlockingGRPCServer interface {
// Start services at the endpoint
- Start(endpoint string, srv Servers)
+ Start(endpoint string, srv Servers, middlewareConfig MiddlewareServerOptionConfig)
// Waits for the service to stop
Wait()
// Stops the service gracefully
@@ -60,9 +60,13 @@ type nonBlockingGRPCServer struct {
}
// Start start service on endpoint.
-func (s *nonBlockingGRPCServer) Start(endpoint string, srv Servers) {
+func (s *nonBlockingGRPCServer) Start(
+ endpoint string,
+ srv Servers,
+ middlewareConfig MiddlewareServerOptionConfig,
+) {
s.wg.Add(1)
- go s.serve(endpoint, srv)
+ go s.serve(endpoint, srv, middlewareConfig)
}
// Wait blocks until the WaitGroup counter.
@@ -80,7 +84,11 @@ func (s *nonBlockingGRPCServer) ForceStop() {
s.server.Stop()
}
-func (s *nonBlockingGRPCServer) serve(endpoint string, srv Servers) {
+func (s *nonBlockingGRPCServer) serve(
+ endpoint string,
+ srv Servers,
+ middlewareConfig MiddlewareServerOptionConfig,
+) {
proto, addr, err := parseEndpoint(endpoint)
if err != nil {
klog.Fatal(err.Error())
@@ -98,7 +106,7 @@ func (s *nonBlockingGRPCServer) serve(endpoint string, srv Servers) {
klog.Fatalf("Failed to listen: %v", err)
}
- server := grpc.NewServer(NewMiddlewareServerOption())
+ server := grpc.NewServer(NewMiddlewareServerOption(middlewareConfig))
s.server = server
if srv.IS != nil {
diff --git a/internal/csi-common/utils.go b/internal/csi-common/utils.go
index 91db95f9324..b541e68f6f4 100644
--- a/internal/csi-common/utils.go
+++ b/internal/csi-common/utils.go
@@ -23,6 +23,7 @@ import (
"runtime/debug"
"strings"
"sync/atomic"
+ "time"
"github.com/ceph/ceph-csi/internal/util"
"github.com/ceph/ceph-csi/internal/util/log"
@@ -108,10 +109,35 @@ func NewGroupControllerServiceCapability(ctrlCap csi.GroupControllerServiceCapab
}
}
+// MiddlewareServerOptionConfig contains configuration parameters
+// that are passed to the respective middleware interceptors that
+// are instantiated when starting gRPC servers.
+type MiddlewareServerOptionConfig struct {
+ LogSlowOpInterval time.Duration
+}
+
// NewMiddlewareServerOption creates a new grpc.ServerOption that configures a
// common format for log messages and other gRPC related handlers.
-func NewMiddlewareServerOption() grpc.ServerOption {
- middleWare := []grpc.UnaryServerInterceptor{contextIDInjector, logGRPC, panicHandler}
+func NewMiddlewareServerOption(config MiddlewareServerOptionConfig) grpc.ServerOption {
+ middleWare := []grpc.UnaryServerInterceptor{
+ contextIDInjector,
+ logGRPC,
+ }
+
+ if config.LogSlowOpInterval > 0 {
+ middleWare = append(middleWare, func(
+ ctx context.Context,
+ req interface{},
+ info *grpc.UnaryServerInfo,
+ handler grpc.UnaryHandler,
+ ) (interface{}, error) {
+ return logSlowGRPC(
+ config.LogSlowOpInterval, ctx, req, info, handler,
+ )
+ })
+ }
+
+ middleWare = append(middleWare, panicHandler)
return grpc.UnaryInterceptor(grpc_middleware.ChainUnaryServer(middleWare...))
}
@@ -250,6 +276,53 @@ func logGRPC(
return resp, err
}
+func logSlowGRPC(
+ logInterval time.Duration,
+ ctx context.Context,
+ req interface{},
+ info *grpc.UnaryServerInfo,
+ handler grpc.UnaryHandler,
+) (interface{}, error) {
+ handlerFinished := make(chan struct{})
+ callStartTime := time.Now()
+
+ // Ticks at a logInterval rate and logs a slow-call message until handler finishes.
+ // This is called once the handler outlives its context, see below.
+ doLogSlowGRPC := func() {
+ ticker := time.NewTicker(logInterval)
+ defer ticker.Stop()
+
+ for {
+ select {
+ case t := <-ticker.C:
+ timePassed := t.Sub(callStartTime).Truncate(time.Second)
+ log.ExtendedLog(ctx,
+ "Slow GRPC call %s (%s)", info.FullMethod, timePassed)
+ log.TraceLog(ctx,
+ "Slow GRPC request: %s", protosanitizer.StripSecrets(req))
+ case <-handlerFinished:
+ return
+ }
+ }
+ }
+
+ go func() {
+ select {
+ case <-ctx.Done():
+ // The call (most likely) outlived its context. Start logging slow messages.
+ doLogSlowGRPC()
+ case <-handlerFinished:
+ // The call finished, exit.
+ return
+ }
+ }()
+
+ resp, err := handler(ctx, req)
+ close(handlerFinished)
+
+ return resp, err
+}
+
//nolint:nonamedreturns // named return used to send recovered panic error.
func panicHandler(
ctx context.Context,
diff --git a/internal/nfs/driver/driver.go b/internal/nfs/driver/driver.go
index 51eefc568e4..890fba815b3 100644
--- a/internal/nfs/driver/driver.go
+++ b/internal/nfs/driver/driver.go
@@ -77,7 +77,10 @@ func (fs *Driver) Run(conf *util.Config) {
srv.CS = controller.NewControllerServer(cd)
}
- server.Start(conf.Endpoint, srv)
+ server.Start(conf.Endpoint, srv, csicommon.MiddlewareServerOptionConfig{
+ LogSlowOpInterval: conf.LogSlowOpInterval,
+ })
+
if conf.EnableProfiling {
go util.StartMetricsServer(conf)
log.DebugLogMsg("Registering profiling handler")
diff --git a/internal/rbd/driver/driver.go b/internal/rbd/driver/driver.go
index cebfc53cf6b..0ad8109f81a 100644
--- a/internal/rbd/driver/driver.go
+++ b/internal/rbd/driver/driver.go
@@ -179,7 +179,9 @@ func (r *Driver) Run(conf *util.Config) {
CS: r.cs,
NS: r.ns,
}
- s.Start(conf.Endpoint, srv)
+ s.Start(conf.Endpoint, srv, csicommon.MiddlewareServerOptionConfig{
+ LogSlowOpInterval: conf.LogSlowOpInterval,
+ })
r.startProfiling(conf)
@@ -233,7 +235,9 @@ func (r *Driver) setupCSIAddonsServer(conf *util.Config) error {
}
// start the server, this does not block, it runs a new go-routine
- err = r.cas.Start()
+ err = r.cas.Start(csicommon.MiddlewareServerOptionConfig{
+ LogSlowOpInterval: conf.LogSlowOpInterval,
+ })
if err != nil {
return fmt.Errorf("failed to start CSI-Addons server: %w", err)
}
diff --git a/internal/util/util.go b/internal/util/util.go
index 869df991b76..1d62c650223 100644
--- a/internal/util/util.go
+++ b/internal/util/util.go
@@ -131,6 +131,9 @@ type Config struct {
MetricsPort int // TCP port for liveness/grpc metrics requests
PollTime time.Duration // time interval in seconds between each poll
PoolTimeout time.Duration // probe timeout in seconds
+ // Log interval for slow GRPC calls. Calls that outlive their context deadline
+ // are considered slow.
+ LogSlowOpInterval time.Duration
EnableProfiling bool // flag to enable profiling
IsControllerServer bool // if set to true start provisioner server
diff --git a/vendor/google.golang.org/grpc/experimental/stats/metricregistry.go b/vendor/google.golang.org/grpc/experimental/stats/metricregistry.go
index 930140f57ed..1d827dd5d9d 100644
--- a/vendor/google.golang.org/grpc/experimental/stats/metricregistry.go
+++ b/vendor/google.golang.org/grpc/experimental/stats/metricregistry.go
@@ -20,7 +20,6 @@ package stats
import (
"maps"
- "testing"
"google.golang.org/grpc/grpclog"
"google.golang.org/grpc/internal"
@@ -250,9 +249,9 @@ func RegisterInt64Gauge(descriptor MetricDescriptor) *Int64GaugeHandle {
}
// snapshotMetricsRegistryForTesting snapshots the global data of the metrics
-// registry. Registers a cleanup function on the provided testing.T that sets
-// the metrics registry to its original state. Only called in testing functions.
-func snapshotMetricsRegistryForTesting(t *testing.T) {
+// registry. Returns a cleanup function that sets the metrics registry to its
+// original state.
+func snapshotMetricsRegistryForTesting() func() {
oldDefaultMetrics := DefaultMetrics
oldRegisteredMetrics := registeredMetrics
oldMetricsRegistry := metricsRegistry
@@ -262,9 +261,9 @@ func snapshotMetricsRegistryForTesting(t *testing.T) {
maps.Copy(registeredMetrics, registeredMetrics)
maps.Copy(metricsRegistry, metricsRegistry)
- t.Cleanup(func() {
+ return func() {
DefaultMetrics = oldDefaultMetrics
registeredMetrics = oldRegisteredMetrics
metricsRegistry = oldMetricsRegistry
- })
+ }
}
diff --git a/vendor/google.golang.org/grpc/internal/internal.go b/vendor/google.golang.org/grpc/internal/internal.go
index 65f936a623a..73fa407b6c8 100644
--- a/vendor/google.golang.org/grpc/internal/internal.go
+++ b/vendor/google.golang.org/grpc/internal/internal.go
@@ -217,10 +217,9 @@ var (
SetConnectedAddress any // func(scs *SubConnState, addr resolver.Address)
// SnapshotMetricRegistryForTesting snapshots the global data of the metric
- // registry. Registers a cleanup function on the provided testing.T that
- // sets the metric registry to its original state. Only called in testing
- // functions.
- SnapshotMetricRegistryForTesting any // func(t *testing.T)
+ // registry. Returns a cleanup function that sets the metric registry to its
+ // original state. Only called in testing functions.
+ SnapshotMetricRegistryForTesting func() func()
// SetDefaultBufferPoolForTesting updates the default buffer pool, for
// testing purposes.
diff --git a/vendor/google.golang.org/grpc/mem/buffer_slice.go b/vendor/google.golang.org/grpc/mem/buffer_slice.go
index d7775cea623..228e9c2f20f 100644
--- a/vendor/google.golang.org/grpc/mem/buffer_slice.go
+++ b/vendor/google.golang.org/grpc/mem/buffer_slice.go
@@ -19,7 +19,6 @@
package mem
import (
- "compress/flate"
"io"
)
@@ -92,9 +91,11 @@ func (s BufferSlice) Materialize() []byte {
}
// MaterializeToBuffer functions like Materialize except that it writes the data
-// to a single Buffer pulled from the given BufferPool. As a special case, if the
-// input BufferSlice only actually has one Buffer, this function has nothing to
-// do and simply returns said Buffer.
+// to a single Buffer pulled from the given BufferPool.
+//
+// As a special case, if the input BufferSlice only actually has one Buffer, this
+// function simply increases the refcount before returning said Buffer. Freeing this
+// buffer won't release it until the BufferSlice is itself released.
func (s BufferSlice) MaterializeToBuffer(pool BufferPool) Buffer {
if len(s) == 1 {
s[0].Ref()
@@ -124,7 +125,8 @@ func (s BufferSlice) Reader() Reader {
// Remaining(), which returns the number of unread bytes remaining in the slice.
// Buffers will be freed as they are read.
type Reader interface {
- flate.Reader
+ io.Reader
+ io.ByteReader
// Close frees the underlying BufferSlice and never returns an error. Subsequent
// calls to Read will return (0, io.EOF).
Close() error
diff --git a/vendor/google.golang.org/grpc/server.go b/vendor/google.golang.org/grpc/server.go
index 457d27338f7..d1e1415a40f 100644
--- a/vendor/google.golang.org/grpc/server.go
+++ b/vendor/google.golang.org/grpc/server.go
@@ -1359,6 +1359,7 @@ func (s *Server) processUnaryRPC(ctx context.Context, t transport.ServerTranspor
}
return err
}
+ defer d.Free()
if channelz.IsOn() {
t.IncrMsgRecv()
}
diff --git a/vendor/google.golang.org/grpc/version.go b/vendor/google.golang.org/grpc/version.go
index 1ffec6e2cee..7c70005d083 100644
--- a/vendor/google.golang.org/grpc/version.go
+++ b/vendor/google.golang.org/grpc/version.go
@@ -19,4 +19,4 @@
package grpc
// Version is the current grpc version.
-const Version = "1.66.0"
+const Version = "1.66.2"
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 0ffd4315e24..d82242ba610 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -834,7 +834,7 @@ google.golang.org/genproto/googleapis/api/httpbody
## explicit; go 1.20
google.golang.org/genproto/googleapis/rpc/errdetails
google.golang.org/genproto/googleapis/rpc/status
-# google.golang.org/grpc v1.66.0
+# google.golang.org/grpc v1.66.2
## explicit; go 1.21
google.golang.org/grpc
google.golang.org/grpc/attributes