Skip to content

Commit

Permalink
Merge pull request #208 from red-hat-storage/sync_us--devel
Browse files Browse the repository at this point in the history
Syncing latest changes from devel for ceph-csi
  • Loading branch information
openshift-ci[bot] authored Nov 6, 2023
2 parents ef69b84 + 9f75388 commit 817ac33
Show file tree
Hide file tree
Showing 30 changed files with 936 additions and 203 deletions.
16 changes: 15 additions & 1 deletion .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,20 @@ Fixes: #issue_number
List items that are not part of the PR and do not impact it's
functionality, but are work items that can be taken up subsequently.

**Checklist:**

* [ ] **Commit Message Formatting**: Commit titles and messages follow
guidelines in the [developer
guide](https://github.com/ceph/ceph-csi/blob/devel/docs/development-guide.md#commit-messages).
* [ ] Reviewed the developer guide on [Submitting a Pull
Request](https://github.com/ceph/ceph-csi/blob/devel/docs/development-guide.md#development-workflow)
* [ ] [Pending release
notes](https://github.com/ceph/ceph-csi/blob/devel/PendingReleaseNotes.md)
updated with breaking and/or notable changes for the next major release.
* [ ] Documentation has been updated, if necessary.
* [ ] Unit tests have been added, if necessary.
* [ ] Integration tests have been added, if necessary.

---

<details>
Expand All @@ -42,7 +56,7 @@ functionality, but are work items that can be taken up subsequently.
These commands are normally not required, but in case of issues, leave any of
the following bot commands in an otherwise empty comment in this PR:

- `/retest ci/centos/<job-name>`: retest the `<job-name>` after unrelated
* `/retest ci/centos/<job-name>`: retest the `<job-name>` after unrelated
failure (please report the failure too!)

</details>
4 changes: 1 addition & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ endif
GO_PROJECT=github.com/ceph/ceph-csi

CEPH_VERSION ?= $(shell . $(CURDIR)/build.env ; echo $${CEPH_VERSION})
# TODO: ceph_preview tag may be removed with go-ceph 0.17.0
# TODO: ceph_ci_untested is added for subvolume metadata (go-ceph#691) and snapshot metadata management (go-ceph#698)
GO_TAGS_LIST ?= $(CEPH_VERSION) ceph_preview ceph_ci_untested ceph_pre_quincy
GO_TAGS_LIST ?= $(CEPH_VERSION)

# go build flags
LDFLAGS ?=
Expand Down
5 changes: 5 additions & 0 deletions PendingReleaseNotes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# v3.10 Pending Release Notes

## Breaking Changes

## Features
14 changes: 3 additions & 11 deletions cmd/cephcsi.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ func init() {
"",
"Comma separated string of mount options accepted by ceph-fuse mounter")

// liveness/grpc metrics related flags
flag.IntVar(&conf.MetricsPort, "metricsport", 8080, "TCP port for liveness/grpc metrics requests")
// liveness/profile metrics related flags
flag.IntVar(&conf.MetricsPort, "metricsport", 8080, "TCP port for liveness/profile metrics requests")
flag.StringVar(
&conf.MetricsPath,
"metricspath",
Expand All @@ -116,14 +116,6 @@ func init() {
flag.DurationVar(&conf.PollTime, "polltime", time.Second*pollTime, "time interval in seconds between each poll")
flag.DurationVar(&conf.PoolTimeout, "timeout", time.Second*probeTimeout, "probe timeout in seconds")

flag.BoolVar(&conf.EnableGRPCMetrics, "enablegrpcmetrics", false, "[DEPRECATED] enable grpc metrics")
flag.StringVar(
&conf.HistogramOption,
"histogramoption",
"0.5,2,6",
"[DEPRECATED] Histogram option for grpc metrics, should be comma separated value, "+
"ex:= 0.5,2,6 where start=0.5 factor=2, count=6")

flag.UintVar(
&conf.RbdHardMaxCloneDepth,
"rbdhardmaxclonedepth",
Expand Down Expand Up @@ -210,7 +202,7 @@ func main() {

setPIDLimit(&conf)

if conf.EnableGRPCMetrics || conf.Vtype == livenessType {
if conf.EnableProfiling || conf.Vtype == livenessType {
// validate metrics endpoint
conf.MetricsIP = os.Getenv("POD_IP")

Expand Down
10 changes: 5 additions & 5 deletions deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ GITHUB_EMAIL=${GITHUB_EMAIL:-"[email protected]"}

# Build and push images. Steps as below:
# 1. get base image from ./build.env (BASE_IMAGE=ceph/ceph:v14.2)
# 2. parse manifest to get image digest per arch (sha256:XXX, sha256:YYY)
# 3. patch Dockerfile with amd64 base image (FROM ceph/ceph:v14.2@sha256:XXX)
# 2. parse manifest to get image digest per arch (sha256:XYZ, sha256:ZYX)
# 3. patch Dockerfile with amd64 base image (FROM ceph/ceph:v14.2@sha256:XYZ)
# 4. build and push amd64 image
# 5. patch Dockerfile with arm64 base image (FROM ceph/ceph:v14.2@sha256:YYY)
# 5. patch Dockerfile with arm64 base image (FROM ceph/ceph:v14.2@sha256:ZYX)
# 6. build and push arm64 image
build_push_images() {
# "docker manifest" requires experimental feature enabled
Expand All @@ -29,11 +29,11 @@ build_push_images() {
# get image digest per architecture
# {
# "arch": "amd64",
# "digest": "sha256:XXX"
# "digest": "sha256:XYZ"
# }
# {
# "arch": "arm64",
# "digest": "sha256:YYY"
# "digest": "sha256:ZYX"
# }
manifests=$(docker manifest inspect "${baseimg}" | jq '.manifests[] | {arch: .platform.architecture, digest: .digest}')
# qemu-user-static is to enable an execution of different multi-architecture containers by QEMU
Expand Down
2 changes: 0 additions & 2 deletions docs/deploy-cephfs.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,9 @@ make image-cephcsi
| `--pidlimit` | _0_ | Configure the PID limit in cgroups. The container runtime can restrict the number of processes/tasks which can cause problems while provisioning (or deleting) a large number of volumes. A value of `-1` configures the limit to the maximum, `0` does not configure limits at all. |
| `--metricsport` | `8080` | TCP port for liveness metrics requests |
| `--metricspath` | `/metrics` | Path of prometheus endpoint where metrics will be available |
| `--enablegrpcmetrics` | `false` | [Deprecated] Enable grpc metrics collection and start prometheus server |
| `--polltime` | `60s` | Time interval in between each poll |
| `--timeout` | `3s` | Probe timeout in seconds |
| `--clustername` | _empty_ | Cluster name to set on subvolume |
| `--histogramoption` | `0.5,2,6` | [Deprecated] Histogram option for grpc metrics, should be comma separated value (ex:= "0.5,2,6" where start=0.5 factor=2, count=6) |
| `--forcecephkernelclient` | `false` | Force enabling Ceph Kernel clients for mounting on kernels < 4.17 |
| `--kernelmountoptions` | _empty_ | Comma separated string of mount options accepted by cephfs kernel mounter |
| `--fusemountoptions` | _empty_ | Comma separated string of mount options accepted by ceph-fuse mounter |
Expand Down
2 changes: 0 additions & 2 deletions docs/deploy-rbd.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,9 @@ make image-cephcsi
| `--pidlimit` | _0_ | Configure the PID limit in cgroups. The container runtime can restrict the number of processes/tasks which can cause problems while provisioning (or deleting) a large number of volumes. A value of `-1` configures the limit to the maximum, `0` does not configure limits at all. |
| `--metricsport` | `8080` | TCP port for liveness metrics requests |
| `--metricspath` | `"/metrics"` | Path of prometheus endpoint where metrics will be available |
| `--enablegrpcmetrics` | `false` | [Deprecated] Enable grpc metrics collection and start prometheus server |
| `--polltime` | `"60s"` | Time interval in between each poll |
| `--timeout` | `"3s"` | Probe timeout in seconds |
| `--clustername` | _empty_ | Cluster name to set on RBD image |
| `--histogramoption` | `0.5,2,6` | [Deprecated] Histogram option for grpc metrics, should be comma separated value (ex:= "0.5,2,6" where start=0.5 factor=2, count=6) |
| `--domainlabels` | _empty_ | Kubernetes node labels to use as CSI domain labels for topology aware provisioning, should be a comma separated value (ex:= "failure-domain/region,failure-domain/zone") |
| `--rbdhardmaxclonedepth` | `8` | Hard limit for maximum number of nested volume clones that are taken before a flatten occurs |
| `--rbdsoftmaxclonedepth` | `4` | Soft limit for maximum number of nested volume clones that are taken before a flatten occurs |
Expand Down
25 changes: 25 additions & 0 deletions docs/design/proposals/volume-condition.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Support for CSI `VolumeCondition` aka Volume Health Checker

## health-checker API

Under `internal/health-checker` the Manager for health-checking is
implemented. The Manager can start a checking process for a given path, return
the (un)healthy state and stop the checking process when the volume is not
needed anymore.

The Manager is responsible for creating a suitable checker for the requested
path. If the path is a block-device, the BlockChecker should be created. For a
filesystem path (directory), the FileChecker is appropriate.

## CephFS

The health-checker writes to the file `csi-volume-condition.ts` in the root of
the volume. This file contains a JSON formatted timestamp.

A new `data` directory is introduced for newly created volumes. During the
`NodeStageVolume` call the root of the volume is mounted, and the `data`
directory is bind-mounted inside the container when `NodePublishVolume` is
called.

The `data` directory makes it possible to place Ceph-CSI internal files in the
root of the volume, without that the user/application has access to it.
17 changes: 13 additions & 4 deletions e2e/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,29 +26,38 @@ import (
"k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
testutils "k8s.io/kubernetes/test/utils"
)

func createNodeLabel(f *framework.Framework, labelKey, labelValue string) error {
func addLabelsToNodes(f *framework.Framework, labels map[string]string) error {
// NOTE: This makes all nodes (in a multi-node setup) in the test take
// the same label values, which is fine for the test
nodes, err := f.ClientSet.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
if err != nil {
return fmt.Errorf("failed to list node: %w", err)
}
for i := range nodes.Items {
e2enode.AddOrUpdateLabelOnNode(f.ClientSet, nodes.Items[i].Name, labelKey, labelValue)
if err := testutils.AddLabelsToNode(f.ClientSet, nodes.Items[i].Name, labels); err != nil {
return fmt.Errorf("failed to add labels to node: %w", err)
}
}

return nil
}

func deleteNodeLabel(c kubernetes.Interface, labelKey string) error {
func deleteNodeLabels(c kubernetes.Interface, labelKeys []string) error {
nodes, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
if err != nil {
return fmt.Errorf("failed to list node: %w", err)
}
for i := range nodes.Items {
e2enode.RemoveLabelOffNode(c, nodes.Items[i].Name, labelKey)
if err := testutils.RemoveLabelOffNode(c, nodes.Items[i].Name, labelKeys); err != nil {
return fmt.Errorf("failed to remove label off node: %w", err)
}

if err := testutils.VerifyLabelsRemoved(c, nodes.Items[i].Name, labelKeys); err != nil {
return fmt.Errorf("failed to verify label removed from node: %w", err)
}
}

return nil
Expand Down
54 changes: 16 additions & 38 deletions e2e/rbd.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,21 +275,14 @@ var _ = Describe("RBD", func() {
}
c = f.ClientSet
if deployRBD {
err := createNodeLabel(f, nodeRegionLabel, regionValue)
if err != nil {
framework.Failf("failed to create node label: %v", err)
}
err = createNodeLabel(f, nodeZoneLabel, zoneValue)
if err != nil {
framework.Failf("failed to create node label: %v", err)
}
err = createNodeLabel(f, crushLocationRegionLabel, crushLocationRegionValue)
if err != nil {
framework.Failf("failed to create node label: %v", err)
}
err = createNodeLabel(f, crushLocationZoneLabel, crushLocationZoneValue)
err := addLabelsToNodes(f, map[string]string{
nodeRegionLabel: regionValue,
nodeZoneLabel: zoneValue,
crushLocationRegionLabel: crushLocationRegionValue,
crushLocationZoneLabel: crushLocationZoneValue,
})
if err != nil {
framework.Failf("failed to create node label: %v", err)
framework.Failf("failed to add node labels: %v", err)
}
if cephCSINamespace != defaultNs {
err = createNamespace(c, cephCSINamespace)
Expand Down Expand Up @@ -408,31 +401,16 @@ var _ = Describe("RBD", func() {
}
}
}
err = deleteNodeLabel(c, nodeRegionLabel)
if err != nil {
framework.Failf("failed to delete node label: %v", err)
}
err = deleteNodeLabel(c, nodeZoneLabel)
if err != nil {
framework.Failf("failed to delete node label: %v", err)
}
// Remove the CSI labels that get added
err = deleteNodeLabel(c, nodeCSIRegionLabel)
if err != nil {
framework.Failf("failed to delete node label: %v", err)
}
err = deleteNodeLabel(c, nodeCSIZoneLabel)
if err != nil {
framework.Failf("failed to delete node label: %v", err)
}
// Remove the CRUSH Location labels
err = deleteNodeLabel(c, crushLocationRegionLabel)
if err != nil {
framework.Failf("failed to delete node label: %v", err)
}
err = deleteNodeLabel(c, crushLocationZoneLabel)
err = deleteNodeLabels(c, []string{
nodeRegionLabel,
nodeZoneLabel,
nodeCSIRegionLabel,
nodeCSIZoneLabel,
crushLocationRegionLabel,
crushLocationZoneLabel,
})
if err != nil {
framework.Failf("failed to delete node label: %v", err)
framework.Failf("failed to delete node labels: %v", err)
}
})

Expand Down
23 changes: 10 additions & 13 deletions e2e/upgrade-rbd.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,12 @@ var _ = Describe("RBD Upgrade Testing", func() {
if err != nil {
framework.Failf("failed to create snapshotclass: %v", err)
}

err = createNodeLabel(f, nodeRegionLabel, regionValue)
if err != nil {
framework.Failf("failed to create node label: %v", err)
}
err = createNodeLabel(f, nodeZoneLabel, zoneValue)
err = addLabelsToNodes(f, map[string]string{
nodeRegionLabel: regionValue,
nodeZoneLabel: zoneValue,
})
if err != nil {
framework.Failf("failed to create node label: %v", err)
framework.Failf("failed to add node labels: %v", err)
}
})
AfterEach(func() {
Expand Down Expand Up @@ -167,13 +165,12 @@ var _ = Describe("RBD Upgrade Testing", func() {
}
}
}
err = deleteNodeLabel(c, nodeRegionLabel)
if err != nil {
framework.Failf("failed to delete node label: %v", err)
}
err = deleteNodeLabel(c, nodeZoneLabel)
err = deleteNodeLabels(c, []string{
nodeRegionLabel,
nodeZoneLabel,
})
if err != nil {
framework.Failf("failed to delete node label: %v", err)
framework.Failf("failed to delete node labels: %v", err)
}
})

Expand Down
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ require (
github.com/aws/aws-sdk-go v1.46.4
github.com/aws/aws-sdk-go-v2/service/sts v1.23.2
github.com/ceph/ceph-csi/api v0.0.0-00010101000000-000000000000
// TODO: API for managing subvolume metadata and snapshot metadata requires `ceph_ci_untested` build-tag
github.com/ceph/go-ceph v0.24.0
github.com/container-storage-interface/spec v1.9.0
github.com/csi-addons/replication-lib-utils v0.2.0
Expand Down
13 changes: 5 additions & 8 deletions internal/cephfs/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
casceph "github.com/ceph/ceph-csi/internal/csi-addons/cephfs"
csiaddons "github.com/ceph/ceph-csi/internal/csi-addons/server"
csicommon "github.com/ceph/ceph-csi/internal/csi-common"
hc "github.com/ceph/ceph-csi/internal/health-checker"
"github.com/ceph/ceph-csi/internal/journal"
"github.com/ceph/ceph-csi/internal/util"
"github.com/ceph/ceph-csi/internal/util/log"
Expand Down Expand Up @@ -82,6 +83,7 @@ func NewNodeServer(
VolumeLocks: util.NewVolumeLocks(),
kernelMountOptions: kernelMountOptions,
fuseMountOptions: fuseMountOptions,
healthChecker: hc.NewHealthCheckManager(),
}
}

Expand Down Expand Up @@ -167,15 +169,10 @@ func (fs *Driver) Run(conf *util.Config) {
// passing nil for replication server as cephFS does not support mirroring.
RS: nil,
}
server.Start(conf.Endpoint, conf.HistogramOption, srv, conf.EnableGRPCMetrics)
if conf.EnableGRPCMetrics {
log.WarningLogMsg("EnableGRPCMetrics is deprecated")
go util.StartMetricsServer(conf)
}
server.Start(conf.Endpoint, srv)

if conf.EnableProfiling {
if !conf.EnableGRPCMetrics {
go util.StartMetricsServer(conf)
}
go util.StartMetricsServer(conf)
log.DebugLogMsg("Registering profiling handler")
go util.EnableProfiling()
}
Expand Down
Loading

0 comments on commit 817ac33

Please sign in to comment.