-
Notifications
You must be signed in to change notification settings - Fork 65
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve NFS home directory restore documentation #5291
base: main
Are you sure you want to change the base?
Changes from all commits
a4ba4fe
b58b55e
65027ec
06c74b1
6c8ea48
ed2c380
11bdd13
64fa20b
12e7911
e9cf080
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,15 @@ def root_homes( | |
extra_nfs_mount_path: str = typer.Option( | ||
None, help="Mount point for the extra NFS share" | ||
), | ||
restore_volume_id: str = typer.Option( | ||
None, | ||
help="ID of volume to mount (e.g. vol-1234567890abcdef0) " | ||
"to restore data from", | ||
), | ||
restore_mount_path: str = typer.Option( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comment on restore_volume_id about using 'restore' in the name. I'd say this could be |
||
"/restore-volume", help="Mount point for the volume" | ||
), | ||
restore_volume_size: str = typer.Option("100Gi", help="Size of the volume"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comment later about specifying the size |
||
): | ||
""" | ||
Pop an interactive shell with the entire nfs file system of the given cluster mounted on /root-homes | ||
|
@@ -82,6 +91,57 @@ def root_homes( | |
} | ||
] | ||
|
||
if restore_volume_id: | ||
# Create PV for volume | ||
pv_name = f"restore-{restore_volume_id}" | ||
pv = { | ||
"apiVersion": "v1", | ||
"kind": "PersistentVolume", | ||
"metadata": {"name": pv_name}, | ||
"spec": { | ||
"capacity": {"storage": restore_volume_size}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this necessary for us to specify? I worry that if we get this wrong, it will trigger the kubernetes volume resizing feature There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suspect storage capacity is a required field. I'll try to confirm whether that's required or not. Reading this warning note, looks like volume expansion is triggered only when the storage capacity is updated for an existing PVC that is already bound to a PV creating a mismatch between the specified storage capacity of the PVC and the bound PV. I will confirm what happens when the specified volume on the PV and the PVC is larger than the actual size of the disk and There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did some testing and storage capacity is indeed a required field. I also tried passing a larger size than the actual disk size as the argument to see if that triggers a resize. The PV and PVC were labeled with the wrong capacity but the underlying disk was not resized. @yuvipanda How would you like to move forward? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @sunu ah, if it doesn't trigger a resize I would say:
This would allow us to not have the engineer doing this need to know the filesize. |
||
"volumeMode": "Filesystem", | ||
"accessModes": ["ReadWriteOnce"], | ||
"persistentVolumeReclaimPolicy": "Retain", | ||
"storageClassName": "", | ||
"csi": { | ||
"driver": "ebs.csi.aws.com", | ||
"fsType": "xfs", | ||
"volumeHandle": restore_volume_id, | ||
}, | ||
"mountOptions": [ | ||
"rw", | ||
"relatime", | ||
"nouuid", | ||
"attr2", | ||
"inode64", | ||
"logbufs=8", | ||
"logbsize=32k", | ||
"pquota", | ||
], | ||
}, | ||
} | ||
|
||
# Create PVC to bind to the PV | ||
pvc = { | ||
"apiVersion": "v1", | ||
"kind": "PersistentVolumeClaim", | ||
"metadata": {"name": pv_name, "namespace": hub_name}, | ||
"spec": { | ||
"accessModes": ["ReadWriteOnce"], | ||
"volumeName": pv_name, | ||
"storageClassName": "", | ||
"resources": {"requests": {"storage": restore_volume_size}}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comment about specifying sizes |
||
}, | ||
} | ||
|
||
volumes.append( | ||
{"name": "restore-volume", "persistentVolumeClaim": {"claimName": pv_name}} | ||
) | ||
volume_mounts.append( | ||
{"name": "restore-volume", "mountPath": restore_mount_path} | ||
) | ||
|
||
if extra_nfs_server and extra_nfs_base_path and extra_nfs_mount_path: | ||
volumes.append( | ||
{ | ||
|
@@ -141,6 +201,23 @@ def root_homes( | |
|
||
with cluster.auth(): | ||
try: | ||
# If we have an volume to restore from, create PV and PVC first | ||
if restore_volume_id: | ||
with tempfile.NamedTemporaryFile( | ||
mode="w", suffix=".json" | ||
) as pv_tmpf: | ||
json.dump(pv, pv_tmpf) | ||
pv_tmpf.flush() | ||
subprocess.check_call(["kubectl", "create", "-f", pv_tmpf.name]) | ||
|
||
with tempfile.NamedTemporaryFile( | ||
mode="w", suffix=".json" | ||
) as pvc_tmpf: | ||
json.dump(pvc, pvc_tmpf) | ||
pvc_tmpf.flush() | ||
subprocess.check_call( | ||
["kubectl", "create", "-f", pvc_tmpf.name] | ||
) | ||
# We are splitting the previous `kubectl run` command into a | ||
# create and exec couplet, because using run meant that the bash | ||
# process we start would be assigned PID 1. This is a 'special' | ||
|
@@ -152,11 +229,32 @@ def root_homes( | |
# | ||
# Ask api-server to create a pod | ||
subprocess.check_call(["kubectl", "create", "-f", tmpf.name]) | ||
|
||
# wait for the pod to be ready | ||
print_colour("Waiting for the pod to be ready...", "yellow") | ||
subprocess.check_call( | ||
[ | ||
"kubectl", | ||
"wait", | ||
"-n", | ||
hub_name, | ||
f"pod/{pod_name}", | ||
"--for=condition=Ready", | ||
"--timeout=90s", | ||
] | ||
) | ||
|
||
# Exec into pod | ||
subprocess.check_call(exec_cmd) | ||
finally: | ||
if not persist: | ||
delete_pod(pod_name, hub_name) | ||
# Clean up PV and PVC if we created them | ||
if restore_volume_id: | ||
subprocess.check_call( | ||
["kubectl", "-n", hub_name, "delete", "pvc", pv_name] | ||
) | ||
subprocess.check_call(["kubectl", "delete", "pv", pv_name]) | ||
|
||
|
||
@exec_app.command() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's call this
additional_ebs_volume_id
or something like that, as the code itself doesn't do anything to do with restoring - just mounting additional ebs volumes.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we use
additional_volume_id
to keep it platform agnostic? I can add in the help text that only EBS volumes are supported for now.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That sounds reasonable to me for now. Of course, we can always update things as we begin work on other platforms.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ya i leave it to you both for now!