From bab48c806c60111666d4a0f6aa1995d2cda298bd Mon Sep 17 00:00:00 2001 From: Ben Wheatley Date: Thu, 3 Feb 2022 13:37:30 +0000 Subject: [PATCH] Fix order of operations when destroying instance In the implementation prior to this patch, we remove the instance from the "store", i.e. the `instances` database table, and _then_ proceed to actually remove the instance and its data. This isn't good, because if the removal of the actual instance fails, then we've already removed the pointer to that instance in the DB, meaning that it's not possible to retry the removal manually via the API *and* the process that occurs when a deletion of an _image_ is requested, which in turn deletes all instances which are referencing that image, will skip removing the instance data. Removing the instance from the database table is actually far more likely to succeed than the removal of the instance itself, as this involves stopping the Postgres processs and removing the btrfs subvolume - both of which take some time. If in that time the HTTP request for `DELETE /instances/{id}` is cancelled (i.e. connection terminated by the user and the context cancellation passed back through to the thread), then we end up in the state described above. The biggest problem in leaving instance data on disk is that the instance is using a copy-on-write snapshot of the entire *image*, meaning that even if that image is deleted from the `image_uploads` directory there's still no way for btrfs to reclaim the disk space, and therefore we end up with a disk too full to prepare any new images. --- pkg/server/api/routes/instances.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/server/api/routes/instances.go b/pkg/server/api/routes/instances.go index b11792ca..d27bf59d 100644 --- a/pkg/server/api/routes/instances.go +++ b/pkg/server/api/routes/instances.go @@ -257,14 +257,14 @@ func (i Instances) Destroy(w http.ResponseWriter, r *http.Request) error { } logger.With("instance", id).Info("destroying instance") - err = i.InstanceStore.Destroy(instance) + err = i.Executor.DestroyInstance(r.Context(), instance.ID) if err != nil { - return errors.Wrap(err, "failed to destroy instance") + return errors.Wrap(err, "failed to destroy instance on disk") } - err = i.Executor.DestroyInstance(r.Context(), instance.ID) + err = i.InstanceStore.Destroy(instance) if err != nil { - return errors.Wrap(err, "failed to destroy instance") + return errors.Wrap(err, "failed to remove instance from table") } // Destroying the instance will cascade and destroy any linked whitelisted