Skip to content

Commit

Permalink
fix: move log collector creation logic (#1870)
Browse files Browse the repository at this point in the history
## Description:
Fixes: #1833

The resource leak was being caused by a state where if a failure
occurred while creating an enclave (eg. ctrl+C while running `kurtosis
enclave add`) at a point where the log collector container was created
BEFORE it had been connected to the network AND before the necessary
`defer undo`s to clean up the log collector had been queued THEN the
network would get cleaned up by the `defer undo` from `CreateNetwork`,
but the log collector container would remain.

Any attempt to do a `kurtosis clean -a` would fail to clean log
collector container because the network(and thus enclave) the container
was created for had already been cleaned up.

After digging - I realized the log collector was being created in
`container-engine-lib` as opposed to in the `engine` Moving the logic to
the engine AFTER the enclave is created fixes the issue. If there is an
error at ANY point in the creation of the log collector container (even
if the log collectors `defer undo` hasn't been queued), the `defer undo`
from the `CreateEnclave` call will clean the log collector because it
has a label with the `enclaveUUID`.

## Is this change user facing?
NO

## References:
#1833
  • Loading branch information
tedim52 authored Nov 30, 2023
1 parent d18f20e commit b695e27
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ const (

shouldFetchStoppedContainersWhenDumpingEnclave = true

defaultHttpLogsCollectorPortNum = uint16(9712)
defaultTcpLogsCollectorPortNum = uint16(9713)

serializedArgs = "SERIALIZED_ARGS"
)

Expand Down Expand Up @@ -148,21 +145,7 @@ func (backend *DockerKurtosisBackend) CreateEnclave(ctx context.Context, enclave

// TODO: return production mode for create enclave request as well
newEnclave := enclave.NewEnclave(enclaveUuid, enclaveName, enclave.EnclaveStatus_Empty, &creationTime, false)
// TODO the logs collector has a random private ip address in the enclave network that must be tracked
if _, err := backend.CreateLogsCollectorForEnclave(ctx, enclaveUuid, defaultTcpLogsCollectorPortNum, defaultHttpLogsCollectorPortNum); err != nil {
return nil, stacktrace.Propagate(err, "An error occurred creating the logs collector with TCP port number '%v' and HTTP port number '%v'", defaultTcpLogsCollectorPortNum, defaultHttpLogsCollectorPortNum)
}
shouldDeleteLogsCollector := true
defer func() {
if shouldDeleteLogsCollector {
err = backend.DestroyLogsCollectorForEnclave(ctx, enclaveUuid)
if err != nil {
logrus.Errorf("Couldn't cleanup logs collector for enclave '%v' as the following error was thrown:\n%v", enclaveUuid, err)
}
}
}()

shouldDeleteLogsCollector = false
shouldDeleteNetwork = false
shouldDeleteVolume = false
return newEnclave, nil
Expand Down
28 changes: 26 additions & 2 deletions engine/server/engine/enclave_manager/enclave_creator.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,17 @@ import (
"github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_interface/objects/enclave"
"github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/uuid_generator"
"github.com/kurtosis-tech/kurtosis/core/launcher/api_container_launcher"
"github.com/kurtosis-tech/kurtosis/engine/launcher/args"
"github.com/kurtosis-tech/kurtosis/metrics-library/golang/lib/metrics_client"
"github.com/kurtosis-tech/stacktrace"
"github.com/sirupsen/logrus"
)

const (
defaultHttpLogsCollectorPortNum = uint16(9712)
defaultTcpLogsCollectorPortNum = uint16(9713)
)

type EnclaveCreator struct {
kurtosisBackend backend_interface.KurtosisBackend
apiContainerKurtosisBackendConfigSupplier api_container_launcher.KurtosisBackendConfigSupplier
Expand Down Expand Up @@ -43,6 +49,7 @@ func (creator *EnclaveCreator) CreateEnclave(
isCI bool,
cloudUserID metrics_client.CloudUserID,
cloudInstanceID metrics_client.CloudInstanceID,
kurtosisBackendType args.KurtosisBackendType,
) (*kurtosis_engine_rpc_api_bindings.EnclaveInfo, error) {

uuid, err := uuid_generator.GenerateUUIDString()
Expand Down Expand Up @@ -75,6 +82,23 @@ func (creator *EnclaveCreator) CreateEnclave(
}
}()

// only create log collector for backend as
shouldDeleteLogsCollector := true
if kurtosisBackendType == args.KurtosisBackendType_Docker {
// TODO the logs collector has a random private ip address in the enclave network that must be tracked
if _, err := creator.kurtosisBackend.CreateLogsCollectorForEnclave(setupCtx, enclaveUuid, defaultTcpLogsCollectorPortNum, defaultHttpLogsCollectorPortNum); err != nil {
return nil, stacktrace.Propagate(err, "An error occurred creating the logs collector with TCP port number '%v' and HTTP port number '%v'", defaultTcpLogsCollectorPortNum, defaultHttpLogsCollectorPortNum)
}
defer func() {
if shouldDeleteLogsCollector {
err = creator.kurtosisBackend.DestroyLogsCollectorForEnclave(teardownCtx, enclaveUuid)
if err != nil {
logrus.Errorf("Couldn't cleanup logs collector for enclave '%v' as the following error was thrown:\n%v", enclaveUuid, err)
}
}
}()
}

apiContainer, err := creator.launchApiContainer(setupCtx,
apiContainerImageVersionTag,
apiContainerLogLevel,
Expand All @@ -88,7 +112,6 @@ func (creator *EnclaveCreator) CreateEnclave(
cloudUserID,
cloudInstanceID,
)

if err != nil {
return nil, stacktrace.Propagate(err, "An error occurred launching the API container")
}
Expand Down Expand Up @@ -155,8 +178,9 @@ func (creator *EnclaveCreator) CreateEnclave(
}

// Everything started successfully, so the responsibility of deleting the enclave is now transferred to the caller
shouldDestroyEnclave = false
shouldStopApiContainer = false
shouldDeleteLogsCollector = false
shouldDestroyEnclave = false
return newEnclaveInfo, nil
}

Expand Down
7 changes: 5 additions & 2 deletions engine/server/engine/enclave_manager/enclave_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ type EnclaveManager struct {
mutex *sync.Mutex

kurtosisBackend backend_interface.KurtosisBackend
kurtosisBackendType args.KurtosisBackendType
apiContainerKurtosisBackendConfigSupplier api_container_launcher.KurtosisBackendConfigSupplier

// this is a stop gap solution, this would be stored and retrieved from the DB in the future
Expand Down Expand Up @@ -104,8 +105,9 @@ func CreateEnclaveManager(
}

enclaveManager := &EnclaveManager{
mutex: &sync.Mutex{},
kurtosisBackend: kurtosisBackend,
mutex: &sync.Mutex{},
kurtosisBackend: kurtosisBackend,
kurtosisBackendType: kurtosisBackendType,
apiContainerKurtosisBackendConfigSupplier: apiContainerKurtosisBackendConfigSupplier,
allExistingAndHistoricalIdentifiers: []*kurtosis_engine_rpc_api_bindings.EnclaveIdentifiers{},
enclaveCreator: enclaveCreator,
Expand Down Expand Up @@ -189,6 +191,7 @@ func (manager *EnclaveManager) CreateEnclave(
manager.isCI,
manager.cloudUserID,
manager.cloudInstanceID,
manager.kurtosisBackendType,
)
if err != nil {
return nil, stacktrace.Propagate(
Expand Down
3 changes: 3 additions & 0 deletions engine/server/engine/enclave_manager/enclave_pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"github.com/kurtosis-tech/kurtosis/api/golang/engine/kurtosis_engine_rpc_api_bindings"
"github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_interface"
"github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_interface/objects/enclave"
"github.com/kurtosis-tech/kurtosis/engine/launcher/args"
"github.com/kurtosis-tech/kurtosis/metrics-library/golang/lib/metrics_client"
"github.com/kurtosis-tech/stacktrace"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -45,6 +46,7 @@ type EnclavePool struct {
// 3- Will start a subroutine in charge of filling the pool
func CreateEnclavePool(
kurtosisBackend backend_interface.KurtosisBackend,

enclaveCreator *EnclaveCreator,
poolSize uint8,
engineVersion string,
Expand Down Expand Up @@ -289,6 +291,7 @@ func (pool *EnclavePool) createNewIdleEnclave(ctx context.Context) (*kurtosis_en
pool.isCI,
pool.cloudUserID,
pool.cloudInstanceID,
args.KurtosisBackendType_Kubernetes, // enclave pool only available for k8s
)
if err != nil {
return nil, stacktrace.Propagate(
Expand Down

0 comments on commit b695e27

Please sign in to comment.