From 185ce0d46fa335bce84616bf441f6808e93d0da9 Mon Sep 17 00:00:00 2001 From: Vitaly Antonenko Date: Fri, 12 Jul 2024 14:39:48 +0300 Subject: [PATCH 1/2] fix(application): handle storage details conversion error - Added error handling for "cannot convert storage details" in `applicationsClient.ReadApplication`. - Improved retry logic for storage readiness in `ReadApplicationWithRetryOnNotFound`. This commit ensures that the application correctly handles storage conversion errors and retries appropriately until the storage is ready. --- internal/juju/applications.go | 27 ++++---- .../provider/resource_application_test.go | 61 ++++++++++++++++++- 2 files changed, 73 insertions(+), 15 deletions(-) diff --git a/internal/juju/applications.go b/internal/juju/applications.go index c8ba9813..f9b101ce 100644 --- a/internal/juju/applications.go +++ b/internal/juju/applications.go @@ -762,6 +762,18 @@ func (c applicationsClient) ReadApplicationWithRetryOnNotFound(ctx context.Conte c.Errorf(err, fmt.Sprintf("ReadApplicationWithRetryOnNotFound %q", input.AppName)) return nil } + + // NOTE: Applications can always have storage. However, they + // will not be listed right after the application is created. So + // we need to wait for the storage to be ready. And we need to + // check if all storage constraints have pool equal "" and size equal 0 + // to drop the error. + for _, storage := range output.Storage { + if storage.Pool == "" || storage.Size == 0 { + return fmt.Errorf("ReadApplicationWithRetryOnNotFound: no storage found in output") + } + } + // NOTE: An IAAS subordinate should also have machines. However, they // will not be listed until after the relation has been created. // Those happen with the integration resource which will not be @@ -779,17 +791,6 @@ func (c applicationsClient) ReadApplicationWithRetryOnNotFound(ctx context.Conte return fmt.Errorf("ReadApplicationWithRetryOnNotFound: need %d machines, have %d", output.Units, len(machines)) } - // NOTE: Applications can always have storage. However, they - // will not be listed right after the application is created. So - // we need to wait for the storage to be ready. And we need to - // check if all storage constraints have pool equal "" and size equal 0 - // to drop the error. - for _, storage := range output.Storage { - if storage.Pool == "" || storage.Size == 0 { - return fmt.Errorf("ReadApplicationWithRetryOnNotFound: no storage found in output") - } - } - // NOTE: An IAAS subordinate should also have machines. However, they // will not be listed until after the relation has been created. // Those happen with the integration resource which will not be @@ -899,7 +900,9 @@ func (c applicationsClient) ReadApplication(input *ReadApplicationInput) (*ReadA IncludeStorage: true, }) if err != nil { - if strings.Contains(err.Error(), "filesystem for storage instance") || strings.Contains(err.Error(), "volume for storage instance") { + if strings.Contains(err.Error(), "filesystem for storage instance") || + strings.Contains(err.Error(), "volume for storage instance") || + strings.Contains(err.Error(), "cannot convert storage details") { // Retry if we get this error. It means the storage is not ready yet. return nil, &storageNotFoundError{input.AppName} } diff --git a/internal/provider/resource_application_test.go b/internal/provider/resource_application_test.go index 02f1f989..2df8e0d9 100644 --- a/internal/provider/resource_application_test.go +++ b/internal/provider/resource_application_test.go @@ -575,7 +575,7 @@ func TestAcc_ResourceApplication_UpdateEndpointBindings(t *testing.T) { }) } -func TestAcc_ResourceApplication_Storage(t *testing.T) { +func TestAcc_ResourceApplication_StorageLXD(t *testing.T) { if testingCloud != LXDCloudTesting { t.Skip(t.Name() + " only runs with LXD") } @@ -589,7 +589,7 @@ func TestAcc_ResourceApplication_Storage(t *testing.T) { ProtoV6ProviderFactories: frameworkProviderFactories, Steps: []resource.TestStep{ { - Config: testAccResourceApplicationStorage(modelName, appName, storageConstraints), + Config: testAccResourceApplicationStorageLXD(modelName, appName, storageConstraints), Check: resource.ComposeTestCheckFunc( resource.TestCheckResourceAttr("juju_application."+appName, "model", modelName), resource.TestCheckResourceAttr("juju_application."+appName, "storage_directives.runner", "2G"), @@ -603,6 +603,34 @@ func TestAcc_ResourceApplication_Storage(t *testing.T) { }) } +func TestAcc_ResourceApplication_StorageK8s(t *testing.T) { + if testingCloud != MicroK8sTesting { + t.Skip(t.Name() + " only runs with Microk8s") + } + modelName := acctest.RandomWithPrefix("tf-test-application-storage") + appName := "test-app-storage" + + storageConstraints := map[string]string{"label": "pgdata", "size": "2G"} + + resource.Test(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + ProtoV6ProviderFactories: frameworkProviderFactories, + Steps: []resource.TestStep{ + { + Config: testAccResourceApplicationStorageK8s(modelName, appName, storageConstraints), + Check: resource.ComposeTestCheckFunc( + resource.TestCheckResourceAttr("juju_application."+appName, "model", modelName), + resource.TestCheckResourceAttr("juju_application."+appName, "storage_directives.pgdata", "2G"), + resource.TestCheckResourceAttr("juju_application."+appName, "storage.0.label", "pgdata"), + resource.TestCheckResourceAttr("juju_application."+appName, "storage.0.count", "1"), + resource.TestCheckResourceAttr("juju_application."+appName, "storage.0.size", "2G"), + resource.TestCheckResourceAttr("juju_application."+appName, "storage.0.pool", "kubernetes"), + ), + }, + }, + }) +} + func testAccResourceApplicationBasic_Minimal(modelName, charmName string) string { return fmt.Sprintf(` resource "juju_model" "testmodel" { @@ -950,7 +978,7 @@ resource "juju_application" "{{.AppName}}" { }) } -func testAccResourceApplicationStorage(modelName, appName string, storageConstraints map[string]string) string { +func testAccResourceApplicationStorageLXD(modelName, appName string, storageConstraints map[string]string) string { return internaltesting.GetStringFromTemplateWithData("testAccResourceApplicationStorage", ` resource "juju_model" "{{.ModelName}}" { name = "{{.ModelName}}" @@ -978,6 +1006,33 @@ resource "juju_application" "{{.AppName}}" { }) } +func testAccResourceApplicationStorageK8s(modelName, appName string, storageConstraints map[string]string) string { + return internaltesting.GetStringFromTemplateWithData("testAccResourceApplicationStorage", ` +resource "juju_model" "{{.ModelName}}" { + name = "{{.ModelName}}" +} + +resource "juju_application" "{{.AppName}}" { + model = juju_model.{{.ModelName}}.name + name = "{{.AppName}}" + charm { + name = "postgresql-k8s" + channel = "14/stable" + } + + storage_directives = { + {{.StorageConstraints.label}} = "{{.StorageConstraints.size}}" + } + + units = 1 +} +`, internaltesting.TemplateData{ + "ModelName": modelName, + "AppName": appName, + "StorageConstraints": storageConstraints, + }) +} + func testCheckEndpointsAreSetToCorrectSpace(modelName, appName, defaultSpace string, configuredEndpoints map[string]string) resource.TestCheckFunc { return func(s *terraform.State) error { conn, err := TestClient.Models.GetConnection(&modelName) From 98952a1d2ed0f0981f1b53ba4c72482f9f5d37b1 Mon Sep 17 00:00:00 2001 From: Heather Lanigan Date: Fri, 12 Jul 2024 16:41:14 -0400 Subject: [PATCH 2/2] fix(application): don't return nil if there's an error Related to #520, panic on nil pointer from ReadApplicationWithRetryOnNotFound. Add IsFatalError func to retry. Allow for known types or "connection refused" which is intermittent. Doesn't guarentee that everything works as expected, but doesn't panic either. --- internal/juju/applications.go | 42 ++++++++++++++++++------------ internal/juju/applications_test.go | 19 ++++++++++++++ 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/internal/juju/applications.go b/internal/juju/applications.go index f9b101ce..b01e01f2 100644 --- a/internal/juju/applications.go +++ b/internal/juju/applications.go @@ -71,6 +71,17 @@ func (se *storageNotFoundError) Error() string { return fmt.Sprintf("storage %s not found", se.storageName) } +var RetryReadError = &retryReadError{} + +// retryReadError +type retryReadError struct { + msg string +} + +func (e *retryReadError) Error() string { + return fmt.Sprintf("retrying: %s", e.msg) +} + type applicationsClient struct { SharedClient controllerVersion version.Number @@ -756,11 +767,7 @@ func (c applicationsClient) ReadApplicationWithRetryOnNotFound(ctx context.Conte if errors.As(err, &ApplicationNotFoundError) || errors.As(err, &StorageNotFoundError) { return err } else if err != nil { - // Log the error to the terraform Diagnostics to be - // caught in the provider. Return nil so we stop - // retrying. - c.Errorf(err, fmt.Sprintf("ReadApplicationWithRetryOnNotFound %q", input.AppName)) - return nil + return err } // NOTE: Applications can always have storage. However, they @@ -768,9 +775,9 @@ func (c applicationsClient) ReadApplicationWithRetryOnNotFound(ctx context.Conte // we need to wait for the storage to be ready. And we need to // check if all storage constraints have pool equal "" and size equal 0 // to drop the error. - for _, storage := range output.Storage { + for label, storage := range output.Storage { if storage.Pool == "" || storage.Size == 0 { - return fmt.Errorf("ReadApplicationWithRetryOnNotFound: no storage found in output") + return &retryReadError{msg: fmt.Sprintf("storage label %q missing detail", label)} } } @@ -784,21 +791,25 @@ func (c applicationsClient) ReadApplicationWithRetryOnNotFound(ctx context.Conte return nil } if output.Placement == "" { - return fmt.Errorf("ReadApplicationWithRetryOnNotFound: no machines found in output") + return &retryReadError{msg: "no machines found in output"} } machines := strings.Split(output.Placement, ",") if len(machines) != output.Units { - return fmt.Errorf("ReadApplicationWithRetryOnNotFound: need %d machines, have %d", output.Units, len(machines)) + return &retryReadError{msg: fmt.Sprintf("need %d machines, have %d", output.Units, len(machines))} } - // NOTE: An IAAS subordinate should also have machines. However, they - // will not be listed until after the relation has been created. - // Those happen with the integration resource which will not be - // run by terraform before the application resource finishes. Thus - // do not block here for subordinates. c.Tracef("Have machines - returning", map[string]interface{}{"output": *output}) return nil }, + IsFatalError: func(err error) bool { + if errors.As(err, &ApplicationNotFoundError) || + errors.As(err, &StorageNotFoundError) || + errors.As(err, &RetryReadError) || + strings.Contains(err.Error(), "connection refused") { + return false + } + return true + }, NotifyFunc: func(err error, attempt int) { if attempt%4 == 0 { message := fmt.Sprintf("waiting for application %q", input.AppName) @@ -874,8 +885,7 @@ func (c applicationsClient) ReadApplication(input *ReadApplicationInput) (*ReadA apps, err := applicationAPIClient.ApplicationsInfo([]names.ApplicationTag{names.NewApplicationTag(input.AppName)}) if err != nil { - c.Errorf(err, "found when querying the applications info") - return nil, err + return nil, jujuerrors.Annotate(err, "when querying the applications info") } if len(apps) > 1 { return nil, fmt.Errorf("more than one result for application: %s", input.AppName) diff --git a/internal/juju/applications_test.go b/internal/juju/applications_test.go index ec7b996f..0235cc8f 100644 --- a/internal/juju/applications_test.go +++ b/internal/juju/applications_test.go @@ -6,6 +6,7 @@ package juju // Basic imports import ( "context" + "fmt" "testing" "github.com/juju/juju/api" @@ -168,6 +169,24 @@ func (s *ApplicationSuite) TestReadApplicationRetry() { s.Assert().Equal("ubuntu@22.04", resp.Base) } +func (s *ApplicationSuite) TestReadApplicationRetryDoNotPanic() { + defer s.setupMocks(s.T()).Finish() + s.mockSharedClient.EXPECT().ModelType(gomock.Any()).Return(model.IAAS, nil).AnyTimes() + + appName := "testapplication" + aExp := s.mockApplicationClient.EXPECT() + + aExp.ApplicationsInfo(gomock.Any()).Return(nil, fmt.Errorf("don't panic")) + + client := s.getApplicationsClient() + _, err := client.ReadApplicationWithRetryOnNotFound(context.Background(), + &ReadApplicationInput{ + ModelName: s.testModelName, + AppName: appName, + }) + s.Require().Error(err, "don't panic") +} + func (s *ApplicationSuite) TestReadApplicationRetryWaitForMachines() { defer s.setupMocks(s.T()).Finish() s.mockSharedClient.EXPECT().ModelType(gomock.Any()).Return(model.IAAS, nil).AnyTimes()