From e3a907966ee130f2f56d1e5187589157482bca12 Mon Sep 17 00:00:00 2001 From: Piotr Truszkowski Date: Thu, 18 Apr 2024 10:14:44 +0200 Subject: [PATCH] feat: wait for run in resource `spacelift_run` (#535) * feat: added generic functions AsError and IsErrorType for working with error types inline in if-statements * refactor: added github.com/hashicorp/terraform-plugin-log as direct requirement to go.mod * feat: implemented wait logic for resource spacelift_run * test: added TestRunResourceWait to spacelift/resource_run_test.go * docs: updated docs for spacelift_run resource * refactor: using continue_on_state for a list of possible end states to wait spacelift/resource_run.go * refactor: change "enabled" to "disabled" in schema and waitConfiguration struct resource_run.go * fix test: should be "disabled" instead of "enabled" * let's treat unconfirmed as the target state. * use RunResourceState (global scope) insteaf of Run (user scope) * added two more tests for "wait_for_run" * fix logs * fix tests: same WP name for different tests * fix field description; move piece of code into separate method --------- Co-authored-by: Thomas --- docs/resources/run.md | 19 +++ go.mod | 2 +- spacelift/internal/error.go | 14 +++ spacelift/resource_run.go | 207 ++++++++++++++++++++++++++++++- spacelift/resource_run_test.go | 160 ++++++++++++++++++++++++ spacelift/resource_stack_test.go | 8 +- 6 files changed, 402 insertions(+), 8 deletions(-) diff --git a/docs/resources/run.md b/docs/resources/run.md index 26f1cbc0..471305b7 100644 --- a/docs/resources/run.md +++ b/docs/resources/run.md @@ -40,7 +40,26 @@ resource "spacelift_run" "this" { - `commit_sha` (String) The commit SHA for which to trigger a run. - `keepers` (Map of String) Arbitrary map of values that, when changed, will trigger recreation of the resource. - `proposed` (Boolean) Whether the run is a proposed run. Defaults to `false`. +- `timeouts` (Block, Optional) (see [below for nested schema](#nestedblock--timeouts)) +- `wait` (Block List, Max: 1) Wait for the run to finish (see [below for nested schema](#nestedblock--wait)) ### Read-Only - `id` (String) The ID of the triggered run. + + +### Nested Schema for `timeouts` + +Optional: + +- `create` (String) + + + +### Nested Schema for `wait` + +Optional: + +- `continue_on_state` (Set of String) Continue on the specified states of a finished run. If not specified, the default is `[ 'finished' ]`. You can use following states: `applying`, `canceled`, `confirmed`, `destroying`, `discarded`, `failed`, `finished`, `initializing`, `pending_review`, `performing`, `planning`, `preparing_apply`, `preparing_replan`, `preparing`, `queued`, `ready`, `replan_requested`, `skipped`, `stopped`, `unconfirmed`. +- `continue_on_timeout` (Boolean) Continue if run timed out, i.e. did not reach any defined end state in time. Default: `false` +- `disabled` (Boolean) Whether waiting for a job is disabled or not. Default: `false` diff --git a/go.mod b/go.mod index 34f0c96d..e1806895 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/dgrijalva/jwt-go/v4 v4.0.0-preview1 github.com/hashicorp/go-cty v1.4.1-0.20200414143053-d3edf31b6320 github.com/hashicorp/go-retryablehttp v0.7.4 + github.com/hashicorp/terraform-plugin-log v0.9.0 github.com/hashicorp/terraform-plugin-sdk/v2 v2.29.0 github.com/kelseyhightower/envconfig v1.4.0 github.com/pkg/errors v0.9.1 @@ -36,7 +37,6 @@ require ( github.com/hashicorp/terraform-exec v0.19.0 // indirect github.com/hashicorp/terraform-json v0.17.1 // indirect github.com/hashicorp/terraform-plugin-go v0.19.0 // indirect - github.com/hashicorp/terraform-plugin-log v0.9.0 // indirect github.com/hashicorp/terraform-registry-address v0.2.2 // indirect github.com/hashicorp/terraform-svchost v0.1.1 // indirect github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d // indirect diff --git a/spacelift/internal/error.go b/spacelift/internal/error.go index d1d8ca66..1ec8c66a 100644 --- a/spacelift/internal/error.go +++ b/spacelift/internal/error.go @@ -46,3 +46,17 @@ func parseExtensions(ext map[string]interface{}) string { return strings.Join(errorParts, ", ") } + +// AsError is an inline form of errors.As. +func AsError[TError error](err error) (TError, bool) { + var as TError + ok := errors.As(err, &as) + return as, ok +} + +// IsErrorType reports whether or not the type of any error in err's chain matches +// the Error type. +func IsErrorType[TError error](err error) bool { + _, ok := AsError[TError](err) + return ok +} diff --git a/spacelift/resource_run.go b/spacelift/resource_run.go index 4eb39e05..80137f5b 100644 --- a/spacelift/resource_run.go +++ b/spacelift/resource_run.go @@ -2,9 +2,16 @@ package spacelift import ( "context" + "fmt" + "slices" + "strings" + "time" + "github.com/hashicorp/terraform-plugin-log/tflog" "github.com/hashicorp/terraform-plugin-sdk/v2/diag" + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" + "github.com/pkg/errors" "github.com/shurcooL/graphql" "github.com/spacelift-io/terraform-provider-spacelift/spacelift/internal" @@ -20,6 +27,11 @@ func resourceRun() *schema.Resource { CreateContext: resourceRunCreate, ReadContext: schema.NoopContext, Delete: schema.RemoveFromState, + UpdateContext: schema.NoopContext, + + Timeouts: &schema.ResourceTimeout{ + Create: schema.DefaultTimeout(30 * time.Minute), + }, Schema: map[string]*schema.Schema{ "stack_id": { @@ -55,16 +67,144 @@ func resourceRun() *schema.Resource { Type: schema.TypeString, Computed: true, }, + "wait": { + Type: schema.TypeList, + Optional: true, + Description: "Wait for the run to finish", + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "disabled": { + Type: schema.TypeBool, + Description: "Whether waiting for a job is disabled or not. Default: `false`", + Optional: true, + Default: false, + }, + "continue_on_state": { + Type: schema.TypeSet, + Elem: &schema.Schema{ + Type: schema.TypeString, + }, + Description: "Continue on the specified states of a finished run. If not specified, the default is `[ 'finished' ]`. You can use following states: `applying`, `canceled`, `confirmed`, `destroying`, `discarded`, `failed`, `finished`, `initializing`, `pending_review`, `performing`, `planning`, `preparing_apply`, `preparing_replan`, `preparing`, `queued`, `ready`, `replan_requested`, `skipped`, `stopped`, `unconfirmed`.", + Optional: true, + }, + "continue_on_timeout": { + Type: schema.TypeBool, + Description: "Continue if run timed out, i.e. did not reach any defined end state in time. Default: `false`", + Optional: true, + Default: false, + }, + }, + }, + }, }, } } +type waitConfiguration struct { + disabled bool + continueOnState []string + continueOnTimeout bool +} + +func expandWaitConfiguration(input []interface{}) *waitConfiguration { + if len(input) == 0 { + return nil + } + v := input[0].(map[string]interface{}) + cfg := &waitConfiguration{ + disabled: v["disabled"].(bool), + continueOnState: []string{}, + continueOnTimeout: v["continue_on_timeout"].(bool), + } + + if v, ok := v["continue_on_state"]; ok { + for _, item := range v.(*schema.Set).List() { + str, ok := item.(string) + if !ok { + panic(fmt.Sprintf("continue_on_state contains a non-string element %+v", str)) + } + cfg.continueOnState = append(cfg.continueOnState, str) + } + } + if len(cfg.continueOnState) == 0 { + cfg.continueOnState = append(cfg.continueOnState, "finished") + } + return cfg +} + +func (wait *waitConfiguration) Wait(ctx context.Context, d *schema.ResourceData, client *internal.Client, stackID, mutationID string) diag.Diagnostics { + if wait.disabled { + return nil + } + + stateConf := &retry.StateChangeConf{ + ContinuousTargetOccurence: 1, + Delay: 10 * time.Second, + MinTimeout: 10 * time.Second, + Pending: []string{ + "running", + }, + Target: []string{ + "finished", + "unconfirmed", // Let's treat unconfirmed as the target state. + // It's not finished, but we don't want to wait for it because it requires confirmation from someone. + }, + Refresh: checkStackStatusFunc(ctx, client, stackID, mutationID), + Timeout: d.Timeout(schema.TimeoutCreate), + } + + finalState, err := stateConf.WaitForStateContext(ctx) + if err != nil { + if timeoutErr, ok := internal.AsError[*retry.TimeoutError](err); ok { + tflog.Debug(ctx, "received retry.TimeoutError from WaitForStateContext", map[string]any{ + "stackID": stackID, + "runID": mutationID, + "lastState": timeoutErr.LastState, + "expectedState": timeoutErr.ExpectedState, + }) + finalState = "__timeout__" + } else if err == context.DeadlineExceeded { + tflog.Debug(ctx, "received context.DeadlineExceeded from WaitForStateContext", map[string]any{ + "stackID": stackID, + "runID": mutationID, + }) + finalState = "__timeout__" + } else { + return diag.Errorf("failed waiting for run %s on stack %s to finish. error(%T): %+v ", mutationID, stackID, err, err) + } + } + + switch finalState.(string) { + case "__timeout__": + if !wait.continueOnTimeout { + return diag.Errorf("run %s on stack %s has timed out", mutationID, stackID) + } + tflog.Info(ctx, "run timed out but continue_on_timeout=true", + map[string]any{ + "stackID": stackID, + "runID": mutationID, + }) + default: + if !slices.Contains[[]string](wait.continueOnState, finalState.(string)) { + return diag.Errorf("run %s on stack %s has ended with status %s. expected %v", mutationID, stackID, finalState, wait.continueOnState) + } + tflog.Debug(ctx, "run finished", map[string]any{ + "stackID": stackID, + "runID": mutationID, + "finalState": finalState, + }) + } + + return nil +} + func resourceRunCreate(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics { var mutation struct { ID string `graphql:"runResourceCreate(stack: $stack, commitSha: $sha, proposed: $proposed)"` } - stackID := d.Get("stack_id") + stackID := d.Get("stack_id").(string) variables := map[string]interface{}{ "stack": toID(stackID), @@ -80,11 +220,72 @@ func resourceRunCreate(ctx context.Context, d *schema.ResourceData, meta interfa variables["proposed"] = graphql.NewBoolean(graphql.Boolean(proposed.(bool))) } - if err := meta.(*internal.Client).Mutate(ctx, "ResourceRunCreate", &mutation, variables); err != nil { + client := meta.(*internal.Client) + if err := client.Mutate(ctx, "ResourceRunCreate", &mutation, variables); err != nil { return diag.Errorf("could not trigger run for stack %s: %v", stackID, internal.FromSpaceliftError(err)) } - d.SetId(mutation.ID) + if waitRaw, ok := d.GetOk("wait"); ok { + wait := expandWaitConfiguration(waitRaw.([]interface{})) + if diag := wait.Wait(ctx, d, client, stackID, mutation.ID); len(diag) > 0 { + return diag + } + } + d.SetId(mutation.ID) return nil } + +func checkStackStatusFunc(ctx context.Context, client *internal.Client, stackID string, runID string) retry.StateRefreshFunc { + return func() (result any, state string, err error) { + // instead of a resource handle we return the current state as result + // Makes it easier to detect which end state has been reached. + // Otherwise we would need another GraphQL query + result, finished, err := getStackRunStateByID(ctx, client, stackID, runID) + if err != nil { + return + } + state = "running" + if finished { + state = "finished" + } + // Let's treat unconfirmed as the target state. + // It's not finished, but we don't want to wait for it because it requires confirmation from someone. + if result == "unconfirmed" { + state = "unconfirmed" + } + return + } +} + +func getStackRunStateByID(ctx context.Context, client *internal.Client, stackID string, runID string) (string, bool, error) { + var query struct { + Stack struct { + RunResourceState struct { + ID graphql.String + State graphql.String + Finished graphql.Boolean + } `graphql:"runResourceState(id: $runId)"` + } `graphql:"stack(id: $stackId)"` + } + + variables := map[string]interface{}{ + "stackId": graphql.ID(stackID), + "runId": graphql.ID(runID), + } + + if err := client.Query(ctx, "StackRunRead", &query, variables); err != nil { + return "", false, errors.Wrap(err, fmt.Sprintf("could not query for run %s of stack %s", runID, stackID)) + } + + rrs := query.Stack.RunResourceState + + currentState := strings.ToLower(string(rrs.State)) + tflog.Debug(ctx, "current state of run", map[string]interface{}{ + "stackID": stackID, + "runID": runID, + "currentState": currentState, + "finished": rrs.Finished, + }) + return currentState, bool(rrs.Finished), nil +} diff --git a/spacelift/resource_run_test.go b/spacelift/resource_run_test.go index e2150ae5..aeb99baa 100644 --- a/spacelift/resource_run_test.go +++ b/spacelift/resource_run_test.go @@ -2,6 +2,7 @@ package spacelift import ( "fmt" + "regexp" "testing" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/acctest" @@ -47,3 +48,162 @@ func TestRunResource(t *testing.T) { }) }) } + +func TestRunResourceWait(t *testing.T) { + + t.Run("on a new stack", func(t *testing.T) { + const resourceName = "spacelift_run.test" + + randomID := acctest.RandStringFromCharSet(5, acctest.CharSetAlphaNum) + randomIDwp := acctest.RandStringFromCharSet(5, acctest.CharSetAlphaNum) + + testSteps(t, []resource.TestStep{ + { + Config: fmt.Sprintf(` + resource "spacelift_worker_pool" "test" { + name = "Let's create a dummy worker pool to avoid running the job %s" + } + + resource "spacelift_stack" "test" { + name = "Test stack %s" + repository = "demo" + branch = "feat_wait_for_run" + worker_pool_id = spacelift_worker_pool.test.id + } + + resource "spacelift_run" "test" { + stack_id = spacelift_stack.test.id + + keepers = { "bacon" = "tasty" } + + timeouts { + create = "10s" + } + + wait { + disabled = false + continue_on_timeout = true + } + }`, randomIDwp, randomID), + Check: Resource( + resourceName, + Attribute("id", IsNotEmpty()), + Attribute("stack_id", Contains(randomID)), + ), + }, + }) + }) + + t.Run("timed out run", func(t *testing.T) { + const resourceName = "spacelift_run.test" + + randomID := acctest.RandStringFromCharSet(5, acctest.CharSetAlphaNum) + randomIDwp := acctest.RandStringFromCharSet(5, acctest.CharSetAlphaNum) + + testSteps(t, []resource.TestStep{ + { + Config: fmt.Sprintf(` + resource "spacelift_worker_pool" "test" { + name = "Let's create a dummy worker pool to avoid running the job %s" + } + + resource "spacelift_stack" "test" { + name = "Test stack %s" + repository = "demo" + branch = "feat_wait_for_run" + worker_pool_id = spacelift_worker_pool.test.id + } + + resource "spacelift_run" "test" { + stack_id = spacelift_stack.test.id + + keepers = { "bacon" = "tasty" } + + timeouts { + create = "10s" + } + + wait { + disabled = false + continue_on_timeout = false + } + }`, randomIDwp, randomID), + ExpectError: regexp.MustCompile("run [0-9A-Z]* on stack test-stack-[a-z0-9]* has timed out"), + }, + }) + }) + + t.Run("continue on unconfirmed", func(t *testing.T) { + const resourceName = "spacelift_run.test" + + randomID := acctest.RandStringFromCharSet(5, acctest.CharSetAlphaNum) + + testSteps(t, []resource.TestStep{ + { + Config: fmt.Sprintf(` + resource "spacelift_stack" "test" { + name = "Test stack %s" + repository = "demo" + branch = "feat_wait_for_run" + } + + resource "spacelift_run" "test" { + stack_id = spacelift_stack.test.id + + keepers = { "bacon" = "tasty" } + + timeouts { + create = "120s" + } + + wait { + disabled = false + continue_on_state = ["unconfirmed"] + } + }`, randomID), + Check: Resource( + resourceName, + Attribute("id", IsNotEmpty()), + Attribute("stack_id", Contains(randomID)), + ), + }, + }) + }) + + t.Run("finished with autodeploy", func(t *testing.T) { + const resourceName = "spacelift_run.test" + + randomID := acctest.RandStringFromCharSet(5, acctest.CharSetAlphaNum) + + testSteps(t, []resource.TestStep{ + { + Config: fmt.Sprintf(` + resource "spacelift_stack" "test" { + name = "Test stack %s" + repository = "demo" + branch = "feat_wait_for_run" + autodeploy = true + } + + resource "spacelift_run" "test" { + stack_id = spacelift_stack.test.id + + keepers = { "bacon" = "tasty" } + + timeouts { + create = "180s" + } + + wait { + disabled = false + } + }`, randomID), + Check: Resource( + resourceName, + Attribute("id", IsNotEmpty()), + Attribute("stack_id", Contains(randomID)), + ), + }, + }) + }) +} diff --git a/spacelift/resource_stack_test.go b/spacelift/resource_stack_test.go index af30da40..9759aafc 100644 --- a/spacelift/resource_stack_test.go +++ b/spacelift/resource_stack_test.go @@ -146,10 +146,10 @@ func TestStackResource(t *testing.T) { worker_pool_id = spacelift_worker_pool.test.id } resource "spacelift_worker_pool" "test" { - name = "Autoretryable worker pool." + name = "Autoretryable worker pool (%s)." description = "test worker pool" } - `, description, randomID, randomID) + `, description, randomID, randomID, randomID) } testSteps(t, []resource.TestStep{ @@ -801,10 +801,10 @@ func TestStackResourceSpace(t *testing.T) { } resource "spacelift_worker_pool" "test" { - name = "Autoretryable worker pool." + name = "Autoretryable worker pool (%s)." description = "test worker pool" } - `, description, randomID, randomID) + `, description, randomID, randomID, randomID) } testSteps(t, []resource.TestStep{