Skip to content

Commit

Permalink
feat: wait for run in resource spacelift_run (#535)
Browse files Browse the repository at this point in the history
* feat: added generic functions AsError and IsErrorType for working with error types inline in if-statements
* refactor: added github.com/hashicorp/terraform-plugin-log as direct requirement to go.mod
* feat: implemented wait logic for resource spacelift_run
* test: added TestRunResourceWait to spacelift/resource_run_test.go
* docs: updated docs for spacelift_run resource
* refactor: using continue_on_state for a list of possible end states to wait spacelift/resource_run.go
* refactor: change "enabled" to "disabled" in schema and waitConfiguration struct resource_run.go
* fix test: should be "disabled" instead of "enabled"
* let's treat unconfirmed as the target state.
* use RunResourceState (global scope) insteaf of Run (user scope)
* added two more tests for "wait_for_run"
* fix logs
* fix tests: same WP name for different tests
* fix field description; move piece of code into separate method

---------

Co-authored-by: Thomas <[email protected]>
  • Loading branch information
truszkowski and tmeckel authored Apr 18, 2024
1 parent dec9cce commit e3a9079
Show file tree
Hide file tree
Showing 6 changed files with 402 additions and 8 deletions.
19 changes: 19 additions & 0 deletions docs/resources/run.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,26 @@ resource "spacelift_run" "this" {
- `commit_sha` (String) The commit SHA for which to trigger a run.
- `keepers` (Map of String) Arbitrary map of values that, when changed, will trigger recreation of the resource.
- `proposed` (Boolean) Whether the run is a proposed run. Defaults to `false`.
- `timeouts` (Block, Optional) (see [below for nested schema](#nestedblock--timeouts))
- `wait` (Block List, Max: 1) Wait for the run to finish (see [below for nested schema](#nestedblock--wait))

### Read-Only

- `id` (String) The ID of the triggered run.

<a id="nestedblock--timeouts"></a>
### Nested Schema for `timeouts`

Optional:

- `create` (String)


<a id="nestedblock--wait"></a>
### Nested Schema for `wait`

Optional:

- `continue_on_state` (Set of String) Continue on the specified states of a finished run. If not specified, the default is `[ 'finished' ]`. You can use following states: `applying`, `canceled`, `confirmed`, `destroying`, `discarded`, `failed`, `finished`, `initializing`, `pending_review`, `performing`, `planning`, `preparing_apply`, `preparing_replan`, `preparing`, `queued`, `ready`, `replan_requested`, `skipped`, `stopped`, `unconfirmed`.
- `continue_on_timeout` (Boolean) Continue if run timed out, i.e. did not reach any defined end state in time. Default: `false`
- `disabled` (Boolean) Whether waiting for a job is disabled or not. Default: `false`
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/dgrijalva/jwt-go/v4 v4.0.0-preview1
github.com/hashicorp/go-cty v1.4.1-0.20200414143053-d3edf31b6320
github.com/hashicorp/go-retryablehttp v0.7.4
github.com/hashicorp/terraform-plugin-log v0.9.0
github.com/hashicorp/terraform-plugin-sdk/v2 v2.29.0
github.com/kelseyhightower/envconfig v1.4.0
github.com/pkg/errors v0.9.1
Expand Down Expand Up @@ -36,7 +37,6 @@ require (
github.com/hashicorp/terraform-exec v0.19.0 // indirect
github.com/hashicorp/terraform-json v0.17.1 // indirect
github.com/hashicorp/terraform-plugin-go v0.19.0 // indirect
github.com/hashicorp/terraform-plugin-log v0.9.0 // indirect
github.com/hashicorp/terraform-registry-address v0.2.2 // indirect
github.com/hashicorp/terraform-svchost v0.1.1 // indirect
github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d // indirect
Expand Down
14 changes: 14 additions & 0 deletions spacelift/internal/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,17 @@ func parseExtensions(ext map[string]interface{}) string {

return strings.Join(errorParts, ", ")
}

// AsError is an inline form of errors.As.
func AsError[TError error](err error) (TError, bool) {
var as TError
ok := errors.As(err, &as)
return as, ok
}

// IsErrorType reports whether or not the type of any error in err's chain matches
// the Error type.
func IsErrorType[TError error](err error) bool {
_, ok := AsError[TError](err)
return ok
}
207 changes: 204 additions & 3 deletions spacelift/resource_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,16 @@ package spacelift

import (
"context"
"fmt"
"slices"
"strings"
"time"

"github.com/hashicorp/terraform-plugin-log/tflog"
"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
"github.com/pkg/errors"
"github.com/shurcooL/graphql"

"github.com/spacelift-io/terraform-provider-spacelift/spacelift/internal"
Expand All @@ -20,6 +27,11 @@ func resourceRun() *schema.Resource {
CreateContext: resourceRunCreate,
ReadContext: schema.NoopContext,
Delete: schema.RemoveFromState,
UpdateContext: schema.NoopContext,

Timeouts: &schema.ResourceTimeout{
Create: schema.DefaultTimeout(30 * time.Minute),
},

Schema: map[string]*schema.Schema{
"stack_id": {
Expand Down Expand Up @@ -55,16 +67,144 @@ func resourceRun() *schema.Resource {
Type: schema.TypeString,
Computed: true,
},
"wait": {
Type: schema.TypeList,
Optional: true,
Description: "Wait for the run to finish",
MaxItems: 1,
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
"disabled": {
Type: schema.TypeBool,
Description: "Whether waiting for a job is disabled or not. Default: `false`",
Optional: true,
Default: false,
},
"continue_on_state": {
Type: schema.TypeSet,
Elem: &schema.Schema{
Type: schema.TypeString,
},
Description: "Continue on the specified states of a finished run. If not specified, the default is `[ 'finished' ]`. You can use following states: `applying`, `canceled`, `confirmed`, `destroying`, `discarded`, `failed`, `finished`, `initializing`, `pending_review`, `performing`, `planning`, `preparing_apply`, `preparing_replan`, `preparing`, `queued`, `ready`, `replan_requested`, `skipped`, `stopped`, `unconfirmed`.",
Optional: true,
},
"continue_on_timeout": {
Type: schema.TypeBool,
Description: "Continue if run timed out, i.e. did not reach any defined end state in time. Default: `false`",
Optional: true,
Default: false,
},
},
},
},
},
}
}

type waitConfiguration struct {
disabled bool
continueOnState []string
continueOnTimeout bool
}

func expandWaitConfiguration(input []interface{}) *waitConfiguration {
if len(input) == 0 {
return nil
}
v := input[0].(map[string]interface{})
cfg := &waitConfiguration{
disabled: v["disabled"].(bool),
continueOnState: []string{},
continueOnTimeout: v["continue_on_timeout"].(bool),
}

if v, ok := v["continue_on_state"]; ok {
for _, item := range v.(*schema.Set).List() {
str, ok := item.(string)
if !ok {
panic(fmt.Sprintf("continue_on_state contains a non-string element %+v", str))
}
cfg.continueOnState = append(cfg.continueOnState, str)
}
}
if len(cfg.continueOnState) == 0 {
cfg.continueOnState = append(cfg.continueOnState, "finished")
}
return cfg
}

func (wait *waitConfiguration) Wait(ctx context.Context, d *schema.ResourceData, client *internal.Client, stackID, mutationID string) diag.Diagnostics {
if wait.disabled {
return nil
}

stateConf := &retry.StateChangeConf{
ContinuousTargetOccurence: 1,
Delay: 10 * time.Second,
MinTimeout: 10 * time.Second,
Pending: []string{
"running",
},
Target: []string{
"finished",
"unconfirmed", // Let's treat unconfirmed as the target state.
// It's not finished, but we don't want to wait for it because it requires confirmation from someone.
},
Refresh: checkStackStatusFunc(ctx, client, stackID, mutationID),
Timeout: d.Timeout(schema.TimeoutCreate),
}

finalState, err := stateConf.WaitForStateContext(ctx)
if err != nil {
if timeoutErr, ok := internal.AsError[*retry.TimeoutError](err); ok {
tflog.Debug(ctx, "received retry.TimeoutError from WaitForStateContext", map[string]any{
"stackID": stackID,
"runID": mutationID,
"lastState": timeoutErr.LastState,
"expectedState": timeoutErr.ExpectedState,
})
finalState = "__timeout__"
} else if err == context.DeadlineExceeded {
tflog.Debug(ctx, "received context.DeadlineExceeded from WaitForStateContext", map[string]any{
"stackID": stackID,
"runID": mutationID,
})
finalState = "__timeout__"
} else {
return diag.Errorf("failed waiting for run %s on stack %s to finish. error(%T): %+v ", mutationID, stackID, err, err)
}
}

switch finalState.(string) {
case "__timeout__":
if !wait.continueOnTimeout {
return diag.Errorf("run %s on stack %s has timed out", mutationID, stackID)
}
tflog.Info(ctx, "run timed out but continue_on_timeout=true",
map[string]any{
"stackID": stackID,
"runID": mutationID,
})
default:
if !slices.Contains[[]string](wait.continueOnState, finalState.(string)) {
return diag.Errorf("run %s on stack %s has ended with status %s. expected %v", mutationID, stackID, finalState, wait.continueOnState)
}
tflog.Debug(ctx, "run finished", map[string]any{
"stackID": stackID,
"runID": mutationID,
"finalState": finalState,
})
}

return nil
}

func resourceRunCreate(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics {
var mutation struct {
ID string `graphql:"runResourceCreate(stack: $stack, commitSha: $sha, proposed: $proposed)"`
}

stackID := d.Get("stack_id")
stackID := d.Get("stack_id").(string)

variables := map[string]interface{}{
"stack": toID(stackID),
Expand All @@ -80,11 +220,72 @@ func resourceRunCreate(ctx context.Context, d *schema.ResourceData, meta interfa
variables["proposed"] = graphql.NewBoolean(graphql.Boolean(proposed.(bool)))
}

if err := meta.(*internal.Client).Mutate(ctx, "ResourceRunCreate", &mutation, variables); err != nil {
client := meta.(*internal.Client)
if err := client.Mutate(ctx, "ResourceRunCreate", &mutation, variables); err != nil {
return diag.Errorf("could not trigger run for stack %s: %v", stackID, internal.FromSpaceliftError(err))
}

d.SetId(mutation.ID)
if waitRaw, ok := d.GetOk("wait"); ok {
wait := expandWaitConfiguration(waitRaw.([]interface{}))
if diag := wait.Wait(ctx, d, client, stackID, mutation.ID); len(diag) > 0 {
return diag
}
}

d.SetId(mutation.ID)
return nil
}

func checkStackStatusFunc(ctx context.Context, client *internal.Client, stackID string, runID string) retry.StateRefreshFunc {
return func() (result any, state string, err error) {
// instead of a resource handle we return the current state as result
// Makes it easier to detect which end state has been reached.
// Otherwise we would need another GraphQL query
result, finished, err := getStackRunStateByID(ctx, client, stackID, runID)
if err != nil {
return
}
state = "running"
if finished {
state = "finished"
}
// Let's treat unconfirmed as the target state.
// It's not finished, but we don't want to wait for it because it requires confirmation from someone.
if result == "unconfirmed" {
state = "unconfirmed"
}
return
}
}

func getStackRunStateByID(ctx context.Context, client *internal.Client, stackID string, runID string) (string, bool, error) {
var query struct {
Stack struct {
RunResourceState struct {
ID graphql.String
State graphql.String
Finished graphql.Boolean
} `graphql:"runResourceState(id: $runId)"`
} `graphql:"stack(id: $stackId)"`
}

variables := map[string]interface{}{
"stackId": graphql.ID(stackID),
"runId": graphql.ID(runID),
}

if err := client.Query(ctx, "StackRunRead", &query, variables); err != nil {
return "", false, errors.Wrap(err, fmt.Sprintf("could not query for run %s of stack %s", runID, stackID))
}

rrs := query.Stack.RunResourceState

currentState := strings.ToLower(string(rrs.State))
tflog.Debug(ctx, "current state of run", map[string]interface{}{
"stackID": stackID,
"runID": runID,
"currentState": currentState,
"finished": rrs.Finished,
})
return currentState, bool(rrs.Finished), nil
}
Loading

0 comments on commit e3a9079

Please sign in to comment.