Skip to content

Commit

Permalink
Merge pull request #641 from qu1queee/one_shot_br
Browse files Browse the repository at this point in the history
Enhance BuildRun reconciles failure scenarios
  • Loading branch information
openshift-merge-robot authored Mar 22, 2021
2 parents c086190 + 239f470 commit 023e050
Show file tree
Hide file tree
Showing 17 changed files with 890 additions and 259 deletions.
19 changes: 14 additions & 5 deletions docs/buildrun.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,20 @@ The following table illustrates the different states a BuildRun can have under i

| Status | Reason | CompletionTime is set | Description |
| --- | --- | --- | --- |
| Unknown | Pending | No | The BuildRun is waiting on a Pod in status Pending. |
| Unknown | Running | No | The BuildRun has been validate and started to perform its work. |
| True | Succeeded | Yes | The BuildRun Pod is done. |
| False | Failed | Yes | The BuildRun failed in one of the steps. |
| False | BuildRunTimeout | Yes | The BuildRun timed out. |
| Unknown | Pending | No | The BuildRun is waiting on a Pod in status Pending. |
| Unknown | Running | No | The BuildRun has been validate and started to perform its work. |
| True | Succeeded | Yes | The BuildRun Pod is done. |
| False | Failed | Yes | The BuildRun failed in one of the steps. |
| False | BuildRunTimeout | Yes | The BuildRun timed out. |
| False | UnknownStrategyKind | Yes | The Build specified strategy Kind is unknown. (_options: ClusterBuildStrategy or BuildStrategy_) |
| False | ClusterBuildStrategyNotFound | Yes | The referenced cluster strategy was not found in the cluster. |
| False | BuildStrategyNotFound | Yes | The referenced namespaced strategy was not found in the cluster. |
| False | SetOwnerReferenceFailed | Yes | Setting ownerreferences from the BuildRun to the related TaskRun failed. |
| False | TaskRunIsMissing | Yes | The BuildRun related TaskRun was not found. |
| False | TaskRunGenerationFailed | Yes | The generation of a TaskRun spec failed. |
| False | ServiceAccountNotFound | Yes | The referenced service account was not found in the cluster. |
| False | BuildRegistrationFailed | Yes | The related Build in the BuildRun is on a Failed state. |
| False | BuildNotFound | Yes | The related Build in the BuildRun was not found. |

_Note_: We heavily rely on the Tekton TaskRun [Conditions](https://github.com/tektoncd/pipeline/blob/main/docs/taskruns.md#monitoring-execution-status) for populating the BuildRun ones, with some exceptions.

Expand Down
11 changes: 11 additions & 0 deletions pkg/apis/build/v1alpha1/buildrun_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,17 @@ func (brs *BuildRunStatus) GetCondition(t Type) *Condition {
return nil
}

// IsFailed returns a condition with a False Status
// based on a type from a list of Conditions.
func (brs *BuildRunStatus) IsFailed(t Type) bool {
for _, c := range brs.Conditions {
if c.Type == t {
return c.Status == corev1.ConditionFalse
}
}
return false
}

// SetCondition updates a list of conditions with the provided condition
func (brs *BuildRunStatus) SetCondition(condition *Condition) {
var idx = -1
Expand Down
178 changes: 102 additions & 76 deletions pkg/reconciler/buildrun/buildrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (

corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -92,16 +91,33 @@ func (r *ReconcileBuildRun) Reconcile(request reconcile.Request) (reconcile.Resu
}

build = &buildv1alpha1.Build{}
if err = resources.GetBuildObject(ctx, r.client, buildRun.Spec.BuildRef.Name, buildRun.Namespace, build); err != nil {
updateErr := r.updateBuildRunErrorStatus(ctx, buildRun, err.Error())
return reconcile.Result{}, resources.HandleError("Failed to fetch the Build instance", err, updateErr)
err = resources.GetBuildObject(ctx, r.client, buildRun, build)
if err != nil {
if !resources.IsClientStatusUpdateError(err) && buildRun.Status.IsFailed(buildv1alpha1.Succeeded) {
return reconcile.Result{}, nil
}
// system call failure, reconcile again
return reconcile.Result{}, err
}

// Validate if the Build was successfully registered
if err := r.ValidateBuildRegistration(ctx, build, buildRun); err != nil {
if build.Status.Registered == "" {
err := fmt.Errorf("the Build is not yet validated, build: %s", build.Name)
// reconcile again until it gets a registration value
return reconcile.Result{}, err
}

if build.Status.Registered != corev1.ConditionTrue {
// stop reconciling and mark the BuildRun as Failed
// we only reconcile again if the status.Update call fails
message := fmt.Sprintf("the Build is not registered correctly, build: %s, registered status: %s, reason: %s", build.Name, build.Status.Registered, build.Status.Reason)
if updateErr := resources.UpdateConditionWithFalseStatus(ctx, r.client, buildRun, message, resources.ConditionBuildRegistrationFailed); updateErr != nil {
return reconcile.Result{}, updateErr
}

return reconcile.Result{}, nil
}

// Ensure the build-related labels on the BuildRun
if buildRun.GetLabels() == nil {
buildRun.Labels = make(map[string]string)
Expand Down Expand Up @@ -142,16 +158,39 @@ func (r *ReconcileBuildRun) Reconcile(request reconcile.Request) (reconcile.Resu
return reconcile.Result{}, err
}

// Choose a service account to use
svcAccount, err := resources.RetrieveServiceAccount(ctx, r.client, build, buildRun)
if err != nil {
if !resources.IsClientStatusUpdateError(err) && buildRun.Status.IsFailed(buildv1alpha1.Succeeded) {
return reconcile.Result{}, nil
}
// system call failure, reconcile again
return reconcile.Result{}, err
}

strategy, err := r.getReferencedStrategy(ctx, build, buildRun)
if err != nil {
if !resources.IsClientStatusUpdateError(err) && buildRun.Status.IsFailed(buildv1alpha1.Succeeded) {
return reconcile.Result{}, nil
}
return reconcile.Result{}, err
}

// Create the TaskRun, this needs to be the last step in this block to be idempotent
generatedTaskRun, err := r.createTaskRun(ctx, build, buildRun)
generatedTaskRun, err := r.createTaskRun(ctx, svcAccount, strategy, build, buildRun)
if err != nil {
if !resources.IsClientStatusUpdateError(err) && buildRun.Status.IsFailed(buildv1alpha1.Succeeded) {
ctxlog.Info(ctx, "taskRun generation failed", namespace, request.Namespace, name, request.Name)
return reconcile.Result{}, nil
}
// system call failure, reconcile again
return reconcile.Result{}, err
}

ctxlog.Info(ctx, "creating TaskRun from BuildRun", namespace, request.Namespace, name, generatedTaskRun.GenerateName, "BuildRun", buildRun.Name)
if err = r.client.Create(ctx, generatedTaskRun); err != nil {
updateErr := r.updateBuildRunErrorStatus(ctx, buildRun, err.Error())
return reconcile.Result{}, resources.HandleError("Failed to create TaskRun if no TaskRun for that BuildRun exists", err, updateErr)
// system call failure, reconcile again
return reconcile.Result{}, err
}

// Set the LastTaskRunRef in the BuildRun status
Expand Down Expand Up @@ -326,92 +365,79 @@ func (r *ReconcileBuildRun) VerifyRequestName(ctx context.Context, request recon
// We ignore the errors from the following call, because the parent call of this function will always
// return back a reconcile.Result{}, nil. This is done to avoid infinite reconcile loops when a BuildRun
// does not longer exists
r.updateBuildRunErrorStatus(ctx, buildRun, fmt.Sprintf("taskRun %s doesn't exist", request.Name))
_ = resources.UpdateConditionWithFalseStatus(ctx, r.client, buildRun, fmt.Sprintf("taskRun %s doesn't exist", request.Name), resources.ConditionTaskRunIsMissing)
}
}
}
}

func (r *ReconcileBuildRun) createTaskRun(ctx context.Context, build *buildv1alpha1.Build, buildRun *buildv1alpha1.BuildRun) (*v1beta1.TaskRun, error) {
var generatedTaskRun *v1beta1.TaskRun
// Choose a service account to use
serviceAccount, err := resources.RetrieveServiceAccount(ctx, r.client, build, buildRun)
if err != nil {
updateErr := r.updateBuildRunErrorStatus(ctx, buildRun, err.Error())
return nil, resources.HandleError("Failed to choose a service account to use", err, updateErr)
}
func (r *ReconcileBuildRun) getReferencedStrategy(ctx context.Context, build *buildv1alpha1.Build, buildRun *buildv1alpha1.BuildRun) (strategy buildv1alpha1.BuilderStrategy, err error) {

if build.Spec.StrategyRef.Kind == nil || *build.Spec.StrategyRef.Kind == buildv1alpha1.NamespacedBuildStrategyKind {
buildStrategy, err := resources.RetrieveBuildStrategy(ctx, r.client, build)
if build.Spec.StrategyRef.Kind == nil {
// If the strategy Kind is not specified, we default to a namespaced-scope strategy
ctxlog.Info(ctx, "missing strategy Kind, defaulting to a namespaced-scope one", buildRun.Name, build.Name, namespace)
strategy, err = resources.RetrieveBuildStrategy(ctx, r.client, build)
if err != nil {
return nil, err
}
if buildStrategy != nil {
generatedTaskRun, err = resources.GenerateTaskRun(r.config, build, buildRun, serviceAccount.Name, buildStrategy)
if err != nil {
updateErr := r.updateBuildRunErrorStatus(ctx, buildRun, err.Error())
return nil, resources.HandleError("Failed to generate the taskrun with buildStrategy", err, updateErr)
if apierrors.IsNotFound(err) {
if updateErr := resources.UpdateConditionWithFalseStatus(ctx, r.client, buildRun, err.Error(), resources.BuildStrategyNotFound); updateErr != nil {
return nil, resources.HandleError("failed to get referenced strategy", err, updateErr)
}
}
}
} else if *build.Spec.StrategyRef.Kind == buildv1alpha1.ClusterBuildStrategyKind {
clusterBuildStrategy, err := resources.RetrieveClusterBuildStrategy(ctx, r.client, build)
return strategy, err
}

switch *build.Spec.StrategyRef.Kind {
case buildv1alpha1.NamespacedBuildStrategyKind:
strategy, err = resources.RetrieveBuildStrategy(ctx, r.client, build)
if err != nil {
return nil, err
if apierrors.IsNotFound(err) {
if updateErr := resources.UpdateConditionWithFalseStatus(ctx, r.client, buildRun, err.Error(), resources.BuildStrategyNotFound); updateErr != nil {
return nil, resources.HandleError("failed to get referenced strategy", err, updateErr)
}
}
}
if clusterBuildStrategy != nil {
generatedTaskRun, err = resources.GenerateTaskRun(r.config, build, buildRun, serviceAccount.Name, clusterBuildStrategy)
if err != nil {
updateErr := r.updateBuildRunErrorStatus(ctx, buildRun, err.Error())
return nil, resources.HandleError("Failed to generate the taskrun with clusterBuildStrategy", err, updateErr)
case buildv1alpha1.ClusterBuildStrategyKind:
strategy, err = resources.RetrieveClusterBuildStrategy(ctx, r.client, build)
if err != nil {
if apierrors.IsNotFound(err) {
if updateErr := resources.UpdateConditionWithFalseStatus(ctx, r.client, buildRun, err.Error(), resources.ClusterBuildStrategyNotFound); updateErr != nil {
return nil, resources.HandleError("failed to get referenced strategy", err, updateErr)
}
}
}
} else {
err := fmt.Errorf("unknown strategy %s", string(*build.Spec.StrategyRef.Kind))
updateErr := r.updateBuildRunErrorStatus(ctx, buildRun, err.Error())
return nil, resources.HandleError(fmt.Sprintf("Unsupported BuildStrategy Kind: %v", build.Spec.StrategyRef.Kind), err, updateErr)
}

// Set OwnerReference for BuildRun and TaskRun
if err := r.setOwnerReferenceFunc(buildRun, generatedTaskRun, r.scheme); err != nil {
updateErr := r.updateBuildRunErrorStatus(ctx, buildRun, err.Error())
return nil, resources.HandleError("failed to set OwnerReference for BuildRun and TaskRun", err, updateErr)
default:
err = fmt.Errorf("unknown strategy %s", string(*build.Spec.StrategyRef.Kind))
if updateErr := resources.UpdateConditionWithFalseStatus(ctx, r.client, buildRun, err.Error(), resources.ConditionUnknownStrategyKind); updateErr != nil {
return nil, resources.HandleError("failed to get referenced strategy", err, updateErr)
}
}

return generatedTaskRun, nil
return strategy, err
}

// updateBuildRunErrorStatus updates buildRun status fields
func (r *ReconcileBuildRun) updateBuildRunErrorStatus(ctx context.Context, buildRun *buildv1alpha1.BuildRun, errorMessage string) error {
// these two fields are deprecated and will be removed soon
//lint:ignore SA1019 should be set until removed
buildRun.Status.Succeeded = corev1.ConditionFalse
//lint:ignore SA1019 should be set until removed
buildRun.Status.Reason = errorMessage

now := metav1.Now()
buildRun.Status.CompletionTime = &now
buildRun.Status.SetCondition(&buildv1alpha1.Condition{
LastTransitionTime: now,
Type: buildv1alpha1.Succeeded,
Status: corev1.ConditionFalse,
Reason: "Failed",
Message: errorMessage,
})
ctxlog.Debug(ctx, "updating buildRun status", namespace, buildRun.Namespace, name, buildRun.Name)
updateErr := r.client.Status().Update(ctx, buildRun)
return updateErr
}
func (r *ReconcileBuildRun) createTaskRun(ctx context.Context, serviceAccount *corev1.ServiceAccount, strategy buildv1alpha1.BuilderStrategy, build *buildv1alpha1.Build, buildRun *buildv1alpha1.BuildRun) (*v1beta1.TaskRun, error) {
var (
generatedTaskRun *v1beta1.TaskRun
)

// ValidateBuildRegistration verifies that a referenced Build is properly registered
func (r *ReconcileBuildRun) ValidateBuildRegistration(ctx context.Context, build *buildv1alpha1.Build, buildRun *buildv1alpha1.BuildRun) error {
if build.Status.Registered == "" {
err := fmt.Errorf("the Build is not yet validated, build: %s", build.Name)
return err
generatedTaskRun, err := resources.GenerateTaskRun(r.config, build, buildRun, serviceAccount.Name, strategy)
if err != nil {
if updateErr := resources.UpdateConditionWithFalseStatus(ctx, r.client, buildRun, err.Error(), resources.ConditionTaskRunGenerationFailed); updateErr != nil {
return nil, resources.HandleError("failed to create taskrun runtime object", err, updateErr)
}

return nil, err
}
if build.Status.Registered != corev1.ConditionTrue {
err := fmt.Errorf("the Build is not registered correctly, build: %s, registered status: %s, reason: %s", build.Name, build.Status.Registered, build.Status.Reason)
updateErr := r.updateBuildRunErrorStatus(ctx, buildRun, err.Error())
return resources.HandleError("Build is not ready", err, updateErr)

// Set OwnerReference for BuildRun and TaskRun
if err := r.setOwnerReferenceFunc(buildRun, generatedTaskRun, r.scheme); err != nil {
if updateErr := resources.UpdateConditionWithFalseStatus(ctx, r.client, buildRun, err.Error(), resources.ConditionSetOwnerReferenceFailed); updateErr != nil {
return nil, resources.HandleError("failed to create taskrun runtime object", err, updateErr)
}

return nil, err
}
return nil

return generatedTaskRun, nil
}
Loading

0 comments on commit 023e050

Please sign in to comment.