Skip to content

Commit abd6644

Browse files
committed
Set failure only on instance error when no nodeRef
Setting a failure message is permanent and requires user action to get rid of. We cannot know if the error is permanent though. The idea with this commit is that if the machine has a nodeRef, we know that the instance was working at some point, so the error could well be temporary. Therefore we do not set failure in this case. On the other hand, if the machine does not have a nodeRef, we set failure since it is more likely that there is some configuration error that will not go away. Remove dead code and improve logging slightly in similar cases.
1 parent 01b5263 commit abd6644

File tree

1 file changed

+8
-11
lines changed

1 file changed

+8
-11
lines changed

controllers/openstackmachine_controller.go

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -334,14 +334,6 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
334334
return ctrl.Result{}, err
335335
}
336336

337-
// Set an error message if we couldn't find the instance.
338-
if instanceStatus == nil {
339-
err = errors.New("OpenStack instance not found")
340-
openStackMachine.SetFailure(capierrors.UpdateMachineError, err)
341-
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceNotFoundReason, clusterv1.ConditionSeverityError, "")
342-
return ctrl.Result{}, nil
343-
}
344-
345337
// TODO(sbueringer) From CAPA: TODO(ncdc): move this validation logic into a validating webhook (for us: create validation logic in webhook)
346338

347339
openStackMachine.Spec.ProviderID = pointer.String(fmt.Sprintf("openstack:///%s", instanceStatus.ID()))
@@ -364,10 +356,13 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
364356
conditions.MarkTrue(openStackMachine, infrav1.InstanceReadyCondition)
365357
openStackMachine.Status.Ready = true
366358
case infrav1.InstanceStateError:
367-
// Error is unexpected, thus we report error and never retry
359+
// If the machine has a NodeRef the error could be something temporary.
360+
// If not, it is more likely a configuration error so we set failure and never retry.
368361
scope.Logger().Info("Machine instance state is ERROR", "id", instanceStatus.ID())
369-
err = fmt.Errorf("instance state %q is unexpected", instanceStatus.State())
370-
openStackMachine.SetFailure(capierrors.UpdateMachineError, err)
362+
if machine.Status.NodeRef == nil {
363+
err = fmt.Errorf("instance state %q is unexpected", instanceStatus.State())
364+
openStackMachine.SetFailure(capierrors.UpdateMachineError, err)
365+
}
371366
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceStateErrorReason, clusterv1.ConditionSeverityError, "")
372367
return ctrl.Result{}, nil
373368
case infrav1.InstanceStateDeleted:
@@ -429,6 +424,8 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
429424
func (r *OpenStackMachineReconciler) getOrCreate(logger logr.Logger, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster, machine *clusterv1.Machine, openStackMachine *infrav1.OpenStackMachine, computeService *compute.Service, userData string) (*compute.InstanceStatus, error) {
430425
instanceStatus, err := computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name)
431426
if err != nil {
427+
logger.Info("Unable to get OpenStack instance", "Machine", openStackMachine.Name)
428+
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceNotFoundReason, clusterv1.ConditionSeverityError, err.Error())
432429
return nil, err
433430
}
434431

0 commit comments

Comments
 (0)