diff --git a/cmd/machine-remediation/main.go b/cmd/machine-remediation/main.go index f0cfa90a..e78901d2 100644 --- a/cmd/machine-remediation/main.go +++ b/cmd/machine-remediation/main.go @@ -6,6 +6,7 @@ import ( "github.com/golang/glog" bmov1 "github.com/metal3-io/baremetal-operator/pkg/apis/metal3/v1alpha1" + mrv1 "kubevirt.io/machine-remediation-operator/pkg/apis/machineremediation/v1alpha1" "kubevirt.io/machine-remediation-operator/pkg/baremetal/remediator" "kubevirt.io/machine-remediation-operator/pkg/controllers" @@ -65,9 +66,9 @@ func main() { glog.Fatal(err) } - bareMetalRemediator := remediator.NewBareMetalRemediator(mgr) + remediator := remediator.NewBareMetalRemediator(mgr) addController := func(m manager.Manager, opts manager.Options) error { - return machineremediation.AddWithRemediator(m, bareMetalRemediator, opts) + return machineremediation.AddWithRemediator(m, remediator, opts) } // Setup all Controllers diff --git a/pkg/baremetal/remediator/remediator.go b/pkg/baremetal/remediator/remediator.go index 38cc125a..d1fca5b6 100644 --- a/pkg/baremetal/remediator/remediator.go +++ b/pkg/baremetal/remediator/remediator.go @@ -64,10 +64,10 @@ func (bmr *BareMetalRemediator) Reboot(ctx context.Context, machineRemediation * } // Copy the BareMetalHost object to prevent modification of the original one - newBmh := bmh.DeepCopy() + bmhCopy := bmh.DeepCopy() // Copy the MachineRemediation object to prevent modification of the original one - newMachineRemediation := machineRemediation.DeepCopy() + mrCopy := machineRemediation.DeepCopy() now := time.Now() switch machineRemediation.Status.State { @@ -77,30 +77,30 @@ func (bmr *BareMetalRemediator) Reboot(ctx context.Context, machineRemediation * // it can mean that an user power off the machine by purpose if !bmh.Spec.Online { glog.V(4).Infof("Skip the remediation, machine %q has power off state before the remediation action", machine.Name) - newMachineRemediation.Status.State = mrv1.RemediationStateSucceeded - newMachineRemediation.Status.Reason = "Skip the reboot, the machine power off by an user" - newMachineRemediation.Status.EndTime = &metav1.Time{Time: now} + mrCopy.Status.State = mrv1.RemediationStateSucceeded + mrCopy.Status.Reason = "Skip the reboot, the machine power off by an user" + mrCopy.Status.EndTime = &metav1.Time{Time: now} } else { // power off the machine glog.V(4).Infof("Power off machine %q", machine.Name) - newBmh.Spec.Online = false - if err := bmr.client.Update(context.TODO(), newBmh); err != nil { + bmhCopy.Spec.Online = false + if err := bmr.client.Update(context.TODO(), bmhCopy); err != nil { return err } - newMachineRemediation.Status.State = mrv1.RemediationStatePowerOff - newMachineRemediation.Status.Reason = "Starts the reboot process" + mrCopy.Status.State = mrv1.RemediationStatePowerOff + mrCopy.Status.Reason = "Starts the reboot process" } - return bmr.client.Update(context.TODO(), newMachineRemediation) + return bmr.client.Status().Update(context.TODO(), mrCopy) case mrv1.RemediationStatePowerOff: // failed the remediation on timeout if machineRemediation.Status.StartTime.Time.Add(rebootDefaultTimeout * time.Minute).Before(now) { glog.Errorf("Remediation of machine %q failed on timeout", machine.Name) - newMachineRemediation.Status.State = mrv1.RemediationStateFailed - newMachineRemediation.Status.Reason = "Reboot failed on timeout" - newMachineRemediation.Status.EndTime = &metav1.Time{Time: now} - return bmr.client.Update(context.TODO(), newMachineRemediation) + mrCopy.Status.State = mrv1.RemediationStateFailed + mrCopy.Status.Reason = "Reboot failed on timeout" + mrCopy.Status.EndTime = &metav1.Time{Time: now} + return bmr.client.Status().Update(context.TODO(), mrCopy) } // host still has state on, we need to reconcile @@ -116,23 +116,23 @@ func (bmr *BareMetalRemediator) Reboot(ctx context.Context, machineRemediation * // power on the machine glog.V(4).Infof("Power on machine %q", machine.Name) - newBmh.Spec.Online = true - if err := bmr.client.Update(context.TODO(), newBmh); err != nil { + bmhCopy.Spec.Online = true + if err := bmr.client.Update(context.TODO(), bmhCopy); err != nil { return err } - newMachineRemediation.Status.State = mrv1.RemediationStatePowerOn - newMachineRemediation.Status.Reason = "Reboot in progress" - return bmr.client.Update(context.TODO(), newMachineRemediation) + mrCopy.Status.State = mrv1.RemediationStatePowerOn + mrCopy.Status.Reason = "Reboot in progress" + return bmr.client.Status().Update(context.TODO(), mrCopy) case mrv1.RemediationStatePowerOn: // failed the remediation on timeout if machineRemediation.Status.StartTime.Time.Add(rebootDefaultTimeout * time.Minute).Before(now) { glog.Errorf("Remediation of machine %q failed on timeout", machine.Name) - newMachineRemediation.Status.State = mrv1.RemediationStateFailed - newMachineRemediation.Status.Reason = "Reboot failed on timeout" - newMachineRemediation.Status.EndTime = &metav1.Time{Time: now} - return bmr.client.Update(context.TODO(), newMachineRemediation) + mrCopy.Status.State = mrv1.RemediationStateFailed + mrCopy.Status.Reason = "Reboot failed on timeout" + mrCopy.Status.EndTime = &metav1.Time{Time: now} + return bmr.client.Status().Update(context.TODO(), mrCopy) } node, err := getNodeByMachine(bmr.client, machine) @@ -149,10 +149,10 @@ func (bmr *BareMetalRemediator) Reboot(ctx context.Context, machineRemediation * // Node back to Ready under the cluster if conditions.NodeHasCondition(node, corev1.NodeReady, corev1.ConditionTrue) { glog.V(4).Infof("Remediation of machine %q succeeded", machine.Name) - newMachineRemediation.Status.State = mrv1.RemediationStateSucceeded - newMachineRemediation.Status.Reason = "Reboot succeeded" - newMachineRemediation.Status.EndTime = &metav1.Time{Time: now} - return bmr.client.Update(context.TODO(), newMachineRemediation) + mrCopy.Status.State = mrv1.RemediationStateSucceeded + mrCopy.Status.Reason = "Reboot succeeded" + mrCopy.Status.EndTime = &metav1.Time{Time: now} + return bmr.client.Status().Update(context.TODO(), mrCopy) } return nil diff --git a/pkg/controllers/machinedisruptionbudget/BUILD.bazel b/pkg/controllers/machinedisruptionbudget/BUILD.bazel index e996abe9..8f1c43dd 100644 --- a/pkg/controllers/machinedisruptionbudget/BUILD.bazel +++ b/pkg/controllers/machinedisruptionbudget/BUILD.bazel @@ -13,7 +13,6 @@ go_library( "//vendor/k8s.io/apimachinery/pkg/api/equality:go_default_library", "//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library", "//vendor/k8s.io/client-go/tools/record:go_default_library", diff --git a/pkg/controllers/machinedisruptionbudget/machinedisruptionbudget_controller.go b/pkg/controllers/machinedisruptionbudget/machinedisruptionbudget_controller.go index 830aa846..c2763c47 100644 --- a/pkg/controllers/machinedisruptionbudget/machinedisruptionbudget_controller.go +++ b/pkg/controllers/machinedisruptionbudget/machinedisruptionbudget_controller.go @@ -11,7 +11,6 @@ import ( apiequality "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/record" @@ -66,7 +65,6 @@ func Add(mgr manager.Manager, opts manager.Options) error { func newReconciler(mgr manager.Manager, opts manager.Options) *ReconcileMachineDisruption { return &ReconcileMachineDisruption{ client: mgr.GetClient(), - scheme: mgr.GetScheme(), recorder: mgr.GetEventRecorderFor("machine-disruption-controller"), } } @@ -94,7 +92,6 @@ type ReconcileMachineDisruption struct { // that reads objects from the cache and writes to the apiserver client client.Client recorder record.EventRecorder - scheme *runtime.Scheme } // Reconcile reads that state of the cluster for MachineDisruptionBudget and machine objects and makes changes based on labels under diff --git a/pkg/controllers/machinedisruptionbudget/machinedisruptionbudget_controller_test.go b/pkg/controllers/machinedisruptionbudget/machinedisruptionbudget_controller_test.go index 574def97..2730c358 100644 --- a/pkg/controllers/machinedisruptionbudget/machinedisruptionbudget_controller_test.go +++ b/pkg/controllers/machinedisruptionbudget/machinedisruptionbudget_controller_test.go @@ -37,7 +37,6 @@ func newFakeReconciler(recorder record.EventRecorder, initObjects ...runtime.Obj return &ReconcileMachineDisruption{ client: fakeClient, recorder: recorder, - scheme: scheme.Scheme, } } diff --git a/pkg/controllers/machinehealthcheck/BUILD.bazel b/pkg/controllers/machinehealthcheck/BUILD.bazel index a4312789..13c1b715 100644 --- a/pkg/controllers/machinehealthcheck/BUILD.bazel +++ b/pkg/controllers/machinehealthcheck/BUILD.bazel @@ -14,7 +14,6 @@ go_library( "//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/labels:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", "//vendor/k8s.io/client-go/tools/cache:go_default_library", "//vendor/sigs.k8s.io/cluster-api/pkg/apis/machine/v1beta1:go_default_library", diff --git a/pkg/controllers/machinehealthcheck/machinehealthcheck_controller.go b/pkg/controllers/machinehealthcheck/machinehealthcheck_controller.go index 88eeb23d..506bc284 100644 --- a/pkg/controllers/machinehealthcheck/machinehealthcheck_controller.go +++ b/pkg/controllers/machinehealthcheck/machinehealthcheck_controller.go @@ -11,7 +11,6 @@ import ( "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/cache" @@ -29,8 +28,8 @@ import ( ) const ( - machineAnnotationKey = "machine.openshift.io/machine" - ownerControllerKind = "MachineSet" + machineAnnotationKey = "machine.openshift.io/machine" + ownerControllerKind = "MachineSet" disableRemediationAnotationKey = "healthchecking.openshift.io/disabled" ) @@ -44,7 +43,6 @@ func Add(mgr manager.Manager, opts manager.Options) error { func newReconciler(mgr manager.Manager, opts manager.Options) reconcile.Reconciler { return &ReconcileMachineHealthCheck{ client: mgr.GetClient(), - scheme: mgr.GetScheme(), namespace: opts.Namespace, } } @@ -66,7 +64,6 @@ type ReconcileMachineHealthCheck struct { // This client, initialized using mgr.Client() above, is a split client // that reads objects from the cache and writes to the apiserver client client.Client - scheme *runtime.Scheme namespace string } @@ -122,6 +119,11 @@ func (r *ReconcileMachineHealthCheck) Reconcile(request reconcile.Request) (reco return reconcile.Result{}, err } + if hasRemediationDisabledAnnotation(*machine) { + glog.Infof("Machine %q has a matching %s annotation set to , remediation is skipped.", machine.Name, disableRemediationAnotationKey) + return reconcile.Result{}, nil + } + // If the current machine matches any existing MachineHealthCheck CRD allMachineHealthChecks := &mrv1.MachineHealthCheckList{} err = r.client.List(context.Background(), allMachineHealthChecks) @@ -130,11 +132,6 @@ func (r *ReconcileMachineHealthCheck) Reconcile(request reconcile.Request) (reco return reconcile.Result{}, err } - if hasRemediationDisabledAnnotation(*machine) { - glog.Infof("Machine %s has a matching %s annotation set to , remediation is skipped.", - machineKey, disableRemediationAnotationKey) - return reconcile.Result{}, nil - } for _, hc := range allMachineHealthChecks.Items { if hasMatchingLabels(&hc, machine) { @@ -280,11 +277,6 @@ func (r *ReconcileMachineHealthCheck) remediationStrategyReboot(machine *mapiv1. MachineName: machine.Name, Type: mrv1.RemediationTypeReboot, }, - Status: mrv1.MachineRemediationStatus{ - State: mrv1.RemediationStateStarted, - Reason: "Machine remediation started", - StartTime: &metav1.Time{Time: time.Now()}, - }, } glog.Infof("Machine %s has been unhealthy for too long, creating machine remediation", machine.Name) @@ -338,7 +330,6 @@ func hasRemediationDisabledAnnotation(machine mapiv1.Machine) bool { return skipRemediation == "true" } - func hasMatchingLabels(machineHealthCheck *mrv1.MachineHealthCheck, machine *mapiv1.Machine) bool { selector, err := metav1.LabelSelectorAsSelector(&machineHealthCheck.Spec.Selector) if err != nil { diff --git a/pkg/controllers/machinehealthcheck/machinehealthcheck_controller_test.go b/pkg/controllers/machinehealthcheck/machinehealthcheck_controller_test.go index 4f599001..493db71d 100644 --- a/pkg/controllers/machinehealthcheck/machinehealthcheck_controller_test.go +++ b/pkg/controllers/machinehealthcheck/machinehealthcheck_controller_test.go @@ -120,7 +120,6 @@ func newFakeReconciler(initObjects ...runtime.Object) *ReconcileMachineHealthChe fakeClient := fake.NewFakeClient(initObjects...) return &ReconcileMachineHealthCheck{ client: fakeClient, - scheme: scheme.Scheme, namespace: consts.NamespaceOpenshiftMachineAPI, } } diff --git a/pkg/controllers/machineremediation/BUILD.bazel b/pkg/controllers/machineremediation/BUILD.bazel index 852ecac8..e4dfdd1e 100644 --- a/pkg/controllers/machineremediation/BUILD.bazel +++ b/pkg/controllers/machineremediation/BUILD.bazel @@ -12,7 +12,7 @@ go_library( "//pkg/apis/machineremediation/v1alpha1:go_default_library", "//vendor/github.com/golang/glog:go_default_library", "//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/sigs.k8s.io/controller-runtime/pkg/client:go_default_library", "//vendor/sigs.k8s.io/controller-runtime/pkg/controller:go_default_library", "//vendor/sigs.k8s.io/controller-runtime/pkg/handler:go_default_library", diff --git a/pkg/controllers/machineremediation/machineremediation_controller.go b/pkg/controllers/machineremediation/machineremediation_controller.go index 3e5768b0..9717b36f 100644 --- a/pkg/controllers/machineremediation/machineremediation_controller.go +++ b/pkg/controllers/machineremediation/machineremediation_controller.go @@ -7,7 +7,7 @@ import ( "github.com/golang/glog" "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" mrv1 "kubevirt.io/machine-remediation-operator/pkg/apis/machineremediation/v1alpha1" @@ -27,7 +27,6 @@ type ReconcileMachineRemediation struct { // that reads objects from the cache and writes to the apiserver client client.Client remediator Remediator - scheme *runtime.Scheme namespace string } @@ -44,7 +43,6 @@ func AddWithRemediator(mgr manager.Manager, remediator Remediator, opts manager. func newReconciler(mgr manager.Manager, remediator Remediator, opts manager.Options) (reconcile.Reconciler, error) { return &ReconcileMachineRemediation{ client: mgr.GetClient(), - scheme: mgr.GetScheme(), remediator: remediator, namespace: opts.Namespace, }, nil @@ -88,6 +86,19 @@ func (r *ReconcileMachineRemediation) Reconcile(request reconcile.Request) (reco return reconcile.Result{}, nil } + if mr.Status.State == "" { + mrCopy := mr.DeepCopy() + mrCopy.Status = mrv1.MachineRemediationStatus{ + State: mrv1.RemediationStateStarted, + Reason: "Machine remediation started", + StartTime: &metav1.Time{Time: time.Now()}, + } + if err := r.client.Status().Update(context.TODO(), mrCopy); err != nil { + glog.Errorf("failed to update MR %q status: %v", mr.Name, err) + return reconcile.Result{}, err + } + } + switch mr.Spec.Type { case mrv1.RemediationTypeReboot: glog.V(4).Infof("Run remediation reboot acion for MachineRemediation %s", mr.Name) diff --git a/pkg/operator/components/rbac.go b/pkg/operator/components/rbac.go index 5a383ba1..4ec598d8 100644 --- a/pkg/operator/components/rbac.go +++ b/pkg/operator/components/rbac.go @@ -172,6 +172,7 @@ var ( Verbs: []string{ "get", "list", + "watch", }, }, { @@ -180,6 +181,7 @@ var ( }, Resources: []string{ "machineremediations", + "machineremediations/status", }, Verbs: []string{ "delete", @@ -200,6 +202,7 @@ var ( "delete", "get", "list", + "watch", }, }, { @@ -213,6 +216,7 @@ var ( "get", "list", "update", + "watch", }, }, { diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 4def3e2a..0c9e588a 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -173,7 +173,7 @@ func (r *ReconcileMachineRemediationOperator) createOrUpdateComponents(mro *mrv1 ImageRepository: mro.Spec.ImageRegistry, PullPolicy: mro.Spec.ImagePullPolicy, OperatorVersion: r.operatorVersion, - Verbosity: "2", + Verbosity: "4", } if err := r.createOrUpdateDeployment(deployData); err != nil { return err