Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Operator SDK controller failed to update Custom Resource status

I'm following this tutorial to create my first Custom Resource named PodSet and currently at step 6 of 7 to test my CR.

Here is my Operator SDK controller Go code:

package controllers

import (
    "context"
    "reflect"

    "github.com/go-logr/logr"
    "k8s.io/apimachinery/pkg/labels"
    "k8s.io/apimachinery/pkg/runtime"
    ctrl "sigs.k8s.io/controller-runtime"
    "sigs.k8s.io/controller-runtime/pkg/client"
    "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    "sigs.k8s.io/controller-runtime/pkg/reconcile"

    appv1alpha1 "github.com/redhat/podset-operator/api/v1alpha1"
    corev1 "k8s.io/api/core/v1"
    "k8s.io/apimachinery/pkg/api/errors"
    metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// PodSetReconciler reconciles a PodSet object
type PodSetReconciler struct {
    client.Client
    Log    logr.Logger
    Scheme *runtime.Scheme
}

// +kubebuilder:rbac:groups=app.example.com,resources=podsets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=app.example.com,resources=podsets/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=v1,resources=pods,verbs=get;list;watch;create;update;patch;delete

// Reconcile is the core logic of controller
func (r *PodSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
    _ = context.Background()
    _ = r.Log.WithValues("podset", req.NamespacedName)

    // Fetch the PodSet instance (the parent of the pods)
    instance := &appv1alpha1.PodSet{}
    err := r.Get(context.Background(), req.NamespacedName, instance)
    if err != nil {
        if errors.IsNotFound(err) {
            // Request object not found, could have been deleted after reconcile request.
            // Owned objects are automatically garbage collected. For additional cleanup logic use finalizers.
            // Return and don't requeue
            return reconcile.Result{}, nil
        }
        // Error reading the object - requeue the request
        return reconcile.Result{}, err
    }

    // List all pods owned by this PodSet instance
    podSet := instance
    podList := &corev1.PodList{}
    labelz := map[string]string{
        "app":     podSet.Name, // the metadata.name field from user's CR PodSet YAML file
        "version": "v0.1",
    }
    labelSelector := labels.SelectorFromSet(labelz)
    listOpts := &client.ListOptions{Namespace: podSet.Namespace, LabelSelector: labelSelector}
    if err = r.List(context.Background(), podList, listOpts); err != nil {
        return reconcile.Result{}, err
    }

    // Count the pods that are pending or running and add them to available array
    var available []corev1.Pod
    for _, pod := range podList.Items {
        if pod.ObjectMeta.DeletionTimestamp != nil {
            continue
        }
        if pod.Status.Phase == corev1.PodRunning || pod.Status.Phase == corev1.PodPending {
            available = append(available, pod)
        }
    }
    numAvailable := int32(len(available))
    availableNames := []string{}
    for _, pod := range available {
        availableNames = append(availableNames, pod.ObjectMeta.Name)
    }

    // Update the status if necessary
    status := appv1alpha1.PodSetStatus{
        PodNames:          availableNames,
        AvailableReplicas: numAvailable,
    }
    if !reflect.DeepEqual(podSet.Status, status) {
        podSet.Status = status
        err = r.Status().Update(context.Background(), podSet)
        if err != nil {
            r.Log.Error(err, "Failed to update PodSet status")
            return reconcile.Result{}, err
        }
    }

    // When the number of pods in the cluster is bigger that what we want, scale down
    if numAvailable > podSet.Spec.Replicas {
        r.Log.Info("Scaling down pods", "Currently available", numAvailable, "Required replicas", podSet.Spec.Replicas)
        diff := numAvailable - podSet.Spec.Replicas
        toDeletePods := available[:diff] // Syntax help: https://play.golang.org/p/SHAMCdd12sp
        for _, toDeletePod := range toDeletePods {
            err = r.Delete(context.Background(), &toDeletePod)
            if err != nil {
                r.Log.Error(err, "Failed to delete pod", "pod.name", toDeletePod.Name)
                return reconcile.Result{}, err
            }
        }
        return reconcile.Result{Requeue: true}, nil
    }

    // When the number of pods in the cluster is smaller that what we want, scale up
    if numAvailable < podSet.Spec.Replicas {
        r.Log.Info("Scaling up pods", "Currently available", numAvailable, "Required replicas", podSet.Spec.Replicas)
        // Define a new Pod object
        pod := newPodForCR(podSet)
        // Set PodSet instance as the owner of the Pod
        if err := controllerutil.SetControllerReference(podSet, pod, r.Scheme); err != nil {
            return reconcile.Result{}, err
        }
        err = r.Create(context.Background(), pod)
        if err != nil {
            r.Log.Error(err, "Failed to create pod", "pod.name", pod.Name)
            return reconcile.Result{}, err
        }
        return reconcile.Result{Requeue: true}, nil
    }

    return ctrl.Result{}, nil
}

// newPodForCR returns a busybox pod with the same name/namespace as the cr
func newPodForCR(cr *appv1alpha1.PodSet) *corev1.Pod {
    labels := map[string]string{
        "app":     cr.Name, // the metadata.name field from user's CR PodSet YAML file
        "version": "v0.1",
    }
    return &corev1.Pod{
        ObjectMeta: metav1.ObjectMeta{
            GenerateName: cr.Name + "-pod",
            Namespace:    cr.Namespace,
            Labels:       labels,
        },
        Spec: corev1.PodSpec{
            Containers: []corev1.Container{
                {
                    Name:    "busybox",
                    Image:   "busybox",
                    Command: []string{"sleep", "3600"},
                },
            },
        },
    }
}

// SetupWithManager defines how the controller will watch for resources
func (r *PodSetReconciler) SetupWithManager(mgr ctrl.Manager) error {
    return ctrl.NewControllerManagedBy(mgr).
        For(&appv1alpha1.PodSet{}).
        Owns(&corev1.Pod{}).
        Complete(r)
}

When I apply below YAML file, I saw strange behaviour of the pods. They were struggling in the first few seconds — some of them get up and running for a while and quickly get into terminating state. When I leave them untouched for few more seconds, the CR reached the desired state just fine.

apiVersion: app.example.com/v1alpha1
kind: PodSet
metadata:
  name: podset-sample
spec:
  replicas: 5

I captured the deployment scene above in this video. And here are the full logs from my local terminal running WATCH_NAMESPACE=podset-operator make run command (sorry, I have to use Pastebin because SO didn't allow me to paste the full logs here because they are too long).

So, my questions here are:

  1. What does the Failed to update PodSet status {"error": "Operation cannot be fulfilled on podsets.app.example.com \"podset-sample\": the object has been modified; please apply your changes to the latest version and try again"} actually means?
  2. Why this happened?
  3. What can I do to get rid of these errors?
like image 316
Zulhilmi Zainudin Avatar asked May 08 '26 23:05

Zulhilmi Zainudin


1 Answers

You need get the object before update, that happens because you have old version of the object when you try to update.

EDIT:

podSet := &appv1alpha1.PodSet{}
err := r.Get(context.Background(), req.NamespacedName, podSet)
if err != nil {
  return reconcile.Result{}, err
}

// Update the status if necessary
status := appv1alpha1.PodSetStatus{
    PodNames:          availableNames,
    AvailableReplicas: numAvailable,
}
if !reflect.DeepEqual(podSet.Status, status) {
   podSet.Status = status
   err = r.Status().Update(context.Background(), podSet)
   if err != nil {
      r.Log.Error(err, "Failed to update PodSet status")
      return reconcile.Result{}, err
  }
}

you have to bring the latest version of the object from kubernetes to make sure you have the latest version of it

like image 110
Alejandro Jesus Nuñez Madrazo Avatar answered May 11 '26 15:05

Alejandro Jesus Nuñez Madrazo