Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

terraform helm release timeout while waiting for condition

I'm using terraform to provision some resources in azure and I can't seem to get helm to install nginx-ingress because it timeouts waiting for condition

  • helm_release.nginx_ingress: 1 error(s) occurred:

  • helm_release.nginx_ingress: rpc error: code = Unknown desc = release nginx-ingress failed: timed out waiting for the condition

Terraform does not automatically rollback in the face of errors. Instead, your Terraform state file has been partially updated with any resources that successfully completed. Please address the error above and apply again to incrementally change your infrastructure. main.tf

data "azurerm_public_ip" "nginx_ingress" {
    name                = "xxxx-public-ip"
    resource_group_name = "xxxx-public-ip"
}

resource "azurerm_resource_group" "xxxx_RG" {
  name     = "${var.name_prefix}"
  location = "${var.location}"
}

resource "azurerm_kubernetes_cluster" "k8s" {
    name                    = "${var.name_prefix}-aks"
    kubernetes_version      = "${var.kubernetes_version}"
    location                = "${azurerm_resource_group.xxxx_RG.location}"
    resource_group_name     = "${azurerm_resource_group.xxxx_RG.name}"
    dns_prefix              = "AKS-${var.dns_prefix}"

    agent_pool_profile {
        name                = "${var.node_pool_name}"
        count               = "${var.node_pool_size}"
        vm_size             = "${var.node_pool_vmsize}"
        os_type             = "${var.node_pool_os}"
        os_disk_size_gb     = 30
    }

    service_principal {
        client_id           = "${var.client_id}"
        client_secret       = "${var.client_secret}"
    }

    tags = {
        environment = "${var.env_tag}"
    }
}

provider "helm" {
  install_tiller = true

  kubernetes {
    host                   = "${azurerm_kubernetes_cluster.k8s.kube_config.0.host}"
    client_certificate     = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_certificate)}"
    client_key             = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_key)}"
    cluster_ca_certificate = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.cluster_ca_certificate)}"
  }
}

# Add Kubernetes Stable Helm charts repo
resource "helm_repository" "stable" {
  name = "stable"
  url  = "https://kubernetes-charts.storage.googleapis.com"
}

# Install Nginx Ingress using Helm Chart
resource "helm_release" "nginx_ingress" {
  name       = "nginx-ingress"
  repository = "${helm_repository.stable.metadata.0.name}"
  chart      = "nginx-ingress"
  wait       = "true"

  set {
    name  = "rbac.create"
    value = "false"
  }

  set {
    name  = "controller.service.externalTrafficPolicy"
    value = "Local"
  }

  set {
    name  = "controller.service.loadBalancerIP"
    value = "${data.azurerm_public_ip.nginx_ingress.ip_address}"
  }
}

Then deploying my application with this

provider "kubernetes" {
    host                    = "${azurerm_kubernetes_cluster.k8s.kube_config.0.host}"
    username                = "${azurerm_kubernetes_cluster.k8s.kube_config.0.username}"
    password                = "${azurerm_kubernetes_cluster.k8s.kube_config.0.password}"
    client_certificate      = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_certificate)}"
    client_key              = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_key)}"
    cluster_ca_certificate  = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.cluster_ca_certificate)}"
}

resource "kubernetes_deployment" "flask_api_deployment" {
    metadata {
        name = "flask-api-deployment"
    }

    spec {
        replicas = 1
        selector {
            match_labels {
                component = "api"
            }
        }

        template {
            metadata {
                labels = {
                    component = "api"
                }
            }

            spec {
                container {
                    image = "xxxx.azurecr.io/sampleflask:0.1.0"
                    name = "flask-api"
                    port {
                        container_port = 5000
                    }
                }
            }
        }
    }
}

resource "kubernetes_ingress" "flask_api_ingress_service" {
    metadata {
        name = "flask-api-ingress-service"
    }

    spec {
        backend {
            service_name = "flask-api-cluster-ip-service"
            service_port = 5000
        }
    }
}

resource "kubernetes_service" "flask_api_cluster_ip-service" {
    metadata {
        name = "flask-api-cluster-ip-service"
    }

    spec {
        selector {
            component = "api"
        }

        port {
            port = 5000
            target_port = 5000
        }
    }
}

I'm not sure what condition its waiting for. I can set the timeout larger but that doesn't seem to help. I can also set wait = false in the helm release but then no resources seem to get provisioned.

EDIT: From some testing I've done I see there is an issue when specifying the loadbalancerIP in the helm release. If I comment that out it completes just fine.

EDIT: From more testing I've found that the load balancer that is created is failing to be created. controller: user supplied IP Address 52.xxx.x.xx was not found in resource group MC_xxxxxxxx

So I guess the question is how do I allow specifying an IP from a different resource group?

like image 622
Matthew The Terrible Avatar asked Jul 13 '19 12:07

Matthew The Terrible


3 Answers

To install the nginx-ingress in AKS cluster through helm in Terraform, here I show one way that available here. In this way, you need to install the helm in the machine which you want to run the terraform script. And then you also need to configure the helm to your AKS cluster. The steps in Configure the helm to AKS. You can check if the helm configured to AKS through installing something to the AKS.

When everything is ready. You just need to set the helm provider and use the resource helm_release. The Terraform script to install the nginx-ingress shows here:

provider "helm" {
  version = "~> 0.9"
}

resource "helm_release" "ingress" {
    name = "application1"
    chart = "stable/nginx-ingress"
    version = "1.10.2"
    namespace = "ingress-basic"

    set {
        name = "controller.replicaCount"
        value = "1"
    }

    ...

}

The process shows here:

enter image description here

This is just to install the nginx-ingress through helm in Terraform. If you want to create resources of the kubernetes. You can use the kubernetes in Terraform.

Update:

OK, to use a static public IP in another resource group for your ingress, you need to do two more steps.

  1. The service principal used by the AKS cluster must have delegated permissions to the other resource group which the public IP in. The permission should be "Network Contributor" at least.
  2. Set the ingress service annotations with the value of the resource group which the public IP in.

The annotation in the yaml file would like this:

annotations:
    service.beta.kubernetes.io/azure-load-balancer-resource-group: myResourceGroup

For more details, see Use a static IP address outside of the node resource group.

Update1:

The code in the "helm_release":

resource "helm_release" "ingress" {
    name = "application1223"
    chart = "stable/nginx-ingress"
        version = "1.10.2"
    namespace = "ingress-basic"

    set {
        name = "controller.replicaCount"
        value = "1"
    }

    set {
      name = "controller.service.annotations.\"service\\.beta\\.kubernetes\\.io/azure-load-balancer-resource-group\""
      value = "v-chaxu-xxxx"
    }

    set {
      name = "controller.service.loadBalancerIP"
      value = "13.68.175.40"
    }

}

When it deploys successfully, the ingress service shows like this:

enter image description here

The info of the public IP which is in another resource group:

enter image description here

like image 136
Charles Xu Avatar answered Nov 01 '22 17:11

Charles Xu


I was facing the same issue (helm_release getting timed out). On further investigation, I found out that the Public IP was not getting assigned to the load balancer (kubectl describe svc nginx-ingress -n ingress-basic) because of incorrect RBAC permissions.

I was using Azure AKS Managed Identity feature by which Azure automatically creates a managed identity service principal which has only very limited permission (readonly permission to Managed Cluster resource group which is automatically created by AKS). My Pubic IP is in another resource group and load balancer was in managed resource group by AKS cluster.

Finally I was able to fix the issue by using 'Service Principal' option instead of managed identity in AKS cluster with 'contributor' access to subscription for the service principal.

So if any one is facing some issue with Managed Identity, please try using Service Principal with contributor access to the subscription, and that will fix the issue

like image 4
srsn Avatar answered Nov 01 '22 17:11

srsn


It’s best practise to enable RBAC in your cluster. An example how to do it with Terraform and subsequently install Helm is:

…
resource "azurerm_kubernetes_cluster" "k8s" {
…

  role_based_access_control {
    enabled = "true"
  }

}

provider "kubernetes" {
  host                   = "${azurerm_kubernetes_cluster.k8s.kube_config.0.host}"
  client_certificate     = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_certificate)}"
  client_key             = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_key)}"
  cluster_ca_certificate = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.cluster_ca_certificate)}"
}

resource "kubernetes_service_account" "tiller_sa" {
  metadata {
    name      = "tiller"
    namespace = "kube-system"
  }
}

resource "kubernetes_cluster_role_binding" "tiller_sa_cluster_admin_rb" {
  metadata {
    name = "tiller-cluster-role"
  }
  role_ref {
    kind      = "ClusterRole"
    name      = "cluster-admin"
    api_group = "rbac.authorization.k8s.io"
  }
  subject {
    kind      = "ServiceAccount"
    name      = "${kubernetes_service_account.tiller_sa.metadata.0.name}"
    namespace = "kube-system"
    api_group = ""
  }
}

# helm provider
provider "helm" {
  debug           = true
  namespace       = "kube-system"
  service_account = "tiller"
  install_tiller  = "true"
  tiller_image    = "gcr.io/kubernetes-helm/tiller:v${var.TILLER_VER}"
  kubernetes {
    host                   = "${azurerm_kubernetes_cluster.k8s.kube_config.0.host}"
    client_certificate     = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_certificate)}"
    client_key             = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.client_key)}"
    cluster_ca_certificate = "${base64decode(azurerm_kubernetes_cluster.k8s.kube_config.0.cluster_ca_certificate)}"
  }
}

data "helm_repository" "stable" {
  name = "stable"
  url  = "https://kubernetes-charts.storage.googleapis.com"
}

resource "helm_release" "datadog" {
  name       = "datadog"
  namespace  = "datadog"
  repository = "${data.helm_repository.stable.metadata.0.name}"
  chart      = "stable/datadog"

  set {
    name  = "datadog.apiKey"
    value = "${var.datadog_apikey}"
  }

}
like image 3
Alessandro Vozza Avatar answered Nov 01 '22 17:11

Alessandro Vozza