Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

EcsService did not stabilize after hours

I have written a cloudformation JSON file from scratch, but i can't deploy the stack and i don't have any information from AWS about why...

It gets stuck at the service CREATE_IN_PROGRESS for 4/5 hours, then it says that the service did not stabilize and rollback.

When i check the cluster, it says it is "active" and everything looks fine from the AWS dashboard.

I guess the container meet an issue, or maybe something wrong with the health check, but i don't get any information from cloudformation, is there a way to get more logs about what it is going on during these 4/5 hours ?

Here is my full JSON:

{
  "AWSTemplateFormatVersion": "2010-09-09",
  "Description": "test",
  "Resources": {
    "InstanceSecurityGroupOpenWeb": {
      "Type" : "AWS::EC2::SecurityGroup",
      "Properties" : {
        "GroupName" : "test-open-web",
        "GroupDescription" : "Allow http to client host",
        "VpcId" : "vpc-89a8cfef",
        "SecurityGroupIngress" : [{
          "IpProtocol" : "tcp",
          "FromPort" : "80",
          "ToPort" : "80",
          "CidrIp" : "0.0.0.0/0"
        }],
        "SecurityGroupEgress" : [{
          "IpProtocol" : "tcp",
          "FromPort" : "80",
          "ToPort" : "80",
          "CidrIp" : "0.0.0.0/0"
        }]
      }
    },

    "InstanceSecurityGroupOpenFull": {
      "Type" : "AWS::EC2::SecurityGroup",
      "Properties" : {
        "GroupName" : "test-open-full",
        "GroupDescription" : "Allow http to client host",
        "VpcId" : "vpc-89a8cfef",
        "SecurityGroupIngress" : [{
          "IpProtocol" : "tcp",
          "FromPort" : "0",
          "ToPort" : "65535",
          "CidrIp" : "0.0.0.0/0"
        }],
        "SecurityGroupEgress" : [{
          "IpProtocol" : "tcp",
          "FromPort" : "80",
          "ToPort" : "80",
          "CidrIp" : "0.0.0.0/0"
        }]
      }
    },

    "LoadBalancer" : {
      "Type": "AWS::ElasticLoadBalancingV2::LoadBalancer",
      "DependsOn": [
        "InstanceSecurityGroupOpenWeb",
        "InstanceSecurityGroupOpenFull"
      ],
      "Properties": {
        "Name": "testalb",
        "Scheme" : "internal",
        "Subnets" : [
          "subnet-aaaaaaaa",
          "subnet-bbbbbbbb",
          "subnet-cccccccc"
        ],
        "LoadBalancerAttributes" : [
          { "Key" : "idle_timeout.timeout_seconds", "Value" : "50" }
        ],
        "SecurityGroups": [
          { "Ref": "InstanceSecurityGroupOpenWeb" },
          { "Ref" : "InstanceSecurityGroupOpenFull" }
        ]
      }
    },

    "TargetGroup" : {
      "Type" : "AWS::ElasticLoadBalancingV2::TargetGroup",
      "DependsOn": [
        "LoadBalancer"
      ],
      "Properties" : {
        "Name": "web",
        "Port": 3000,
        "TargetType": "ip",
        "Protocol": "HTTP",
        "HealthCheckIntervalSeconds": 30,
        "HealthCheckProtocol": "HTTP",
        "HealthCheckTimeoutSeconds": 10,
        "HealthyThresholdCount": 4,
        "Matcher" : {
          "HttpCode" : "200"
        },
        "TargetGroupAttributes": [{
          "Key": "deregistration_delay.timeout_seconds",
          "Value": "20"
        }],
        "UnhealthyThresholdCount": 3,
        "VpcId": "vpc-aaaaaaaa"
      }
    },

    "LoadBalancerListener": {
      "Type": "AWS::ElasticLoadBalancingV2::Listener",
      "DependsOn": [
        "TargetGroup"
      ],
      "Properties": {
        "DefaultActions": [{
          "Type": "forward",
          "TargetGroupArn": {
            "Ref": "TargetGroup"
          }
        }],
        "LoadBalancerArn": {
          "Ref": "LoadBalancer"
        },
        "Port": 80,
        "Protocol": "HTTP"
      }
    },

    "EcsCluster": {
      "Type": "AWS::ECS::Cluster",
      "DependsOn": [
        "LoadBalancerListener"
      ],
      "Properties": {
        "ClusterName": "test"
      }
    },

    "EcsTaskRole": {
      "Type":"AWS::IAM::Role",
      "Properties":{
        "AssumeRolePolicyDocument": {
          "Statement": [
            {
              "Effect":"Allow",
              "Principal": {
                "Service": [
                  "ecs.amazonaws.com"
                ]
              },
              "Action": [
                "sts:AssumeRole"
              ]
            }
          ]
        },
        "Path":"/",
        "Policies": [
          {
            "PolicyName": "ecs-task",
            "PolicyDocument": {
              "Statement": [
                {
                  "Effect": "Allow",
                  "Action": [
                    "ecr:**",
                  ],
                  "Resource": "*"
                }
              ]
            }
          }
        ]
      }
    },

    "WebServerTaskDefinition": {
      "Type": "AWS::ECS::TaskDefinition",
      "DependsOn": [
        "EcsCluster",
        "EcsTaskRole"
      ],
      "Properties": {
        "ExecutionRoleArn": {
          "Ref": "EcsTaskRole"
        },
        "RequiresCompatibilities": [
          "FARGATE"
        ],
        "NetworkMode": "awsvpc",
        "Cpu": "1024",
        "Memory": "2048",
        "ContainerDefinitions": [
        {
          "Name": "test-web",
          "Image": "xxxxxxxxxxxx.dkr.ecr.us-east-1.amazonaws.com/test-web:latest",
          "Cpu": "1024",
          "Memory": "2048",
          "PortMappings": [
            {
              "ContainerPort": "80",
              "HostPort": "80"
            }
          ],
          "Essential": "true"
        }]
      }
    },

    "EcsService": {
      "Type": "AWS::ECS::Service",
      "DependsOn": [
        "WebServerTaskDefinition"
      ],
      "Properties": {
        "Cluster": {
          "Ref": "EcsCluster"
        },
        "DesiredCount": "1",
        "DeploymentConfiguration": {
          "MaximumPercent": 100,
          "MinimumHealthyPercent": 0
        },
        "LoadBalancers": [
          {
            "ContainerName": "test-web",
            "ContainerPort": "80",
            "TargetGroupArn": {
              "Ref": "TargetGroup"
            }
          }
        ],
        "NetworkConfiguration": {
          "AwsvpcConfiguration": {
            "AssignPublicIp": "DISABLED",
            "SecurityGroups": [
              { "Ref": "InstanceSecurityGroupOpenWeb" },
              { "Ref": "InstanceSecurityGroupOpenFull" }
            ],
            "Subnets": [
              "subnet-aaaaaaaa",
              "subnet-bbbbbbbb",
              "subnet-cccccccc"
            ]
          }
        },
        "TaskDefinition": {
          "Ref": "WebServerTaskDefinition"
        }
      }
    }

  }
}
like image 533
Ludo Avatar asked Mar 08 '18 14:03

Ludo


People also ask

Why did my ECS Task stop?

Your Amazon ECS tasks might stop due to a variety of reasons. The most common reasons are: Essential container exited. Failed Elastic Load Balancing (ELB) health checks.

Does ECS handle load balancing?

Your Amazon ECS service can optionally be configured to use Elastic Load Balancing to distribute traffic evenly across the tasks in your service. When you use tasks sets, all the tasks in the set must all be configured to use Elastic Load Balancing or to not use Elastic Load Balancing.

What is the most important metric to monitor in ECS?

The most useful statistic is Average. Unit: Percent. If you're using tasks with the EC2 launch type and have Linux container instances, the Amazon ECS container agent relies on Docker stats metrics to gather CPU and memory data for each container running on the instance.


1 Answers

Go to your ECS homepage , locate your cluster - EcsCluster

On a dashboard kind of page , you will see Service and Active/Pending Tasks.

If you go further down inside EcsCluster

  • On 'Services' tab click EcsService
  • Go to 'tasks' tab in the table you will see 'task status' Running/stopped
  • click "Stopped"

You should be able to see reason why it is stopped.

like image 169
Aniket Chopade Avatar answered Oct 02 '22 00:10

Aniket Chopade