From e21044d59a167875b78d47203262defdf2397171 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Wed, 4 Dec 2024 16:15:56 -0800 Subject: [PATCH 1/4] test: node to node connectivity --- test/suites/networking/suite_test.go | 248 +++++++++++++++++++++++++++ 1 file changed, 248 insertions(+) create mode 100644 test/suites/networking/suite_test.go diff --git a/test/suites/networking/suite_test.go b/test/suites/networking/suite_test.go new file mode 100644 index 000000000..b832549f9 --- /dev/null +++ b/test/suites/networking/suite_test.go @@ -0,0 +1,248 @@ +package networking_test + +import ( + "context" + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" + + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/Azure/karpenter-provider-azure/pkg/apis/v1alpha2" + "github.com/Azure/karpenter-provider-azure/test/pkg/environment/azure" + corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" + //"sigs.k8s.io/karpenter/pkg/test" +) + + + +var env *azure.Environment +var nodeClass *v1alpha2.AKSNodeClass +var nodePool *corev1beta1.NodePool +var ns string +func TestNetworking(t *testing.T) { + RegisterFailHandler(Fail) + BeforeSuite(func(){ + + env = azure.NewEnvironment(t) + ns = "default" + // TODO: Migrate to karpenter test helpers + serviceAccount := createServiceAccount(ns) + clusterRole := createClusterRole() + clusterRoleBinding := createClusterRoleBinding(ns) + daemonSet := createDaemonSet(ns) + service := createService(ns) + By("Creating Goldpinger resources") + env.ExpectCreated(serviceAccount, clusterRole, clusterRoleBinding, daemonSet, service) + RunSpecs(t, "Networking") + }) +} + +var _ = Describe("Networking", func() { + Describe("GoldPinger", func(){ + It("should ensure Goldpinger pods are ready", func() { + By("Waiting for Goldpinger pods to be ready") + Eventually(func() int { + pods := &corev1.PodList{} + err := env.Client.List(context.TODO(), pods, client.MatchingLabels{"app": "goldpinger"}) + Expect(err).NotTo(HaveOccurred(), "Failed to list Goldpinger pods") + readyCount := 0 + for _, pod := range pods.Items { + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + readyCount++ + } + } + } + return readyCount + }, 5*time.Minute, 10*time.Second).Should(BeNumerically(">=", 10), "Not all Goldpinger pods are ready") + }) + + It("should verify node-to-node connectivity", func() { + By("Fetching node connectivity status from Goldpinger") + resp, err := http.Get("http://goldpinger.default.svc.cluster.local:8080/check_all") + Expect(err).NotTo(HaveOccurred(), "Failed to reach Goldpinger service") + defer resp.Body.Close() + + body, err := ioutil.ReadAll(resp.Body) + Expect(err).NotTo(HaveOccurred(), "Failed to read Goldpinger response body") + + var checkAllResponse CheckAllResponse + err = json.Unmarshal(body, &checkAllResponse) + Expect(err).NotTo(HaveOccurred(), "Failed to parse Goldpinger response JSON") + + for node, status := range checkAllResponse.Nodes { + Expect(status.Status).To(Equal("ok"), fmt.Sprintf("Node %s is not reachable", node)) + } + }) + }) +}) + + +// --------------------- Test Helpers ------------------------ // +type NodeStatus struct { + Status string `json:"status"` + Latency int `json:"latency"` +} + +type CheckAllResponse struct { + Nodes map[string]NodeStatus `json:"nodes"` +} + +func createServiceAccount(namespace string) *corev1.ServiceAccount { + return &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "goldpinger-serviceaccount", + Namespace: namespace, + }, + } +} + +func createClusterRole() *rbacv1.ClusterRole { + return &rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{ + Name: "goldpinger-clusterrole", + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{"pods", "nodes", "daemonsets"}, + Verbs: []string{"list", "get", "watch"}, + }, + }, + } +} + +func createClusterRoleBinding(namespace string) *rbacv1.ClusterRoleBinding { + return &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "goldpinger-clusterrolebinding", + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: "goldpinger-serviceaccount", + Namespace: namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "ClusterRole", + Name: "goldpinger-clusterrole", + }, + } +} + +func createDaemonSet(namespace string) *appsv1.DaemonSet { + return &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "goldpinger-daemon", + Namespace: namespace, + Labels: map[string]string{"app": "goldpinger"}, + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "goldpinger"}, + }, + UpdateStrategy: appsv1.DaemonSetUpdateStrategy{ + Type: appsv1.RollingUpdateDaemonSetStrategyType, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "goldpinger"}, + Annotations: map[string]string{ + "prometheus.io/scrape": "true", + "prometheus.io/port": "8080", + }, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: "goldpinger-serviceaccount", + HostNetwork: true, + Containers: []corev1.Container{ + { + Name: "goldpinger", + Image: "docker.io/bloomberg/goldpinger:v3.0.0", + Env: []corev1.EnvVar{ + {Name: "USE_HOST_IP", Value: "true"}, + {Name: "HOST", Value: "0.0.0.0"}, + {Name: "PORT", Value: "8080"}, + }, + Ports: []corev1.ContainerPort{ + {ContainerPort: 8080, Name: "http"}, + }, + }, + }, + }, + }, + }, + } +} + +func createService(namespace string) *corev1.Service { + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "goldpinger", + Namespace: namespace, + Labels: map[string]string{"app": "goldpinger"}, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeNodePort, + Ports: []corev1.ServicePort{ + { + Port: 8080, + TargetPort: intstr.FromInt(8080), + NodePort: 30080, + Name: "http", + }, + }, + Selector: map[string]string{"app": "goldpinger"}, + }, + } +} + +func createDeployment(namespace string) *appsv1.Deployment { + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "goldpinger-deploy", + Namespace: namespace, + Labels: map[string]string{"app": "goldpinger"}, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](10), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "goldpinger"}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "goldpinger"}, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: "goldpinger-serviceaccount", + Containers: []corev1.Container{ + { + Name: "goldpinger", + Image: "docker.io/bloomberg/goldpinger:v3.0.0", + Ports: []corev1.ContainerPort{ + {ContainerPort: 8080, Name: "http"}, + }, + }, + }, + }, + }, + }, + } +} + From 53bf13baa3b17c3e3cbec00333cda0eb2d2feab2 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Wed, 4 Dec 2024 21:22:50 -0800 Subject: [PATCH 2/4] test: fix deployment and nodepools --- test/suites/networking/suite_test.go | 89 ++++++++++++++++++---------- 1 file changed, 59 insertions(+), 30 deletions(-) diff --git a/test/suites/networking/suite_test.go b/test/suites/networking/suite_test.go index b832549f9..283463158 100644 --- a/test/suites/networking/suite_test.go +++ b/test/suites/networking/suite_test.go @@ -36,25 +36,44 @@ var ns string func TestNetworking(t *testing.T) { RegisterFailHandler(Fail) BeforeSuite(func(){ - env = azure.NewEnvironment(t) ns = "default" - // TODO: Migrate to karpenter test helpers - serviceAccount := createServiceAccount(ns) - clusterRole := createClusterRole() - clusterRoleBinding := createClusterRoleBinding(ns) - daemonSet := createDaemonSet(ns) - service := createService(ns) - By("Creating Goldpinger resources") - env.ExpectCreated(serviceAccount, clusterRole, clusterRoleBinding, daemonSet, service) - RunSpecs(t, "Networking") }) + AfterSuite(func() { + By("Cleaning up Goldpinger resources") + env.ExpectDeleted( + &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger-serviceaccount", Namespace: ns}}, + &rbacv1.ClusterRole{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger-clusterrole"}}, + &rbacv1.ClusterRoleBinding{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger-clusterrolebinding"}}, + &appsv1.DaemonSet{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger-daemon", Namespace: ns}}, + &corev1.Service{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger", Namespace: ns}}, + &appsv1.Deployment{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger-deploy", Namespace: ns}}, + ) + }) + + RunSpecs(t, "Networking") } +var _ = BeforeEach(func() { env.BeforeEach() }) +var _ = AfterEach(func() { env.Cleanup() }) +var _ = AfterEach(func() { env.AfterEach() }) + + var _ = Describe("Networking", func() { Describe("GoldPinger", func(){ - It("should ensure Goldpinger pods are ready", func() { + It("should ensure goldpinger resources are all deployed", func() { By("Waiting for Goldpinger pods to be ready") + serviceAccount := createServiceAccount(ns) + clusterRole := createClusterRole() + clusterRoleBinding := createClusterRoleBinding(ns) + daemonSet := createDaemonSet(ns) + service := createService(ns) + deployment := createDeployment(ns) + + nodeClass := env.DefaultAKSNodeClass() + nodePool := env.DefaultNodePool(nodeClass) + By("Creating Goldpinger resources") + env.ExpectCreated(serviceAccount, clusterRole, clusterRoleBinding, daemonSet, service, deployment, nodePool) Eventually(func() int { pods := &corev1.PodList{} err := env.Client.List(context.TODO(), pods, client.MatchingLabels{"app": "goldpinger"}) @@ -69,6 +88,14 @@ var _ = Describe("Networking", func() { } return readyCount }, 5*time.Minute, 10*time.Second).Should(BeNumerically(">=", 10), "Not all Goldpinger pods are ready") + Eventually(func() string { + svc := &corev1.Service{} + err := env.Client.Get(context.TODO(), client.ObjectKey{Name: "goldpinger", Namespace: ns}, svc) + if err != nil { + return "" + } + return svc.Spec.ClusterIP + }, 2*time.Minute, 10*time.Second).ShouldNot(BeEmpty(), "Goldpinger service ClusterIP not assigned") }) It("should verify node-to-node connectivity", func() { @@ -87,8 +114,10 @@ var _ = Describe("Networking", func() { for node, status := range checkAllResponse.Nodes { Expect(status.Status).To(Equal("ok"), fmt.Sprintf("Node %s is not reachable", node)) } + time.Sleep(time.Hour * 1) }) }) + }) @@ -191,26 +220,26 @@ func createDaemonSet(namespace string) *appsv1.DaemonSet { } } + func createService(namespace string) *corev1.Service { - return &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: "goldpinger", - Namespace: namespace, - Labels: map[string]string{"app": "goldpinger"}, - }, - Spec: corev1.ServiceSpec{ - Type: corev1.ServiceTypeNodePort, - Ports: []corev1.ServicePort{ - { - Port: 8080, - TargetPort: intstr.FromInt(8080), - NodePort: 30080, - Name: "http", - }, - }, - Selector: map[string]string{"app": "goldpinger"}, - }, - } + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "goldpinger", + Namespace: namespace, + Labels: map[string]string{"app": "goldpinger"}, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeNodePort, + Ports: []corev1.ServicePort{ + { + Port: 8080, + TargetPort: intstr.FromInt(8080), + Name: "http", + }, + }, + Selector: map[string]string{"app": "goldpinger"}, + }, + } } func createDeployment(namespace string) *appsv1.Deployment { From 1ee63fb14e15b84016c567f4248c4904a6e84cc7 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Thu, 5 Dec 2024 13:18:03 -0800 Subject: [PATCH 3/4] test: adding topology spread constraints to spread the goldpinger pods --- test/suites/networking/suite_test.go | 43 +++++++++++++++++++--------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/test/suites/networking/suite_test.go b/test/suites/networking/suite_test.go index 283463158..2f2e1654f 100644 --- a/test/suites/networking/suite_test.go +++ b/test/suites/networking/suite_test.go @@ -37,7 +37,7 @@ func TestNetworking(t *testing.T) { RegisterFailHandler(Fail) BeforeSuite(func(){ env = azure.NewEnvironment(t) - ns = "default" + ns = "default" }) AfterSuite(func() { By("Cleaning up Goldpinger resources") @@ -61,8 +61,12 @@ var _ = AfterEach(func() { env.AfterEach() }) var _ = Describe("Networking", func() { Describe("GoldPinger", func(){ - It("should ensure goldpinger resources are all deployed", func() { - By("Waiting for Goldpinger pods to be ready") + It("should ensure goldpinger resources are all deployed and are reachable", func() { + nodeClass := env.DefaultAKSNodeClass() + nodePool := env.DefaultNodePool(nodeClass) + env.ExpectCreated(nodeClass, nodePool) + + By("should configure all k8s resources needed") serviceAccount := createServiceAccount(ns) clusterRole := createClusterRole() clusterRoleBinding := createClusterRoleBinding(ns) @@ -70,10 +74,8 @@ var _ = Describe("Networking", func() { service := createService(ns) deployment := createDeployment(ns) - nodeClass := env.DefaultAKSNodeClass() - nodePool := env.DefaultNodePool(nodeClass) - By("Creating Goldpinger resources") - env.ExpectCreated(serviceAccount, clusterRole, clusterRoleBinding, daemonSet, service, deployment, nodePool) + env.ExpectCreated(serviceAccount, clusterRole, clusterRoleBinding, daemonSet, service, deployment) + By("should scale up the goldpinger-deploy pods for pod to pod connectivity testing and to scale up karp nodes") Eventually(func() int { pods := &corev1.PodList{} err := env.Client.List(context.TODO(), pods, client.MatchingLabels{"app": "goldpinger"}) @@ -87,7 +89,8 @@ var _ = Describe("Networking", func() { } } return readyCount - }, 5*time.Minute, 10*time.Second).Should(BeNumerically(">=", 10), "Not all Goldpinger pods are ready") + }, 15*time.Minute, 10*time.Second).Should(BeNumerically(">=", 10), "Not all Goldpinger pods are ready") + By("should ensure gold pinger service has clusterIP assigned") Eventually(func() string { svc := &corev1.Service{} err := env.Client.Get(context.TODO(), client.ObjectKey{Name: "goldpinger", Namespace: ns}, svc) @@ -96,10 +99,7 @@ var _ = Describe("Networking", func() { } return svc.Spec.ClusterIP }, 2*time.Minute, 10*time.Second).ShouldNot(BeEmpty(), "Goldpinger service ClusterIP not assigned") - }) - - It("should verify node-to-node connectivity", func() { - By("Fetching node connectivity status from Goldpinger") + By("Fetching node connectivity status from Goldpinger") resp, err := http.Get("http://goldpinger.default.svc.cluster.local:8080/check_all") Expect(err).NotTo(HaveOccurred(), "Failed to reach Goldpinger service") defer resp.Body.Close() @@ -110,10 +110,12 @@ var _ = Describe("Networking", func() { var checkAllResponse CheckAllResponse err = json.Unmarshal(body, &checkAllResponse) Expect(err).NotTo(HaveOccurred(), "Failed to parse Goldpinger response JSON") - + for node, status := range checkAllResponse.Nodes { + // This checks that all other nodes in the cluster can reach this node Expect(status.Status).To(Equal("ok"), fmt.Sprintf("Node %s is not reachable", node)) } + // TODO: Check pod stats to see if pod to pod communciation works time.Sleep(time.Hour * 1) }) }) @@ -128,7 +130,9 @@ type NodeStatus struct { } type CheckAllResponse struct { - Nodes map[string]NodeStatus `json:"nodes"` + Nodes map[string]NodeStatus `json:"nodes"` // For node-to-node connectivity + Pods map[string]map[string]NodeStatus `json:"pods"` // For pod-to-pod reachability + PacketLoss map[string]float64 `json:"packet_loss"` // For packet loss (if it occurred) } func createServiceAccount(namespace string) *corev1.ServiceAccount { @@ -259,6 +263,17 @@ func createDeployment(namespace string) *appsv1.Deployment { Labels: map[string]string{"app": "goldpinger"}, }, Spec: corev1.PodSpec{ + // We want to validate node to node communication so we need to spread the deployment between many karpenter nodes + TopologySpreadConstraints: []corev1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "kubernetes.io/hostname", + WhenUnsatisfiable: corev1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "goldpinger"}, + }, + }, + }, ServiceAccountName: "goldpinger-serviceaccount", Containers: []corev1.Container{ { From 2aa5c11889c461944094c75840b8cdd3d3b1cc14 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian Date: Fri, 6 Dec 2024 15:21:20 -0800 Subject: [PATCH 4/4] fix: goldpinger separation --- Makefile-az.mk | 17 ++- test/suites/networking/suite_test.go | 164 ++++++++++++--------------- 2 files changed, 88 insertions(+), 93 deletions(-) diff --git a/Makefile-az.mk b/Makefile-az.mk index 6702658b0..98ff3cfa0 100755 --- a/Makefile-az.mk +++ b/Makefile-az.mk @@ -1,5 +1,5 @@ -AZURE_LOCATION ?= westus2 -COMMON_NAME ?= karpenter +AZURE_LOCATION ?= eastus +COMMON_NAME ?= karpe ifeq ($(CODESPACES),true) AZURE_RESOURCE_GROUP ?= $(CODESPACE_NAME) AZURE_ACR_NAME ?= $(subst -,,$(CODESPACE_NAME)) @@ -19,7 +19,6 @@ CUSTOM_VNET_NAME ?= $(AZURE_CLUSTER_NAME)-vnet CUSTOM_SUBNET_NAME ?= nodesubnet az-all: az-login az-create-workload-msi az-mkaks-cilium az-create-federated-cred az-perm az-perm-acr az-configure-values az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload - az-all-cniv1: az-login az-create-workload-msi az-mkaks-cniv1 az-create-federated-cred az-perm az-perm-acr az-configure-values az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload az-all-cni-overlay: az-login az-create-workload-msi az-mkaks-overlay az-create-federated-cred az-perm az-perm-acr az-configure-values az-build az-run az-run-sample ## Provision the infra (ACR,AKS); build and deploy Karpenter; deploy sample Provisioner and workload @@ -46,7 +45,12 @@ az-mkacr: az-mkrg ## Create test ACR az-acrimport: ## Imports an image to an acr registry az acr import --name $(AZURE_ACR_NAME) --source "mcr.microsoft.com/oss/kubernetes/pause:3.6" --image "pause:3.6" +az-rmpolicy: + az aks disable-addons --addons azure-policy --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) + + az-cleanenv: az-rmnodeclaims-fin ## Deletes a few common karpenter testing resources(pods, nodepools, nodeclaims, aksnodeclasses) + kubectl delete deployments -n default --all kubectl delete pods -n default --all kubectl delete nodeclaims --all kubectl delete nodepools --all @@ -252,7 +256,7 @@ az-taintnodes: az-e2etests: ## Run e2etests kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --all --overwrite - TEST_SUITE=Utilization make e2etests + TEST_SUITE=Networking make e2etests kubectl taint nodes CriticalAddonsOnly=true:NoSchedule- --all az-perftest1: ## Test scaling out/in (1 VM) @@ -370,3 +374,8 @@ az-swagger-generate-clients-raw: az-swagger-generate-clients: az-swagger-generate-clients-raw hack/boilerplate.sh make tidy + +az-deploy-goldpinger: ## Deploy gold pinger for testing networking + kubectl apply -f https://gist.githubusercontent.com/paulgmiller/084bd4605f1661a329e5ab891a826ae0/raw/94a32d259e137bb300ac8af3ef71caa471463f23/goldpinger-daemon.yaml + kubectl apply -f https://gist.githubusercontent.com/paulgmiller/7bca68cd08cccb4e9bc72b0a08485edf/raw/d6a103fb79a65083f6555e4d822554ed64f510f8/goldpinger-deploy.yaml + diff --git a/test/suites/networking/suite_test.go b/test/suites/networking/suite_test.go index 2f2e1654f..aacbe50dd 100644 --- a/test/suites/networking/suite_test.go +++ b/test/suites/networking/suite_test.go @@ -16,6 +16,7 @@ import ( corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" @@ -23,24 +24,23 @@ import ( "github.com/Azure/karpenter-provider-azure/pkg/apis/v1alpha2" "github.com/Azure/karpenter-provider-azure/test/pkg/environment/azure" - corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" - //"sigs.k8s.io/karpenter/pkg/test" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" ) - - var env *azure.Environment var nodeClass *v1alpha2.AKSNodeClass -var nodePool *corev1beta1.NodePool +var nodePool *karpv1.NodePool var ns string + func TestNetworking(t *testing.T) { RegisterFailHandler(Fail) - BeforeSuite(func(){ + BeforeSuite(func() { env = azure.NewEnvironment(t) ns = "default" }) AfterSuite(func() { By("Cleaning up Goldpinger resources") + // TODO: Move into env.Cleanup() env.ExpectDeleted( &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger-serviceaccount", Namespace: ns}}, &rbacv1.ClusterRole{ObjectMeta: metav1.ObjectMeta{Name: "goldpinger-clusterrole"}}, @@ -58,70 +58,57 @@ var _ = BeforeEach(func() { env.BeforeEach() }) var _ = AfterEach(func() { env.Cleanup() }) var _ = AfterEach(func() { env.AfterEach() }) - var _ = Describe("Networking", func() { - Describe("GoldPinger", func(){ - It("should ensure goldpinger resources are all deployed and are reachable", func() { - nodeClass := env.DefaultAKSNodeClass() - nodePool := env.DefaultNodePool(nodeClass) - env.ExpectCreated(nodeClass, nodePool) + Describe("GoldPinger", func() { + It("should ensure goldpinger resources are all deployed", func() { + nodeClass := env.DefaultAKSNodeClass() + nodePool := env.DefaultNodePool(nodeClass) + env.ExpectCreated(nodeClass, nodePool) - By("should configure all k8s resources needed") - serviceAccount := createServiceAccount(ns) - clusterRole := createClusterRole() - clusterRoleBinding := createClusterRoleBinding(ns) - daemonSet := createDaemonSet(ns) - service := createService(ns) - deployment := createDeployment(ns) - - env.ExpectCreated(serviceAccount, clusterRole, clusterRoleBinding, daemonSet, service, deployment) - By("should scale up the goldpinger-deploy pods for pod to pod connectivity testing and to scale up karp nodes") - Eventually(func() int { - pods := &corev1.PodList{} - err := env.Client.List(context.TODO(), pods, client.MatchingLabels{"app": "goldpinger"}) - Expect(err).NotTo(HaveOccurred(), "Failed to list Goldpinger pods") - readyCount := 0 - for _, pod := range pods.Items { - for _, condition := range pod.Status.Conditions { - if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { - readyCount++ - } - } - } - return readyCount - }, 15*time.Minute, 10*time.Second).Should(BeNumerically(">=", 10), "Not all Goldpinger pods are ready") - By("should ensure gold pinger service has clusterIP assigned") - Eventually(func() string { - svc := &corev1.Service{} - err := env.Client.Get(context.TODO(), client.ObjectKey{Name: "goldpinger", Namespace: ns}, svc) - if err != nil { - return "" - } - return svc.Spec.ClusterIP - }, 2*time.Minute, 10*time.Second).ShouldNot(BeEmpty(), "Goldpinger service ClusterIP not assigned") - By("Fetching node connectivity status from Goldpinger") - resp, err := http.Get("http://goldpinger.default.svc.cluster.local:8080/check_all") - Expect(err).NotTo(HaveOccurred(), "Failed to reach Goldpinger service") - defer resp.Body.Close() - - body, err := ioutil.ReadAll(resp.Body) - Expect(err).NotTo(HaveOccurred(), "Failed to read Goldpinger response body") + By("should configure all k8s resources needed") + serviceAccount := createServiceAccount(ns) + clusterRole := createClusterRole() + clusterRoleBinding := createClusterRoleBinding(ns) + daemonSet := createDaemonSet(ns) + service := createService(ns) + deployment := createDeployment(ns) + + env.ExpectCreated(serviceAccount, clusterRole, clusterRoleBinding, daemonSet, service, deployment) + By("should scale up the goldpinger-deploy pods for pod to pod connectivity testing and to scale up karp nodes") - var checkAllResponse CheckAllResponse - err = json.Unmarshal(body, &checkAllResponse) - Expect(err).NotTo(HaveOccurred(), "Failed to parse Goldpinger response JSON") + env.ExpectCreatedNodeCount("==", 10) + env.EventuallyExpectHealthyPodCountWithTimeout(time.Minute*15, labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), 10) + By("should ensure gold pinger service has clusterIP assigned") + Eventually(func() string { + svc := &corev1.Service{} + err := env.Client.Get(context.TODO(), client.ObjectKey{Name: "goldpinger", Namespace: ns}, svc) + if err != nil { + return "" + } + return svc.Spec.ClusterIP + }, 2*time.Minute, 10*time.Second).ShouldNot(BeEmpty(), "Goldpinger service ClusterIP not assigned") + By("Fetching node connectivity status from Goldpinger") + resp, err := http.Get("http://goldpinger.default.svc.cluster.local:8080/check_all") + Expect(err).NotTo(HaveOccurred(), "Failed to reach Goldpinger service") + defer resp.Body.Close() - for node, status := range checkAllResponse.Nodes { - // This checks that all other nodes in the cluster can reach this node - Expect(status.Status).To(Equal("ok"), fmt.Sprintf("Node %s is not reachable", node)) - } - // TODO: Check pod stats to see if pod to pod communciation works - time.Sleep(time.Hour * 1) - }) + body, err := ioutil.ReadAll(resp.Body) + Expect(err).NotTo(HaveOccurred(), "Failed to read Goldpinger response body") + + var checkAllResponse CheckAllResponse + err = json.Unmarshal(body, &checkAllResponse) + Expect(err).NotTo(HaveOccurred(), "Failed to parse Goldpinger response JSON") + + for node, status := range checkAllResponse.Nodes { + // This checks that all other nodes in the cluster can reach this node + Expect(status.Status).To(Equal("ok"), fmt.Sprintf("Node %s is not reachable", node)) + } + // TODO: Check pod stats to see if pod to pod communciation works + time.Sleep(time.Hour * 1) + }) }) - -}) +}) // --------------------- Test Helpers ------------------------ // type NodeStatus struct { @@ -130,9 +117,9 @@ type NodeStatus struct { } type CheckAllResponse struct { - Nodes map[string]NodeStatus `json:"nodes"` // For node-to-node connectivity - Pods map[string]map[string]NodeStatus `json:"pods"` // For pod-to-pod reachability - PacketLoss map[string]float64 `json:"packet_loss"` // For packet loss (if it occurred) + Nodes map[string]NodeStatus `json:"nodes"` // For node-to-node connectivity + Pods map[string]map[string]NodeStatus `json:"pods"` // For pod-to-pod reachability + PacketLoss map[string]float64 `json:"packet_loss"` // For packet loss (if it occurred) } func createServiceAccount(namespace string) *corev1.ServiceAccount { @@ -224,26 +211,25 @@ func createDaemonSet(namespace string) *appsv1.DaemonSet { } } - func createService(namespace string) *corev1.Service { - return &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: "goldpinger", - Namespace: namespace, - Labels: map[string]string{"app": "goldpinger"}, - }, - Spec: corev1.ServiceSpec{ - Type: corev1.ServiceTypeNodePort, - Ports: []corev1.ServicePort{ - { - Port: 8080, - TargetPort: intstr.FromInt(8080), - Name: "http", - }, - }, - Selector: map[string]string{"app": "goldpinger"}, - }, - } + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "goldpinger", + Namespace: namespace, + Labels: map[string]string{"app": "goldpinger"}, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeNodePort, + Ports: []corev1.ServicePort{ + { + Port: 8080, + TargetPort: intstr.FromInt(8080), + Name: "http", + }, + }, + Selector: map[string]string{"app": "goldpinger"}, + }, + } } func createDeployment(namespace string) *appsv1.Deployment { @@ -266,14 +252,15 @@ func createDeployment(namespace string) *appsv1.Deployment { // We want to validate node to node communication so we need to spread the deployment between many karpenter nodes TopologySpreadConstraints: []corev1.TopologySpreadConstraint{ { - MaxSkew: 1, - TopologyKey: "kubernetes.io/hostname", + MaxSkew: 1, + TopologyKey: "kubernetes.io/hostname", WhenUnsatisfiable: corev1.DoNotSchedule, LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{"app": "goldpinger"}, }, }, }, + // TODO: Contribute ServiceAccountName and Containers to the karpenter-core test.PodOptions ServiceAccountName: "goldpinger-serviceaccount", Containers: []corev1.Container{ { @@ -289,4 +276,3 @@ func createDeployment(namespace string) *appsv1.Deployment { }, } } -