From 45e0c39ddf18bdaee161e05e5172dfeae25cb931 Mon Sep 17 00:00:00 2001 From: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> Date: Tue, 4 Jun 2024 13:08:30 -0700 Subject: [PATCH] feat: supporting 1.30 bootstrap via enabling out of tree credential provider (#370) * feat: supporting 1.30 bootstrap via removing kubelet flag --azure-container-registry-config for 1.30 onwards * feat: adding support for using out of tree credentials in karpenter * test: adding additional validation in our testing for the flags for out of tree provider * fix: adding back in cgroupsv2 from rebase * fix: removing flag from base flags again * fix: fixing tests to properly validate the kubelet flags * fix: ci * fix: adding CSE VAR to contract that was not cherry picked properly from other pr * fix: 1.30 ci * test(unit): fixing client version we get for bootstrap to use the same source of truth * fix: ci lint * Update pkg/providers/imagefamily/bootstrap/aksbootstrap.go Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> * chore: adding util for checking if we need to use OOT Credential * chore: moving the OOTCredential CSE var to only be populated for 1.30+ * Update hack/toolchain.sh Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> --------- Co-authored-by: Alex Leites <18728999+tallaxes@users.noreply.github.com> --- .github/workflows/ci-test.yml | 2 +- go.mod | 2 +- .../imagefamily/bootstrap/aksbootstrap.go | 20 +++++-- .../imagefamily/bootstrap/cse_cmd.sh.gtpl | 1 + pkg/providers/instancetype/suite_test.go | 52 +++++++++++++++++++ pkg/utils/utils.go | 4 ++ 6 files changed, 74 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml index 142e206a6..c58a85c31 100644 --- a/.github/workflows/ci-test.yml +++ b/.github/workflows/ci-test.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - k8sVersion: ["1.25.x", "1.26.x", "1.27.x", "1.28.x", "1.29.x"] + k8sVersion: ["1.25.x", "1.26.x", "1.27.x", "1.28.x", "1.29.x", "1.30.x"] env: K8S_VERSION: ${{ matrix.k8sVersion }} steps: diff --git a/go.mod b/go.mod index e8370d1df..4dbb9eed0 100644 --- a/go.mod +++ b/go.mod @@ -18,6 +18,7 @@ require ( github.com/Azure/skewer v0.0.19 github.com/Pallinder/go-randomdata v1.2.0 github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 + github.com/blang/semver/v4 v4.0.0 github.com/go-logr/logr v1.4.1 github.com/go-logr/zapr v1.3.0 github.com/go-playground/validator/v10 v10.19.0 @@ -66,7 +67,6 @@ require ( github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect github.com/avast/retry-go v3.0.0+incompatible // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/blang/semver/v4 v4.0.0 // indirect github.com/blendle/zapdriver v1.3.1 // indirect github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect diff --git a/pkg/providers/imagefamily/bootstrap/aksbootstrap.go b/pkg/providers/imagefamily/bootstrap/aksbootstrap.go index 609a6d43b..f4e9b2260 100644 --- a/pkg/providers/imagefamily/bootstrap/aksbootstrap.go +++ b/pkg/providers/imagefamily/bootstrap/aksbootstrap.go @@ -25,6 +25,7 @@ import ( "text/template" "github.com/Azure/karpenter-provider-azure/pkg/utils" + "github.com/blang/semver/v4" "github.com/samber/lo" v1 "k8s.io/api/core/v1" "knative.dev/pkg/ptr" @@ -103,6 +104,7 @@ type NodeBootstrapVariables struct { KubernetesVersion string // ? cluster/node pool specific, derived from user input HyperkubeURL string // - should be unnecessary KubeBinaryURL string // - necessary only for non-cached versions / static-ish + CredentialProviderDownloadURL string // - necessary only for non-cached versions / static-ish CustomKubeBinaryURL string // - unnecessary KubeproxyURL string // - should be unnecessary or bug APIServerPublicKey string // - unique per cluster, actually not sure best way to extract? [should not be needed on agent nodes] @@ -238,12 +240,12 @@ var ( // source note: unique per nodepool. partially user-specified, static, and RP-generated // removed --image-pull-progress-deadline=30m (not in 1.24?) // removed --network-plugin=cni (not in 1.24?) + // removed --azure-container-registry-config (not in 1.30) kubeletFlagsBase = map[string]string{ "--address": "0.0.0.0", "--anonymous-auth": "false", "--authentication-token-webhook": "true", "--authorization-mode": "Webhook", - "--azure-container-registry-config": "/etc/kubernetes/azure.json", "--cgroups-per-qos": "true", "--client-ca-file": "/etc/kubernetes/certs/ca.crt", "--cloud-config": "/etc/kubernetes/azure.json", @@ -385,7 +387,6 @@ var ( ContainerdConfigContent: "", // kd IsKata: false, // n NeedsCgroupV2: true, // s only static for karpenter - } ) @@ -437,7 +438,6 @@ func (a AKS) applyOptions(nbv *NodeBootstrapVariables) { nbv.KubeBinaryURL = kubeBinaryURL(a.KubernetesVersion, a.Arch) nbv.VNETCNILinuxPluginsURL = fmt.Sprintf("%s/azure-cni/v1.4.32/binaries/azure-vnet-cni-linux-%s-v1.4.32.tgz", globalAKSMirror, a.Arch) nbv.CNIPluginsURL = fmt.Sprintf("%s/cni-plugins/v1.1.1/binaries/cni-plugins-linux-%s-v1.1.1.tgz", globalAKSMirror, a.Arch) - // calculated values nbv.EnsureNoDupePromiscuousBridge = nbv.NeedsContainerd && nbv.NetworkPlugin == "kubenet" && nbv.NetworkPolicy != "calico" nbv.NetworkSecurityGroup = fmt.Sprintf("aks-agentpool-%s-nsg", a.ClusterID) @@ -449,6 +449,7 @@ func (a AKS) applyOptions(nbv *NodeBootstrapVariables) { nbv.GPUDriverVersion = a.GPUDriverVersion nbv.GPUImageSHA = a.GPUImageSHA } + // merge and stringify labels kubeletLabels := lo.Assign(kubeletNodeLabelsBase, a.Labels) getAgentbakerGeneratedLabels(a.ResourceGroup, kubeletLabels) @@ -462,6 +463,15 @@ func (a AKS) applyOptions(nbv *NodeBootstrapVariables) { return fmt.Sprintf("%s=%s", k, v) }), ",") + // Assign Per K8s version kubelet flags + minorVersion := semver.MustParse(a.KubernetesVersion).Minor + if utils.UseOOTCredential(minorVersion) { + nbv.CredentialProviderDownloadURL = fmt.Sprintf("https://acs-mirror.azureedge.net/cloud-provider-azure/%s/binaries/azure-acr-credential-provider-linux-amd64-v%s.tar.gz", nbv.KubernetesVersion, nbv.KubernetesVersion) + kubeletFlagsBase["--image-credential-provider-config"] = "/var/lib/kubelet/credential-provider-config.yaml" + kubeletFlagsBase["--image-credential-provider-bin-dir"] = "/var/lib/kubelet/credential-provider" + } else { // Versions Less than 1.30 + kubeletFlagsBase["--azure-container-registry-config"] = "/etc/kubernetes/azure.json" + } // merge and stringify taints kubeletFlags := lo.Assign(kubeletFlagsBase) if len(a.Taints) > 0 { @@ -469,8 +479,8 @@ func (a AKS) applyOptions(nbv *NodeBootstrapVariables) { kubeletFlags = lo.Assign(kubeletFlags, map[string]string{"--register-with-taints": strings.Join(taintStrs, ",")}) } - machineKubeletConfig := KubeletConfigToMap(a.KubeletConfig) - kubeletFlags = lo.Assign(kubeletFlags, machineKubeletConfig) + nodeclaimKubeletConfig := KubeletConfigToMap(a.KubeletConfig) + kubeletFlags = lo.Assign(kubeletFlags, nodeclaimKubeletConfig) // striginify kubelet flags (including taints) nbv.KubeletFlags = strings.Join(lo.MapToSlice(kubeletFlags, func(k, v string) string { diff --git a/pkg/providers/imagefamily/bootstrap/cse_cmd.sh.gtpl b/pkg/providers/imagefamily/bootstrap/cse_cmd.sh.gtpl index b7cbbabe3..1964b82fc 100644 --- a/pkg/providers/imagefamily/bootstrap/cse_cmd.sh.gtpl +++ b/pkg/providers/imagefamily/bootstrap/cse_cmd.sh.gtpl @@ -24,6 +24,7 @@ KUBERNETES_VERSION={{.KubernetesVersion}} HYPERKUBE_URL={{.HyperkubeURL}} KUBE_BINARY_URL={{.KubeBinaryURL}} CUSTOM_KUBE_BINARY_URL={{.CustomKubeBinaryURL}} +CREDENTIAL_PROVIDER_DOWNLOAD_URL={{.CredentialProviderDownloadURL}} KUBEPROXY_URL={{.KubeproxyURL}} APISERVER_PUBLIC_KEY={{.APIServerPublicKey}} SUBSCRIPTION_ID={{.SubscriptionID}} diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index ae5ba9081..f4a41e3ae 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -27,6 +27,7 @@ import ( "testing" "time" + "github.com/blang/semver/v4" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/samber/lo" @@ -1091,6 +1092,57 @@ var _ = Describe("InstanceType Provider", func() { }) }) + Context("Bootstrap", func() { + It("should gate kubelet flags that are dependent on kubelet version", func() { + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + pod := coretest.UnschedulablePod() + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, coreProvisioner, pod) + ExpectScheduled(ctx, env.Client, pod) + + Expect(azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Len()).To(Equal(1)) + vm := azureEnv.VirtualMachinesAPI.VirtualMachineCreateOrUpdateBehavior.CalledWithInput.Pop().VM + customData := *vm.Properties.OSProfile.CustomData + Expect(customData).ToNot(BeNil()) + decodedBytes, err := base64.StdEncoding.DecodeString(customData) + Expect(err).To(Succeed()) + decodedString := string(decodedBytes[:]) + Expect(decodedString).To(ContainSubstring("CREDENTIAL_PROVIDER_DOWNLOAD_URL")) + kubeletFlags := decodedString[strings.Index(decodedString, "KUBELET_FLAGS=")+len("KUBELET_FLAGS="):] + parseKubeletFlags := func(flags string) map[string]string { + flagMap := make(map[string]string) + flagList := strings.Split(flags, " --") + for _, flag := range flagList { + parts := strings.SplitN(flag, "=", 2) + if len(parts) == 2 { + flagMap["--"+parts[0]] = parts[1] + } + } + return flagMap + } + + // TODO: (bsoghigian) leverage the helpers from the azure cni pr once they get in instead for testing kubelet flags + flagMap := parseKubeletFlags(kubeletFlags) + // NOTE: env.Version may differ from the version we get for the apiserver + k8sVersion, err := azureEnv.ImageProvider.KubeServerVersion(ctx) + Expect(err).To(BeNil()) + parsed := semver.MustParse(k8sVersion) + + if utils.UseOOTCredential(parsed.Minor) { + Expect(flagMap).ToNot(HaveKey("--azure-container-registry-config")) + Expect(flagMap).To(HaveKeyWithValue("--image-credential-provider-config", "/var/lib/kubelet/credential-provider-config.yaml")) + Expect(flagMap).To(HaveKeyWithValue("--image-credential-provider-bin-dir", "/var/lib/kubelet/credential-provider")) + Expect(decodedString).To(ContainSubstring( + fmt.Sprintf("https://acs-mirror.azureedge.net/cloud-provider-azure/%s/binaries/azure-acr-credential-provider-linux-amd64-v%s.tar.gz", parsed.String(), parsed.String()), + )) + + } else { + Expect(flagMap).To(HaveKey("--azure-container-registry-config")) + Expect(flagMap).ToNot(HaveKey("--image-credential-provider-config")) + Expect(flagMap).ToNot(HaveKey("--image-credential-provider-bin-dir")) + } + }) + }) + Context("LoadBalancer", func() { resourceGroup := "test-resourceGroup" diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index bd6ea31de..edb84c7fc 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -58,3 +58,7 @@ func MkVMID(resourceGroupName string, vmName string) string { const idFormat = "/subscriptions/subscriptionID/resourceGroups/%s/providers/Microsoft.Compute/virtualMachines/%s" return fmt.Sprintf(idFormat, resourceGroupName, vmName) } + +func UseOOTCredential(minorK8sVersion uint64) bool { + return minorK8sVersion >= 30 +}