Skip to content

Commit

Permalink
Merge branch 'main' into dependabot/go_modules/github.com/Azure/azure…
Browse files Browse the repository at this point in the history
…-sdk-for-go/sdk/azidentity-1.5.1
  • Loading branch information
tallaxes authored Jan 26, 2024
2 parents 29da9c2 + 7705f24 commit b33eb69
Show file tree
Hide file tree
Showing 22 changed files with 71 additions and 37 deletions.
2 changes: 1 addition & 1 deletion .github/actions/e2e/create-acr/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ inputs:
location:
type: string
description: "the azure location to run the e2e test in"
default: "westus2"
default: "eastus"
runs:
using: "composite"
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/e2e/create-cluster/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ inputs:
location:
type: string
description: "the azure location to run the e2e test in"
default: "westus2"
default: "eastus"
runs:
using: "composite"
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/e2e/install-karpenter/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ inputs:
location:
type: string
description: "the azure location to run the e2e test in"
default: "westus2"
default: "eastus"
runs:
using: "composite"
steps:
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/build-publish-mcr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ on:
type: string

permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout
contents: read

env:
REGISTRY_REPO: unlisted/aks/karpenter
Expand Down Expand Up @@ -41,6 +40,9 @@ jobs:
echo "release_tag=$RELEASE_TAG" >> $GITHUB_OUTPUT
publish-images:
permissions:
contents: read
id-token: write # This is required for requesting the JWT
runs-on: ubuntu-latest
needs: prepare-variables
steps:
Expand Down
7 changes: 6 additions & 1 deletion .github/workflows/ci-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@ on:
branches: [main]
pull_request:
workflow_dispatch:
permissions:
contents: read
jobs:
ci-test:
permissions:
contents: read
statuses: write
runs-on: ubuntu-latest
strategy:
matrix:
Expand All @@ -24,7 +29,7 @@ jobs:
k8sVersion: ${{ matrix.k8sVersion }}
- run: K8S_VERSION=${{ matrix.k8sVersion }} make ci-test
- name: Send coverage
# should only send converage once https://docs.coveralls.io/parallel-builds
# should only send coverage once https://docs.coveralls.io/parallel-builds
if: matrix.k8sVersion == '1.27.x'
env:
COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ on:
branches: [main]
pull_request:
workflow_dispatch:
permissions:
contents: read
jobs:
ci:
runs-on: ubuntu-latest
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/deflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,15 @@ on:
schedule:
- cron: '0 12 * * *'
workflow_dispatch:

permissions:
contents: read
jobs:
deflake:
runs-on: ubuntu-latest
permissions:
contents: read
statuses: write
steps:
- name: Harden Runner
uses: step-security/harden-runner@eb238b55efaa70779f274895e782ed17c84f2895 # v2.6.1
Expand Down
14 changes: 9 additions & 5 deletions .github/workflows/e2e-matrix-trigger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,30 @@ on:
location:
type: string
description: "the azure location to run the e2e test in"
default: "westus2"
default: "eastus"
push:
branches: [main]
workflow_run:
workflows: [ApprovalComment]
types: [completed]

permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout
statuses: write # ./.github/actions/commit-status/*
contents: read

jobs:
resolve:
if: github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success'
uses: ./.github/workflows/resolve-args.yaml
e2e-matrix:
permissions:
contents: read
id-token: write # This is required for requesting the JWT
statuses: write # ./.github/actions/commit-status/*
needs: [resolve]
uses: ./.github/workflows/e2e-matrix.yaml
with:
git_ref: ${{ needs.resolve.outputs.GIT_REF }}
location: ${{ inputs.location || 'westus2' }}
location: ${{ inputs.location || 'eastus' }}
secrets:
E2E_CLIENT_ID: ${{ secrets.E2E_CLIENT_ID }}
E2E_TENANT_ID: ${{ secrets.E2E_TENANT_ID }}
Expand Down
10 changes: 9 additions & 1 deletion .github/workflows/e2e-matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
location:
type: string
description: "the azure location to run the e2e test in"
default: "westus2"
default: "eastus"
# k8s_version:
# type: string
# default: "1.27"
Expand All @@ -18,6 +18,10 @@ on:
required: true
E2E_SUBSCRIPTION_ID:
required: true

permissions:
contents: read

jobs:
initialize-generative-params:
runs-on: ubuntu-latest
Expand All @@ -40,6 +44,10 @@ jobs:
fail-fast: false
matrix:
suite: [Nonbehavioral, Utilization, GPU, Drift, Integration, NodeClaim]
permissions:
contents: read
id-token: write
statuses: write
uses: ./.github/workflows/e2e.yaml
with:
git_ref: ${{ inputs.git_ref }}
Expand Down
12 changes: 8 additions & 4 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ on:
location:
type: string
description: "the azure location to run the e2e test in"
default: "westus2"
default: "eastus"
# k8s_version:
# type: string
# default: "1.27"
Expand All @@ -24,14 +24,18 @@ on:
required: true
E2E_SUBSCRIPTION_ID:
required: true

permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout
statuses: write # ./.github/actions/commit-status/*
contents: read

jobs:
run-suite:
name: suite-${{ inputs.suite }}
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write # This is required for requesting the JWT
statuses: write # ./.github/actions/commit-status/*
env:
AZURE_SUBSCRIPTION_ID: ${{ secrets.E2E_SUBSCRIPTION_ID }}
steps:
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/resolve-args.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ on:
outputs:
GIT_REF:
value: ${{ jobs.resolve.outputs.GIT_REF }}

permissions:
contents: read

jobs:
resolve:
runs-on: ubuntu-latest
Expand Down
4 changes: 2 additions & 2 deletions charts/karpenter/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -187,13 +187,13 @@ spec:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
# The template below patches the .Values.affinity to add a default label selector where not specificed
# The template below patches the .Values.affinity to add a default label selector where not specified
{{- $_ := include "karpenter.patchAffinity" $ }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.topologySpreadConstraints }}
# The template below patches the .Values.topologySpreadConstraints to add a default label selector where not specificed
# The template below patches the .Values.topologySpreadConstraints to add a default label selector where not specified
{{- $_ := include "karpenter.patchTopologySpreadConstraints" $ }}
topologySpreadConstraints:
{{- toYaml . | nindent 8 }}
Expand Down
2 changes: 1 addition & 1 deletion designs/aks-node-bootstrap.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ Karpenter also supports provider-specific configuration via `NodeTemplate` custo
Note that this represents part of the external configuration surface / API, and should be treated as such.

<!-- TODO: cover NodeTemplate details -->
<!-- TODO: add guidance on what belongs to settins vs NodeTemplate -->
<!-- TODO: add guidance on what belongs to settings vs NodeTemplate -->

### Auto-detected values

Expand Down
6 changes: 3 additions & 3 deletions designs/gpu-selection-and-bootstrap.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ The way we determine these drivers is via trial and error, and there is not a gr
For Converged drivers they are a mix of multiple drivers installing vanilla cuda drivers will fail to install with opaque errors.
nvidia-bug-report.sh may be helpful, but usually it tells you the pci card id is incompatible.
So manual trial and error, or leveraging other peoples manual trial and error, and published gpu drivers seems to be the prefered method for approaching this.
So manual trial and error, or leveraging other peoples manual trial and error, and published gpu drivers seems to be the preferred method for approaching this.
see https://github.com/Azure/azhpc-extensions/blob/daaefd78df6f27012caf30f3b54c3bd6dc437652/NvidiaGPU/resources.json for the HPC list of skus and converged drivers, and the driver matrix used by HPC
**Ownership:** Node SIG is responsible for ensuring successful and functional installation. Our goal is to share a bootstrap contract, and the oblication of a functional successfully bootstrapped vhd relies on the node sig.
Expand All @@ -95,9 +95,9 @@ The NVIDIA device plugin for Kubernetes is designed to enable GPU support within

We will require the customer to install the nvidia device plugin daemonset to enable GPU support through karpenter.

When a node with Nvidia GPUS joins the cluster, the device plugin detects available gpus and notifies the k8s scheduler that we have a new Allocatable Resource type of `nvidia.com/gpu` along with a resource quanity that can be considered for scheduling.
When a node with Nvidia GPUS joins the cluster, the device plugin detects available gpus and notifies the k8s scheduler that we have a new Allocatable Resource type of `nvidia.com/gpu` along with a resource quantity that can be considered for scheduling.

Note the device plugin is also reponsible for the allocation of that resource and reporting that other pods can not use that resource and marking it as used by changing the allocatable capacity on the node.
Note the device plugin is also responsible for the allocation of that resource and reporting that other pods can not use that resource and marking it as used by changing the allocatable capacity on the node.

## Changes to Requirements API

Expand Down
2 changes: 1 addition & 1 deletion designs/k8s-node-image-upgrade.md
Original file line number Diff line number Diff line change
Expand Up @@ -310,5 +310,5 @@ From template:
design doc by carefully reviewing it or assigning a tech leads that
are domain expert in that SIG to review and approve this doc

[^6]: Q&A style meeting notes from desgin review meeting to capture
[^6]: Q&A style meeting notes from design review meeting to capture
todos
2 changes: 1 addition & 1 deletion pkg/fake/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ type MockHandler[T any] struct {
err error
}

// Done returns true if the LRO has reached a terminal state. TrivialHanlder is always done.
// Done returns true if the LRO has reached a terminal state. TrivialHandler is always done.
func (h MockHandler[T]) Done() bool {
return true
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/imagefamily/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func (r Resolver) Resolve(ctx context.Context, nodeClass *v1alpha2.AKSNodeClass,
kubeletConfig = &corev1beta1.KubeletConfiguration{}
}

// TODO: revist computeResources and maxPods implementation
// TODO: revisit computeResources and maxPods implementation
kubeletConfig.KubeReserved = instanceType.Overhead.KubeReserved
kubeletConfig.SystemReserved = instanceType.Overhead.SystemReserved
kubeletConfig.EvictionHard = map[string]string{
Expand Down
4 changes: 2 additions & 2 deletions pkg/providers/instancetype/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -787,9 +787,9 @@ var _ = Describe("InstanceType Provider", func() {
Expect(ok).To(BeTrue(), "Expected nvidia.com/gpu to be present in capacity")
Expect(gpuQuantity.Value()).To(Equal(int64(1)))

gpuQuanityNonGPU, ok := normalNode.Capacity["nvidia.com/gpu"]
gpuQuantityNonGPU, ok := normalNode.Capacity["nvidia.com/gpu"]
Expect(ok).To(BeTrue(), "Expected nvidia.com/gpu to be present in capacity, and be zero")
Expect(gpuQuanityNonGPU.Value()).To(Equal(int64(0)))
Expect(gpuQuantityNonGPU.Value()).To(Equal(int64(0)))
})
})

Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/pricing/pricing.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ func (p *Provider) updatePricing(ctx context.Context) {
prices := map[client.Item]bool{}
err := p.fetchPricing(ctx, processPage(prices))
if err != nil {
logging.FromContext(ctx).Errorf("error featching updated pricing for region %s, %s, using existing pricing data, on-demand: %s, spot: %s", p.region, err, err.lastOnDemandUpdateTime.Format(time.RFC3339), err.lastSpotUpdateTime.Format(time.RFC3339))
logging.FromContext(ctx).Errorf("error fetching updated pricing for region %s, %s, using existing pricing data, on-demand: %s, spot: %s", p.region, err, err.lastOnDemandUpdateTime.Format(time.RFC3339), err.lastSpotUpdateTime.Format(time.RFC3339))
return
}

Expand Down
8 changes: 4 additions & 4 deletions test/pkg/debug/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func (c *EventClient) dumpKarpenterEvents(ctx context.Context) error {
if err := c.kubeClient.List(ctx, el, client.InNamespace("karpenter")); err != nil {
return err
}
for k, v := range coallateEvents(filterTestEvents(el.Items, c.start)) {
for k, v := range collateEvents(filterTestEvents(el.Items, c.start)) {
fmt.Print(getEventInformation(k, v))
}
return nil
Expand All @@ -71,7 +71,7 @@ func (c *EventClient) dumpPodEvents(ctx context.Context) error {
events := lo.Filter(filterTestEvents(el.Items, c.start), func(e v1.Event, _ int) bool {
return e.InvolvedObject.Namespace != "kube-system"
})
for k, v := range coallateEvents(events) {
for k, v := range collateEvents(events) {
fmt.Print(getEventInformation(k, v))
}
return nil
Expand All @@ -84,7 +84,7 @@ func (c *EventClient) dumpNodeEvents(ctx context.Context) error {
}); err != nil {
return err
}
for k, v := range coallateEvents(filterTestEvents(el.Items, c.start)) {
for k, v := range collateEvents(filterTestEvents(el.Items, c.start)) {
fmt.Print(getEventInformation(k, v))
}
return nil
Expand All @@ -103,7 +103,7 @@ func filterTestEvents(events []v1.Event, startTime time.Time) []v1.Event {
})
}

func coallateEvents(events []v1.Event) map[v1.ObjectReference]*v1.EventList {
func collateEvents(events []v1.Event) map[v1.ObjectReference]*v1.EventList {
eventMap := map[v1.ObjectReference]*v1.EventList{}
for i := range events {
elem := events[i]
Expand Down
2 changes: 1 addition & 1 deletion test/pkg/environment/azure/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func (env *Environment) Cleanup() {
}

func (env *Environment) AfterEach() {
fmt.Println("##[group]AfterEach (CONTROLLER LOGS)")
fmt.Println("##[group] E2E SUITE: LOG DUMP")
defer fmt.Println("##[endgroup]")
env.Environment.AfterEach()
// Ensure we reset settings after collecting the controller logs
Expand Down
7 changes: 3 additions & 4 deletions test/pkg/environment/common/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"time"

"github.com/Azure/karpenter-provider-azure/pkg/apis/v1alpha2"
"github.com/Azure/karpenter-provider-azure/test/pkg/debug"
. "github.com/onsi/ginkgo/v2" //nolint:revive,stylecheck
. "github.com/onsi/gomega" //nolint:revive,stylecheck
"github.com/samber/lo"
Expand Down Expand Up @@ -64,8 +65,7 @@ var (

// nolint:gocyclo
func (env *Environment) BeforeEach() {
// TODO (charliedmcb): uncomment debugging lib, but removing references for now to avoid the need to review the entire package
// debug.BeforeEach(env.Context, env.Config, env.Client)
debug.BeforeEach(env.Context, env.Config, env.Client)
env.Context = injection.WithSettingsOrDie(env.Context, env.KubeClient, apis.Settings...)

// Expect this cluster to be clean for test runs to execute successfully
Expand Down Expand Up @@ -113,8 +113,7 @@ func (env *Environment) Cleanup() {
}

func (env *Environment) AfterEach() {
// TODO (charliedmcb): uncomment debugging lib, but removing references for now to avoid the need to review the entire package
// debug.AfterEach(env.Context)
debug.AfterEach(env.Context)
env.printControllerLogs(&v1.PodLogOptions{Container: "controller"})
}

Expand Down

0 comments on commit b33eb69

Please sign in to comment.