Skip to content

Commit

Permalink
Merge branch 'main' into bump-x-net
Browse files Browse the repository at this point in the history
Signed-off-by: dongjiang <[email protected]>
  • Loading branch information
dongjiang1989 authored Jan 15, 2024
2 parents a08a1d6 + c70f410 commit 4ba4e71
Show file tree
Hide file tree
Showing 223 changed files with 23,863 additions and 3,470 deletions.
1 change: 1 addition & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ updates:
commit-message:
prefix: "feat"
include: "scope"
open-pull-requests-limit: 0
- package-ecosystem: "github-actions"
directory: "/"
schedule:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v4
- uses: actions/setup-go@v5
with:
cache: false
go-version-file: go.mod
Expand Down Expand Up @@ -52,7 +52,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v4
- uses: actions/setup-go@v5
with:
cache: false
go-version-file: go.mod
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/e2e-k8s-1.22.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
with:
submodules: true
- name: Setup Go
uses: actions/setup-go@v4
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
- name: Setup Kind Cluster
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/e2e-k8s-1.24.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
with:
submodules: true
- name: Setup Go
uses: actions/setup-go@v4
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
- name: Setup Kind Cluster
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/e2e-k8s-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
with:
submodules: true
- name: Setup Go
uses: actions/setup-go@v4
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
- name: Setup Kind Cluster
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/license.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
run: |
gem install license_finder
license_finder --decisions_file .license/dependency_decisions.yml
- uses: actions/setup-go@v4
- uses: actions/setup-go@v5
with:
cache: false
go-version-file: go.mod
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
- name: Fetch all tags
run: git fetch --force --tags
- name: Set up Go
uses: actions/setup-go@v4
uses: actions/setup-go@v5
with:
cache: false
go-version-file: go.mod
Expand Down
19 changes: 11 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ endif
SHELL = /usr/bin/env bash -o pipefail
.SHELLFLAGS = -ec

LINT_TIMEOUT ?= 15m
DOCKER_BUILDER ?= build # 'buildx build'

.PHONY: all
all: build

Expand Down Expand Up @@ -83,15 +86,15 @@ lint: lint-go lint-license ## Lint all code.

.PHONY: lint-go
lint-go: golangci-lint ## Lint Go code.
$(GOLANGCI_LINT) run -v --timeout=10m
$(GOLANGCI_LINT) run -v --timeout=$(LINT_TIMEOUT)

.PHONY: lint-license
lint-license:
@hack/update-license-header.sh

.PHONY: test
test: manifests generate fmt vet envtest libpfm ## Run tests.
@KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" agent_mode=$(AGENT_MODE) go test $(PACKAGES) -race -covermode atomic -coverprofile cover.out
@KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" agent_mode=$(AGENT_MODE) go test $(PACKAGES) -race -covermode atomic -coverprofile cover.out
@KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" agent_mode=$(AGENT_MODE) go test $(PERFGROUPPACKAGE) -covermode atomic -coverprofile tmp.out && cat tmp.out | tail -n +2 >> cover.out && rm tmp.out

.PHONY: fast-test
Expand Down Expand Up @@ -129,19 +132,19 @@ docker-build: test docker-build-koordlet docker-build-koord-manager docker-build

.PHONY: docker-build-koordlet
docker-build-koordlet: ## Build docker image with the koordlet.
docker build --pull -t ${KOORDLET_IMG} -f docker/koordlet.dockerfile .
docker ${DOCKER_BUILDER} ${DOCKER_BUILD_ARGS} --pull -t ${KOORDLET_IMG} -f docker/koordlet.dockerfile .

.PHONY: docker-build-koord-manager
docker-build-koord-manager: ## Build docker image with the koord-manager.
docker build --pull -t ${KOORD_MANAGER_IMG} -f docker/koord-manager.dockerfile .
docker $(DOCKER_BUILDER) ${DOCKER_BUILD_ARGS} --pull -t ${KOORD_MANAGER_IMG} -f docker/koord-manager.dockerfile .

.PHONY: docker-build-koord-scheduler
docker-build-koord-scheduler: ## Build docker image with the scheduler.
docker build --pull -t ${KOORD_SCHEDULER_IMG} -f docker/koord-scheduler.dockerfile .
docker $(DOCKER_BUILDER) ${DOCKER_BUILD_ARGS} --pull -t ${KOORD_SCHEDULER_IMG} -f docker/koord-scheduler.dockerfile .

.PHONY: docker-build-koord-descheduler
docker-build-koord-descheduler: ## Build docker image with the descheduler.
docker build --pull -t ${KOORD_DESCHEDULER_IMG} -f docker/koord-descheduler.dockerfile .
docker $(DOCKER_BUILDER) ${DOCKER_BUILD_ARGS} --pull -t ${KOORD_DESCHEDULER_IMG} -f docker/koord-descheduler.dockerfile .

.PHONY: docker-push
docker-push: docker-push-koordlet docker-push-koord-manager docker-push-koord-scheduler docker-push-koord-descheduler
Expand Down Expand Up @@ -245,5 +248,5 @@ $(GINKGO): $(LOCALBIN)
GOBIN=$(LOCALBIN) go install github.com/onsi/ginkgo/ginkgo@$(GINKGO_VERSION)

.PHONY: libpfm
libpfm:
@hack/libpfm.sh
libpfm:
@hack/libpfm.sh
2 changes: 1 addition & 1 deletion README-zh_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Koordinator 社区遵照[行为守则](CODE_OF_CONDUCT.md)。我们鼓励每个
- 社区双周会(中文):
- 周二 19:30 GMT+8 (北京时间)
- [钉钉会议链接](https://meeting.dingtalk.com/j/cgTTojEI8Zy)
- [议题&记录文档](https://shimo.im/docs/m4kMLdgO1LIma9qD)
- [议题&记录文档](https://alidocs.dingtalk.com/document/edit?docKey=oJGq769vBG4WnAKe&dentryKey=paP7wO3nXFnLzMAa&type=d)
- Slack( English ): [koordinator channel](https://kubernetes.slack.com/channels/koordinator) in Kubernetes workspace
- 钉钉( Chinese ): 搜索群ID `33383887`或者扫描二维码加入

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Active communication channels:
- Bi-weekly Community Meeting (APAC, *Chinese*):
- Tuesday 19:30 GMT+8 (Asia/Shanghai)
- [Meeting Link(DingTalk)](https://meeting.dingtalk.com/j/cgTTojEI8Zy)
- [Notes and agenda](https://shimo.im/docs/m4kMLdgO1LIma9qD)
- [Notes and agenda](https://alidocs.dingtalk.com/document/edit?docKey=oJGq769vBG4WnAKe&dentryKey=paP7wO3nXFnLzMAa&type=d)
- Slack(English): [koordinator channel](https://kubernetes.slack.com/channels/koordinator) in Kubernetes workspace
- DingTalk(Chinese): Search Group ID `33383887` or scan the following QR Code

Expand Down
10 changes: 10 additions & 0 deletions apis/config/v1alpha1/cluster_colocation_profile_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@ type ClusterColocationProfileSpec struct {
// +optional
Annotations map[string]string `json:"annotations,omitempty"`

// LabelKeysMapping describes the labels that needs to inject into Pod.Labels with the same values.
// It sets the Pod.Labels[LabelsToLabels[k]] = Pod.Labels[k] for each key k.
// +optional
LabelKeysMapping map[string]string `json:"labelKeysMapping,omitempty"`

// AnnotationKeysMapping describes the annotations that needs to inject into Pod.Annotations with the same values.
// It sets the Pod.Annotations[AnnotationsToAnnotations[k]] = Pod.Annotations[k] for each key k.
// +optional
AnnotationKeysMapping map[string]string `json:"annotationKeysMapping,omitempty"`

// If specified, the pod will be dispatched by specified scheduler.
// +optional
SchedulerName string `json:"schedulerName,omitempty"`
Expand Down
14 changes: 14 additions & 0 deletions apis/config/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions apis/extension/cpu_normalization.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ const (

// AnnotationCPUBasicInfo denotes the basic CPU info of the node.
AnnotationCPUBasicInfo = NodeDomainPrefix + "/cpu-basic-info"

// NormalizationRatioDiffEpsilon is the min difference between two cpu normalization ratios.
NormalizationRatioDiffEpsilon = 0.01
)

// GetCPUNormalizationRatio gets the cpu normalization ratio from the node.
Expand Down Expand Up @@ -90,6 +93,10 @@ func GetCPUNormalizationEnabled(node *corev1.Node) (*bool, error) {
return pointer.Bool(v), nil
}

func IsCPUNormalizationRatioDifferent(old, new float64) bool {
return old > new+NormalizationRatioDiffEpsilon || old < new-NormalizationRatioDiffEpsilon
}

// CPUBasicInfo describes the cpu basic features and status.
type CPUBasicInfo struct {
CPUModel string `json:"cpuModel,omitempty"`
Expand Down
32 changes: 32 additions & 0 deletions apis/extension/cpu_normalization_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,3 +388,35 @@ func TestSetCPUBasicInfo(t *testing.T) {
})
}
}

func TestCPUNormalizationRatioDifferent(t *testing.T) {
testCases := []struct {
old float64
new float64
expectedDiff bool
}{
{
old: 1.2,
new: 1.2,
expectedDiff: false,
},
{
old: 1.2,
new: 1.3,
expectedDiff: true,
},
{
old: 1.2,
new: 1.205,
expectedDiff: false,
},
{
old: 1.2,
new: 1.195,
expectedDiff: false,
},
}
for _, tc := range testCases {
assert.Equal(t, tc.expectedDiff, IsCPUNormalizationRatioDifferent(tc.old, tc.new))
}
}
125 changes: 122 additions & 3 deletions apis/extension/device_share.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ import (
const (
// AnnotationDeviceAllocated represents the device allocated by the pod
AnnotationDeviceAllocated = SchedulingDomainPrefix + "/device-allocated"
// AnnotationDeviceAllocateHint guides the scheduler in selecting and allocating specialized hardware resources
AnnotationDeviceAllocateHint = SchedulingDomainPrefix + "/device-allocate-hint"
// AnnotationDeviceJointAllocate guides the scheduler joint-allocates devices
AnnotationDeviceJointAllocate = SchedulingDomainPrefix + "/device-joint-allocate"
)

const (
Expand All @@ -36,6 +40,7 @@ const (
ResourceRDMA corev1.ResourceName = DomainPrefix + "rdma"
ResourceFPGA corev1.ResourceName = DomainPrefix + "fpga"
ResourceGPU corev1.ResourceName = DomainPrefix + "gpu"
ResourceGPUShared corev1.ResourceName = DomainPrefix + "gpu.shared"
ResourceGPUCore corev1.ResourceName = DomainPrefix + "gpu-core"
ResourceGPUMemory corev1.ResourceName = DomainPrefix + "gpu-memory"
ResourceGPUMemoryRatio corev1.ResourceName = DomainPrefix + "gpu-memory-ratio"
Expand Down Expand Up @@ -72,11 +77,64 @@ const (
type DeviceAllocations map[schedulingv1alpha1.DeviceType][]*DeviceAllocation

type DeviceAllocation struct {
Minor int32 `json:"minor"`
Resources corev1.ResourceList `json:"resources"`
Extension json.RawMessage `json:"extension,omitempty"`
Minor int32 `json:"minor"`
Resources corev1.ResourceList `json:"resources"`
Extension *DeviceAllocationExtension `json:"extension,omitempty"`
}

type DeviceAllocationExtension struct {
VirtualFunctions []VirtualFunction `json:"vfs,omitempty"`
}

type VirtualFunction struct {
Minor int `json:"minor,omitempty"`
BusID string `json:"busID,omitempty"`
}

type DeviceJointAllocate struct {
// DeviceTypes indicates that the specified types of devices are grouped and allocated according to topology.
DeviceTypes []schedulingv1alpha1.DeviceType `json:"deviceTypes,omitempty"`
// RequiredScope specifies the allocation scope required for the joint allocation of devices.
// It defines the granularity at which devices should be joint-allocated, e.g. in the same PCIe.
RequiredScope DeviceJointAllocateScope `json:"requiredScope,omitempty"`
}

type DeviceJointAllocateScope string

const (
SamePCIeDeviceJointAllocateScope DeviceJointAllocateScope = "SamePCIe"
)

type DeviceAllocateHints map[schedulingv1alpha1.DeviceType]*DeviceHint

type DeviceHint struct {
// Selector selects devices by label selector.
Selector *metav1.LabelSelector `json:"selector,omitempty"`
// VFSelector selects VFs by label selector.
// If specified the VFSelector, scheduler will allocate VFs from PFs which satisfy VFSelector.
VFSelector *metav1.LabelSelector `json:"vfSelector,omitempty"`
// AllocateStrategy controls the allocation strategy
AllocateStrategy DeviceAllocateStrategy `json:"allocateStrategy,omitempty"`
// ExclusivePolicy indicates the exclusive policy.
ExclusivePolicy DeviceExclusivePolicy `json:"exclusivePolicy,omitempty"`
}

type DeviceAllocateStrategy string

const (
ApplyForAllDeviceAllocateStrategy DeviceAllocateStrategy = "ApplyForAll"
RequestsAsCountAllocateStrategy DeviceAllocateStrategy = "RequestsAsCount"
)

type DeviceExclusivePolicy string

const (
// DeviceLevelDeviceExclusivePolicy represents mutual exclusion in the device instance dimension
DeviceLevelDeviceExclusivePolicy DeviceExclusivePolicy = "DeviceLevel"
// PCIExpressLevelDeviceExclusivePolicy represents mutual exclusion in the PCIe dimension
PCIExpressLevelDeviceExclusivePolicy DeviceExclusivePolicy = "PCIeLevel"
)

func GetDeviceAllocations(podAnnotations map[string]string) (DeviceAllocations, error) {
deviceAllocations := DeviceAllocations{}
data, ok := podAnnotations[AnnotationDeviceAllocated]
Expand Down Expand Up @@ -105,3 +163,64 @@ func SetDeviceAllocations(obj metav1.Object, allocations DeviceAllocations) erro
obj.SetAnnotations(annotations)
return nil
}

func SetDeviceAllocateHints(obj metav1.Object, hint DeviceAllocateHints) error {
if hint == nil {
return nil
}

data, err := json.Marshal(hint)
if err != nil {
return err
}
annotations := obj.GetAnnotations()
if annotations == nil {
annotations = map[string]string{}
}
annotations[AnnotationDeviceAllocateHint] = string(data)
obj.SetAnnotations(annotations)
return nil
}

func GetDeviceAllocateHints(annotations map[string]string) (DeviceAllocateHints, error) {
var hint DeviceAllocateHints
if val, ok := annotations[AnnotationDeviceAllocateHint]; ok {
hint = DeviceAllocateHints{}
err := json.Unmarshal([]byte(val), &hint)
if err != nil {
return nil, err
}
}
return hint, nil
}

func SetDeviceJointAllocate(obj metav1.Object, jointAllocate *DeviceJointAllocate) error {
if jointAllocate == nil {
return nil
}

data, err := json.Marshal(jointAllocate)
if err != nil {
return err
}
annotations := obj.GetAnnotations()
if annotations == nil {
annotations = map[string]string{}
}
annotations[AnnotationDeviceJointAllocate] = string(data)
obj.SetAnnotations(annotations)
return nil
}

func GetDeviceJointAllocate(annotations map[string]string) (*DeviceJointAllocate, error) {
val, ok := annotations[AnnotationDeviceJointAllocate]
if !ok {
return nil, nil
}
var jointAllocate DeviceJointAllocate
err := json.Unmarshal([]byte(val), &jointAllocate)
if err != nil {
return nil, err
}
return &jointAllocate, nil
}
Loading

0 comments on commit 4ba4e71

Please sign in to comment.