From b6a7d9ef2eb0bdb01e4349e82b2f09e08616030a Mon Sep 17 00:00:00 2001 From: Frame Date: Tue, 14 Nov 2023 10:17:40 +0800 Subject: [PATCH] apis: add core sched apis (#1720) Signed-off-by: saintube --- apis/slo/v1alpha1/nodeslo_types.go | 27 +++ apis/slo/v1alpha1/pod.go | 38 ++++ apis/slo/v1alpha1/zz_generated.deepcopy.go | 35 ++++ .../bases/slo.koordinator.sh_nodeslos.yaml | 97 ++++++++- .../runtimehooks/hooks/groupidentity/rule.go | 37 ++-- .../hooks/groupidentity/rule_test.go | 186 ++++++++++++++++++ pkg/koordlet/util/cold_page.go | 1 + pkg/util/sloconfig/nodeslo_config.go | 26 +++ pkg/util/sloconfig/nodeslo_config_test.go | 1 + 9 files changed, 427 insertions(+), 21 deletions(-) diff --git a/apis/slo/v1alpha1/nodeslo_types.go b/apis/slo/v1alpha1/nodeslo_types.go index 57df8e008..e814f128e 100644 --- a/apis/slo/v1alpha1/nodeslo_types.go +++ b/apis/slo/v1alpha1/nodeslo_types.go @@ -26,9 +26,28 @@ import ( // CPUQOS enables cpu qos features. type CPUQOS struct { // group identity value for pods, default = 0 + // NOTE: It takes effect if cpuPolicy = "groupIdentity". GroupIdentity *int64 `json:"groupIdentity,omitempty" validate:"omitempty,min=-1,max=2"` + // cpu.idle value for pods, default = 0. + // `1` means using SCHED_IDLE. + // CGroup Idle (introduced since mainline Linux 5.15): https://lore.kernel.org/lkml/162971078674.25758.15464079371945307825.tip-bot2@tip-bot2/#r + // NOTE: It takes effect if cpuPolicy = "coreSched". + SchedIdle *int64 `json:"schedIdle,omitempty" validate:"omitempty,min=0,max=1"` + // whether pods of the QoS class can expel the cgroup idle pods at the SMT-level. default = false + // If set to true, pods of this QoS will use a dedicated core sched group for noise clean with the SchedIdle pods. + // NOTE: It takes effect if cpuPolicy = "coreSched". + CoreExpeller *bool `json:"coreExpeller,omitempty"` } +type CPUQOSPolicy string + +const ( + // CPUQOSPolicyGroupIdentity indicates the Group Identity is applied to ensure the CPU QoS. + CPUQOSPolicyGroupIdentity CPUQOSPolicy = "groupIdentity" + // CPUQOSPolicyCoreSched indicates the Linux Core Scheduling and CGroup Idle is applied to ensure the CPU QoS. + CPUQOSPolicyCoreSched CPUQOSPolicy = "coreSched" +) + // MemoryQOS enables memory qos features. type MemoryQOS struct { // memcg qos @@ -185,7 +204,15 @@ type ResourceQOS struct { ResctrlQOS *ResctrlQOSCfg `json:"resctrlQOS,omitempty"` } +type ResourceQOSPolicies struct { + // applied policy for the CPU QoS, default = "groupIdentity" + CPUPolicy *CPUQOSPolicy `json:"cpuPolicy,omitempty"` +} + type ResourceQOSStrategy struct { + // Policies of pod QoS. + Policies *ResourceQOSPolicies `json:"policies,omitempty"` + // ResourceQOS for LSR pods. LSRClass *ResourceQOS `json:"lsrClass,omitempty"` diff --git a/apis/slo/v1alpha1/pod.go b/apis/slo/v1alpha1/pod.go index de30552f2..fe10319aa 100644 --- a/apis/slo/v1alpha1/pod.go +++ b/apis/slo/v1alpha1/pod.go @@ -20,6 +20,7 @@ import ( "encoding/json" corev1 "k8s.io/api/core/v1" + "k8s.io/utils/pointer" apiext "github.com/koordinator-sh/koordinator/apis/extension" ) @@ -64,3 +65,40 @@ func GetPodMemoryQoSConfig(pod *corev1.Pod) (*PodMemoryQOSConfig, error) { } return &cfg, nil } + +const ( + // AnnotationCoreSchedGroupID is the annotation key of the group ID of the Linux Core Scheduling. + // Value should be a valid UUID or the none value "0". + // When the value is a valid UUID, pods with that group ID and the equal CoreExpelled status on the node will be + // assigned to the same core sched cookie. + // When the value is the none value "0", pod will be reset to the default core sched cookie `0`. + // When the annotation is missing but the node-level strategy enables the core sched, the pod will be assigned an + // internal group according to the pod's UID. + // + // Core Sched: https://docs.kernel.org/admin-guide/hw-vuln/core-scheduling.html + // When the Core Sched is enabled, pods with the different core sched group IDs will not be running at the same SMT + // core at the same time, which means they will take different core sched cookies. If a pod sets the core sched + // disabled, it will take the default core sched cookie (0) and will also be force-idled to run on the same SMT core + // concurrently with the core-sched-enabled pods. In addition, the CoreExpelled configured in ResourceQOS also + // enables the individual cookie from pods of other QoS classes via adding a suffix for the group ID. So the pods + // of different QoS will take different cookies when their CoreExpelled status are diverse even if their group ID + // are the same. + AnnotationCoreSchedGroupID = apiext.DomainPrefix + "core-sched-group-id" + + // CoreSchedGroupIDNone is the none value of the core sched group ID which indicates the core sched is disabled for + // the pod. The pod will be reset to the system-default cookie `0`. + CoreSchedGroupIDNone = "0" +) + +// GetCoreSchedGroupID gets the core sched group ID from the pod annotations. +// It returns the core sched group ID and whether the pod explicitly disables the core sched. +func GetCoreSchedGroupID(annotations map[string]string) (string, *bool) { + if annotations == nil { + return "", nil + } + value, ok := annotations[AnnotationCoreSchedGroupID] + if !ok { + return "", nil + } + return value, pointer.Bool(value == CoreSchedGroupIDNone) +} diff --git a/apis/slo/v1alpha1/zz_generated.deepcopy.go b/apis/slo/v1alpha1/zz_generated.deepcopy.go index 6ff1c02a8..dfa0d2ab8 100644 --- a/apis/slo/v1alpha1/zz_generated.deepcopy.go +++ b/apis/slo/v1alpha1/zz_generated.deepcopy.go @@ -194,6 +194,16 @@ func (in *CPUQOS) DeepCopyInto(out *CPUQOS) { *out = new(int64) **out = **in } + if in.SchedIdle != nil { + in, out := &in.SchedIdle, &out.SchedIdle + *out = new(int64) + **out = **in + } + if in.CoreExpeller != nil { + in, out := &in.CoreExpeller, &out.CoreExpeller + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CPUQOS. @@ -912,9 +922,34 @@ func (in *ResourceQOS) DeepCopy() *ResourceQOS { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ResourceQOSPolicies) DeepCopyInto(out *ResourceQOSPolicies) { + *out = *in + if in.CPUPolicy != nil { + in, out := &in.CPUPolicy, &out.CPUPolicy + *out = new(CPUQOSPolicy) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceQOSPolicies. +func (in *ResourceQOSPolicies) DeepCopy() *ResourceQOSPolicies { + if in == nil { + return nil + } + out := new(ResourceQOSPolicies) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ResourceQOSStrategy) DeepCopyInto(out *ResourceQOSStrategy) { *out = *in + if in.Policies != nil { + in, out := &in.Policies, &out.Policies + *out = new(ResourceQOSPolicies) + (*in).DeepCopyInto(*out) + } if in.LSRClass != nil { in, out := &in.LSRClass, &out.LSRClass *out = new(ResourceQOS) diff --git a/config/crd/bases/slo.koordinator.sh_nodeslos.yaml b/config/crd/bases/slo.koordinator.sh_nodeslos.yaml index a8f36bd4f..58d134ab8 100644 --- a/config/crd/bases/slo.koordinator.sh_nodeslos.yaml +++ b/config/crd/bases/slo.koordinator.sh_nodeslos.yaml @@ -176,12 +176,26 @@ spec: cpuQOS: description: CPUQOSCfg stores node-level config of cpu qos properties: + coreExpeller: + description: 'whether pods of the QoS class can expel + the cgroup idle pods at the SMT-level. default = false + If set to true, pods of this QoS will use a dedicated + core sched group for noise clean with the SchedIdle + pods. NOTE: It takes effect if cpuPolicy = "coreSched".' + type: boolean enable: description: Enable indicates whether the cpu qos is enabled. type: boolean groupIdentity: - description: group identity value for pods, default = - 0 + description: 'group identity value for pods, default = + 0 NOTE: It takes effect if cpuPolicy = "groupIdentity".' + format: int64 + type: integer + schedIdle: + description: 'cpu.idle value for pods, default = 0. `1` + means using SCHED_IDLE. CGroup Idle (introduced since + mainline Linux 5.15): https://lore.kernel.org/lkml/162971078674.25758.15464079371945307825.tip-bot2@tip-bot2/#r + NOTE: It takes effect if cpuPolicy = "coreSched".' format: int64 type: integer type: object @@ -387,12 +401,26 @@ spec: cpuQOS: description: CPUQOSCfg stores node-level config of cpu qos properties: + coreExpeller: + description: 'whether pods of the QoS class can expel + the cgroup idle pods at the SMT-level. default = false + If set to true, pods of this QoS will use a dedicated + core sched group for noise clean with the SchedIdle + pods. NOTE: It takes effect if cpuPolicy = "coreSched".' + type: boolean enable: description: Enable indicates whether the cpu qos is enabled. type: boolean groupIdentity: - description: group identity value for pods, default = - 0 + description: 'group identity value for pods, default = + 0 NOTE: It takes effect if cpuPolicy = "groupIdentity".' + format: int64 + type: integer + schedIdle: + description: 'cpu.idle value for pods, default = 0. `1` + means using SCHED_IDLE. CGroup Idle (introduced since + mainline Linux 5.15): https://lore.kernel.org/lkml/162971078674.25758.15464079371945307825.tip-bot2@tip-bot2/#r + NOTE: It takes effect if cpuPolicy = "coreSched".' format: int64 type: integer type: object @@ -598,12 +626,26 @@ spec: cpuQOS: description: CPUQOSCfg stores node-level config of cpu qos properties: + coreExpeller: + description: 'whether pods of the QoS class can expel + the cgroup idle pods at the SMT-level. default = false + If set to true, pods of this QoS will use a dedicated + core sched group for noise clean with the SchedIdle + pods. NOTE: It takes effect if cpuPolicy = "coreSched".' + type: boolean enable: description: Enable indicates whether the cpu qos is enabled. type: boolean groupIdentity: - description: group identity value for pods, default = - 0 + description: 'group identity value for pods, default = + 0 NOTE: It takes effect if cpuPolicy = "groupIdentity".' + format: int64 + type: integer + schedIdle: + description: 'cpu.idle value for pods, default = 0. `1` + means using SCHED_IDLE. CGroup Idle (introduced since + mainline Linux 5.15): https://lore.kernel.org/lkml/162971078674.25758.15464079371945307825.tip-bot2@tip-bot2/#r + NOTE: It takes effect if cpuPolicy = "coreSched".' format: int64 type: integer type: object @@ -809,12 +851,26 @@ spec: cpuQOS: description: CPUQOSCfg stores node-level config of cpu qos properties: + coreExpeller: + description: 'whether pods of the QoS class can expel + the cgroup idle pods at the SMT-level. default = false + If set to true, pods of this QoS will use a dedicated + core sched group for noise clean with the SchedIdle + pods. NOTE: It takes effect if cpuPolicy = "coreSched".' + type: boolean enable: description: Enable indicates whether the cpu qos is enabled. type: boolean groupIdentity: - description: group identity value for pods, default = - 0 + description: 'group identity value for pods, default = + 0 NOTE: It takes effect if cpuPolicy = "groupIdentity".' + format: int64 + type: integer + schedIdle: + description: 'cpu.idle value for pods, default = 0. `1` + means using SCHED_IDLE. CGroup Idle (introduced since + mainline Linux 5.15): https://lore.kernel.org/lkml/162971078674.25758.15464079371945307825.tip-bot2@tip-bot2/#r + NOTE: It takes effect if cpuPolicy = "coreSched".' format: int64 type: integer type: object @@ -948,6 +1004,13 @@ spec: type: integer type: object type: object + policies: + description: Policies of pod QoS. + properties: + cpuPolicy: + description: applied policy for the CPU QoS, default = "groupIdentity" + type: string + type: object systemClass: description: ResourceQOS for system pods properties: @@ -1020,12 +1083,26 @@ spec: cpuQOS: description: CPUQOSCfg stores node-level config of cpu qos properties: + coreExpeller: + description: 'whether pods of the QoS class can expel + the cgroup idle pods at the SMT-level. default = false + If set to true, pods of this QoS will use a dedicated + core sched group for noise clean with the SchedIdle + pods. NOTE: It takes effect if cpuPolicy = "coreSched".' + type: boolean enable: description: Enable indicates whether the cpu qos is enabled. type: boolean groupIdentity: - description: group identity value for pods, default = - 0 + description: 'group identity value for pods, default = + 0 NOTE: It takes effect if cpuPolicy = "groupIdentity".' + format: int64 + type: integer + schedIdle: + description: 'cpu.idle value for pods, default = 0. `1` + means using SCHED_IDLE. CGroup Idle (introduced since + mainline Linux 5.15): https://lore.kernel.org/lkml/162971078674.25758.15464079371945307825.tip-bot2@tip-bot2/#r + NOTE: It takes effect if cpuPolicy = "coreSched".' format: int64 type: integer type: object diff --git a/pkg/koordlet/runtimehooks/hooks/groupidentity/rule.go b/pkg/koordlet/runtimehooks/hooks/groupidentity/rule.go index 9b5a9a550..8a1bdae9e 100644 --- a/pkg/koordlet/runtimehooks/hooks/groupidentity/rule.go +++ b/pkg/koordlet/runtimehooks/hooks/groupidentity/rule.go @@ -76,16 +76,31 @@ func (r *bvtRule) getHostQOSBvtValue(qosClass ext.QoSClass) int64 { func (b *bvtPlugin) parseRule(mergedNodeSLOIf interface{}) (bool, error) { mergedNodeSLO := mergedNodeSLOIf.(*slov1alpha1.NodeSLOSpec) + qosStrategy := mergedNodeSLO.ResourceQOSStrategy - // check if bvt is enabled - enable := *mergedNodeSLO.ResourceQOSStrategy.LSRClass.CPUQOS.Enable || - *mergedNodeSLO.ResourceQOSStrategy.LSClass.CPUQOS.Enable || - *mergedNodeSLO.ResourceQOSStrategy.BEClass.CPUQOS.Enable + // default policy enables + isPolicyGroupIdentity := qosStrategy.Policies == nil || qosStrategy.Policies.CPUPolicy == nil || + len(*qosStrategy.Policies.CPUPolicy) <= 0 || *qosStrategy.Policies.CPUPolicy == slov1alpha1.CPUQOSPolicyGroupIdentity + // check if bvt (group identity) is enabled + lsrEnabled := isPolicyGroupIdentity && *qosStrategy.LSRClass.CPUQOS.Enable + lsEnabled := isPolicyGroupIdentity && *qosStrategy.LSClass.CPUQOS.Enable + beEnabled := isPolicyGroupIdentity && *qosStrategy.BEClass.CPUQOS.Enable // setting pod rule by qos config - lsrValue := *mergedNodeSLO.ResourceQOSStrategy.LSRClass.CPUQOS.CPUQOS.GroupIdentity - lsValue := *mergedNodeSLO.ResourceQOSStrategy.LSClass.CPUQOS.GroupIdentity - beValue := *mergedNodeSLO.ResourceQOSStrategy.BEClass.CPUQOS.GroupIdentity + // Group Identity should be reset if the CPU QOS disables (already merged in states informer) or the CPU QoS policy + // is not "groupIdentity". + lsrValue := *sloconfig.NoneCPUQOS().GroupIdentity + if lsrEnabled { + lsrValue = *qosStrategy.LSRClass.CPUQOS.GroupIdentity + } + lsValue := *sloconfig.NoneCPUQOS().GroupIdentity + if lsEnabled { + lsValue = *qosStrategy.LSClass.CPUQOS.GroupIdentity + } + beValue := *sloconfig.NoneCPUQOS().GroupIdentity + if beEnabled { + beValue = *qosStrategy.BEClass.CPUQOS.GroupIdentity + } // setting besteffort according to BE besteffortDirVal := beValue @@ -95,18 +110,18 @@ func (b *bvtPlugin) parseRule(mergedNodeSLOIf interface{}) (bool, error) { burstableDirVal := lsValue burstablePodVal := lsValue - // NOTICE guaranteed root dir must set as 0 until kernel supported + // NOTE: guaranteed root dir must set as 0 until kernel supported guaranteedDirVal := *sloconfig.NoneCPUQOS().GroupIdentity // setting guaranteed pod enabled if LS or LSR enabled guaranteedPodVal := *sloconfig.NoneCPUQOS().GroupIdentity - if *mergedNodeSLO.ResourceQOSStrategy.LSRClass.CPUQOS.Enable { + if lsrEnabled { guaranteedPodVal = lsrValue - } else if *mergedNodeSLO.ResourceQOSStrategy.LSClass.CPUQOS.Enable { + } else if lsEnabled { guaranteedPodVal = lsValue } newRule := &bvtRule{ - enable: enable, + enable: lsrEnabled || lsEnabled || beEnabled, podQOSParams: map[ext.QoSClass]int64{ ext.QoSLSE: lsrValue, ext.QoSLSR: lsrValue, diff --git a/pkg/koordlet/runtimehooks/hooks/groupidentity/rule_test.go b/pkg/koordlet/runtimehooks/hooks/groupidentity/rule_test.go index d4cb54cb5..4bd00fded 100644 --- a/pkg/koordlet/runtimehooks/hooks/groupidentity/rule_test.go +++ b/pkg/koordlet/runtimehooks/hooks/groupidentity/rule_test.go @@ -99,6 +99,8 @@ func Test_bvtRule_getPodBvtValue(t *testing.T) { } func Test_bvtPlugin_parseRule(t *testing.T) { + policyGroupIdentity := slov1alpha1.CPUQOSPolicyGroupIdentity + policyCoreSched := slov1alpha1.CPUQOSPolicyCoreSched type args struct { rule *bvtRule mergedNodeSLO *slov1alpha1.NodeSLOSpec @@ -115,6 +117,9 @@ func Test_bvtPlugin_parseRule(t *testing.T) { args: args{ mergedNodeSLO: &slov1alpha1.NodeSLOSpec{ ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: &slov1alpha1.ResourceQOSPolicies{ + CPUPolicy: &policyGroupIdentity, + }, LSRClass: &slov1alpha1.ResourceQOS{ CPUQOS: &slov1alpha1.CPUQOSCfg{ Enable: pointer.Bool(true), @@ -169,6 +174,9 @@ func Test_bvtPlugin_parseRule(t *testing.T) { args: args{ mergedNodeSLO: &slov1alpha1.NodeSLOSpec{ ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: &slov1alpha1.ResourceQOSPolicies{ + CPUPolicy: &policyGroupIdentity, + }, LSRClass: &slov1alpha1.ResourceQOS{ CPUQOS: &slov1alpha1.CPUQOSCfg{ Enable: pointer.Bool(false), @@ -223,6 +231,9 @@ func Test_bvtPlugin_parseRule(t *testing.T) { args: args{ mergedNodeSLO: &slov1alpha1.NodeSLOSpec{ ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: &slov1alpha1.ResourceQOSPolicies{ + CPUPolicy: &policyGroupIdentity, + }, LSRClass: &slov1alpha1.ResourceQOS{ CPUQOS: &slov1alpha1.CPUQOSCfg{ Enable: pointer.Bool(false), @@ -277,6 +288,9 @@ func Test_bvtPlugin_parseRule(t *testing.T) { args: args{ mergedNodeSLO: &slov1alpha1.NodeSLOSpec{ ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: &slov1alpha1.ResourceQOSPolicies{ + CPUPolicy: &policyGroupIdentity, + }, LSRClass: &slov1alpha1.ResourceQOS{ CPUQOS: &slov1alpha1.CPUQOSCfg{ Enable: pointer.Bool(false), @@ -350,6 +364,9 @@ func Test_bvtPlugin_parseRule(t *testing.T) { }, mergedNodeSLO: &slov1alpha1.NodeSLOSpec{ ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: &slov1alpha1.ResourceQOSPolicies{ + CPUPolicy: &policyGroupIdentity, + }, LSRClass: &slov1alpha1.ResourceQOS{ CPUQOS: &slov1alpha1.CPUQOSCfg{ Enable: pointer.Bool(true), @@ -399,6 +416,175 @@ func Test_bvtPlugin_parseRule(t *testing.T) { want: false, wantErr: false, }, + { + name: "only enable cpu qos for BE", + args: args{ + mergedNodeSLO: &slov1alpha1.NodeSLOSpec{ + ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: &slov1alpha1.ResourceQOSPolicies{ + CPUPolicy: &policyGroupIdentity, + }, + LSRClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(false), + CPUQOS: slov1alpha1.CPUQOS{ + GroupIdentity: pointer.Int64(2), + }, + }, + }, + LSClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(false), + CPUQOS: slov1alpha1.CPUQOS{ + GroupIdentity: pointer.Int64(2), + }, + }, + }, + BEClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: slov1alpha1.CPUQOS{ + GroupIdentity: pointer.Int64(-1), + }, + }, + }, + }, + }, + }, + wantRule: bvtRule{ + enable: true, + podQOSParams: map[ext.QoSClass]int64{ + ext.QoSLSE: 0, + ext.QoSLSR: 0, + ext.QoSLS: 0, + ext.QoSBE: -1, + }, + kubeQOSDirParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 0, + corev1.PodQOSBurstable: 0, + corev1.PodQOSBestEffort: -1, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 0, + corev1.PodQOSBurstable: 0, + corev1.PodQOSBestEffort: -1, + }, + }, + want: true, + wantErr: false, + }, + { + name: "parse default cpu qos policy", + args: args{ + mergedNodeSLO: &slov1alpha1.NodeSLOSpec{ + ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: &slov1alpha1.ResourceQOSPolicies{}, + LSRClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: slov1alpha1.CPUQOS{ + GroupIdentity: pointer.Int64(2), + }, + }, + }, + LSClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: slov1alpha1.CPUQOS{ + GroupIdentity: pointer.Int64(2), + }, + }, + }, + BEClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: slov1alpha1.CPUQOS{ + GroupIdentity: pointer.Int64(-1), + }, + }, + }, + }, + }, + }, + wantRule: bvtRule{ + enable: true, + podQOSParams: map[ext.QoSClass]int64{ + ext.QoSLSE: 2, + ext.QoSLSR: 2, + ext.QoSLS: 2, + ext.QoSBE: -1, + }, + kubeQOSDirParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 0, + corev1.PodQOSBurstable: 2, + corev1.PodQOSBestEffort: -1, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 2, + corev1.PodQOSBurstable: 2, + corev1.PodQOSBestEffort: -1, + }, + }, + want: true, + wantErr: false, + }, + { + name: "parse different cpu qos policy", + args: args{ + mergedNodeSLO: &slov1alpha1.NodeSLOSpec{ + ResourceQOSStrategy: &slov1alpha1.ResourceQOSStrategy{ + Policies: &slov1alpha1.ResourceQOSPolicies{ + CPUPolicy: &policyCoreSched, + }, + LSRClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: slov1alpha1.CPUQOS{ + GroupIdentity: pointer.Int64(2), + }, + }, + }, + LSClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: slov1alpha1.CPUQOS{ + GroupIdentity: pointer.Int64(2), + }, + }, + }, + BEClass: &slov1alpha1.ResourceQOS{ + CPUQOS: &slov1alpha1.CPUQOSCfg{ + Enable: pointer.Bool(true), + CPUQOS: slov1alpha1.CPUQOS{ + GroupIdentity: pointer.Int64(-1), + }, + }, + }, + }, + }, + }, + wantRule: bvtRule{ + enable: false, + podQOSParams: map[ext.QoSClass]int64{ + ext.QoSLSE: 0, + ext.QoSLSR: 0, + ext.QoSLS: 0, + ext.QoSBE: 0, + }, + kubeQOSDirParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 0, + corev1.PodQOSBurstable: 0, + corev1.PodQOSBestEffort: 0, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 0, + corev1.PodQOSBurstable: 0, + corev1.PodQOSBestEffort: 0, + }, + }, + want: true, + wantErr: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/pkg/koordlet/util/cold_page.go b/pkg/koordlet/util/cold_page.go index a184fa45d..3f44ab1c3 100644 --- a/pkg/koordlet/util/cold_page.go +++ b/pkg/koordlet/util/cold_page.go @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + package util import ( diff --git a/pkg/util/sloconfig/nodeslo_config.go b/pkg/util/sloconfig/nodeslo_config.go index 31f33b6f1..de746152b 100644 --- a/pkg/util/sloconfig/nodeslo_config.go +++ b/pkg/util/sloconfig/nodeslo_config.go @@ -64,18 +64,26 @@ func DefaultCPUQOS(qos apiext.QoSClass) *slov1alpha1.CPUQOS { case apiext.QoSLSR: cpuQOS = &slov1alpha1.CPUQOS{ GroupIdentity: pointer.Int64(2), + SchedIdle: pointer.Int64(0), + CoreExpeller: pointer.Bool(true), } case apiext.QoSLS: cpuQOS = &slov1alpha1.CPUQOS{ GroupIdentity: pointer.Int64(2), + SchedIdle: pointer.Int64(0), + CoreExpeller: pointer.Bool(true), } case apiext.QoSBE: cpuQOS = &slov1alpha1.CPUQOS{ GroupIdentity: pointer.Int64(-1), + SchedIdle: pointer.Int64(1), + CoreExpeller: pointer.Bool(false), } case apiext.QoSSystem: cpuQOS = &slov1alpha1.CPUQOS{ GroupIdentity: pointer.Int64(0), + SchedIdle: pointer.Int64(0), + CoreExpeller: pointer.Bool(false), } default: klog.Infof("cpu qos has no auto config for qos %s", qos) @@ -184,8 +192,16 @@ func DefaultMemoryQOS(qos apiext.QoSClass) *slov1alpha1.MemoryQOS { return memoryQOS } +func DefaultResourceQOSPolicies() *slov1alpha1.ResourceQOSPolicies { + defaultCPUPolicy := slov1alpha1.CPUQOSPolicyGroupIdentity + return &slov1alpha1.ResourceQOSPolicies{ + CPUPolicy: &defaultCPUPolicy, + } +} + func DefaultResourceQOSStrategy() *slov1alpha1.ResourceQOSStrategy { return &slov1alpha1.ResourceQOSStrategy{ + Policies: DefaultResourceQOSPolicies(), LSRClass: &slov1alpha1.ResourceQOS{ CPUQOS: &slov1alpha1.CPUQOSCfg{ Enable: pointer.Bool(false), @@ -265,6 +281,8 @@ func NoneResourceQOS(qos apiext.QoSClass) *slov1alpha1.ResourceQOS { func NoneCPUQOS() *slov1alpha1.CPUQOS { return &slov1alpha1.CPUQOS{ GroupIdentity: pointer.Int64(0), + SchedIdle: pointer.Int64(0), + CoreExpeller: pointer.Bool(false), } } @@ -291,9 +309,17 @@ func NoneMemoryQOS() *slov1alpha1.MemoryQOS { } } +func NoneResourceQOSPolicies() *slov1alpha1.ResourceQOSPolicies { + noneCPUPolicy := slov1alpha1.CPUQOSPolicyGroupIdentity + return &slov1alpha1.ResourceQOSPolicies{ + CPUPolicy: &noneCPUPolicy, + } +} + // NoneResourceQOSStrategy indicates the qos strategy with all qos func NoneResourceQOSStrategy() *slov1alpha1.ResourceQOSStrategy { return &slov1alpha1.ResourceQOSStrategy{ + Policies: NoneResourceQOSPolicies(), LSRClass: NoneResourceQOS(apiext.QoSLSR), LSClass: NoneResourceQOS(apiext.QoSLS), BEClass: NoneResourceQOS(apiext.QoSBE), diff --git a/pkg/util/sloconfig/nodeslo_config_test.go b/pkg/util/sloconfig/nodeslo_config_test.go index eee909665..cfad6e730 100644 --- a/pkg/util/sloconfig/nodeslo_config_test.go +++ b/pkg/util/sloconfig/nodeslo_config_test.go @@ -39,6 +39,7 @@ func Test_DefaultNodeSLOSpecConfig(t *testing.T) { func Test_NoneResourceQOSStrategy(t *testing.T) { expect := &slov1alpha1.ResourceQOSStrategy{ + Policies: NoneResourceQOSPolicies(), LSRClass: NoneResourceQOS(apiext.QoSLSR), LSClass: NoneResourceQOS(apiext.QoSLS), BEClass: NoneResourceQOS(apiext.QoSBE),