diff --git a/pkg/descheduler/apis/config/types_loadaware.go b/pkg/descheduler/apis/config/types_loadaware.go index 8cda4c487..a55dab067 100644 --- a/pkg/descheduler/apis/config/types_loadaware.go +++ b/pkg/descheduler/apis/config/types_loadaware.go @@ -40,6 +40,11 @@ type LowNodeLoadArgs struct { // By default, NumberOfNodes is set to zero. NumberOfNodes int32 + // NodeMetricExpirationSeconds indicates the NodeMetric expiration in seconds. + // When NodeMetrics expired, the node is considered abnormal, and should not be considered by deschedule plugin. + // Default is 180 seconds. + NodeMetricExpirationSeconds *int64 + // Naming this one differently since namespaces are still // considered while considering resoures used by pods // but then filtered out before eviction diff --git a/pkg/descheduler/apis/config/v1alpha2/defaults.go b/pkg/descheduler/apis/config/v1alpha2/defaults.go index 321ce2af9..526cb1d9c 100644 --- a/pkg/descheduler/apis/config/v1alpha2/defaults.go +++ b/pkg/descheduler/apis/config/v1alpha2/defaults.go @@ -34,7 +34,8 @@ import ( ) const ( - defaultMigrationControllerMaxConcurrentReconciles = 1 + defaultMigrationControllerMaxConcurrentReconciles = 1 + defaultNodeMetricExpirationSeconds int64 = 180 defaultMaxMigratingPerNode = 2 defaultMigrationJobMode = sev1alpha1.PodMigrationJobModeReservationFirst @@ -263,6 +264,10 @@ func SetDefaults_LowNodeLoadArgs(obj *LowNodeLoadArgs) { obj.AnomalyCondition.ConsecutiveAbnormalities = defaultLoadAnomalyCondition.ConsecutiveAbnormalities } + if obj.NodeMetricExpirationSeconds == nil { + obj.NodeMetricExpirationSeconds = pointer.Int64(defaultNodeMetricExpirationSeconds) + } + defaultResourceWeights := map[corev1.ResourceName]int64{ corev1.ResourceCPU: 1, corev1.ResourceMemory: 1, diff --git a/pkg/descheduler/apis/config/v1alpha2/defaults_test.go b/pkg/descheduler/apis/config/v1alpha2/defaults_test.go index f4fc9b8bf..459827604 100644 --- a/pkg/descheduler/apis/config/v1alpha2/defaults_test.go +++ b/pkg/descheduler/apis/config/v1alpha2/defaults_test.go @@ -38,8 +38,9 @@ func TestSetDefaults_LowNodeLoadArgs(t *testing.T) { NodeFit: pointer.Bool(false), }, expected: &LowNodeLoadArgs{ - NodeFit: pointer.Bool(false), - AnomalyCondition: defaultLoadAnomalyCondition, + NodeFit: pointer.Bool(false), + NodeMetricExpirationSeconds: pointer.Int64(defaultNodeMetricExpirationSeconds), + AnomalyCondition: defaultLoadAnomalyCondition, ResourceWeights: map[corev1.ResourceName]int64{ corev1.ResourceCPU: 1, corev1.ResourceMemory: 1, @@ -56,7 +57,8 @@ func TestSetDefaults_LowNodeLoadArgs(t *testing.T) { }, }, expected: &LowNodeLoadArgs{ - NodeFit: pointer.Bool(true), + NodeFit: pointer.Bool(true), + NodeMetricExpirationSeconds: pointer.Int64(defaultNodeMetricExpirationSeconds), AnomalyCondition: &LoadAnomalyCondition{ Timeout: &metav1.Duration{Duration: 10 * time.Second}, ConsecutiveAbnormalities: defaultLoadAnomalyCondition.ConsecutiveAbnormalities, @@ -82,8 +84,9 @@ func TestSetDefaults_LowNodeLoadArgs(t *testing.T) { }, }, expected: &LowNodeLoadArgs{ - NodeFit: pointer.Bool(true), - AnomalyCondition: defaultLoadAnomalyCondition, + NodeFit: pointer.Bool(true), + NodeMetricExpirationSeconds: pointer.Int64(defaultNodeMetricExpirationSeconds), + AnomalyCondition: defaultLoadAnomalyCondition, LowThresholds: ResourceThresholds{ corev1.ResourceCPU: 30, corev1.ResourceMemory: 30, diff --git a/pkg/descheduler/apis/config/v1alpha2/types_loadaware.go b/pkg/descheduler/apis/config/v1alpha2/types_loadaware.go index 9ef5f0347..6c8a05ad5 100644 --- a/pkg/descheduler/apis/config/v1alpha2/types_loadaware.go +++ b/pkg/descheduler/apis/config/v1alpha2/types_loadaware.go @@ -39,6 +39,11 @@ type LowNodeLoadArgs struct { // By default, NumberOfNodes is set to zero. NumberOfNodes *int32 `json:"numberOfNodes,omitempty"` + // NodeMetricExpirationSeconds indicates the NodeMetric expiration in seconds. + // When NodeMetrics expired, the node is considered abnormal, and should not be considered by deschedule plugin. + // Default is 180 seconds. + NodeMetricExpirationSeconds *int64 `json:"nodeMetricExpirationSeconds,omitempty"` + // Naming this one differently since namespaces are still // considered while considering resoures used by pods // but then filtered out before eviction diff --git a/pkg/descheduler/apis/config/v1alpha2/zz_generated.conversion.go b/pkg/descheduler/apis/config/v1alpha2/zz_generated.conversion.go index 39b99ce55..7b8297d07 100644 --- a/pkg/descheduler/apis/config/v1alpha2/zz_generated.conversion.go +++ b/pkg/descheduler/apis/config/v1alpha2/zz_generated.conversion.go @@ -367,6 +367,7 @@ func autoConvert_v1alpha2_LowNodeLoadArgs_To_config_LowNodeLoadArgs(in *LowNodeL if err := v1.Convert_Pointer_int32_To_int32(&in.NumberOfNodes, &out.NumberOfNodes, s); err != nil { return err } + out.NodeMetricExpirationSeconds = (*int64)(unsafe.Pointer(in.NodeMetricExpirationSeconds)) out.EvictableNamespaces = (*config.Namespaces)(unsafe.Pointer(in.EvictableNamespaces)) out.NodeSelector = (*v1.LabelSelector)(unsafe.Pointer(in.NodeSelector)) out.PodSelectors = *(*[]config.LowNodeLoadPodSelector)(unsafe.Pointer(&in.PodSelectors)) @@ -412,6 +413,7 @@ func autoConvert_config_LowNodeLoadArgs_To_v1alpha2_LowNodeLoadArgs(in *config.L if err := v1.Convert_int32_To_Pointer_int32(&in.NumberOfNodes, &out.NumberOfNodes, s); err != nil { return err } + out.NodeMetricExpirationSeconds = (*int64)(unsafe.Pointer(in.NodeMetricExpirationSeconds)) out.EvictableNamespaces = (*Namespaces)(unsafe.Pointer(in.EvictableNamespaces)) out.NodeSelector = (*v1.LabelSelector)(unsafe.Pointer(in.NodeSelector)) out.PodSelectors = *(*[]LowNodeLoadPodSelector)(unsafe.Pointer(&in.PodSelectors)) diff --git a/pkg/descheduler/apis/config/v1alpha2/zz_generated.deepcopy.go b/pkg/descheduler/apis/config/v1alpha2/zz_generated.deepcopy.go index c0e97cff6..58ce772be 100644 --- a/pkg/descheduler/apis/config/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/descheduler/apis/config/v1alpha2/zz_generated.deepcopy.go @@ -179,6 +179,11 @@ func (in *LowNodeLoadArgs) DeepCopyInto(out *LowNodeLoadArgs) { *out = new(int32) **out = **in } + if in.NodeMetricExpirationSeconds != nil { + in, out := &in.NodeMetricExpirationSeconds, &out.NodeMetricExpirationSeconds + *out = new(int64) + **out = **in + } if in.EvictableNamespaces != nil { in, out := &in.EvictableNamespaces, &out.EvictableNamespaces *out = new(Namespaces) diff --git a/pkg/descheduler/apis/config/validation/validation_loadaware.go b/pkg/descheduler/apis/config/validation/validation_loadaware.go index 2e2dd7969..dd4af65a8 100644 --- a/pkg/descheduler/apis/config/validation/validation_loadaware.go +++ b/pkg/descheduler/apis/config/validation/validation_loadaware.go @@ -30,6 +30,10 @@ func ValidateLowLoadUtilizationArgs(path *field.Path, args *deschedulerconfig.Lo allErrs = append(allErrs, field.Invalid(path.Child("numberOfNodes"), args.NumberOfNodes, "must be greater than or equal to 0")) } + if args.NodeMetricExpirationSeconds != nil && *args.NodeMetricExpirationSeconds <= 0 { + allErrs = append(allErrs, field.Invalid(field.NewPath("nodeMetricExpiredSeconds"), *args.NodeMetricExpirationSeconds, "nodeMetricExpiredSeconds should be a positive value")) + } + if args.EvictableNamespaces != nil && len(args.EvictableNamespaces.Include) > 0 && len(args.EvictableNamespaces.Exclude) > 0 { allErrs = append(allErrs, field.Invalid(path.Child("evictableNamespaces"), args.EvictableNamespaces, "only one of Include/Exclude namespaces can be set")) } diff --git a/pkg/descheduler/apis/config/zz_generated.deepcopy.go b/pkg/descheduler/apis/config/zz_generated.deepcopy.go index 8d3c76530..ba2cc7bc7 100644 --- a/pkg/descheduler/apis/config/zz_generated.deepcopy.go +++ b/pkg/descheduler/apis/config/zz_generated.deepcopy.go @@ -165,6 +165,11 @@ func (in *LoadAnomalyCondition) DeepCopy() *LoadAnomalyCondition { func (in *LowNodeLoadArgs) DeepCopyInto(out *LowNodeLoadArgs) { *out = *in out.TypeMeta = in.TypeMeta + if in.NodeMetricExpirationSeconds != nil { + in, out := &in.NodeMetricExpirationSeconds, &out.NodeMetricExpirationSeconds + *out = new(int64) + **out = **in + } if in.EvictableNamespaces != nil { in, out := &in.EvictableNamespaces, &out.EvictableNamespaces *out = new(Namespaces) diff --git a/pkg/descheduler/framework/plugins/loadaware/low_node_load.go b/pkg/descheduler/framework/plugins/loadaware/low_node_load.go index 1ad36d79b..b2bda6a57 100644 --- a/pkg/descheduler/framework/plugins/loadaware/low_node_load.go +++ b/pkg/descheduler/framework/plugins/loadaware/low_node_load.go @@ -164,7 +164,7 @@ func (pl *LowNodeLoad) processOneNodePool(ctx context.Context, nodePool *desched lowThresholds, highThresholds := newThresholds(nodePool.UseDeviationThresholds, nodePool.LowThresholds, nodePool.HighThresholds) resourceNames := getResourceNames(lowThresholds) - nodeUsages := getNodeUsage(nodes, resourceNames, pl.nodeMetricLister, pl.handle.GetPodsAssignedToNodeFunc()) + nodeUsages := getNodeUsage(nodes, resourceNames, pl.nodeMetricLister, pl.handle.GetPodsAssignedToNodeFunc(), pl.args.NodeMetricExpirationSeconds) nodeThresholds := getNodeThresholds(nodeUsages, lowThresholds, highThresholds, resourceNames, nodePool.UseDeviationThresholds) lowNodes, sourceNodes := classifyNodes(nodeUsages, nodeThresholds, lowThresholdFilter, highThresholdFilter) diff --git a/pkg/descheduler/framework/plugins/loadaware/utilization_util.go b/pkg/descheduler/framework/plugins/loadaware/utilization_util.go index 57a2bae5f..c72f6cdc5 100644 --- a/pkg/descheduler/framework/plugins/loadaware/utilization_util.go +++ b/pkg/descheduler/framework/plugins/loadaware/utilization_util.go @@ -20,9 +20,11 @@ import ( "context" "fmt" "sort" + "time" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" @@ -127,7 +129,7 @@ func resourceThreshold(nodeCapacity corev1.ResourceList, resourceName corev1.Res return resource.NewQuantity(resourceCapacityFraction(resourceCapacityQuantity.Value()), resourceCapacityQuantity.Format) } -func getNodeUsage(nodes []*corev1.Node, resourceNames []corev1.ResourceName, nodeMetricLister slolisters.NodeMetricLister, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) map[string]*NodeUsage { +func getNodeUsage(nodes []*corev1.Node, resourceNames []corev1.ResourceName, nodeMetricLister slolisters.NodeMetricLister, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc, nodeMetricExpirationSeconds *int64) map[string]*NodeUsage { nodeUsages := map[string]*NodeUsage{} for _, v := range nodes { pods, err := podutil.ListPodsOnANode(v.Name, getPodsAssignedToNode, nil) @@ -141,8 +143,10 @@ func getNodeUsage(nodes []*corev1.Node, resourceNames []corev1.ResourceName, nod klog.ErrorS(err, "Failed to get NodeMetric", "node", klog.KObj(v)) continue } - // TODO(joseph): We should check if NodeMetric is expired. - if nodeMetric.Status.NodeMetric == nil { + // We should check if NodeMetric is expired. + if nodeMetric.Status.NodeMetric == nil || nodeMetricExpirationSeconds != nil && + isNodeMetricExpired(nodeMetric.Status.UpdateTime, *nodeMetricExpirationSeconds) { + klog.ErrorS(err, "NodeMetric has expired", "node", klog.KObj(v), "effective period", time.Duration(*nodeMetricExpirationSeconds)*time.Second) continue } @@ -415,6 +419,12 @@ func isNodeUnderutilized(usage, thresholds map[corev1.ResourceName]*resource.Qua return true } +func isNodeMetricExpired(lastUpdateTime *metav1.Time, nodeMetricExpirationSeconds int64) bool { + return lastUpdateTime == nil || + nodeMetricExpirationSeconds > 0 && + time.Since(lastUpdateTime.Time) >= time.Duration(nodeMetricExpirationSeconds)*time.Second +} + func getResourceNames(thresholds ResourceThresholds) []corev1.ResourceName { names := make([]corev1.ResourceName, 0, len(thresholds)) for resourceName := range thresholds {