Skip to content

Commit

Permalink
scheduler: support allocating from reservation when no resource match…
Browse files Browse the repository at this point in the history
…ed (#2279)

Signed-off-by: saintube <[email protected]>
Co-authored-by: shenxin <[email protected]>
  • Loading branch information
saintube and shenxin authored Nov 26, 2024
1 parent 0f574d2 commit ceff9cd
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 14 deletions.
5 changes: 3 additions & 2 deletions pkg/scheduler/frameworkext/topologymanager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ func (m *topologyManager) Admit(ctx context.Context, cycleState *framework.Cycle
if !ok || extensionPointBeingExecuted == schedulingphase.PostFilter {
var admit bool
bestHint, admit = m.calculateAffinity(ctx, cycleState, policy, pod, nodeName, exclusivePolicy, allNUMANodeStatus)
klog.V(5).Infof("Best TopologyHint for (pod: %v): %v on node: %v", klog.KObj(pod), bestHint, nodeName)
klog.V(4).Infof("Best TopologyHint for (pod: %v): %+v on node %s, policy %s, exclusivePolicy %s, admit %v",
klog.KObj(pod), bestHint, nodeName, policy, exclusivePolicy, admit)
if !admit {
return framework.NewStatus(framework.Unschedulable, "node(s) NUMA Topology affinity error")
}
Expand Down Expand Up @@ -107,7 +108,7 @@ func (m *topologyManager) accumulateProvidersHints(ctx context.Context, cycleSta
// Get the TopologyHints for a Pod from a provider.
hints, _ := provider.GetPodTopologyHints(ctx, cycleState, pod, nodeName)
providersHints = append(providersHints, hints)
klog.V(5).Infof("TopologyHints for pod '%v': %v on node: %v", klog.KObj(pod), hints, nodeName)
klog.V(4).Infof("TopologyHints for pod '%v' by provider %T: %v on node: %v", klog.KObj(pod), provider, hints, nodeName)
}
return providersHints
}
Expand Down
11 changes: 4 additions & 7 deletions pkg/scheduler/plugins/reservation/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,9 +324,6 @@ func (pl *Plugin) PreFilterExtensions() framework.PreFilterExtensions {

func (pl *Plugin) AddPod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *corev1.Pod, podInfoToAdd *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status {
podRequests := resourceapi.PodRequests(podInfoToAdd.Pod, resourceapi.PodResourcesOptions{})
if quotav1.IsZero(podRequests) { // TODO: support zero request pod in reservation
return nil
}

podRequests[corev1.ResourcePods] = *resource.NewQuantity(1, resource.DecimalSI) // count pods resources
state := getStateData(cycleState)
Expand Down Expand Up @@ -356,9 +353,6 @@ func (pl *Plugin) AddPod(ctx context.Context, cycleState *framework.CycleState,

func (pl *Plugin) RemovePod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *corev1.Pod, podInfoToRemove *framework.PodInfo, nodeInfo *framework.NodeInfo) *framework.Status {
podRequests := resourceapi.PodRequests(podInfoToRemove.Pod, resourceapi.PodResourcesOptions{})
if quotav1.IsZero(podRequests) { // TODO: support zero request pod in reservation
return nil
}

podRequests[corev1.ResourcePods] = *resource.NewQuantity(1, resource.DecimalSI) // count pods resources
state := getStateData(cycleState)
Expand Down Expand Up @@ -481,7 +475,10 @@ func (pl *Plugin) filterWithReservations(ctx context.Context, cycleState *framew
var allInsufficientResourceReasonsByReservation []string
for _, rInfo := range matchedReservations {
resourceNames := quotav1.Intersection(rInfo.ResourceNames, podRequestsResourceNames)
if len(resourceNames) == 0 {
// NOTE: The reservation may not consider the irrelevant pods that have no matched resource names since it makes
// no sense in most cases but introduces a performance overhead. However, we allow pods to allocate reserved
// resources to accomplish their reservation affinities.
if len(resourceNames) == 0 && !state.hasAffinity {
continue
}
// When the pod specifies a reservation name, we record the admission reasons.
Expand Down
172 changes: 168 additions & 4 deletions pkg/scheduler/plugins/reservation/plugin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,8 @@ func Test_filterWithReservations(t *testing.T) {
corev1.ResourceCPU: resource.MustParse("32"),
corev1.ResourceMemory: resource.MustParse("32Gi"),
corev1.ResourcePods: resource.MustParse("100"),
apiext.BatchCPU: resource.MustParse("7500"),
apiext.BatchMemory: resource.MustParse("10Gi"),
},
},
}
Expand Down Expand Up @@ -1008,6 +1010,57 @@ func Test_filterWithReservations(t *testing.T) {
},
wantStatus: nil,
},
{
name: "filter restricted reservation with nodeInfo and matched requests are zero",
stateData: &stateData{
schedulingStateData: schedulingStateData{
hasAffinity: true,
podRequests: corev1.ResourceList{
apiext.BatchCPU: resource.MustParse("6000"),
apiext.BatchMemory: resource.MustParse("8Gi"),
},
nodeReservationStates: map[string]*nodeReservationState{
node.Name: {
podRequested: &framework.Resource{
MilliCPU: 30 * 1000,
Memory: 24 * 1024 * 1024 * 1024,
ScalarResources: map[corev1.ResourceName]int64{
apiext.BatchCPU: 1500,
apiext.BatchMemory: 2 * 1024 * 1024 * 1024,
},
},
rAllocated: &framework.Resource{
MilliCPU: 6000,
},
matchedOrIgnored: []*frameworkext.ReservationInfo{
frameworkext.NewReservationInfo(&schedulingv1alpha1.Reservation{
ObjectMeta: metav1.ObjectMeta{
Name: "test-r",
},
Spec: schedulingv1alpha1.ReservationSpec{
AllocatePolicy: schedulingv1alpha1.ReservationAllocatePolicyRestricted,
Template: &corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("6"),
},
},
},
},
},
},
},
}),
},
},
},
},
},
wantStatus: nil,
},
{
name: "failed to filter restricted reservation with nodeInfo",
stateData: &stateData{
Expand Down Expand Up @@ -1081,6 +1134,109 @@ func Test_filterWithReservations(t *testing.T) {
},
wantStatus: framework.NewStatus(framework.Unschedulable, "Reservation(s) Too many pods"),
},
{
name: "failed to filter restricted reservation since unmatched resources are insufficient",
stateData: &stateData{
schedulingStateData: schedulingStateData{
hasAffinity: true,
podRequests: corev1.ResourceList{
apiext.BatchCPU: resource.MustParse("8000"),
apiext.BatchMemory: resource.MustParse("8Gi"),
},
nodeReservationStates: map[string]*nodeReservationState{
node.Name: {
podRequested: &framework.Resource{
MilliCPU: 30 * 1000,
Memory: 24 * 1024 * 1024 * 1024,
ScalarResources: map[corev1.ResourceName]int64{
apiext.BatchCPU: 1500,
apiext.BatchMemory: 2 * 1024 * 1024 * 1024,
},
},
rAllocated: &framework.Resource{
MilliCPU: 2000,
},
matchedOrIgnored: []*frameworkext.ReservationInfo{
frameworkext.NewReservationInfo(&schedulingv1alpha1.Reservation{
ObjectMeta: metav1.ObjectMeta{
Name: "test-r",
},
Spec: schedulingv1alpha1.ReservationSpec{
AllocatePolicy: schedulingv1alpha1.ReservationAllocatePolicyRestricted,
Template: &corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("6"),
},
},
},
},
},
},
},
}),
},
},
},
},
},
wantStatus: framework.NewStatus(framework.Unschedulable,
"Insufficient kubernetes.io/batch-cpu by node"),
},
{
name: "filter restricted reservation and ignore matched requests are zero without affinity",
stateData: &stateData{
schedulingStateData: schedulingStateData{
hasAffinity: false,
podRequests: corev1.ResourceList{
apiext.BatchCPU: resource.MustParse("8000"),
apiext.BatchMemory: resource.MustParse("8Gi"),
},
nodeReservationStates: map[string]*nodeReservationState{
node.Name: {
podRequested: &framework.Resource{
MilliCPU: 30 * 1000,
Memory: 24 * 1024 * 1024 * 1024,
ScalarResources: map[corev1.ResourceName]int64{
apiext.BatchCPU: 1500,
apiext.BatchMemory: 2 * 1024 * 1024 * 1024,
},
},
rAllocated: &framework.Resource{
MilliCPU: 6000,
},
matchedOrIgnored: []*frameworkext.ReservationInfo{
frameworkext.NewReservationInfo(&schedulingv1alpha1.Reservation{
ObjectMeta: metav1.ObjectMeta{
Name: "test-r",
},
Spec: schedulingv1alpha1.ReservationSpec{
AllocatePolicy: schedulingv1alpha1.ReservationAllocatePolicyRestricted,
Template: &corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("6"),
},
},
},
},
},
},
},
}),
},
},
},
},
},
wantStatus: nil,
},
{
name: "filter default reservations with preemption",
stateData: &stateData{
Expand Down Expand Up @@ -1789,7 +1945,11 @@ func TestPreFilterExtensionAddPod(t *testing.T) {
preemptibleInRRs: map[string]map[types.UID]corev1.ResourceList{},
},
},
wantPreemptible: map[string]corev1.ResourceList{},
wantPreemptible: map[string]corev1.ResourceList{
"test-node": {
corev1.ResourcePods: *resource.NewQuantity(-1, resource.DecimalSI),
},
},
wantPreemptibleInRRs: map[string]map[types.UID]corev1.ResourceList{},
},
{
Expand Down Expand Up @@ -1958,9 +2118,13 @@ func TestPreFilterExtensionRemovePod(t *testing.T) {
wantPreemptibleInRRs map[string]map[types.UID]corev1.ResourceList
}{
{
name: "with BestEffort Pod",
pod: &corev1.Pod{},
wantPreemptible: map[string]corev1.ResourceList{},
name: "with BestEffort Pod",
pod: &corev1.Pod{},
wantPreemptible: map[string]corev1.ResourceList{
"test-node": {
corev1.ResourcePods: *resource.NewQuantity(1, resource.DecimalSI),
},
},
wantPreemptibleInRRs: map[string]map[types.UID]corev1.ResourceList{},
},
{
Expand Down
1 change: 0 additions & 1 deletion pkg/scheduler/plugins/reservation/scoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ func findMostPreferredReservationByOrder(rOnNode []*frameworkext.ReservationInfo
}

func scoreReservation(pod *corev1.Pod, rInfo *frameworkext.ReservationInfo, allocated corev1.ResourceList) int64 {
// TODO(joseph): we should support zero-request pods
requested := resourceapi.PodRequests(pod, resourceapi.PodResourcesOptions{})
requested = quotav1.Add(requested, allocated)
resources := quotav1.RemoveZeros(rInfo.Allocatable)
Expand Down

0 comments on commit ceff9cd

Please sign in to comment.