Skip to content

Commit

Permalink
fix gpu request counts and use generic label
Browse files Browse the repository at this point in the history
  • Loading branch information
Fengping Hu committed Jan 29, 2024
1 parent 68d2125 commit 6d4f201
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions binderhub/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def get_gpu_availability(self, product=None, memory=None):
elif memory:
nodes = api.list_node(label_selector='gpu=true,nvidia.com/gpu.memory=%s' %memory)
else:
nodes = api.list_node(label_selector='gpu=true')
nodes = api.list_node(label_selector='nvidia.com/gpu.product')
for node in nodes.items:
product = node.metadata.labels['nvidia.com/gpu.product']
memory = int(node.metadata.labels['nvidia.com/gpu.memory'])
Expand All @@ -80,9 +80,10 @@ def get_gpu_availability(self, product=None, memory=None):
gpu['total_requests'] = 0
pods = api.list_pod_for_all_namespaces(field_selector='spec.nodeName=%s' %node.metadata.name).items
for pod in pods:
requests = pod.spec.containers[0].resources.requests
if requests:
gpu['total_requests'] += int(requests.get('nvidia.com/gpu', 0))
for container in pod.spec.containers:
requests = container.resources.requests
if requests:
gpu['total_requests'] += int(requests.get('nvidia.com/gpu', 0))
gpu['available'] = max(gpu['count'] - gpu['total_requests'], 0)
return sorted(gpus.values(), key=lambda gpu : gpu['memory'])

Expand Down

0 comments on commit 6d4f201

Please sign in to comment.