-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpu_mem.cc
217 lines (178 loc) · 6.98 KB
/
gpu_mem.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#ifndef DTEST_GPU_MEM_H
#define DTEST_GPU_MEM_H
#include <cuda.h>
#include <cuda_runtime.h>
#include <nvml.h>
#include <iostream>
namespace Dtest {
//! Return the total memory being used by the GPU
inline size_t getGPUMemUsageCUDA()
{
size_t free, total;
cudaMemGetInfo(&free, &total);
return total - free;
}
//! Return the total memory available on the GPU
inline size_t getGPUMemAvailableCUDA()
{
size_t free, total;
cudaMemGetInfo(&free, &total);
return free;
}
//! Return the total memory available on the GPU
inline size_t getGPUMemTotalCUDA()
{
size_t free, total;
cudaMemGetInfo(&free, &total);
return total;
}
inline unsigned long long getGPUMemAvailable()
{
nvmlReturn_t result;
result = nvmlInit();
if (result != NVML_SUCCESS)
{
std::cerr << "Failed to initialize NVML: " << nvmlErrorString(result) << std::endl;
return 0;
}
nvmlDevice_t device;
result = nvmlDeviceGetHandleByIndex(0, &device);
if (result != NVML_SUCCESS)
{
std::cerr << "Failed to get NVML device: " << nvmlErrorString(result) << std::endl;
return 0;
}
nvmlMemory_t memory;
result = nvmlDeviceGetMemoryInfo(device, &memory);
if (result != NVML_SUCCESS)
{
std::cerr << "Failed to get NVML memory: " << nvmlErrorString(result) << std::endl;
return 0;
}
return memory.free;
}
inline unsigned long long getGPUMemTotal()
{
nvmlReturn_t result;
result = nvmlInit();
if (result != NVML_SUCCESS)
{
std::cerr << "Failed to initialize NVML: " << nvmlErrorString(result) << std::endl;
return 0;
}
nvmlDevice_t device;
result = nvmlDeviceGetHandleByIndex(0, &device);
if (result != NVML_SUCCESS)
{
std::cerr << "Failed to get NVML device: " << nvmlErrorString(result) << std::endl;
return 0;
}
nvmlMemory_t memory;
result = nvmlDeviceGetMemoryInfo(device, &memory);
if (result != NVML_SUCCESS)
{
std::cerr << "Failed to get NVML memory: " << nvmlErrorString(result) << std::endl;
return 0;
}
return memory.total;
}
inline unsigned long long getMemFromPIDandUtil(const unsigned int& pid, nvmlProcessDetailList_t& util)
{
for (unsigned int i = 0; i < util.numProcArrayEntries; i++)
{
if (pid == util.procArray[i].pid)
{
return util.procArray[i].usedGpuMemory;
}
}
// std::cerr << "Failed to find process id " << pid <<std::endl;
return 0;
}
inline unsigned long long getAndCheckRunningProcesses(const unsigned int& pid,
nvmlDevice_t& device,
nvmlProcessDetailList_t& processesUtilInfo)
{
nvmlReturn_t result;
// get the processes info
result = nvmlDeviceGetRunningProcessDetailList(device, &processesUtilInfo);
if (result == NVML_SUCCESS)
{
//check the struct for requested PID
return getMemFromPIDandUtil(pid, processesUtilInfo);
}
else if (result == NVML_ERROR_INSUFFICIENT_SIZE)
{
//try again with additional slots for info if we hadn't allocated enough before
delete[] processesUtilInfo.procArray;
processesUtilInfo.procArray = new nvmlProcessDetail_v1_t[processesUtilInfo.numProcArrayEntries];
result = nvmlDeviceGetRunningProcessDetailList(device, &processesUtilInfo);
if (result != NVML_SUCCESS)
{
std::cerr << "Failed to get NVML process info: " << nvmlErrorString(result) << std::endl;
return 0;
}
return getMemFromPIDandUtil(pid, processesUtilInfo);
}
else
{
std::cerr << "Failed to get NVML process info: " << nvmlErrorString(result) << std::endl;
return 0;
}
}
/**
* get GPU memory used.
*
* @param pid optional id of the process of interest. id of 0 gives total memory used
*/
inline unsigned long long getGPUMemUsage(unsigned int pid = 0)
{
// although less efficient, we just do total-free from other functions.
// Less efficient since both other functions initialize nvml, but
// since it's super fast (~2.6 us), the cleanliness is more advantageous
// than then increased time
if (pid == 0)
return getGPUMemTotal() - getGPUMemAvailable();
// == Get the GPU memory used by a specific PID
nvmlReturn_t result;
//initialie nvml
result = nvmlInit();
if (result != NVML_SUCCESS)
{
std::cerr << "Failed to initialize NVML: " << nvmlErrorString(result) << std::endl;
return 0;
}
// get the device, assuming we just want the first device on this host
// if we have machines with multiple gpus, this needs to be updated
nvmlDevice_t device;
result = nvmlDeviceGetHandleByIndex(0, &device);
if (result != NVML_SUCCESS)
{
std::cerr << "Failed to get NVML device: " << nvmlErrorString(result) << std::endl;
return 0;
}
nvmlProcessDetailList_t processesUtilInfo;
processesUtilInfo.numProcArrayEntries = 100; //max number of info structs we can get the first time around
processesUtilInfo.procArray
= new nvmlProcessDetail_v1_t[processesUtilInfo.numProcArrayEntries]; //allocate space for the info structs
processesUtilInfo.version = nvmlProcessDetailList_v1;
processesUtilInfo.mode = 1; //sue graphics mode (Compute/Graphics/MPSCompute) to compute mode
//first checking graphics processes
unsigned long long mem = getAndCheckRunningProcesses(pid, device, processesUtilInfo);
if (mem > 0)
{
delete[] processesUtilInfo.procArray;
return mem;
}
//doing a second check through compute processes if not in compute list
//we need to reset and reallocate to ensure the previous check didn't clobber these, particularly numProcArrayEntries
processesUtilInfo.numProcArrayEntries = 100; //max number of info structs we can get the first time around
delete[] processesUtilInfo.procArray;
processesUtilInfo.procArray
= new nvmlProcessDetail_v1_t[processesUtilInfo.numProcArrayEntries]; //allocate space for the info structs
processesUtilInfo.mode = 0; //switch to compute mode
mem = getAndCheckRunningProcesses(pid, device, processesUtilInfo);
delete[] processesUtilInfo.procArray;
return mem;
}
} // namespace Dtest
#endif // define DTEST_GPU_MEM_H