This repository has been archived by the owner on Mar 28, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathgpu_multicore.cu
202 lines (169 loc) · 6.04 KB
/
gpu_multicore.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
/*
* Copyright (c) 2019 Trail of Bits, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "gpu.h"
#include "gpu_common.h"
#include "multicore.h"
#include "timing.h"
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include "output.h"
#include "workpool.h"
#ifndef __FILE_NAME__
#define __FILE_NAME__ "GPU_MULTICORE"
#endif
static void *gpu_wait_thread(void *arg);
static void *cpu_wait_thread(void *arg);
#define THREAD_COUNT 2
static thread_args_t gpu_args = {0};
static thread_args_t cpu_args = {0};
typedef void* (*thread_func_t)(void*);
static thread_args_t *worker_threads[THREAD_COUNT] = {&gpu_args, &cpu_args};
static thread_func_t thread_funcs[THREAD_COUNT] = {gpu_wait_thread, cpu_wait_thread};
static const char *thread_names[THREAD_COUNT] = {"GPU", "CPU"};
static pthread_mutex_t cond_guard;
static pthread_cond_t thread_is_done;
#ifdef __cplusplus
extern "C"
#endif
bool gpu_multicore_check(void) {
return multicore_check() && gpu_check();
}
static void *gpu_wait_thread(void *arg) {
// make this thread cancellable
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
// make it cancellable *immediately*
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
if (NULL == arg) {
log_error(__FILE_NAME__, "GPU Thread given a bad argument\n");
exit(-1);
}
thread_args_t *ti = (thread_args_t *)(arg);
// just call the method runner directly with limited arguments
// TODO(artem): Control logging :)
ti->nops = gpu_method(ti->secret, &(ti->found), ti->start, ti->stop);
ti->done = true;
pthread_mutex_lock(&cond_guard);
pthread_cond_signal(&thread_is_done);
pthread_mutex_unlock(&cond_guard);
return NULL;
}
static void *cpu_wait_thread(void *arg) {
// make this thread cancellable
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
// make it cancellable *immediately*
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
if (NULL == arg) {
log_error(__FILE_NAME__, "CPU Thread given a bad argument\n");
exit(-1);
}
thread_args_t *ti = (thread_args_t *)(arg);
// just call the method runner directly with limited arguments
ti->nops = multicore_method(ti->secret, &(ti->found), ti->start, ti->stop);
ti->done = true;
pthread_mutex_lock(&cond_guard);
pthread_cond_signal(&thread_is_done);
pthread_mutex_unlock(&cond_guard);
return NULL;
}
#ifdef __cplusplus
extern "C"
#endif
uint64_t gpu_multicore_method(uint64_t secret, bool *found, uint64_t h_start,
uint64_t h_end) {
*found = false;
// initialize notification mutex
pthread_mutex_init(&cond_guard, NULL);
pthread_cond_init(&thread_is_done, NULL);
for(int i = 0; i < THREAD_COUNT; i++) {
memset(worker_threads[i], 0, sizeof(thread_args_t));
worker_threads[i]->done = false;
worker_threads[i]->found = false;
// these values are ignored since its fetched from a workpool
worker_threads[i]->start = h_start;
worker_threads[i]->stop = h_end;
worker_threads[i]->secret = secret;
}
int gpu_workers = gpu_get_workers();
int cpu_workers = multicore_get_ncpus();
int total_workers = cpu_workers + gpu_workers;
log_output(__FILE_NAME__, "Total workers = [%d] [CPU: %d][GPU: %d]\n",
total_workers, cpu_workers, gpu_workers);
// Set up work items each thread will pick
if (!workpool_is_set()) {
workpool_set(h_start, h_end, total_workers);
}
log_output(__FILE_NAME__, "Creating worker threads\n");
for (int i = 0; i < 2; i++) {
pthread_attr_t attr;
int ret = pthread_attr_init(&attr);
int rv = pthread_create(&(worker_threads[i]->tid), &attr, thread_funcs[i],
worker_threads[i]);
if(ret < 0) {
perror("Could not create thread");
exit(-1);
}
log_output(__FILE_NAME__, ".\n");
}
log_output(__FILE_NAME__, "Waiting on a method to finish...\n");
// merge multicore wait loop with gpu wait loop
bool threads_done = false;
while (false == threads_done) {
pthread_cond_wait(&thread_is_done, &cond_guard);
log_output(__FILE_NAME__, "A thread finished!\n");
int done_count = 0;
for (int i = 0; i < THREAD_COUNT; i++) {
// count how many threads are done
if (true == worker_threads[i]->done) {
done_count += 1;
}
// only output the found message once
if (true == worker_threads[i]->found && false == *found) {
log_output(__FILE_NAME__, "[%s] found the secret!\n",
thread_names[i]);
*found = true;
log_output(__FILE_NAME__, "Waiting for other threads to finish to get "
"accurate performance stats\n");
}
// stop loop when all threads end
if (THREAD_COUNT == done_count) {
threads_done = true;
}
}
}
log_output(__FILE_NAME__, "All threads done!\n");
pthread_mutex_unlock(&cond_guard);
// just to be sure, in case we ever bail out early
for (int i = 0; i < THREAD_COUNT; i++) {
void *p;
pthread_join(worker_threads[i]->tid, &p);
}
// calculate a combined nops from both methods
uint64_t total_nops = 0;
for(int i = 0; i < THREAD_COUNT; i++) {
total_nops += worker_threads[i]->nops;
}
log_output(__FILE_NAME__, "There were [%016" PRIu64 "] operations performed\n",
total_nops);
// get percentage performed by each method
for (int i = 0; i < THREAD_COUNT; i++) {
uint64_t my_nops = worker_threads[i]->nops;
log_output(__FILE_NAME__, "\t[%s] did [%016" PRIu64 "] Operations, or [%02.02lf] percent\n",
thread_names[i], my_nops,
(double)my_nops * 100.0 / (double)total_nops);
}
return total_nops;
}