forked from codeplaysoftware/syclacademy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsource.cpp
77 lines (66 loc) · 2.22 KB
/
source.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/*
SYCL Academy (c)
SYCL Academy is licensed under a Creative Commons
Attribution-ShareAlike 4.0 International License.
You should have received a copy of the license along with this
work. If not, see <http://creativecommons.org/licenses/by-sa/4.0/>.
* SYCL Quick Reference
* ~~~~~~~~~~~~~~~~~~~~
*
* // Get all available devices
* auto devs = sycl::device::get_devices();
*
* // Construct a queue with a device
* auto q = sycl::queue{my_device};
*
* // Declare a buffer pointing to ptr
* auto buf = sycl::buffer{ptr, sycl::range{n}};
*
* // Submit work to the queue
* q.submit([&](sycl::handler &cgh) {
* // COMMAND GROUP
* });
*
* // Within the command group you can
* // 1. Declare an accessor to a buffer
* auto read_write_acc = sycl::accessor{buf, cgh};
* auto read_acc = sycl::accessor{buf, cgh, sycl::read_only};
* auto write_acc = sycl::accessor{buf, cgh, sycl::write_only};
* auto no_init_acc = sycl::accessor{buf, cgh, sycl::no_init};
* // 2. Enqueue a single task:
* cgh.single_task<class mykernel>([=]() {
* // Do something
* });
* // 3. Enqueue a parallel for:
* cgh.parallel_for<class mykernel>(sycl::range{n}, [=](sycl::id<1> i) {
* // Do something
* });
*
*/
#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>
TEST_CASE("load_balancing", "load_balancing_source") {
constexpr size_t dataSize = 1024;
constexpr float ratio = 0.5f;
constexpr size_t dataSizeFirst = ratio * dataSize;
constexpr size_t dataSizeSecond = dataSize - dataSizeFirst;
float a[dataSize], b[dataSize], r[dataSize];
for (int i = 0; i < dataSize; ++i) {
a[i] = static_cast<float>(i);
b[i] = static_cast<float>(i);
r[i] = 0.0f;
}
// Task: split the total work across two distinct SYCL devices
// You might split the work as in the two loops below.
// Vector add for first part
for (int i = 0; i < dataSizeFirst; ++i) {
r[i] = a[i] + b[i];
}
// Vector add for second part
for (int i = 0; i < dataSizeSecond; ++i) {
r[dataSizeFirst + i] = a[dataSizeFirst + i] + b[dataSizeFirst + i];
}
for (int i = 0; i < dataSize; ++i) {
REQUIRE(r[i] == static_cast<float>(i) * 2.0f);
}
}