-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathLSHReservoirSampler_segsort.cpp
145 lines (123 loc) · 6.7 KB
/
LSHReservoirSampler_segsort.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#include "LSHReservoirSampler.h"
/*
* Function: segmentedSortKV
* --------------------
* Segmented sorting of KV pairs, based on the value of the key. Sorted order is bitonic across segments.
*
* key_in (in/out): input keys, as cl_mem (on which the sort's value is based), length required to be a power of two
*
* val_in (in/out): input values, as cl_mem, length required to be a power of two
*
* segmentSize: length of each segment to be sorted, required to be a power of two
*
* numSegments: number of segments to be sorted
*
* returns: nothing
*/
void LSHReservoirSampler::segmentedSortKV(cl_mem* key_in, cl_mem* val_in, int segmentSize, int numSegments, unsigned int valMax) {
int stage, high_stage, num_stages;
size_t localSize, globalSize;
localSize = std::min(512, segmentSize / 8);
globalSize = segmentSize * numSegments / 8;
num_stages = (int) 2 * (segmentSize / 8) / localSize;
/* Preprocess. */
_err = clSetKernelArg(kernel_bsort_preprocess, 0, sizeof(cl_mem), key_in);
_err = clSetKernelArg(kernel_bsort_preprocess, 1, sizeof(cl_mem), val_in);
_err = clSetKernelArg(kernel_bsort_preprocess, 2, sizeof(unsigned int), &valMax);
clCheckError(_err, "kernel_bsort_preprocess set argument failed!");
_err = clEnqueueNDRangeKernel(command_queue_gpu, kernel_bsort_preprocess, 1, NULL, &globalSize,
&localSize, 0, NULL, NULL);
clCheckError(_err, "kernel_bsort_preprocess failed!");
/* Create kernel argument */
_err = clSetKernelArg(kernel_bsort_init_manning_kv, 0, sizeof(cl_mem), key_in);
_err |= clSetKernelArg(kernel_bsort_stage_0_manning_kv, 0, sizeof(cl_mem), key_in);
_err |= clSetKernelArg(kernel_bsort_stage_n_manning_kv, 0, sizeof(cl_mem), key_in);
_err |= clSetKernelArg(kernel_bsort_init_manning_kv, 1, 8 * localSize * sizeof(unsigned int), NULL);
_err |= clSetKernelArg(kernel_bsort_stage_0_manning_kv, 1, 8 * localSize * sizeof(unsigned int), NULL);
_err |= clSetKernelArg(kernel_bsort_stage_n_manning_kv, 1, 8 * localSize * sizeof(unsigned int), NULL);
_err |= clSetKernelArg(kernel_bsort_init_manning_kv, 2, sizeof(cl_mem), val_in);
_err |= clSetKernelArg(kernel_bsort_stage_0_manning_kv, 2, sizeof(cl_mem), val_in);
_err |= clSetKernelArg(kernel_bsort_stage_n_manning_kv, 2, sizeof(cl_mem), val_in);
_err |= clSetKernelArg(kernel_bsort_init_manning_kv, 3, 8 * localSize * sizeof(unsigned int), NULL);
_err |= clSetKernelArg(kernel_bsort_stage_0_manning_kv, 3, 8 * localSize * sizeof(unsigned int), NULL);
_err |= clSetKernelArg(kernel_bsort_stage_n_manning_kv, 3, 8 * localSize * sizeof(unsigned int), NULL);
clCheckError(_err, "kernel_bsort_kv set argument failed!");
/* Enqueue initial sorting kernel */
_err = clEnqueueNDRangeKernel(command_queue_gpu, kernel_bsort_init_manning_kv, 1, NULL, &globalSize,
&localSize, 0, NULL, NULL);
clCheckError(_err, "kernel_bsort_init_manning_kv failed!");
/* Execute further stages */
for (high_stage = 2; high_stage < num_stages; high_stage <<= 1) {
_err = clSetKernelArg(kernel_bsort_stage_0_manning_kv, 4, sizeof(int), &high_stage);
_err |= clSetKernelArg(kernel_bsort_stage_n_manning_kv, 5, sizeof(int), &high_stage);
clCheckError(_err, "kernel_bsort_kv set argument failed!");
for (stage = high_stage; stage > 1; stage >>= 1) {
_err = clSetKernelArg(kernel_bsort_stage_n_manning_kv, 4, sizeof(int), &stage);
clCheckError(_err, "kernel_bsort_kv set argument failed!");
_err = clEnqueueNDRangeKernel(command_queue_gpu, kernel_bsort_stage_n_manning_kv, 1, NULL,
&globalSize, &localSize, 0, NULL, NULL);
clCheckError(_err, "kernel_bsort_stage_n_manning_kv failed!");
}
_err = clEnqueueNDRangeKernel(command_queue_gpu, kernel_bsort_stage_0_manning_kv, 1, NULL,
&globalSize, &localSize, 0, NULL, NULL);
clCheckError(_err, "kernel_bsort_stage_0_manning_kv failed!");
}
/* Postprocess. */
_err = clSetKernelArg(kernel_bsort_postprocess, 0, sizeof(cl_mem), key_in);
_err = clSetKernelArg(kernel_bsort_postprocess, 1, sizeof(cl_mem), val_in);
_err = clSetKernelArg(kernel_bsort_postprocess, 2, sizeof(unsigned int), &valMax);
clCheckError(_err, "kernel_bsort_postprocess set argument failed!");
_err = clEnqueueNDRangeKernel(command_queue_gpu, kernel_bsort_postprocess, 1, NULL, &globalSize,
&localSize, 0, NULL, NULL);
clCheckError(_err, "kernel_bsort_postprocess failed!");
clFinish(command_queue_gpu);
}
/*
* Function: segmentedSort
* --------------------
* Segmented sorting. Sorted order is bitonic across segments.
*
* in (in/out): input values, as cl_mem (on which the sort's value is based), length required to be a power of two
*
* segmentSize: length of each segment to be sorted, required to be a power of two
*
* numSegments: number of segments to be sorted
*
* returns: nothing
*/
void LSHReservoirSampler::segmentedSort(cl_mem* in, int segmentSize, int numSegments) {
int stage, high_stage, num_stages;
size_t localSize, globalSize;
localSize = std::min(1024, segmentSize / 8);
globalSize = segmentSize * numSegments / 8;
num_stages = (int) 2 * (segmentSize / 8) / localSize;
/* Create kernel argument */
_err = clSetKernelArg(kernel_bsort_init_manning, 0, sizeof(cl_mem), in);
_err |= clSetKernelArg(kernel_bsort_stage_0_manning, 0, sizeof(cl_mem), in);
_err |= clSetKernelArg(kernel_bsort_stage_n_manning, 0, sizeof(cl_mem), in);
_err |= clSetKernelArg(kernel_bsort_init_manning, 1, 8 * localSize * sizeof(unsigned int), NULL);
_err |= clSetKernelArg(kernel_bsort_stage_0_manning, 1, 8 * localSize * sizeof(unsigned int), NULL);
_err |= clSetKernelArg(kernel_bsort_stage_n_manning, 1, 8 * localSize * sizeof(unsigned int), NULL);
clCheckError(_err, "kernel_bsort set argument failed!");
/* Enqueue initial sorting kernel */
_err = clEnqueueNDRangeKernel(command_queue_gpu, kernel_bsort_init_manning, 1, NULL, &globalSize,
&localSize, 0, NULL, NULL);
clCheckError(_err, "kernel_bsort_init_manning failed!");
/* Execute further stages */
for (high_stage = 2; high_stage < num_stages; high_stage <<= 1) {
_err = clSetKernelArg(kernel_bsort_stage_0_manning, 2, sizeof(int), &high_stage);
_err |= clSetKernelArg(kernel_bsort_stage_n_manning, 3, sizeof(int), &high_stage);
clCheckError(_err, "kernel_bsort set argument failed!");
for (stage = high_stage; stage > 1; stage >>= 1) {
_err = clSetKernelArg(kernel_bsort_stage_n_manning, 2, sizeof(int), &stage);
clCheckError(_err, "kernel_bsort set argument failed!");
_err = clEnqueueNDRangeKernel(command_queue_gpu, kernel_bsort_stage_n_manning, 1, NULL,
&globalSize, &localSize, 0, NULL, NULL);
clCheckError(_err, "kernel_bsort_stage_n_manning failed!");
}
_err = clEnqueueNDRangeKernel(command_queue_gpu, kernel_bsort_stage_0_manning, 1, NULL,
&globalSize, &localSize, 0, NULL, NULL);
clCheckError(_err, "kernel_bsort_stage_0_manning failed!");
}
clFinish(command_queue_gpu);
}