Skip to content

Commit c9727f5

Browse files
committed
add a tester for relaxing allocation limits
1 parent e831db1 commit c9727f5

File tree

4 files changed

+274
-0
lines changed

4 files changed

+274
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Copyright (c) 2025 Ben Ashbaugh
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
add_opencl_sample(
6+
TEST
7+
NUMBER 16
8+
TARGET relaxedallocations
9+
VERSION 200
10+
SOURCES main.cpp
11+
LIBS OpenCLExt)
+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Relaxed Allocation Limits
2+
3+
## Sample Purpose
4+
5+
TODO
6+
7+
## Key APIs and Concepts
8+
9+
```
10+
CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL
11+
-cl-intel-greater-than-4GB-buffer-required
12+
```
13+
14+
## Command Line Options
15+
16+
| Option | Default Value | Description |
17+
|:--|:-:|:--|
18+
| `-d <index>` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on.
19+
| `-p <index>` | 0 | Specify the index of the OpenCL platform to execute the sample on.
20+
| `-s <number>` | 2 | Size to allocate in GB.
21+
| `--svm` | N/A | Test USM allocations.
22+
| `--usm` | N/A | Test SVM allocations.
+240
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
/*
2+
// Copyright (c) 2025 Ben Ashbaugh
3+
//
4+
// SPDX-License-Identifier: MIT
5+
*/
6+
7+
#include <cinttypes>
8+
#include <popl/popl.hpp>
9+
10+
#include <CL/opencl.hpp>
11+
12+
#include "util.hpp"
13+
14+
#if !defined(CL_INTEL_RELAX_ALLOCATION_LIMITS_EXTENSION_NAME)
15+
#define CL_INTEL_RELAX_ALLOCATION_LIMITS_EXTENSION_NAME \
16+
"cl_intel_relax_allocation_limits"
17+
#endif
18+
#if !defined(CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL)
19+
#define CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL ( 1 << 23 )
20+
#endif
21+
22+
static const char kernelString[] = R"CLC(
23+
kernel void touch(global uint* buf)
24+
{
25+
size_t id = get_global_id(0);
26+
for (size_t i = 0; i < 1024; i++) {
27+
buf[id * 1024 + i] += 2;
28+
}
29+
}
30+
)CLC";
31+
32+
int main(
33+
int argc,
34+
char** argv )
35+
{
36+
int platformIndex = 0;
37+
int deviceIndex = 0;
38+
39+
size_t sz = 2;
40+
bool relaxAllocationLimits = false;
41+
bool useSVM = false;
42+
bool useUSM = false;
43+
44+
{
45+
popl::OptionParser op("Supported Options");
46+
op.add<popl::Value<int>>("p", "platform", "Platform Index", platformIndex, &platformIndex);
47+
op.add<popl::Value<int>>("d", "device", "Device Index", deviceIndex, &deviceIndex);
48+
op.add<popl::Value<size_t>>("s", "size", "Allocation Size (GB)", sz, &sz);
49+
op.add<popl::Switch>("r", "relax", "Relax Allocation Limits", &relaxAllocationLimits);
50+
op.add<popl::Switch>("", "svm", "Use Coarse-grain SVM Allocations", &useSVM);
51+
op.add<popl::Switch>("", "usm", "Use Device USM Allocations", &useUSM);
52+
53+
bool printUsage = false;
54+
try {
55+
op.parse(argc, argv);
56+
} catch (std::exception& e) {
57+
fprintf(stderr, "Error: %s\n\n", e.what());
58+
printUsage = true;
59+
}
60+
61+
if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) {
62+
fprintf(stderr,
63+
"Usage: relaxedallocations [options]\n"
64+
"%s", op.help().c_str());
65+
return -1;
66+
}
67+
}
68+
69+
std::vector<cl::Platform> platforms;
70+
cl::Platform::get(&platforms);
71+
72+
printf("Running on platform: %s\n",
73+
platforms[platformIndex].getInfo<CL_PLATFORM_NAME>().c_str() );
74+
75+
std::vector<cl::Device> devices;
76+
platforms[platformIndex].getDevices(CL_DEVICE_TYPE_ALL, &devices);
77+
78+
printf("Running on device: %s\n",
79+
devices[deviceIndex].getInfo<CL_DEVICE_NAME>().c_str() );
80+
81+
bool has_cl_intel_relax_allocation_limits =
82+
checkDeviceForExtension(devices[deviceIndex], CL_INTEL_RELAX_ALLOCATION_LIMITS_EXTENSION_NAME);
83+
if (has_cl_intel_relax_allocation_limits) {
84+
printf("Device supports " CL_INTEL_RELAX_ALLOCATION_LIMITS_EXTENSION_NAME ".\n");
85+
} else {
86+
printf("Device does not support " CL_INTEL_RELAX_ALLOCATION_LIMITS_EXTENSION_NAME ".\n");
87+
}
88+
89+
cl::Context context{devices[deviceIndex]};
90+
cl::CommandQueue commandQueue{context, devices[deviceIndex]};
91+
92+
printf("For this device:\n");
93+
printf("\tCL_DEVICE_GLOBAL_MEM_SIZE is %f GB\n",
94+
devices[deviceIndex].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() / (1024.0f * 1024.0f * 1024.0f));
95+
printf("\tCL_DEVICE_MAX_MEM_ALLOC_SIZE is %f GB\n",
96+
devices[deviceIndex].getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>() / (1024.0f * 1024.0f * 1024.0f));
97+
98+
size_t allocSize = (size_t)sz * 1024 * 1024 * 1024;
99+
size_t gwx = allocSize / 1024 / sizeof(cl_uint);
100+
101+
printf("Testing allocation size %zu GB (%zu 32-bit values).\n", sz, allocSize / sizeof(cl_uint));
102+
if (relaxAllocationLimits) {
103+
printf("Testing with relaxed allocation limits.\n");
104+
} else if (allocSize > devices[deviceIndex].getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>()) {
105+
printf("Allocation may fail, allocation size exceeds CL_DEVICE_MAX_MEM_ALLOC_SIZE!\n");
106+
}
107+
108+
std::vector<cl_uint> h_buf(allocSize / sizeof(cl_uint));
109+
for (size_t i = 0; i < h_buf.size(); i++) {
110+
h_buf[i] = static_cast<cl_uint>(i);
111+
}
112+
113+
// initialization
114+
115+
cl::Program program{ context, kernelString };
116+
program.build(relaxAllocationLimits ? "-cl-intel-greater-than-4GB-buffer-required" : "");
117+
cl::Kernel kernel = cl::Kernel{ program, "touch" };
118+
119+
cl_uint* dptr = nullptr;
120+
cl::Buffer mem;
121+
122+
if (useSVM) {
123+
const cl_mem_flags flags =
124+
relaxAllocationLimits ? CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL : 0;
125+
dptr = (cl_uint *)clSVMAlloc(
126+
context(),
127+
flags,
128+
allocSize, 0);
129+
if (dptr == nullptr) {
130+
printf("SVM allocation failed!\n");
131+
} else {
132+
commandQueue.enqueueMemcpySVM(dptr, h_buf.data(), CL_TRUE, allocSize);
133+
kernel.setArg(0, dptr);
134+
}
135+
} else if (useUSM) {
136+
const cl_mem_properties_intel props[] = {
137+
CL_MEM_FLAGS,
138+
CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL,
139+
0
140+
};
141+
dptr = (cl_uint*)clDeviceMemAllocINTEL(
142+
context(),
143+
devices[deviceIndex](),
144+
relaxAllocationLimits ? props : nullptr,
145+
allocSize,
146+
0,
147+
nullptr);
148+
if (dptr == nullptr) {
149+
printf("USM allocation failed!\n");
150+
} else {
151+
clEnqueueMemcpyINTEL(
152+
commandQueue(),
153+
CL_TRUE,
154+
dptr,
155+
h_buf.data(),
156+
allocSize,
157+
0,
158+
nullptr,
159+
nullptr);
160+
clSetKernelArgMemPointerINTEL(
161+
kernel(),
162+
0,
163+
dptr);
164+
}
165+
} else {
166+
const cl_mem_flags flags =
167+
relaxAllocationLimits ? CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL : 0;
168+
mem = cl::Buffer{
169+
context,
170+
flags,
171+
allocSize};
172+
if (mem() == nullptr) {
173+
printf("Buffer allocation failed!\n");
174+
} else {
175+
commandQueue.enqueueWriteBuffer(
176+
mem,
177+
CL_TRUE,
178+
0,
179+
allocSize,
180+
h_buf.data());
181+
kernel.setArg(0, mem);
182+
}
183+
}
184+
185+
// execution
186+
187+
commandQueue.enqueueNDRangeKernel(
188+
kernel,
189+
cl::NullRange,
190+
cl::NDRange{gwx});
191+
192+
// validation
193+
194+
if (useSVM) {
195+
commandQueue.enqueueMemcpySVM(
196+
h_buf.data(),
197+
dptr,
198+
CL_TRUE,
199+
allocSize);
200+
clSVMFree(context(), dptr);
201+
dptr = nullptr;
202+
} else if (useUSM) {
203+
clEnqueueMemcpyINTEL(
204+
commandQueue(),
205+
CL_TRUE,
206+
h_buf.data(),
207+
dptr,
208+
allocSize,
209+
0,
210+
nullptr,
211+
nullptr);
212+
clMemFreeINTEL(context(), dptr);
213+
dptr = nullptr;
214+
} else {
215+
commandQueue.enqueueReadBuffer(
216+
mem,
217+
CL_TRUE,
218+
0,
219+
allocSize,
220+
h_buf.data());
221+
}
222+
223+
cl_uint mismatches = 0;
224+
for (size_t i = 0; i < h_buf.size(); i++) {
225+
cl_uint want = static_cast<cl_uint>(i + 2);
226+
if (h_buf[i] != want) {
227+
if (mismatches < 16) {
228+
printf("Error at index %zu: expected %u, got %u!\n", i, want, h_buf[i]);
229+
}
230+
mismatches++;
231+
}
232+
}
233+
if (mismatches) {
234+
printf("Error: Found %u mismatches / %zu values!!!\n", mismatches, h_buf.size());
235+
} else {
236+
printf("Success.\n");
237+
}
238+
239+
return 0;
240+
}

samples/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,5 @@ if(BUILD_EXTENSION_SAMPLES)
9191
add_subdirectory( 13_mutablecommandbuffers )
9292
add_subdirectory( 14_ooqcommandbuffers )
9393
add_subdirectory( 15_mutablecommandbufferasserts )
94+
add_subdirectory( 17_relaxedallocations )
9495
endif()

0 commit comments

Comments
 (0)