Skip to content

Commit

Permalink
Fixes from review
Browse files Browse the repository at this point in the history
  • Loading branch information
Beanavil committed Dec 12, 2023
1 parent 33535f9 commit 0babaf4
Show file tree
Hide file tree
Showing 2 changed files with 157 additions and 58 deletions.
147 changes: 104 additions & 43 deletions samples/core/multi-device/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,13 @@ void host_convolution(const cl_float* in, cl_float* out, const cl_float* mask,
}
}

cl_int opencl_version_contains(const char* dev_version,
const char* version_fragment)
{
char* found_version = strstr(dev_version, version_fragment);
return (found_version != NULL);
}

int main(int argc, char* argv[])
{
cl_int error = CL_SUCCESS;
Expand Down Expand Up @@ -199,7 +206,13 @@ int main(int argc, char* argv[])
OCLERROR_PAR(dev = cl_util_get_device(dev_opts.triplet.plat_index,
dev_opts.triplet.dev_index,
dev_opts.triplet.dev_type, &error),
error, dev);
error, end);

// Query OpenCL version supported by device.
char dev_version[64];
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_VERSION, sizeof(dev_version),
&dev_version, NULL),
error, end);

if (!diag_opts.quiet)
{
Expand All @@ -212,48 +225,88 @@ int main(int argc, char* argv[])
fflush(stdout);
}

#if CL_HPP_TARGET_OPENCL_VERSION < 120
fprintf(stderr,
"Error: OpenCL subdevices not supported before version 1.2 ");
exit(EXIT_FAILURE);
#endif
if (opencl_version_contains(dev_version, "1.0")
|| opencl_version_contains(dev_version, "1.1"))
{
fprintf(stdout,
"This sample requires device partitioning, which is an OpenCL "
"1.2 feature, but the device chosen only supports OpenCL %s. "
"Please try with a different OpenCL device instead.\n",
dev_version);
exit(EXIT_SUCCESS);
}

// Check if device supports fission.
cl_device_partition_property* dev_props = NULL;
size_t props_size = 0;
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL,
&props_size),
error, end);
if (props_size == 0)
{
fprintf(stdout,
"This sample requires device fission, which is a "
"feature available from OpenCL 1.2 on, but the "
"device chosen does not seem to support it. Please "
"try with a different OpenCL device instead.\n");
exit(EXIT_SUCCESS);
}

// Check if the "partition equally" type is supported.
MEM_CHECK(dev_props = (cl_device_partition_property*)malloc(sizeof(char)
* props_size),
error, end);
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_PARTITION_PROPERTIES,
sizeof(char) * props_size, dev_props, NULL),
error, props);
size_t prop = 0;
for (; prop < props_size; ++prop)
{
if (dev_props[prop] == CL_DEVICE_PARTITION_EQUALLY)
{
break;
}
}
if (prop == props_size)
{
fprintf(stdout,
"This sample requires partition equally, which is a "
"partition scheme available from OpenCL 1.2 on, but "
"the device chosen does not seem to support it. "
"Please try with a different OpenCL device instead.\n");
exit(EXIT_SUCCESS);
}

// Create subdevices, each with half of the compute units available.
// Create sub-devices, each with half of the compute units available.
cl_uint max_compute_units = 0;
cl_uint subdev_created = 0;
const cl_uint subdev_count = 2;
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(cl_uint), &max_compute_units, NULL),
error, dev);
error, props);
cl_device_partition_property subdevices_properties[] = {
(cl_device_partition_property)CL_DEVICE_PARTITION_EQUALLY,
(cl_device_partition_property)(max_compute_units / 2), 0
(cl_device_partition_property)(max_compute_units / subdev_count), 0
};

// Initialize subdevices array with one device and then reallocate for
// MacOS and Windows not to complain about NULL subdevices array.
cl_uint subdev_count = 1;
cl_device_id* subdevices =
(cl_device_id*)malloc(subdev_count * sizeof(cl_device_id));

OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties,
max_compute_units, subdevices,
&subdev_count),
error, dev);
OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties, subdev_count,
subdevices, &subdev_created),
error, props);

if (subdev_count < 2)
if (subdev_created < subdev_count)
{
fprintf(stderr, "Error: OpenCL cannot create subdevices");
fprintf(stderr,
"Error: OpenCL cannot create the number of sub-devices "
"requested\n");
exit(EXIT_FAILURE);
}

subdevices =
(cl_device_id*)realloc(subdevices, subdev_count * sizeof(cl_device_id));
OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties, subdev_count,
subdevices, NULL),
error, subdevs);

OCLERROR_PAR(context = clCreateContext(NULL, subdev_count, subdevices, NULL,
NULL, &error),
error, subdevs);
error, subdev1);

// Read kernel file.
const char* kernel_location = "./convolution.cl";
Expand All @@ -280,11 +333,14 @@ int main(int argc, char* argv[])
// it's only necessary to add the -cl-std option for 2.0 and 3.0 OpenCL
// versions.
char compiler_options[1023] = "";
#if CL_HPP_TARGET_OPENCL_VERSION >= 300
strcat(compiler_options, "-cl-std=CL3.0 ");
#elif CL_HPP_TARGET_OPENCL_VERSION >= 200
strcat(compiler_options, "-cl-std=CL2.0 ");
#endif
if (opencl_version_contains(dev_version, "3."))
{
strcat(compiler_options, "-cl-std=CL3.0 ");
}
else if (opencl_version_contains(dev_version, "2."))
{
strcat(compiler_options, "-cl-std=CL2.0 ");
}

OCLERROR_RET(
clBuildProgram(program, 2, subdevices, compiler_options, NULL, NULL),
Expand Down Expand Up @@ -356,7 +412,7 @@ int main(int argc, char* argv[])
mask_dim * mask_dim, -1000, 1000);

// Create device buffers, from which we will create the subbuffers for the
// subdevices.
// sub-devices.
const size_t grid_midpoint = y_dim / 2;
const size_t pad_grid_midpoint = pad_y_dim / 2;

Expand Down Expand Up @@ -391,7 +447,7 @@ int main(int argc, char* argv[])
fflush(stdout);
}

// Set up subdevices for kernel execution.
// Set up sub-devices for kernel execution.
const size_t half_input_bytes =
sizeof(cl_float) * pad_x_dim * (pad_grid_midpoint + 1);
const size_t input_offset =
Expand All @@ -414,7 +470,7 @@ int main(int argc, char* argv[])
error, bufmask);

// Initialize queues for command execution on each device.
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
#if defined(CL_VERSION_2_0) || defined(CL_VERSION_3_0)
cl_command_queue_properties props[] = { CL_QUEUE_PROPERTIES,
CL_QUEUE_PROFILING_ENABLE, 0 };
OCLERROR_PAR(sub_queues[subdevice] = clCreateCommandQueueWithProperties(
Expand Down Expand Up @@ -507,7 +563,8 @@ int main(int argc, char* argv[])
}

GET_CURRENT_TIMER(host_start)
host_convolution(h_input_grid, h_output_grid, h_mask, (cl_uint)x_dim, (cl_uint)y_dim);
host_convolution(h_input_grid, h_output_grid, h_mask, (cl_uint)x_dim,
(cl_uint)y_dim);
GET_CURRENT_TIMER(host_end)
size_t host_time;
TIMER_DIFFERENCE(host_time, host_start, host_end)
Expand Down Expand Up @@ -588,31 +645,31 @@ int main(int argc, char* argv[])
event1:
OCLERROR_RET(clReleaseEvent(events[0]), end_error, subbufout);
subbufout:
if (subdevice == 1)
if (subdevice >= 1)
{
OCLERROR_RET(clReleaseMemObject(sub_output_grids[1]), end_error,
subbufout0);
}
subbufout0:
OCLERROR_PAR(clReleaseMemObject(sub_output_grids[0]), end_error, subbufin);
subbufin:
if (subdevice == 1)
if (subdevice >= 1)
{
OCLERROR_RET(clReleaseMemObject(sub_input_grids[1]), end_error,
subbufin0);
}
subbufin0:
OCLERROR_RET(clReleaseMemObject(sub_input_grids[0]), end_error, subqueue);
subqueue:
if (subdevice == 1)
if (subdevice >= 1)
{
OCLERROR_RET(clReleaseCommandQueue(sub_queues[1]), end_error,
subqueue0);
}
subqueue0:
OCLERROR_RET(clReleaseCommandQueue(sub_queues[1]), end_error, conv);
OCLERROR_RET(clReleaseCommandQueue(sub_queues[0]), end_error, conv);
conv:
if (subdevice == 1)
if (subdevice >= 1)
{
OCLERROR_RET(clReleaseKernel(convolutions[1]), end_error, conv0);
}
Expand All @@ -631,15 +688,19 @@ int main(int argc, char* argv[])
hinput:
free(h_input_grid);
prg:
OCLERROR_RET(clReleaseProgram(program), end_error, subdevs);
OCLERROR_RET(clReleaseProgram(program), end_error, ker);
ker:
free(kernel);
contx:
OCLERROR_RET(clReleaseContext(context), end_error, end);
OCLERROR_RET(clReleaseContext(context), end_error, subdev1);
subdev1:
OCLERROR_RET(clReleaseDevice(subdevices[1]), end_error, subdev0);
subdev0:
OCLERROR_RET(clReleaseDevice(subdevices[0]), end_error, subdevs);
subdevs:
free(subdevices);
dev:
OCLERROR_RET(clReleaseDevice(dev), end_error, end);
props:
free(dev_props);
end:
if (error) cl_util_print_error(error);
return error;
Expand Down
68 changes: 53 additions & 15 deletions samples/core/multi-device/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ void host_convolution(const std::vector<cl_float> in,
}
}

bool opencl_version_contains(const cl::string& dev_version,
const cl::string& version_fragment)
{
return dev_version.find(version_fragment) != cl::string::npos;
}

int main(int argc, char* argv[])
{
try
Expand All @@ -118,6 +124,9 @@ int main(int argc, char* argv[])
dev.getInfo<CL_DEVICE_PLATFORM>()
}; // https://github.com/KhronosGroup/OpenCL-CLHPP/issues/150

// Query OpenCL version supported by device.
const std::string dev_version = dev.getInfo<CL_DEVICE_VERSION>();

if (!diag_opts.quiet)
{
std::cout << "Selected device: " << dev.getInfo<CL_DEVICE_NAME>()
Expand All @@ -133,12 +142,44 @@ int main(int argc, char* argv[])
std::cout.flush();
}

#if CL_HPP_TARGET_OPENCL_VERSION < 120
std::cerr
<< "Error: OpenCL subdevices not supported before version 1.2 "
<< std::endl;
exit(EXIT_FAILURE);
#endif
if (opencl_version_contains(dev_version, "1.0")
|| opencl_version_contains(dev_version, "1.1"))
{
std::cout
<< "This sample requires device partitioning, which is an "
"OpenCL 1.2 feature, but the device chosen only "
"supports OpenCL "
<< dev_version
<< ". Please try with a different OpenCL device instead."
<< std::endl;
exit(EXIT_SUCCESS);
}

// Check if device supports fission.
std::vector<cl_device_partition_property> dev_props =
dev.getInfo<CL_DEVICE_PARTITION_PROPERTIES>();
if (dev_props.size() == 0)
{
std::cout << "This sample requires device fission, which is a "
"feature available from OpenCL 1.2 on, but the "
"device chosen does not seem to support it. Please "
"try with a different OpenCL device instead."
<< std::endl;
exit(EXIT_SUCCESS);
}

// Check if the "partition equally" type is supported.
if (std::find(dev_props.begin(), dev_props.end(),
CL_DEVICE_PARTITION_EQUALLY)
== dev_props.end())
{
std::cout << "This sample requires partition equally, which is a "
"partition scheme available from OpenCL 1.2 on, but "
"the device chosen does not seem to support it. "
"Please try with a different OpenCL device instead."
<< std::endl;
exit(EXIT_SUCCESS);
}

// Create subdevices, each with half of the compute units available.
cl_uint max_compute_units = dev.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
Expand Down Expand Up @@ -189,10 +230,10 @@ int main(int argc, char* argv[])
std::string compiler_opt_str =
"-cl-std=CL" + std::to_string(i) + ".0 "; // -cl-std=CLi.0

compiler_options += cl::string{ cl::util::opencl_c_version_contains(
dev, version_str)
? compiler_opt_str
: "" };
compiler_options +=
cl::string{ opencl_version_contains(dev_version, version_str)
? compiler_opt_str
: "" };
}
program.build(subdevices, compiler_options.c_str());

Expand Down Expand Up @@ -316,10 +357,7 @@ int main(int argc, char* argv[])
std::cout.flush();
}

auto convolution =
cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl_uint2>(
program, "convolution_3x3")
.getKernel();
auto convolution = cl::Kernel(program, "convolution_3x3");

cl::CommandQueue queue(context, subdevice,
cl::QueueProperties::Profiling);
Expand Down Expand Up @@ -362,7 +400,7 @@ int main(int argc, char* argv[])
std::cout.flush();
}

convolutions.push_back(convolution.clone());
convolutions.push_back(convolution);
sub_queues.push_back(queue);
sub_input_grids.push_back(sub_input_grid);
sub_output_grids.push_back(sub_output_grid);
Expand Down

0 comments on commit 0babaf4

Please sign in to comment.