Skip to content

Commit

Permalink
Added local workgroup size calculation.
Browse files Browse the repository at this point in the history
  • Loading branch information
Darrell A. Ross committed Jan 20, 2017
1 parent 077ce08 commit a615075
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 27 deletions.
64 changes: 43 additions & 21 deletions hwk1/hwk1/OCLArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "enums.h"
#include <vector>
#include <iostream>
#include <algorithm>
#include "ProblemGroups.h"

// Macros for OpenCL versions
#define OPENCL_VERSION_1_2 1.2f
Expand Down Expand Up @@ -347,33 +349,53 @@ int ocl_args_d_t::GetPlatformAndDeviceVersion(cl_platform_id platformId)
// Execute the Kernel
// @param[in] globalWorkSize size_t array of passed in constants to use
// @param[in] workSizeCount size of the globalWorkSize array
cl_uint ocl_args_d_t::ExecuteKernel(size_t *globalWorkSize, cl_uint workSizeCount)
cl_uint ocl_args_d_t::ExecuteKernel(size_t *globalWorkSize, cl_uint workSizeCount, size_t* localWorkSize)
{
cl_int err = CL_SUCCESS;

// execute kernel
err = clEnqueueNDRangeKernel(this->commandQueue, this->kernel, workSizeCount, NULL, globalWorkSize, NULL, 0, NULL, &prof_event);
if (CL_SUCCESS != err)
size_t currentSize = FIND_OPTIMAL_LOCAL_WORKGROUP_SIZE ? 2 : 256;
ResultsList resultsList;
for (size_t i = currentSize; i <= 256; i *= 2)
{
LogError("Error: Failed to run kernel, return %s\n", TranslateOpenCLError(err));
return err;
}
localWorkSize = &i;
ResultsStruct* result = new ResultsStruct();

// Wait until the queued kernel is completed by the device
err = clFinish(this->commandQueue);
if (CL_SUCCESS != err)
{
LogError("Error: clFinish return %s\n", TranslateOpenCLError(err));
return err;
}
// execute kernel
err = clEnqueueNDRangeKernel(this->commandQueue, this->kernel, workSizeCount, localWorkSize, globalWorkSize, NULL, 0, NULL, &prof_event);
if (CL_SUCCESS != err)
{
LogError("Error: Failed to run kernel, return %s\n", TranslateOpenCLError(err));
return err;
}

// Update internal OpenCL Profiler
err = UpdateProfiler();
if (CL_SUCCESS != err)
{
LogError("Error: clWaitForEvents return %s\n", TranslateOpenCLError(err));
return err;
// Wait until the queued kernel is completed by the device
err = clFinish(this->commandQueue);
if (CL_SUCCESS != err)
{
LogError("Error: clFinish return %s\n", TranslateOpenCLError(err));
return err;
}

// Update internal OpenCL Profiler
err = UpdateProfiler();
if (CL_SUCCESS != err)
{
LogError("Error: clWaitForEvents return %s\n", TranslateOpenCLError(err));
return err;
}
if (FIND_OPTIMAL_LOCAL_WORKGROUP_SIZE)
{
result->Annotation = "Finding Optimal Local Work Item Size";
result->OpenCLRunTime = RunTimeMS();
result->HasOpenCLRunTime = true;
result->WorkGroupSize = i;
resultsList.push_back(result);
}
}
const std::string oldFile = RESULTS_FILE;
RESULTS_FILE = "best_time.txt";
PrintWorkGroupResultsToFile(resultsList);
RESULTS_FILE = oldFile;

return CL_SUCCESS;
}

Expand Down
2 changes: 1 addition & 1 deletion hwk1/hwk1/OCLArgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ struct ocl_args_d_t
int SetupOpenCL(cl_device_type deviceType);
int CreateAndBuildProgram(const std::string& filename);
int GetPlatformAndDeviceVersion(cl_platform_id platformId);
cl_uint ExecuteKernel(size_t *globalWorkSize, cl_uint workSizeCount);
cl_uint ExecuteKernel(size_t *globalWorkSize, cl_uint workSizeCount, size_t* localWorkSize = NULL);
};

cl_uint SetKernelArgument(cl_kernel* kernel, cl_mem* mem, unsigned int argNum);
Expand Down
47 changes: 44 additions & 3 deletions hwk1/hwk1/ProblemGroups.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,21 @@ int GLOBAL_ARRAY_HEIGHT = 1024;
bool SKIP_VERIFICATION = false;
bool PRINT_TO_FILE = false;
std::string RESULTS_FILE = "results.txt";
bool FIND_OPTIMAL_LOCAL_WORKGROUP_SIZE = false;

ResultsStruct::ResultsStruct()
: WindowsRunTime(0.0)
, OpenCLRunTime(0.0)
, HasWindowsRunTime(false)
, HasOpenCLRunTime(false)
, WorkGroupSize(0)
{
}

bool resultTimeOCL(ResultsStruct* A, ResultsStruct* B)
{
return A->OpenCLRunTime < B->OpenCLRunTime;
}
// Ensure memory is cleared
ResultsList::~ResultsList()
{
Expand All @@ -32,6 +38,33 @@ ResultsList::~ResultsList()
}
}

void PrintWorkGroupResultsToFile(const ResultsList& results)
{
std::ofstream outfile;
outfile.open(RESULTS_FILE, std::ios_base::app);
if (results.empty())
{
outfile << "No results";
return;
}

double bestOpenCLTime = 100000;
size_t bestOpenCLWorkSize = 1;
int num = 0;
outfile << results.front()->Annotation << std::endl;
outfile << "Run Number & WorkGroupSize & OpenCLTime\\\\hline\\hline" << std::endl;
for (ResultsList::const_iterator i = results.begin(), e = results.end(); i != e; ++i, ++num)
{
if ((*i)->OpenCLRunTime < bestOpenCLTime)
{
bestOpenCLTime = (*i)->OpenCLRunTime;
bestOpenCLWorkSize = (*i)->WorkGroupSize;
}
outfile << num + 1 << " & " << (*i)->WorkGroupSize << " & " << (*i)->OpenCLRunTime << "\\\\" << std::endl;
}
outfile << "Best Time: " << bestOpenCLTime << "; Work Group Size: " << bestOpenCLWorkSize << std::endl;
}

void PrintToFile(const ResultsList& results)
{
std::ofstream outfile;
Expand All @@ -41,18 +74,17 @@ void PrintToFile(const ResultsList& results)
outfile << "No results";
return;
}


double totalWindowsTimes = 0.0;
double totalOpenCLTimes = 0.0;
int num = 0;
outfile << results.front()->Annotation << std::endl;
outfile << "Run#, WindowsTime, OpenCLTime" << std::endl;
outfile << "Run#, WorkGroupSize, WindowsTime, OpenCLTime" << std::endl;
for (ResultsList::const_iterator i = results.begin(), e = results.end(); i != e; ++i, ++num)
{
totalWindowsTimes += (*i)->WindowsRunTime;
totalOpenCLTimes += (*i)->OpenCLRunTime;
outfile << num+1 << "," << (*i)->WindowsRunTime << "," << (*i)->OpenCLRunTime << std::endl;
outfile << num+1 << "," << (*i)->WorkGroupSize << "," << (*i)->WindowsRunTime << "," << (*i)->OpenCLRunTime << std::endl;
}
const double WindowsAvg = totalWindowsTimes / (double)num;
const double OpenCLAvg = totalOpenCLTimes / (double)num;
Expand Down Expand Up @@ -189,6 +221,7 @@ ProblemGroup* GroupManagerInputControlFactory()
InputControl->problems_[++num] = new Problem(&ComparisonThreshold, "Set minimum difference for verifications.");
InputControl->problems_[++num] = new Problem(&PrintResultsToFile, "Set times to print to file (defaults to 0).");
InputControl->problems_[++num] = new Problem(&SetResultsFile, "Set the file path for saving results.");
InputControl->problems_[++num] = new Problem(&SetFindOptimalWorkGroupSize, "Enable local work group size search functionality.");
return InputControl;
}

Expand Down Expand Up @@ -242,3 +275,11 @@ int SetResultsFile(ResultsStruct* results)
RESULTS_FILE = s;
return 0;
}
int SetFindOptimalWorkGroupSize(ResultsStruct* results)
{
std::cout << "Enter 1 to find optimal local work group size (currently " << FIND_OPTIMAL_LOCAL_WORKGROUP_SIZE << "): ";
unsigned int i = (unsigned int)FIND_OPTIMAL_LOCAL_WORKGROUP_SIZE;
std::cin >> i;
FIND_OPTIMAL_LOCAL_WORKGROUP_SIZE = (i == 1);
return 0;
}
9 changes: 9 additions & 0 deletions hwk1/hwk1/ProblemGroups.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ extern int GLOBAL_ARRAY_HEIGHT;
extern bool SKIP_VERIFICATION;
extern bool PRINT_TO_FILE;
extern std::string RESULTS_FILE;
extern bool FIND_OPTIMAL_LOCAL_WORKGROUP_SIZE;

struct ResultsStruct
{
Expand All @@ -15,14 +16,21 @@ struct ResultsStruct
bool HasWindowsRunTime;
bool HasOpenCLRunTime;
std::string Annotation;
size_t WorkGroupSize;
};

// For use when sorting results lists
bool resultTimeOCL(ResultsStruct* A, ResultsStruct* B);

class ResultsList : public std::vector<ResultsStruct*>
{
public:
~ResultsList();


};

void PrintWorkGroupResultsToFile(const ResultsList& results);
void PrintToFile(const ResultsList& results);
void PrintResults(const ResultsList& results);

Expand Down Expand Up @@ -83,3 +91,4 @@ int RunCount(ResultsStruct* results);
int ComparisonThreshold(ResultsStruct* results);
int PrintResultsToFile(ResultsStruct* results);
int SetResultsFile(ResultsStruct* results);
int SetFindOptimalWorkGroupSize(ResultsStruct* results);
2 changes: 1 addition & 1 deletion hwk1/hwk1/homework2.cl
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ __kernel void CrossProduct_Manual(__global float4* pA, __global float4* pB, __gl
{
int idx = get_global_id(0);
pC[idx].x = (pA[idx].y*pB[idx].z - pA[idx].z*pB[idx].y);
pC[idx].y = -1.0*(pA[idx].x*pB[idx].z - pA[idx].z*pB[idx].x);
pC[idx].y = -1.0f*(pA[idx].x*pB[idx].z - pA[idx].z*pB[idx].x);
pC[idx].z = (pA[idx].x*pB[idx].y - pA[idx].y*pB[idx].x);
// ignore w
}
Expand Down
3 changes: 2 additions & 1 deletion hwk1/hwk1/homework2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
#include "arithmetic.h"
#include "enums.h"
#include <iostream>

#include <vector>
#include <algorithm>

/////////// HOMEWORK 2
namespace
Expand Down
1 change: 1 addition & 0 deletions hwk1/hwk1/homework2.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ int exCL_NativeSquareRoot(ResultsStruct* results);
int exCL_SquareRoot(ResultsStruct* results);
int exSequential_SquareRoot(ResultsStruct* results);

int FindOptimalWorkGroupSize(ResultsStruct* results);

0 comments on commit a615075

Please sign in to comment.