Skip to content

Commit

Permalink
ENH: Bandwidth-based benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
allemangD committed Sep 17, 2024
1 parent 72aa7d2 commit c8be98b
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 536 deletions.
131 changes: 66 additions & 65 deletions examples/SigmaBenchmark.cxx → examples/BandwidthBenchmark.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "itkHalideDiscreteGaussianImageFilter.h"
#include "itkDiscreteGaussianImageFilter.h"
#include "itkRecursiveGaussianImageFilter.h"
#include "itkHalideGPUDiscreteGaussianImageFilter.h"
#include "itkGPUDiscreteGaussianImageFilter.h"
#include "itkAdditiveGaussianNoiseImageFilter.h"
Expand All @@ -39,25 +40,40 @@ using HalideBlur = itk::HalideDiscreteGaussianImageFilter<ImageType, ImageType>;
using GPUBlur = itk::GPUDiscreteGaussianImageFilter<GPUImageType, GPUImageType>;
using HalideGPUBlur = itk::HalideGPUDiscreteGaussianImageFilter<ImageType, ImageType>;

using ms = std::chrono::duration<double, std::milli>;
using ns = std::chrono::duration<double, std::nano>;

ms
ns
run_itk_cpu(ImageType * image, float sigma)
{
using FilterType = itk::DiscreteGaussianImageFilter<ImageType, ImageType>;
FilterType::Pointer filter = FilterType::New();
filter->SetInput(image);
filter->SetVariance(sigma * sigma);
filter->SetSigma(sigma);
filter->SetMaximumKernelWidth(48);

std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
filter->Update();
std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();

return std::chrono::duration_cast<ms>(end - start);
return std::chrono::duration_cast<ns>(end - start);
}

ms
ns
run_itk_rec(ImageType * image, float sigma)
{
using FilterType = itk::RecursiveGaussianImageFilter<ImageType, ImageType>;
FilterType::Pointer filter = FilterType::New();
filter->SetInput(image);
filter->SetSigma(sigma);

std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
filter->Update();
std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();

return std::chrono::duration_cast<ns>(end - start);
}

ns
run_itk_gpu(ImageType * image, float sigma)
{
using CastType = itk::CastImageFilter<ImageType, GPUImageType>;
Expand All @@ -67,7 +83,7 @@ run_itk_gpu(ImageType * image, float sigma)
using FilterType = itk::GPUDiscreteGaussianImageFilter<GPUImageType, GPUImageType>;
FilterType::Pointer filter = FilterType::New();
filter->SetInput(cast->GetOutput());
filter->SetVariance(sigma * sigma);
filter->SetSigma(sigma);
filter->SetMaximumKernelWidth(48);

std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
Expand All @@ -76,11 +92,11 @@ run_itk_gpu(ImageType * image, float sigma)
filter->GetOutput()->UpdateBuffers();
std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();

return std::chrono::duration_cast<ms>(end - start);
return std::chrono::duration_cast<ns>(end - start);
}

ms
run_halide_cpu(ImageType * image, float sigma)
ns
run_hal_cpu(ImageType * image, float sigma)
{
using FilterType = itk::HalideDiscreteGaussianImageFilter<ImageType, ImageType>;
FilterType::Pointer filter = FilterType::New();
Expand All @@ -92,11 +108,11 @@ run_halide_cpu(ImageType * image, float sigma)
filter->Update();
std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();

return std::chrono::duration_cast<ms>(end - start);
return std::chrono::duration_cast<ns>(end - start);
}

ms
run_halide_gpu(ImageType * image, float sigma)
ns
run_hal_gpu(ImageType * image, float sigma)
{
using FilterType = itk::HalideGPUDiscreteGaussianImageFilter<ImageType, ImageType>;
FilterType::Pointer filter = FilterType::New();
Expand All @@ -108,7 +124,7 @@ run_halide_gpu(ImageType * image, float sigma)
filter->Update();
std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();

return std::chrono::duration_cast<ms>(end - start);
return std::chrono::duration_cast<ns>(end - start);
}

ImageType::Pointer
Expand Down Expand Up @@ -146,6 +162,22 @@ make_image(float extent, size_t resolution)
return noise->GetOutput();
}

size_t
get_kernel_radius(ImageType * image, float sigma)
{
CPUBlur::Pointer temp = CPUBlur::New();
temp->SetInput(image);
temp->SetSigma(sigma);
temp->SetMaximumKernelWidth(48);
return temp->GetKernelRadius()[0];
}

double
bandwidth(ns time, size_t count)
{
return count / time.count();
}

int
main(int argc, char * argv[])
{
Expand All @@ -157,77 +189,46 @@ main(int argc, char * argv[])

std::string out_path(argv[1]);
std::ofstream csv(out_path);
size_t samples = 10;

float extent = 300.0;
size_t resolution = 1;
size_t image_width = 500;
size_t byte_count = image_width * image_width * image_width * sizeof(ImageType::PixelType);

ImageType::Pointer image = make_image(extent, resolution);
ImageType::Pointer image = make_image(image_width, 1.0);

// warm-up device context
{
// warm-up device context
run_itk_cpu(image, 1);
run_itk_rec(image, 1);
run_itk_gpu(image, 1);
run_halide_cpu(image, 1);
run_halide_gpu(image, 1);
run_hal_cpu(image, 1);
run_hal_cpu(image, 1);
}

size_t samples = 5;
csv << "sigma,radius,itk_cpu,itk_rec,itk_gpu,hal_cpu,hal_gpu" << std::endl;

csv << "sigma,itk_cpu,itk_gpu,itk_halide_cpu,itk_halide_gpu" << std::endl;
for (float sigma = 1; sigma < 20; sigma += 1.5)
{
size_t radius = get_kernel_radius(image, sigma);

const auto proc = [&](float sigma) {
std::cout << "sigma " << sigma << " " << std::flush;
std::cout << "sigma " << sigma << " (radius " << radius << ")" << std::flush;

for (size_t sample = 0; sample < samples; sample++)
for (int k = 0; k < samples; ++k)
{
std::cout << "." << std::flush;

csv << sigma << ",";

if (sigma <= 5) // ITK CPU is prohibitively slow past this point
{
csv << run_itk_cpu(image, sigma).count() << ",";
}
else
{
csv << "nan,";
}

// if (extent * res < 800) // ITK GPU memory allocation failure past this point
// {
csv << run_itk_gpu(image, sigma).count() << ",";
// }
// else
// {
// csv << "nan,";
// }

if (sigma < 19) // Halide CPU is prohibitively slow past this point
{
csv << run_halide_cpu(image, sigma).count() << ",";
}
else
{
csv << "nan,";
}

csv << run_halide_gpu(image, sigma).count() << ",";

csv << sigma << "," << radius << ",";
csv << bandwidth(run_itk_cpu(image, sigma), byte_count) << ",";
csv << bandwidth(run_itk_rec(image, sigma), byte_count) << ",";
csv << bandwidth(run_itk_gpu(image, sigma), byte_count) << ",";
csv << bandwidth(run_hal_cpu(image, sigma), byte_count) << ",";
csv << bandwidth(run_hal_gpu(image, sigma), byte_count);
csv << std::endl;
}

std::cout << std::endl;
};

for (int i = 1; i <= 8; i += 1)
{
proc(static_cast<float>(i));
}
for (int i = 10; i <= 25; i += 3)
{
// beyond 25, kernel is too large.
proc(static_cast<float>(i));
}


return EXIT_SUCCESS;
}
7 changes: 2 additions & 5 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,5 @@ else()
endif()
include(${ITK_USE_FILE})

add_executable(ResolutionBenchmark ResolutionBenchmark.cxx)
target_link_libraries(ResolutionBenchmark ${ITK_LIBRARIES})

add_executable(SigmaBenchmark SigmaBenchmark.cxx)
target_link_libraries(SigmaBenchmark ${ITK_LIBRARIES})
add_executable(BandwidthBenchmark BandwidthBenchmark.cxx)
target_link_libraries(BandwidthBenchmark ${ITK_LIBRARIES})
Loading

0 comments on commit c8be98b

Please sign in to comment.