From cdf415f0d2cc8e84337975dc49b03ead377b8d5e Mon Sep 17 00:00:00 2001 From: Abhigyan Acherjee Date: Tue, 12 Dec 2023 17:34:34 +0530 Subject: [PATCH] BenchMarking for Codegen and CodegenNoGrad() --- root/CMakeLists.txt | 4 ++-- .../roofit/RooFitUnBinnedBenchmarks.cxx | 12 +++++++++++ root/roofit/roofit/benchRooFitBackends.cxx | 20 ++++++++++++++++--- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/root/CMakeLists.txt b/root/CMakeLists.txt index a25eb8f3..27aa635c 100644 --- a/root/CMakeLists.txt +++ b/root/CMakeLists.txt @@ -1,10 +1,10 @@ add_subdirectory(interpreter) -add_subdirectory(io) +#add_subdirectory(io) add_subdirectory(hist) add_subdirectory(math) add_subdirectory(pyroot) if (roofit) add_subdirectory(roofit) endif() -add_subdirectory(tree) +#add_subdirectory(tree) add_subdirectory(tmva) diff --git a/root/roofit/roofit/RooFitUnBinnedBenchmarks.cxx b/root/roofit/roofit/RooFitUnBinnedBenchmarks.cxx index 58f8ecf5..698b976a 100644 --- a/root/roofit/roofit/RooFitUnBinnedBenchmarks.cxx +++ b/root/roofit/roofit/RooFitUnBinnedBenchmarks.cxx @@ -239,6 +239,10 @@ const auto unit = benchmark::kMillisecond; auto Legacy = static_cast(RooFit::EvalBackend::Value::Legacy); auto Cpu = static_cast(RooFit::EvalBackend::Value::Cpu); auto Cuda = static_cast(RooFit::EvalBackend::Value::Cuda); +//testing for codegen and codegen_no_grad-codegen without clad: +auto Codegen = static_cast(RooFit::EvalBackend::Value::Codegen); +auto CodegenNoGrad = static_cast(RooFit::EvalBackend::Value::CodegenNoGrad); + #define ARGS UseRealTime()->Unit(unit) @@ -246,6 +250,9 @@ BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitLegacy")->Args({nEvents, BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitLegacyNumCPU2")->Args({nEvents, Legacy, 2})->ARGS; BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitLegacyNumCPU4")->Args({nEvents, Legacy, 4})->ARGS; BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitCPU")->Args({nEvents, Cpu, 1})->ARGS; +//testing for codegen and codegennograd +BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitCodegen")->Args({nEvents, Codegen, 1})->ARGS; +BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitCodegenNoGrad")->Args({nEvents, CodegenNoGrad, 1})->ARGS; #ifdef DO_BENCH_ROOFIT_CUDA BENCHMARK(BDecayWithMixing)->Name("BDecayWithMixing_FitCUDA")->Args({nEvents, Cuda, 1})->ARGS; #endif @@ -254,6 +261,9 @@ BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitLegacy")->Args( BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitLegacyNumCPU2")->Args({nEvents, Legacy, 2})->ARGS; BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitLegacyNumCPU4")->Args({nEvents, Legacy, 4})->ARGS; BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitCPU")->Args({nEvents, Cpu, 1})->ARGS; +//testing for codegen and codegennograd +BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitCodegen")->Args({nEvents, Codegen, 1})->ARGS; +BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitCodegenNoGrad")->Args({nEvents, CodegenNoGrad, 1})->ARGS; #ifdef DO_BENCH_ROOFIT_CUDA BENCHMARK(BDecayGaussResolution)->Name("BDecayGaussResolution_FitCUDA")->Args({nEvents, Cuda, 1})->ARGS; #endif @@ -262,6 +272,8 @@ BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitLegacy")->Args({nEvents BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitLegacyNumCPU2")->Args({nEvents, Legacy, 2})->ARGS; BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitLegacyNumCPU4")->Args({nEvents, Legacy, 4})->ARGS; BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitCPU")->Args({nEvents, Cpu, 1})->ARGS; +BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitCodegen")->Args({nEvents, Codegen, 1})->ARGS; +BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitCodegenNoGrad")->Args({nEvents, CodegenNoGrad, 1})->ARGS; #ifdef DO_BENCH_ROOFIT_CUDA BENCHMARK(BDecayDoubleGauss)->Name("BDecayDoubleGauss_FitCUDA")->Args({nEvents, Cuda, 1})->ARGS; #endif diff --git a/root/roofit/roofit/benchRooFitBackends.cxx b/root/roofit/roofit/benchRooFitBackends.cxx index 4513baee..afe8c169 100644 --- a/root/roofit/roofit/benchRooFitBackends.cxx +++ b/root/roofit/roofit/benchRooFitBackends.cxx @@ -22,11 +22,11 @@ #include int printLevel = 0; -size_t nEvents = 100000; +size_t nEvents = 10000; const auto minimizerName = "Minuit2"; constexpr bool verbose = false; -enum RunConfig_t { runScalar, runCpu, fitScalar, fitCpu, fitCuda }; +enum RunConfig_t { runScalar, runCpu, fitScalar, fitCpu, fitCuda};//, fitCodegen, fitCodegenNoGrad }; void runFitBenchmark(benchmark::State &state, RooAbsPdf &pdf, RooAbsData &data) { @@ -45,7 +45,11 @@ void runFitBenchmark(benchmark::State &state, RooAbsPdf &pdf, RooAbsData &data) pdf.fitTo(data, EvalBackend::Cpu(), Minimizer(minimizerName), PrintLevel(printLevel - 1), PrintEvalErrors(-1)); } else if (runConfig == fitCuda) { pdf.fitTo(data, EvalBackend::Cuda(), Minimizer(minimizerName), PrintLevel(printLevel - 1), PrintEvalErrors(-1)); - } + }// else if (runConfig == fitCodegen) { + // pdf.fitTo(data, EvalBackend::Codegen(), Minimizer(minimizerName), PrintLevel(printLevel - 1), PrintEvalErrors(-1)); + // } else if (runConfig == fitCodegenNoGrad) { + // pdf.fitTo(data, EvalBackend::CodegenNoGrad(), Minimizer(minimizerName), PrintLevel(printLevel - 1), PrintEvalErrors(-1)); + // } state.PauseTiming(); params.assign(paramsInitial); state.ResumeTiming(); @@ -332,18 +336,26 @@ auto const unit = benchmark::kMillisecond; BENCHMARK(benchFitGauss)->Unit(unit)->Name("Gaus_FitLegacy")->Args({fitScalar}); BENCHMARK(benchFitGauss)->Unit(unit)->Name("Gaus_FitCPU")->Args({fitCpu}); CUDA_ONLY(BENCHMARK(benchFitGauss)->Unit(unit)->Name("Gaus_FitCUDA")->Args({fitCuda})); +BENCHMARK(benchFitGauss)->Unit(unit)->Name("Gaus_FitCodegen")->Args({fitCodegen}); +BENCHMARK(benchFitGauss)->Unit(unit)->Name("Gaus_FitCodegen_No_Grad")->Args({fitCodegenNoGrad}); BENCHMARK(benchFitGaussXSigma)->Unit(unit)->Name("GausXS_FitLegacy")->Args({fitScalar}); BENCHMARK(benchFitGaussXSigma)->Unit(unit)->Name("GausXS_FitCPU")->Args({fitCpu}); CUDA_ONLY(BENCHMARK(benchFitGaussXSigma)->Unit(unit)->Name("GausXS_FitCUDA")->Args({fitCuda})); +BENCHMARK(benchFitGaussXSigma)->Unit(unit)->Name("GausXS_FitCodegen")->Args({fitCodegen}); +BENCHMARK(benchFitGaussXSigma)->Unit(unit)->Name("GausXS_FitCodegen_No_Grad")->Args({fitCodegenNoGrad}); BENCHMARK(benchFit)->Unit(unit)->Name("AddPdf_FitLegacy")->Args({fitScalar}); BENCHMARK(benchFit)->Unit(unit)->Name("AddPdf_FitCPU")->Args({fitCpu}); CUDA_ONLY(BENCHMARK(benchFit)->Unit(unit)->Name("AddPdf_FitCUDA")->Args({fitCuda})); +BENCHMARK(benchFit)->Unit(unit)->Name("AddPdf_FitCodegen")->Args({fitCodegen}); +BENCHMARK(benchFit)->Unit(unit)->Name("AddPdf_FitCodegen_No_Grad")->Args({fitCodegenNoGrad}); BENCHMARK(benchProdPdf)->Unit(unit)->Name("ProdPdf_FitLegacy")->Unit(benchmark::kMillisecond)->Args({fitScalar}); BENCHMARK(benchProdPdf)->Unit(unit)->Name("ProdPdf_FitCPU")->Unit(benchmark::kMillisecond)->Args({fitCpu}); CUDA_ONLY(BENCHMARK(benchProdPdf)->Unit(unit)->Name("ProdPdf_FitCUDA")->Unit(benchmark::kMillisecond)->Args({fitCuda})); +BENCHMARK(benchProdPdf)->Unit(unit)->Name("ProdPdf_FitCodegen")->Unit(benchmark::kMillisecond)->Args({fitCodegen}); +BENCHMARK(benchProdPdf)->Unit(unit)->Name("ProdPdf_FitCodegen_No_Grad")->Unit(benchmark::kMillisecond)->Args({fitCodegenNoGrad}); // Watch out with the result from these benchmarks: if there are evaluation // errors during the minimization, the time differences will mostly come from @@ -351,6 +363,8 @@ CUDA_ONLY(BENCHMARK(benchProdPdf)->Unit(unit)->Name("ProdPdf_FitCUDA")->Unit(ben BENCHMARK(benchModel)->Unit(unit)->Name("FitModel_FitLegacy")->Unit(benchmark::kMillisecond)->Args({fitScalar}); BENCHMARK(benchModel)->Unit(unit)->Name("FitModel_FitCPU")->Unit(benchmark::kMillisecond)->Args({fitCpu}); CUDA_ONLY(BENCHMARK(benchModel)->Unit(unit)->Name("FitModel_FitCUDA")->Unit(benchmark::kMillisecond)->Args({fitCuda})); +BENCHMARK(benchModel)->Unit(unit)->Name("FitModel_FitCodegen")->Unit(benchmark::kMillisecond)->Args({fitCodegen}); +BENCHMARK(benchModel)->Unit(unit)->Name("FitModel_FitCodegen_No_Grad")->Unit(benchmark::kMillisecond)->Args({fitCodegenNoGrad}); int main(int argc, char **argv) {