diff --git a/tests/benchmark/parser/CMakeLists.txt b/tests/benchmark/parser/CMakeLists.txt deleted file mode 100644 index 47d1219321ff..000000000000 --- a/tests/benchmark/parser/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -cmake_minimum_required(VERSION 3.0) - -project(benchmark) - -OPTION(USE_PRECISE_TEXT_PARSER "Use precise (and faster) double parser for text input file" OFF) - -if(USE_PRECISE_TEXT_PARSER) - ADD_DEFINITIONS(-DUSE_PRECISE_TEXT_PARSER) -endif(USE_PRECISE_TEXT_PARSER) - -set(CMAKE_CXX_STANDARD 11) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O3") - -include_directories(${CMAKE_SOURCE_DIR}/../../../include) -include_directories(${CMAKE_SOURCE_DIR}/../../../src) -#link_directories(${CMAKE_SOURCE_DIR}/lib) - -add_executable(parser parser.cpp) diff --git a/tests/benchmark/parser/README.md b/tests/benchmark/parser/README.md deleted file mode 100644 index c5c3f1edfdf4..000000000000 --- a/tests/benchmark/parser/README.md +++ /dev/null @@ -1,38 +0,0 @@ -This is a simple benchmark comparing performance of `Common::Atof` -and `Common::AtofPrecise` when used in `CSVParser`. - -Just run `./run_parser_benchmark.sh` in this directory. - -The test script generates 20000 rows, 2000 columns csv, 840MB file size. - -For this test, `Common::Atof` is much faster than `Common::AtofPrecise`. - -Benchmark run output on Intel Xeon 2640 v3: - -``` -========== Benchmark run Atof parser ========== real 0m2.027s user 0m1.822s -real 0m2.027s -user 0m1.822s -sys 0m0.204s - -real 0m2.186s -user 0m1.998s -sys 0m0.188s - -real 0m2.202s -user 0m2.010s -sys 0m0.192s - -========== Benchmark run AtofPrecise parser ========== -real 0m6.556s -user 0m6.324s -sys 0m0.232s - -real 0m6.648s -user 0m6.496s -sys 0m0.152s - -real 0m6.912s -user 0m6.748s -sys 0m0.164s -``` \ No newline at end of file diff --git a/tests/benchmark/parser/gen_csv.py b/tests/benchmark/parser/gen_csv.py deleted file mode 100644 index 5c2c9b5d2e23..000000000000 --- a/tests/benchmark/parser/gen_csv.py +++ /dev/null @@ -1,17 +0,0 @@ -import sys - -import numpy as np - - -def gen_csv(fname, nrow, ncol): - nrow = int(nrow) - ncol = int(ncol) - - arr = np.random.random(nrow * ncol) * 5 - arr = arr.reshape((nrow, ncol)) - np.savetxt(fname, arr, fmt='%.19f', delimiter=',') - - -if __name__ == '__main__': - import argh - argh.dispatch_command(gen_csv) \ No newline at end of file diff --git a/tests/benchmark/parser/parser.cpp b/tests/benchmark/parser/parser.cpp deleted file mode 100644 index 6eb244ef707c..000000000000 --- a/tests/benchmark/parser/parser.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// This is a very simple benchmark for comparing performance of Atof and AtofPrecise. - -#include -#include -#include -#include - -#include - -namespace LightGBM { - -void ParseCSV(const std::string& fpath, int ncol) { - CSVParser parser(-1, ncol); - - std::ifstream infile(fpath); - if (! infile) { - std::cerr << "fail to open " << fpath; - std::exit(1); - } - - std::string line; - double label; - std::vector> oneline_features; - while (getline(infile, line)) { - parser.ParseOneLine(line.c_str(), &oneline_features, &label); -// printf("%f\n", oneline_features[0].second); - oneline_features.clear(); - } -} - -} // namespace LightGBM - -int main(int argc, const char* argv[]) { - if (argc != 3) { - printf("usage: parser \n"); - exit(1); - } - - const char* fpath = argv[1]; - long ncol = strtol(argv[2], nullptr, 10); - if (errno != 0) { - fprintf(stderr, "fail to parse ncol\n"); - exit(1); - } - - LightGBM::ParseCSV(fpath, ncol); - - return 0; -} \ No newline at end of file diff --git a/tests/benchmark/parser/run_parser_benchmark.sh b/tests/benchmark/parser/run_parser_benchmark.sh deleted file mode 100755 index 2d4d380fac71..000000000000 --- a/tests/benchmark/parser/run_parser_benchmark.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -set -e - -nrow=20000 -ncol=2000 - -build() { - d=$1 - flag=$2 - echo "building $d $flag" - test -d $d || (mkdir -p $d && cd $d && cmake $flag ..) - pushd $d - make - popd -} - -gen_data() { - if [[ ! -f test.csv ]]; then - echo "generating csv ..." - python gen_csv.py test.csv $nrow $ncol - fi -} - -parser_benchmark() { - echo "========== Benchmark run Atof parser ==========" - for i in {1..3}; do -# /usr/bin/time ./build/parser test.csv $ncol - time ./build/parser test.csv $ncol - done - - echo - echo "========== Benchmark run AtofPrecise parser ==========" - for i in {1..3}; do -# /usr/bin/time ./build-precise/parser test.csv $ncol - time ./build-precise/parser test.csv $ncol - done -} - -build build "" -build build-precise "-DUSE_PRECISE_TEXT_PARSER=on" -gen_data -parser_benchmark