From ca391c1f8cbe282b3011f97232ae1ebdd90bd719 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Fri, 23 Aug 2024 12:29:54 -0700 Subject: [PATCH] Fix datagen example --- .../common/cudf_tpch_datagen/datagen.cpp | 206 +++++++++--------- 1 file changed, 101 insertions(+), 105 deletions(-) diff --git a/cpp/benchmarks/common/cudf_tpch_datagen/datagen.cpp b/cpp/benchmarks/common/cudf_tpch_datagen/datagen.cpp index 2d3233d6fbf..4686313a8dc 100644 --- a/cpp/benchmarks/common/cudf_tpch_datagen/datagen.cpp +++ b/cpp/benchmarks/common/cudf_tpch_datagen/datagen.cpp @@ -19,120 +19,116 @@ #include #include -// namespace { -// const std::vector ORDERS = {"o_orderkey", -// "o_custkey", -// "o_orderdate", -// "o_orderpriority", -// "o_clerk", -// "o_shippriority", -// "o_comment", -// "o_totalprice", -// "o_orderstatus"}; -// const std::vector LINEITEM = {"l_orderkey", -// "l_partkey", -// "l_suppkey", -// "l_linenumber", -// "l_quantity", -// "l_discount", -// "l_tax", -// "l_shipdate", -// "l_commitdate", -// "l_receiptdate", -// "l_returnflag", -// "l_linestatus", -// "l_shipinstruct", -// "l_shipmode", -// "l_comment", -// "l_extendedprice"}; -// const std::vector PART = {"p_partkey", -// "p_name", -// "p_mfgr", -// "p_brand", -// "p_type", -// "p_size", -// "p_container", -// "p_retailprice", -// "p_comment"}; -// const std::vector PARTSUPP = { -// "ps_partkey", "ps_suppkey", "ps_availqty", "ps_supplycost", "ps_comment"}; -// const std::vector SUPPLIER = { -// "s_suppkey", "s_name", "s_address", "s_nationkey", "s_phone", "s_acctbal", "s_comment"}; -// const std::vector CUSTOMER = {"c_custkey", -// "c_name", -// "c_address", -// "c_nationkey", -// "c_phone", -// "c_acctbal", -// "c_mktsegment", -// "c_comment"}; -// const std::vector NATION = {"n_nationkey", "n_name", "n_regionkey", "n_comment"}; -// const std::vector REGION = {"r_regionkey", "r_name", "r_comment"}; +namespace { +const std::vector ORDERS = {"o_orderkey", + "o_custkey", + "o_orderdate", + "o_orderpriority", + "o_clerk", + "o_shippriority", + "o_comment", + "o_totalprice", + "o_orderstatus"}; +const std::vector LINEITEM = {"l_orderkey", + "l_partkey", + "l_suppkey", + "l_linenumber", + "l_quantity", + "l_discount", + "l_tax", + "l_shipdate", + "l_commitdate", + "l_receiptdate", + "l_returnflag", + "l_linestatus", + "l_shipinstruct", + "l_shipmode", + "l_comment", + "l_extendedprice"}; +const std::vector PART = {"p_partkey", + "p_name", + "p_mfgr", + "p_brand", + "p_type", + "p_size", + "p_container", + "p_retailprice", + "p_comment"}; +const std::vector PARTSUPP = { + "ps_partkey", "ps_suppkey", "ps_availqty", "ps_supplycost", "ps_comment"}; +const std::vector SUPPLIER = { + "s_suppkey", "s_name", "s_address", "s_nationkey", "s_phone", "s_acctbal", "s_comment"}; +const std::vector CUSTOMER = {"c_custkey", + "c_name", + "c_address", + "c_nationkey", + "c_phone", + "c_acctbal", + "c_mktsegment", + "c_comment"}; +const std::vector NATION = {"n_nationkey", "n_name", "n_regionkey", "n_comment"}; +const std::vector REGION = {"r_regionkey", "r_name", "r_comment"}; -// } // namespace +} // namespace -// /** -// * @brief Write a `cudf::table` to a parquet file -// * -// * @param table The cudf::table to write -// * @param path The path to write the parquet file to -// * @param col_names The names of the columns in the table -// */ -// void write_parquet(std::unique_ptr table, -// std::string const& path, -// std::vector const& col_names) -// { -// CUDF_FUNC_RANGE(); -// cudf::io::table_metadata metadata; -// std::vector col_name_infos; -// for (auto& col_name : col_names) { -// col_name_infos.push_back(cudf::io::column_name_info(col_name)); -// } -// metadata.schema_info = col_name_infos; -// auto const table_input_metadata = cudf::io::table_input_metadata{metadata}; -// auto builder = cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(path)); -// builder.metadata(table_input_metadata); -// auto const options = builder.build(); -// cudf::io::parquet_chunked_writer(options).write(table->view()); -// } - -void use_arr(cudf::host_span s) { std::cout << s[1]; } +/** + * @brief Write a `cudf::table` to a parquet file + * + * @param table The cudf::table to write + * @param path The path to write the parquet file to + * @param col_names The names of the columns in the table + */ +void write_parquet(std::unique_ptr table, + std::string const& path, + std::vector const& col_names) +{ + CUDF_FUNC_RANGE(); + cudf::io::table_metadata metadata; + std::vector col_name_infos; + for (auto& col_name : col_names) { + col_name_infos.push_back(cudf::io::column_name_info(col_name)); + } + metadata.schema_info = col_name_infos; + auto const table_input_metadata = cudf::io::table_input_metadata{metadata}; + auto builder = cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(path)); + builder.metadata(table_input_metadata); + auto const options = builder.build(); + cudf::io::parquet_chunked_writer(options).write(table->view()); +} int main(int argc, char** argv) { - constexpr std::array a{"hello", "world", "nvidia"}; - use_arr(cudf::host_span(a.data(), a.size())); -} -// if (argc < 2) { -// std::cerr << "Usage: " << argv[0] << " [scale_factor]" << std::endl; -// return 1; -// } + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " [scale_factor]" << std::endl; + return 1; + } -// double scale_factor = std::atof(argv[1]); -// std::cout << "Generating scale factor: " << scale_factor << std::endl; + double scale_factor = std::atof(argv[1]); + std::cout << "Generating scale factor: " << scale_factor << std::endl; -// auto [orders, lineitem, part] = cudf::datagen::generate_orders_lineitem_part( -// scale_factor, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); -// write_parquet(std::move(orders), "orders.parquet", ORDERS); -// write_parquet(std::move(lineitem), "lineitem.parquet", LINEITEM); -// write_parquet(std::move(part), "part.parquet", PART); + auto [orders, lineitem, part] = cudf::datagen::generate_orders_lineitem_part( + scale_factor, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + write_parquet(std::move(orders), "orders.parquet", ORDERS); + write_parquet(std::move(lineitem), "lineitem.parquet", LINEITEM); + write_parquet(std::move(part), "part.parquet", PART); -// auto partsupp = cudf::datagen::generate_partsupp( -// scale_factor, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); -// write_parquet(std::move(partsupp), "partsupp.parquet", PARTSUPP); + auto partsupp = cudf::datagen::generate_partsupp( + scale_factor, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + write_parquet(std::move(partsupp), "partsupp.parquet", PARTSUPP); -// auto supplier = cudf::datagen::generate_supplier( -// scale_factor, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); -// write_parquet(std::move(supplier), "supplier.parquet", SUPPLIER); + auto supplier = cudf::datagen::generate_supplier( + scale_factor, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + write_parquet(std::move(supplier), "supplier.parquet", SUPPLIER); -// auto customer = cudf::datagen::generate_customer( -// scale_factor, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); -// write_parquet(std::move(customer), "customer.parquet", CUSTOMER); + auto customer = cudf::datagen::generate_customer( + scale_factor, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + write_parquet(std::move(customer), "customer.parquet", CUSTOMER); -// auto nation = cudf::datagen::generate_nation(cudf::get_default_stream(), -// rmm::mr::get_current_device_resource()); -// write_parquet(std::move(nation), "nation.parquet", NATION); + auto nation = cudf::datagen::generate_nation(cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + write_parquet(std::move(nation), "nation.parquet", NATION); -// auto region = cudf::datagen::generate_region(cudf::get_default_stream(), -// rmm::mr::get_current_device_resource()); -// write_parquet(std::move(region), "region.parquet", REGION); + auto region = cudf::datagen::generate_region(cudf::get_default_stream(), + rmm::mr::get_current_device_resource()); + write_parquet(std::move(region), "region.parquet", REGION); +}