From d5fa07a168a6631cc62688a21c2e09c7e7edd52c Mon Sep 17 00:00:00 2001 From: anandaravuri Date: Fri, 15 Sep 2023 11:36:40 -0700 Subject: [PATCH 01/11] Feature: Add CXL host exerciser cache application Description: Host Exerciser (HE) is responsible for generating traffic to create scenarios like Cache Hit/Miss in Device or Host Caches with the intention of exercising the path from AFU to the Host via CXL IP at full bandwidth. Command line options: Options: -h,--help Print this help message and exit -p,--pci-address TEXT [:]:. -l,--log-level TEXT:{trace,debug,info,warning,error,critical,off} [warning] stdout logging level -t,--timeout UINT [60000] test timeout (msec) --test UINT:value in {fpgardcachehit->0,fpgardcachemiss->2,fpgawrcachehit->1,fpgawrcachemiss->3,hostrdcachehit->4,hostrdcachemiss->6,hostwrcachehit->5,hostwrcachemiss->7} OR {0,2,1,3,4,6,5,7} [fpgardcachehit] host exerciser cache test {fpgardcachehit, fpgawrcachehit, all} --continuousmode BOOLEAN [false] test rollover or test termination --contmodetime UINT [1] Continuous mode time in seconds --target UINT:value in {fpga->1,host->0} OR {1,0} [host] host exerciser run on host or fpga --stride UINT [0] Enable stride mode --linerepcount UINT:INT in [1 - 256] [10] Line repeat count --testall BOOLEAN [false] Run all tests Subcommands: cache run simple cxl he cache test Signed-off-by: anandaravuri --- opae.spec.fedora | 1 + packaging/opae/deb/opae-extra-tools.install | 1 + samples/CMakeLists.txt | 1 + samples/cxl_host_exerciser/CMakeLists.txt | 47 + .../cxl_host_exerciser/cxl_host_exerciser.cpp | 48 + .../cxl_host_exerciser/cxl_host_exerciser.h | 504 ++++++++ .../cxl_host_exerciser_cache.h | 62 + .../cxl_host_exerciser_cmd.h | 1013 +++++++++++++++++ samples/cxl_host_exerciser/dfl-he-cache.h | 133 +++ samples/cxl_host_exerciser/he_cache_test.h | 829 ++++++++++++++ 10 files changed, 2639 insertions(+) create mode 100644 samples/cxl_host_exerciser/CMakeLists.txt create mode 100644 samples/cxl_host_exerciser/cxl_host_exerciser.cpp create mode 100644 samples/cxl_host_exerciser/cxl_host_exerciser.h create mode 100644 samples/cxl_host_exerciser/cxl_host_exerciser_cache.h create mode 100644 samples/cxl_host_exerciser/cxl_host_exerciser_cmd.h create mode 100644 samples/cxl_host_exerciser/dfl-he-cache.h create mode 100644 samples/cxl_host_exerciser/he_cache_test.h diff --git a/opae.spec.fedora b/opae.spec.fedora index 893bcec6416a..dcdbe10e7fa2 100644 --- a/opae.spec.fedora +++ b/opae.spec.fedora @@ -355,6 +355,7 @@ done %{_bindir}/mem_tg %{_bindir}/ofs.uio %{_bindir}/cxl_mem_tg +%{_bindir}/cxl_host_exerciser %{python3_sitearch}/opae.diag* %{python3_sitearch}/opae/diag* diff --git a/packaging/opae/deb/opae-extra-tools.install b/packaging/opae/deb/opae-extra-tools.install index a85827b3f0de..a363035c3704 100644 --- a/packaging/opae/deb/opae-extra-tools.install +++ b/packaging/opae/deb/opae-extra-tools.install @@ -18,6 +18,7 @@ usr/bin/fpga_dma_N3000_test usr/bin/fpga_dma_test usr/bin/host_exerciser usr/bin/cxl_mem_tg +usr/bin/cxl_host_exerciser usr/bin/bist usr/bin/hps usr/bin/hssi diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index d9bce3ec663b..e03073ad863a 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -70,3 +70,4 @@ opae_add_subdirectory(host_exerciser) opae_add_subdirectory(n5010-test) opae_add_subdirectory(n5010-ctl) opae_add_subdirectory(clx_mem_tg) +opae_add_subdirectory(cxl_host_exerciser) \ No newline at end of file diff --git a/samples/cxl_host_exerciser/CMakeLists.txt b/samples/cxl_host_exerciser/CMakeLists.txt new file mode 100644 index 000000000000..adcdf4580a62 --- /dev/null +++ b/samples/cxl_host_exerciser/CMakeLists.txt @@ -0,0 +1,47 @@ +## Copyright(c) 2023, Intel Corporation +## +## Redistribution and use in source and binary forms, with or without +## modification, are permitted provided that the following conditions are met: +## +## * Redistributions of source code must retain the above copyright notice, +## this list of conditions and the following disclaimer. +## * Redistributions in binary form must reproduce the above copyright notice, +## this list of conditions and the following disclaimer in the documentation +## and/or other materials provided with the distribution. +## * Neither the name of Intel Corporation nor the names of its contributors +## may be used to endorse or promote products derived from this software +## without specific prior written permission. +## +## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +## ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +## LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +## CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +## SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +## INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +## ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +## POSSIBILITY OF SUCH DAMAGE. + +if (OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG) + opae_add_executable(TARGET cxl_host_exerciser + SOURCE cxl_host_exerciser.cpp + LIBS + opae-c + opae-cxx-core + ${spdlog_LIBRARIES} + ${json-c_LIBRARIES} + ${uuid_LIBRARIES} + numa + COMPONENT samplebin + ) + target_include_directories(cxl_host_exerciser + PRIVATE + ${OPAE_INCLUDE_PATHS} + ${CMAKE_CURRENT_SOURCE_DIR} + ${CLI11_INCLUDE_DIRS} + ${spdlog_INCLUDE_DIRS}) + + +endif(OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG) diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.cpp b/samples/cxl_host_exerciser/cxl_host_exerciser.cpp new file mode 100644 index 000000000000..3d5eb10f1604 --- /dev/null +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.cpp @@ -0,0 +1,48 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#include +#include +#include + +#include "cxl_host_exerciser.h" +#include "cxl_host_exerciser_cache.h" + +void he_sig_handler(int); + +int main(int argc, char *argv[]) { + host_exerciser::host_exerciser app; + app.register_command(); + + // host exerciser signal handler + struct sigaction act_old, act_new; + memset(&act_old, 0, sizeof(act_old)); + memset(&act_new, 0, sizeof(act_new)); + + act_new.sa_handler = he_sig_handler; + sigaction(SIGINT, &act_new, &act_old); + + return app.main(argc, argv); +} diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.h b/samples/cxl_host_exerciser/cxl_host_exerciser.h new file mode 100644 index 000000000000..293293a30255 --- /dev/null +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.h @@ -0,0 +1,504 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#pragma once +#include +#include +#include + +#include "he_cache_test.h" + +namespace host_exerciser { +using opae::fpga::types::event; +using opae::fpga::types::shared_buffer; +using opae::fpga::types::token; + +static const uint64_t HELPBK_TEST_TIMEOUT = 30000; +static const uint64_t HELPBK_TEST_SLEEP_INVL = 100; +static const uint64_t CL = 64; +static const uint64_t KB = 1024; +static const uint64_t MB = KB * 1024; +static const uint64_t LOG2_CL = 6; + +static const uint64_t BUFFER_SIZE_2MB = 2 * 1024 * 1024; + +static const uint64_t FPGA_32KB_CACHE_LINES = (32 * 1024) / 64; + +static const uint64_t FPGA_2MB_CACHE_LINES = (2 * 1024 * 1024) / 64; + +// Host execiser CSR Offset +enum { + HE_DFH = 0x0000, + HE_ID_L = 0x0008, + HE_ID_H = 0x0010, + HE_DFH_RSVD0 = 0x0018, + HE_DFH_RSVD1 = 0x0020, + HE_SCRATCHPAD0 = 0x028, + HE_DSM_BASE = 0x030, + HE_CTL = 0x038, + HE_INFO = 0x040, + + HE_WR_NUM_LINES = 0x048, + HE_WR_BYTE_ENABLE = 0x050, + HE_WR_CONFIG = 0x058, + HE_WR_ADDR_TABLE_CTRL = 0x060, + HE_WR_ADDR_TABLE_DATA = 0x068, + + HE_RD_NUM_LINES = 0x070, + HE_RD_CONFIG = 0x078, + HE_RD_ADDR_TABLE_CTRL = 0x080, + HE_RD_ADDR_TABLE_DATA = 0x088, + HE_ERROR_STATUS = 0x090, + + HE_ERROR_EXP_DATA = 0x098, + HE_ERROR_ACT_DATA0 = 0x0A0, + HE_ERROR_ACT_DATA1 = 0x0A8, + HE_ERROR_ACT_DATA2 = 0x0B0, + HE_ERROR_ACT_DATA3 = 0x0B8, + HE_ERROR_ACT_DATA4 = 0x0C0, + HE_ERROR_ACT_DATA5 = 0x0C8, + HE_ERROR_ACT_DATA6 = 0x0D0, + HE_ERROR_ACT_DATA7 = 0x0D8, + +}; + +// configures test mode +typedef enum { + HOST_EXEMODE_READ = 0x0, + HOST_EXEMODE_WRITE = 0x1, + HOST_EXEMODE_ALL = 0x2, +} host_exe_mode; + +// Write Traffic Opcode +typedef enum { + RD_LINE_I = 0x0, + RD_LINE_S = 0x1, + RD_LINE_EM = 0x2, +} he_rd_opcode; + +typedef enum { + WR_LINE_I = 0x0, + WR_LINE_M = 0x1, + WR_PUSH_I = 0x2, + WR_BARRIER_FRNCE = 0x3, + WR_FLUSH_CL = 0x4, + WR_FLUSH_CL_HCOH = 0x5, + WR_FLUSH_CL_DCOH = 0x6, +} he_wr_opcode; + +// DFH Header +union he_dfh { + enum { offset = HE_DFH }; + uint64_t value; + struct { + uint16_t CcipVersionNumber : 12; + uint8_t AfuMajVersion : 4; + uint32_t NextDfhOffset : 24; + uint8_t EOL : 1; + uint32_t Reserved : 19; + uint8_t FeatureType : 4; + }; +}; + +// DSM BASEL +union he_dsm_base { + enum { offset = HE_DSM_BASE }; + uint64_t value; + struct { + uint64_t DsmBase : 64; + }; +}; + +// CSR CTL +union he_ctl { + enum { offset = HE_CTL }; + uint64_t value; + struct { + uint64_t ResetL : 1; + uint64_t Start : 1; + uint64_t ForcedTestCmpl : 1; + uint64_t bios_support : 1; + uint64_t Reserved : 60; + }; +}; + +// CSR INFO +union he_info { + enum { offset = HE_INFO }; + uint64_t value; + struct { + uint64_t write_addr_table_size : 4; + uint64_t read_addr_table_size : 4; + uint64_t Reserved : 56; + }; +}; + +// HE_WR_NUM_LINES +union he_wr_num_lines { + enum { offset = HE_WR_NUM_LINES }; + uint64_t value; + struct { + uint64_t write_num_lines : 16; + uint64_t reserved : 48; + }; +}; + +// HE_WR_BYTE_ENABLE +union he_wr_byte_enable { + enum { offset = HE_WR_BYTE_ENABLE }; + uint64_t value; + struct { + uint64_t write_byte_enable : 64; + }; +}; + +// HE_WR_CONFIG +union he_wr_config { + enum { offset = HE_WR_CONFIG }; + uint64_t value; + struct { + uint64_t write_traffic_enable : 1; + uint64_t continuous_mode_enable : 1; + uint64_t waitfor_completion : 1; + uint64_t preread_sync_enable : 1; + uint64_t postread_sync_enable : 1; + uint64_t daata_pattern : 2; + uint64_t cl_evict_enable : 1; + uint64_t opcode : 4; + uint64_t line_repeat_count : 8; + uint64_t reserved : 44; + }; +}; + +// HE_WR_ADDR_TABLE_CTRL +union he_wr_addr_table_ctrl { + enum { offset = HE_WR_ADDR_TABLE_CTRL }; + uint64_t value; + struct { + uint64_t enable_address_table : 1; + uint64_t enable_address_stride : 1; + uint64_t stride : 2; + uint64_t reserved : 60; + }; +}; + +// HE_WR_ADDR_TABLE_DATA +union he_wr_addr_table_data { + enum { offset = HE_WR_ADDR_TABLE_DATA }; + uint64_t value; + struct { + uint64_t address_table_value : 64; + }; +}; + +// HE_RD_NUM_LINES +union he_rd_num_lines { + enum { offset = HE_RD_NUM_LINES }; + uint64_t value; + struct { + uint64_t read_num_lines : 16; + uint64_t reserved : 48; + }; +}; + +// HE_RD_CONFIG +union he_rd_config { + enum { offset = HE_RD_CONFIG }; + uint64_t value; + struct { + uint64_t read_traffic_enable : 1; + uint64_t continuous_mode_Enable : 1; + uint64_t waitfor_completion : 1; + uint64_t prewrite_sync_enable : 1; + uint64_t postwrite_sync_enable : 1; + uint64_t daata_pattern : 2; + uint64_t cl_evict_enable : 1; + uint64_t opcode : 4; + uint64_t line_repeat_count : 8; + uint64_t reserved : 44; + }; +}; + +// HE_RD_ADDR_TABLE_CTRL +union he_rd_addr_table_ctrl { + enum { offset = HE_RD_ADDR_TABLE_CTRL }; + uint64_t value; + struct { + uint64_t enable_address_table : 1; + uint64_t enable_address_stride : 1; + uint64_t stride : 2; + uint64_t reserved : 60; + }; +}; + +// HE_RD_ADDR_TABLE_DATA +union he_rd_addr_table_data { + enum { offset = HE_RD_ADDR_TABLE_DATA }; + uint64_t value; + struct { + uint64_t address_table_value : 64; + }; +}; + +// HE_RD_ADDR_TABLE_DATA +union he_err_status { + enum { offset = HE_ERROR_STATUS }; + uint64_t value; + struct { + uint64_t data_error : 1; + uint64_t rsvd1 : 15; + uint64_t err_index : 16; + uint64_t rsvd2 : 32; + }; +}; + +// HE DSM status +struct he_cache_dsm_status { + uint32_t test_completed : 1; + uint32_t dsm_number : 15; + uint32_t res1 : 16; + uint32_t err_vector : 32; + uint64_t num_ticks : 64; + uint32_t num_reads : 32; + uint32_t num_writes : 32; + uint32_t penalty_start : 32; + uint32_t penalty_end : 32; + uint32_t actual_data : 32; + uint32_t expected_data : 32; + uint32_t res5[2]; +}; + +const std::map he_modes = { + {"read", HOST_EXEMODE_READ}, + {"write", HOST_EXEMODE_WRITE}, + {"all", HOST_EXEMODE_ALL}, +}; + +// configures test mode +typedef enum { + HE_FPGA_RD_CACHE_HIT = 0x0, + HE_FPGA_WR_CACHE_HIT = 0x1, + + HE_FPGA_RD_CACHE_MISS = 0x2, + HE_FPGA_WR_CACHE_MISS = 0x3, + + HE_HOST_RD_CACHE_HIT = 0x4, + HE_HOST_WR_CACHE_HIT = 0x5, + + HE_HOST_RD_CACHE_MISS = 0x6, + HE_HOST_WR_CACHE_MISS = 0x7, + +} he_test_mode; + +// configures traget +typedef enum { + HE_TARGET_HOST = 0x0, + HE_TARGET_FPGA = 0x1, +} he_target; + +const std::map he_test_modes = { + {"fpgardcachehit", HE_FPGA_RD_CACHE_HIT}, + {"fpgawrcachehit", HE_FPGA_WR_CACHE_HIT}, + {"fpgardcachemiss", HE_FPGA_RD_CACHE_MISS}, + {"fpgawrcachemiss", HE_FPGA_WR_CACHE_MISS}, + + {"hostrdcachehit", HE_HOST_RD_CACHE_HIT}, + {"hostwrcachehit", HE_HOST_WR_CACHE_HIT}, + {"hostrdcachemiss", HE_HOST_RD_CACHE_MISS}, + {"hostwrcachemiss", HE_HOST_WR_CACHE_MISS}, +}; + +const std::map he_targets = { + {"host", HE_TARGET_HOST}, + {"fpga", HE_TARGET_FPGA}, +}; + +/////////////////////// +// Bias Support +typedef enum { + HOST_BIOS = 0x0, + DEVIC_BIOA = 0x1, +} he_ctl_bios_support; + +// configures test mode +typedef enum { + HE_ADDRTABLE_SIZE4096 = 0xC, + HE_ADDRTABLE_SIZE2048 = 0xB, + HE_ADDRTABLE_SIZE1024 = 0xA, + HE_ADDRTABLE_SIZE512 = 0x9, + HE_ADDRTABLE_SIZE256 = 0x8, + HE_ADDRTABLE_SIZE128 = 0x7, + HE_ADDRTABLE_SIZE64 = 0x6, + HE_ADDRTABLE_SIZE32 = 0x5, + HE_ADDRTABLE_SIZE16 = 0x4, + HE_ADDRTABLE_SIZE8 = 0x3, + HE_ADDRTABLE_SIZE4 = 0x2, + HE_ADDRTABLE_SIZE2 = 0x1, + +} he_addrtable_size; + +// he test type +typedef enum { + HE_ENABLE_TRAFFIC_STAGE = 0x0, + HE_SIP_SEQ_STAGE = 0x1, +} he_traffic_enable; + +const std::map traffic_enable = { + {"enable", HE_ENABLE_TRAFFIC_STAGE}, + {"skip", HE_SIP_SEQ_STAGE}, + +}; + +std::map addrtable_size = { + {HE_ADDRTABLE_SIZE4096, 4096}, {HE_ADDRTABLE_SIZE2048, 2048}, + {HE_ADDRTABLE_SIZE1024, 1024}, {HE_ADDRTABLE_SIZE512, 512}, + {HE_ADDRTABLE_SIZE256, 256}, {HE_ADDRTABLE_SIZE128, 128}, + {HE_ADDRTABLE_SIZE64, 64}, {HE_ADDRTABLE_SIZE32, 32}, + {HE_ADDRTABLE_SIZE16, 16}, {HE_ADDRTABLE_SIZE8, 8}, + {HE_ADDRTABLE_SIZE4, 4}, {HE_ADDRTABLE_SIZE2, 2}, + +}; + +// he test type +typedef enum { + HE_DISABLE_DATA_INTEGRITY_CHECK = 0x0, + HE_ENABLE_DATA_INTEGRITY_CHECK = 0x1, +} he_data_integrity_check; + +struct MapKeyComparator { + bool operator()(const std::string &a, const std::string &b) const { + if (a.length() != b.length()) + return (a.length() < b.length()); + else + return (a < b); + } +}; + +using test_afu = opae::afu_test::afu; +using test_command = opae::afu_test::command; + +class host_exerciser : public test_afu { +public: + host_exerciser() + : test_afu("host_exerciser", nullptr, "warning"), count_(1), + he_continuousmode_(false), he_test_all_(0), he_contmodetime_(0), + he_clock_mhz_(0),he_linerep_count_(10), he_stide_(0), he_target_(0), he_test_(0) { + + + // test + app_.add_option( + "--test", he_test_, + "host exerciser cache test {fpgardcachehit, fpgawrcachehit, all}") + ->transform(CLI::CheckedTransformer(he_test_modes)) + ->default_val("fpgardcachehit"); + + // Configures test rollover or test termination + app_.add_option("--continuousmode", he_continuousmode_, + "test rollover or test termination") + ->default_val("false"); + + // Continuous mode time + app_.add_option("--contmodetime", he_contmodetime_, + "Continuous mode time in seconds") + ->default_val("1"); + + app_.add_option("--target", he_target_, + "host exerciser run on host or fpga") + ->transform(CLI::CheckedTransformer(he_targets)) + ->default_val("host"); + + + app_.add_option("--stride", he_stide_, "Enable stride mode") + ->default_val("0"); + + app_.add_option("--linerepcount", he_linerep_count_, "Line repeat count") + ->transform(CLI::Range(1, 256)) + ->default_val("10"); + + // Test all + app_.add_option("--testall", he_test_all_, "Run all tests") + ->default_val("false"); + } + + virtual int run(CLI::App *app, test_command::ptr_t test) override { + int res = exit_codes::not_run; + + logger_->set_pattern(" %v"); + // Info prints details of an individual run. Turn it on if doing only one + // test and the user hasn't changed level from the default. + if ((log_level_.compare("warning") == 0) && !he_test_all_) + logger_->set_level(spdlog::level::info); + + logger_->info("starting test run, count of {0:d}", count_); + uint32_t count = 0; + try { + while (count < count_) { + logger_->debug("starting iteration: {0:d}", count + 1); + + res = test_afu::run(app, test); + count++; + logger_->debug("end iteration: {0:d}", count); + if (res) + break; + } + } catch (std::exception &ex) { + logger_->error(ex.what()); + res = exit_codes::exception; + } + + auto pass = res == exit_codes::success ? "PASS" : "FAIL"; + logger_->info("Test {}({}): {}", test->name(), count, pass); + spdlog::drop_all(); + return res; + } + +public: + uint32_t count_; + bool he_continuousmode_; + bool he_test_all_; + uint32_t he_contmodetime_; + uint32_t he_clock_mhz_; + uint32_t he_linerep_count_; + uint32_t he_stide_; + uint32_t he_target_; + uint32_t he_test_; + std::map limits_; + + uint32_t get_offset(uint32_t base, uint32_t i) const { + auto limit = limits_.find(base); + auto offset = base + sizeof(uint64_t) * i; + if (limit != limits_.end() && offset > limit->second - sizeof(uint64_t)) { + throw std::out_of_range("offset out range in csr space"); + } + return offset; + } + + bool option_passed(std::string option_str) { + if (app_.count(option_str) == 0) + return false; + return true; + } +}; +} // end of namespace cxl_host_exerciser diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser_cache.h b/samples/cxl_host_exerciser/cxl_host_exerciser_cache.h new file mode 100644 index 000000000000..892584ef36b1 --- /dev/null +++ b/samples/cxl_host_exerciser/cxl_host_exerciser_cache.h @@ -0,0 +1,62 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#pragma once +#include "cxl_host_exerciser.h" +#include "cxl_host_exerciser_cmd.h" +#include "he_cache_test.h" + +const char *HE_CACHE_AFU_ID = "0118E06B-1FA3-49B9-8159-9b5C2EBD4b23"; + +#define MEM_TG_FEATURE_ID 0x25 +#define MEM_TG_FEATURE_GUIDL 0x81599b5c2ebd4b23 +#define MEM_TG_FEATURE_GUIDH 0x0118e06b1fa349b9 + +using test_afu = opae::afu_test::afu; +using opae::fpga::types::shared_buffer; + +namespace host_exerciser { + +class host_exerciser_cache : public host_exerciser_cmd { +public: + host_exerciser_cache() {} + + virtual ~host_exerciser_cache() {} + virtual const char *name() const override { return "cache"; } + + virtual const char *description() const override { + return "run simple cxl he cache test"; + } + + virtual const char *afu_id() const override { return HE_CACHE_AFU_ID; } + + virtual uint64_t featureid() const override { return MEM_TG_FEATURE_ID; } + + virtual uint64_t guidl() const override { return MEM_TG_FEATURE_GUIDL; } + + virtual uint64_t guidh() const override { return MEM_TG_FEATURE_GUIDH; } +}; + +} // end of namespace host_exerciser diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser_cmd.h b/samples/cxl_host_exerciser/cxl_host_exerciser_cmd.h new file mode 100644 index 000000000000..c9a099694788 --- /dev/null +++ b/samples/cxl_host_exerciser/cxl_host_exerciser_cmd.h @@ -0,0 +1,1013 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#pragma once + +#include "cxl_host_exerciser.h" +#include "he_cache_test.h" +#include +#include +#include + +using test_afu = opae::afu_test::afu; +using opae::fpga::types::shared_buffer; +using opae::fpga::types::token; +namespace fpga = opae::fpga::types; + +#define UNUSED_PARAM(x) ((void)x) + +// HE exit global flag +volatile bool g_he_exit = false; +volatile static bool g_stop_thread = false; + +// host exerciser signal handler +void he_sig_handler(int) { + g_he_exit = true; + g_stop_thread = true; + printf("HE signal handler exit app \n"); +} + +namespace host_exerciser { + +std::mutex he_cache_read_mutex; +std::mutex he_cache_write_mutex; + +class host_exerciser_cmd; + +void he_cache_thread(uint8_t *buf_ptr, uint64_t len); + +class host_exerciser_cmd : public test_command { +public: + host_exerciser_cmd() : host_exe_(NULL), numa_node_(0) {} + virtual ~host_exerciser_cmd() {} + + int he_run_fpga_rd_cache_hit_test() { + cout << "********** FPGA Read cache hit test start**********" << endl; + /* + STEPS + 1) Allocate DSM, Read buffer // flush + 2) set cache lines 32kb/64 + 3) set loop count + 4) Set RdShared (CXL) config + 5) Run test ( AFU copies cache from host memory to FPGA cache) + 6) Set RdShared (CXL) config + 5) Run test ( AFU read cache from FPGA cache) + */ + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + + cout << "Numa node:" << numa_node_ << endl; + host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); + cout << "Read number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; + cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + + // set RD_CONFIG RdShared (CXL) + he_rd_cfg_.value = 0; + he_rd_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_rd_cfg_.read_traffic_enable = 1; + he_rd_cfg_.opcode = RD_LINE_S; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // set RD_ADDR_TABLE_CTRL + rd_table_ctl_.value = 0; + rd_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "allocate dsm failed" << endl; + return -1; + } + + // Allocate Read buffer + if (!host_exe_->allocate_cache_read(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + // Start test + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + cerr << "timeout error" << endl; + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + + cout << "********** AFU Copied host cache to FPGA Cache successfully " + "********** " + << endl; + + // set RD_CONFIG RdShared (CXL) + he_rd_cfg_.value = 0; + he_rd_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_rd_cfg_.read_traffic_enable = 1; + he_rd_cfg_.opcode = RD_LINE_S; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // set RD_ADDR_TABLE_CTRL + rd_table_ctl_.value = 0; + rd_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Start test + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + cerr << "timeout error" << endl; + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + host_exe_->free_dsm(); + host_exe_->free_cache_read(); + + cout + << "********** AFU reads cache from FPGA Cache successfully ********** " + << endl; + + cout << "********** FPGA Read cache hit test end**********" << endl; + return 0; + } + + int he_run_fpga_wr_cache_hit_test() { + cout << "********** FPGA Write cache hit test start**********" << endl; + + /* + STEPS + 1) Allocate DSM, Read buffer, Write buffer // flush + 2) set cache lines 32kb/64 + 3) set loop count + 4) Set RdShared (CXL) config + 5) Run test ( AFU copies cache from host memory to FPGA cache) + 6) Set WrLine_M/WrPart_M (CXL) config + 5) Run test ( AFU writes to FPGA cache) + */ + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; + + host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); + host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); + cout << "Read/write number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; + cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + + // set RD_CONFIG RdShared (CXL) + he_rd_cfg_.value = 0; + he_rd_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_rd_cfg_.read_traffic_enable = 1; + he_rd_cfg_.opcode = RD_LINE_S; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // set RD_ADDR_TABLE_CTRL + rd_table_ctl_.value = 0; + rd_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "alloc dsm failed" << endl; + return -1; + } + + // Allocate Read, Write buffer + if (!host_exe_->allocate_cache_read_write(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + // Start test + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + cerr << "timeout error" << endl; + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + + cout << "********** AFU Copied host cache to FPGA Cache successfully " + "********** " + << endl; + + // set W_CONFIG + he_wr_cfg_.value = 0; + he_wr_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_wr_cfg_.write_traffic_enable = 1; + he_wr_cfg_.opcode = WR_LINE_M; + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + + // Set WR_ADDR_TABLE_CTRL + wr_table_ctl_.value = 0; + wr_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + + // Start test + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + cerr << "timeout error" << endl; + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + cout << "********** AFU Write to FPGA Cache successfully ********** " + << endl; + + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + + cout << "********** FPGA Write cache hit test end**********" << endl; + + return 0; + } + + int he_run_fpga_rd_cache_miss_test() { + cout << "********** FPGA Read cache miss test start**********" << endl; + /* + STEPS + 1) Allocate DSM, Read buffer, Write buffer + 2) Write number of lines more then 32 kb 2mb/64 + 3) Set RdShared (CXL) config + 4) Run test (Buffer is not present in FPGA - FPGA read Cache miss ) + + // 2) Set RdShared (CXL) config + //3) Run test ( AFU copies cache from host memory to FPGA cache) + //4) Set write Evict (CXL) config + //5) Run test ( AFU Invalidate to FPGA cache) + 3) Set RdShared (CXL) config + 4) Run test (Buffer is not present in FPGA - FPGA read Cache miss ) + */ + + // 2MB / 64 + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + + host_exe_->write64(HE_RD_NUM_LINES, FPGA_2MB_CACHE_LINES - 1); + cout << "Read number Lines:" << FPGA_2MB_CACHE_LINES - 1 << endl; + cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + + // set RD_CONFIG RdShared (CXL) + he_rd_cfg_.value = 0; + he_rd_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_rd_cfg_.read_traffic_enable = 1; + he_rd_cfg_.opcode = RD_LINE_S; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // set RD_ADDR_TABLE_CTRL + rd_table_ctl_.value = 0; + rd_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "alloc dsm failed" << endl; + return -1; + } + + // Allocate Read, Write buffer + if (!host_exe_->allocate_cache_read_write(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read write failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + // start test + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + cerr << "timeout error" << endl; + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + + cout << "********** AFU Read FPGA Cache Miss successfully ********** " + << endl; + + cout << "********** FPGA Read cache miss test end**********" << endl; + return 0; + } + + int he_run_fpga_wr_cache_miss_test() { + cout << "********** FPGA write cache miss test start**********" << endl; + + /* + STEPS + 1) Allocate DSM, Read buffer, Write buffer + 2) Write number of lines more then 32 kb 2mb/64 + 3) Set WR ItoMWr (CXL) config + 4) Run test ( Buffer is not present in FPGA - FPGA write Cache miss ) + + //2) Set RdShared (CXL) config + //3) Run test ( AFU copies cache from host to HDM + //4) Set write Evict (CXL) config + //5) Run test ( AFU Invalidate to FPGA cache) + 6) Set WR ItoMWr (CXL) config + 7) Run test ( Buffer is not present in FPGA - FPGA write Cache miss ) + */ + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; + + host_exe_->write64(HE_WR_NUM_LINES, FPGA_2MB_CACHE_LINES - 1); + cout << "Read/write number Lines:" << FPGA_2MB_CACHE_LINES - 1 << endl; + cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + + // set W_CONFIG + he_wr_cfg_.value = 0; + he_wr_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_wr_cfg_.write_traffic_enable = 1; + he_wr_cfg_.opcode = WR_LINE_M; + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + + // Set WR_ADDR_TABLE_CTRL + wr_table_ctl_.value = 0; + wr_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "alloc dsm failed" << endl; + return -1; + } + + // Allocate Read, Write buffer + if (!host_exe_->allocate_cache_read_write(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + // start test + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + cerr << "timeout error" << endl; + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + + cout << "********** AFU Write FPGA Cache Miss successfully ********** " + << endl; + + cout << "********** FPGA Write cache miss test end**********" << endl; + return 0; + } + + int he_run_host_rd_cache_hit_test() { + cout << "********** 1 Host LLC Read cache hit test start**********" << endl; + + /* + STEPS + 1) Allocate DSM, Read buffer + 2) create thread read buffer + 3) Set RdLine_I (CXL) config + 4) Run test ( AFU reads from host cache to FPGA cache) + */ + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; + + host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); + cout << "Read number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; + cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + + // set RD_CONFIG RdShared (CXL) + he_rd_cfg_.value = 0; + he_rd_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_rd_cfg_.read_traffic_enable = 1; + he_rd_cfg_.opcode = RD_LINE_I; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // set RD_ADDR_TABLE_CTRL + rd_table_ctl_.value = 0; + rd_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "alloc dsm failed" << endl; + return -1; + } + + // Allocate Read buffer + if (!host_exe_->allocate_cache_read(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + cout << " create thread - moves read buffer to host cache " << endl; + std::thread t1(he_cache_thread, host_exe_->get_read(), BUFFER_SIZE_2MB); + sleep(1); + + // start + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + cerr << "timeout error" << endl; + he_perf_counters(); + host_exerciser_errors(); + + g_stop_thread = true; + t1.join(); + sleep(1); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + + g_stop_thread = true; + t1.join(); + + he_perf_counters(); + sleep(1); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + + cout << "********** AFU Copied host cache to FPGA Cache successfully " + "********** " + << endl; + + cout << "********** Host LLC cache hit test end**********" << endl; + return 0; + } + + int he_run_host_wr_cache_hit_test() { + cout << "********** Host LLC Write cache hit test start**********" << endl; + + /* + STEPS + 1) Allocate DSM, Write buffer + 2) create thread read buffer + 3) Set ItoMWr (CXL) config + 4) Run test ( AFU write to host cache) + */ + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; + + host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); + cout << "Write number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; + cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + + // set RD_CONFIG + he_wr_cfg_.value = 0; + he_wr_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_wr_cfg_.write_traffic_enable = 1; + he_wr_cfg_.opcode = WR_LINE_I; + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + + // set RD_ADDR_TABLE_CTRL + wr_table_ctl_.value = 0; + wr_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "alloc dsm failed" << endl; + return -1; + } + + // Allocate Read buffer + if (!host_exe_->allocate_cache_write(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + cout << " create thread - moves read buffer to host cache " << endl; + std::thread t1(he_cache_thread, host_exe_->get_write(), BUFFER_SIZE_2MB); + sleep(1); + + // start + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + cerr << "timeout error" << endl; + + he_perf_counters(); + host_exerciser_errors(); + g_stop_thread = true; + t1.join(); + sleep(1); + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + return -1; + } + + g_stop_thread = true; + t1.join(); + he_perf_counters(); + cout << "********** AFU write host cache successfully ********** " << endl; + + sleep(1); + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + + cout << "********** Host LLC cache hit Write test end**********" << endl; + return 0; + } + + int he_run_host_rd_cache_miss_test() { + cout << "********** Host LLC Read cache miss test start**********" << endl; + + /* + STEPS + 1) Allocate DSM, Read buffer + 2) flush host read buffer cachde + 3) Set RdLine_I (CXL) config + 4) Run test ( AFU reads from host cache to FPGA cache) + */ + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; + + host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); + cout << "Read/write number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; + cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + + // set RD_CONFIG + he_rd_cfg_.value = 0; + he_rd_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_rd_cfg_.read_traffic_enable = 1; + he_rd_cfg_.opcode = RD_LINE_I; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // set RD_ADDR_TABLE_CTR + rd_table_ctl_.value = 0; + rd_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "alloc dsm failed" << endl; + return -1; + } + + // Allocate Read buffer + if (!host_exe_->allocate_cache_read(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + // flush host cache + // int status = cacheflush((host_exe_->get_read(), BUFFER_SIZE_2MB, BCACHE); + + // start + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + cerr << "timeout error" << endl; + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + + cout << "********** Ran Host LLC Read cache miss successfully ********** " + << endl; + + cout << "********** Host LLC Read cache miss test end**********" << endl; + return 0; + } + + int he_run_host_wr_cache_miss_test() { + cout << "********** Host LLC Write cache miss test start**********" << endl; + + /* + STEPS + 1) Allocate DSM, write buffer + 2) flush host write buffer cachde + 3) Set RdLine_I (CXL) config + 4) Run test ( AFU reads from host cache to FPGA cache) + */ + + // HE_INFO + // Set write number Lines + he_info_.value = host_exe_->read64(HE_INFO); + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; + + host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); + cout << "Write number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; + cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + + // set RD_CONFIG + he_wr_cfg_.value = 0; + he_wr_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_wr_cfg_.write_traffic_enable = 1; + he_wr_cfg_.opcode = WR_PUSH_I; + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + + // set RD_ADDR_TABLE_CTR + wr_table_ctl_.value = 0; + wr_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "alloc dsm failed" << endl; + return -1; + } + + // Allocate Read buffer + if (!host_exe_->allocate_cache_write(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + // start + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + cerr << "timeout error" << endl; + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + + cout << "********** Ran Host LLC Write cache miss successfully ********** " + << endl; + + cout << "********** Host LLC Write cache miss test end**********" << endl; + return 0; + } + + void he_perf_counters() { + volatile he_cache_dsm_status *dsm_status = NULL; + + dsm_status = reinterpret_cast( + (uint8_t *)(host_exe_->get_dsm())); + if (!dsm_status) + return; + + std::cout << "\n********* DSM Status CSR Start *********" << std::endl; + + std::cout << "test completed :" << dsm_status->test_completed << std::endl; + std::cout << "dsm number:" << dsm_status->dsm_number << std::endl; + std::cout << "error vector:" << dsm_status->err_vector << std::endl; + std::cout << "num ticks:" << dsm_status->num_ticks << std::endl; + std::cout << "num reads:" << dsm_status->num_reads << std::endl; + std::cout << "num writes:" << dsm_status->num_writes << std::endl; + std::cout << "penalty start:" << dsm_status->penalty_start << std::endl; + std::cout << "penalty end:" << dsm_status->penalty_end << std::endl; + std::cout << "actual data:" << dsm_status->actual_data << std::endl; + std::cout << "expected data:" << dsm_status->expected_data << std::endl; + + std::cout << "********* DSM Status CSR end *********" << std::endl; + } + + void host_exerciser_errors() { + he_err_status err_status; + uint64_t err = 0; + if (host_exe_ == NULL) + return; + + err_status.value = host_exe_->read64(HE_ERROR_STATUS); + if (err_status.data_error == 1) { + cout << "Data Integrity Check error occured" << endl; + } + + if (err_status.err_index > 0) { + cout << "Error occurred at cache line address:" << err_status.err_index + << endl; + } + + err = host_exe_->read64(HE_ERROR_EXP_DATA); + cout << "Error Expected Data:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA0); + cout << "Error Expected Data0:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA1); + cout << "Error Expected Data1:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA2); + cout << "Error Expected Data2:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA3); + cout << "Error Expected Data3:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA4); + cout << "Error Expected Data4:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA5); + cout << "Error Expected Data5:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA6); + cout << "Error Expected Data6:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA7); + cout << "Error Expected Data7:" << err << endl; + } + + int parse_input_options() { + + if (!host_exe_) + return -1; + + return 0; + } + + bool he_wait_test_completion() { + /* Wait for test completion */ + uint32_t timeout = HELPBK_TEST_TIMEOUT; + + volatile uint8_t *status_ptr = host_exe_->get_dsm(); + while (0 == ((*status_ptr) & 0x1)) { + usleep(HELPBK_TEST_SLEEP_INVL); + if (--timeout == 0) { + cout << "HE LPBK TIME OUT" << std::endl; + + return false; + } + } + return true; + } + + bool verify_numa_node() { + + if (numa_available() < 0) { + printf("System does not support NUMA API!\n"); + return false; + } + + printf("SUpported NUMA API!\n"); + + int n = numa_max_node(); + printf("There are %d nodes on your system\n", n + 1); + + int cup_num = sched_getcpu(); + printf("cup_num:%d\n", cup_num); + + int node = numa_node_of_cpu(cup_num); + printf("node:%d\n", node); + + if (host_exe_->he_target_ == HE_TARGET_HOST) { + numa_node_ = node; + printf("HE_TARGET_HOST numa_node_:%d\n", numa_node_); + + } else { + // find fpga numa node numebr + numa_node_ = 2; + printf("HE_TARGET_FPGA numa_node_:%d\n", numa_node_); + } + + int num_task = numa_num_task_nodes(); + printf("num_task:%d\n", num_task); + + return true; + } + + virtual int run(test_afu *afu, CLI::App *app) { + (void)app; + int ret = 0; + + host_exe_ = dynamic_cast(afu); + + if (!verify_numa_node()) { + numa_node_ = 0; + cout << "numa nodes are available set numa node to 0" << endl; + }; + + // reset HE cache + he_ctl_.value = 0; + he_ctl_.ResetL = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + he_ctl_.value = 0; + he_ctl_.ResetL = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + + if (host_exe_->he_test_ == HE_FPGA_RD_CACHE_HIT) { + ret = he_run_fpga_rd_cache_hit_test(); + return ret; + } + + if (host_exe_->he_test_ == HE_FPGA_WR_CACHE_HIT) { + ret = he_run_fpga_wr_cache_hit_test(); + return ret; + } + + if (host_exe_->he_test_ == HE_FPGA_RD_CACHE_MISS) { + ret = he_run_fpga_rd_cache_miss_test(); + return ret; + } + + if (host_exe_->he_test_ == HE_FPGA_WR_CACHE_MISS) { + ret = he_run_fpga_wr_cache_miss_test(); + return ret; + } + + if (host_exe_->he_test_ == HE_HOST_RD_CACHE_HIT) { + ret = he_run_host_rd_cache_hit_test(); + return ret; + } + + if (host_exe_->he_test_ == HE_HOST_WR_CACHE_HIT) { + ret = he_run_host_wr_cache_hit_test(); + return ret; + } + + if (host_exe_->he_test_ == HE_HOST_RD_CACHE_MISS) { + ret = he_run_host_rd_cache_miss_test(); + return ret; + } + + if (host_exe_->he_test_ == HE_HOST_WR_CACHE_MISS) { + ret = he_run_host_wr_cache_miss_test(); + return ret; + } + + return 0; + } + +protected: + host_exerciser *host_exe_; + token::ptr_t token_; + + he_ctl he_ctl_; + he_info he_info_; + he_rd_config he_rd_cfg_; + he_wr_config he_wr_cfg_; + + he_rd_addr_table_ctrl rd_table_ctl_; + he_wr_addr_table_ctrl wr_table_ctl_; + uint8_t *dsm_buf_; + uint8_t *rd_buf_; + + uint32_t numa_node_; +}; + +void he_cache_thread(uint8_t *buf_ptr, uint64_t len) { + cout << "he_cache_thread enter" << endl; + if (buf_ptr == NULL || len == 0) { + return; + } + uint64_t value; + UNUSED_PARAM(value); + uint64_t cache_lines = len / 64; + uint64_t i = 0; + cout << "he_cache_thread cache_lines:" << cache_lines << endl; + + while (true) { + + if (g_stop_thread == true) { + cout << "he_cache_thread g_stop_thread " << endl; + return; + } + // cout << "he_cache_thread:i "<= cache_lines) { + i = 0; + } + } + + cout << "he_cache_thread end" << endl; + return; +} + +} // end of namespace host_exerciser diff --git a/samples/cxl_host_exerciser/dfl-he-cache.h b/samples/cxl_host_exerciser/dfl-he-cache.h new file mode 100644 index 000000000000..b95df7414bdb --- /dev/null +++ b/samples/cxl_host_exerciser/dfl-he-cache.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Header File for host exerciser cache DFL User API + * + * Copyright (C) 2023 Intel Corporation, Inc. + * + * Authors: + * Tim Whisonant + * Ananda Ravuri + * Russell H. Weight + */ + +#ifndef _UAPI_LINUX_HE_CACHE_DFL_H +#define _UAPI_LINUX_HE_CACHE_DFL_H + +#include +#include + +#define DFL_HE_CACHE_API_VERSION 0 + +/* + * The IOCTL interface for DFL based HE CACHE is designed for extensibility by + * embedding the structure length (argsz) and flags into structures passed + * between kernel and userspace. This design referenced the VFIO IOCTL + * interface (include/uapi/linux/vfio.h). + */ + +#define DFL_HE_CACHE_MAGIC 0xB6 + +#define DFL_HE_CACHE_BASE 0 + +/** + * DFL_FPGA_GET_API_VERSION - _IO(DFL_FPGA_MAGIC, DFL_FPGA_BASE + 0) + * + * Report the version of the driver API. + * Return: Driver API Version. + */ + +#define DFL_HE_CACHE_GET_API_VERSION \ + _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 0) + +/** + * DFL_FPGA_CHECK_EXTENSION - _IO(DFL_FPGA_MAGIC, DFL_FPGA_BASE + 1) + * + * Check whether an extension is supported. + * Return: 0 if not supported, otherwise the extension is supported. + */ + +#define DFL_HE_CACHE_CHECK_EXTENSION \ + _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 1) + +#define DFL_HE_CACHE_GET_REGION_INFO \ + _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 2) + +/** + * FPGA_PORT_GET_REGION_INFO - _IOWR(FPGA_MAGIC, PORT_BASE + 2, + * struct dfl_he_cache_region_info) + * + * Retrieve information about a device memory region. + * Caller provides struct dfl_fpga_port_region_info with index value set. + * Driver returns the region info in other fields. + * Return: 0 on success, -errno on failure. + */ +struct dfl_he_cache_region_info { + /* Input */ + __u32 argsz; /* Structure length */ + /* Output */ + __u32 flags; /* Access permission */ +#define DFL_HE_CACHE_REGION_READ (1 << 0) /* Region is readable */ +#define DFL_HE_CACHE_REGION_WRITE (1 << 1) /* Region is writable */ +#define DFL_HE_CACHE_REGION_MMAP (1 << 2) /* Can be mmaped to userspace */ + __u64 size; /* Region size (bytes) */ + __u64 offset; /* Region offset from start of device fd */ +}; + +#define DFL_HE_CACHE_SET_DSM_INFO _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 3) + +struct dfl_he_cache_dsm_info { + /* Input */ + __u32 argsz; /* Structure length */ + __u64 user_addr; /* Process virtual address */ + __u64 length; /* Length of mapping (bytes)*/ +}; + +#define DFL_HE_CACHE_CLEAR_DSM_INFO \ + _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 4) + +#define DFL_HE_CACHE_ALLOC_ADDR_TABLE \ + _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 5) +#define DFL_HE_CACHE_FREE_ADDR_TABLE \ + _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 6) +#define DFL_HE_CACHE_APPEND_ADDR_TABLE \ + _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 7) + +#define DFL_HE_CACHE_NUM_LINES_MIN 1 +#define DFL_HE_CACHE_NUM_LINES_MAX 0xffff + +struct dfl_he_cache_addr_table { + /* Input */ + __u32 argsz; /* Structure length */ + __u32 flags; /* Address Table ID */ +#define DFL_HE_CACHE_READ_ADDR_TABLE (1 << 0) +#define DFL_HE_CACHE_WRITE_ADDR_TABLE (1 << 1) + __u32 cache_lines; /* Buffer size/offset in cache lines */ +}; + +#define DFL_HE_CACHE_NUMA_DMA_MAP \ + _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 12) +#define DFL_HE_CACHE_NUMA_DMA_UNMAP \ + _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 13) + +#define DFL_ARRAY_MAX_SIZE 0x10 + +struct dfl_he_cache_dma_map { + /* Input */ + __u32 argsz; /* Structure length */ + __u32 flags; /* flags */ + __u64 user_addr; /* Process virtual address */ + __u64 length; /* Length of mapping (bytes)*/ + __u32 numa_node; /* Node 0,1 2 */ + __u64 csr_array[DFL_ARRAY_MAX_SIZE]; /* CSR */ +}; + +struct dfl_he_cache_dma_unmap { + /* Input */ + __u32 argsz; /* Structure length */ + __u32 flags; /* flags */ + __u64 user_addr; /* Process virtual address */ + __u64 length; /* Length of mapping (bytes)*/ + __u64 csr_array[DFL_ARRAY_MAX_SIZE]; /* CSR */ +}; + +#endif /* _UAPI_LINUX_HE_CACHE_DFL_H */ diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h new file mode 100644 index 000000000000..1e3b4d503c60 --- /dev/null +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -0,0 +1,829 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dfl-he-cache.h" + +using namespace std; + +const char *sbdf_pattern = + "(([0-9a-fA-F]{4}):)?([0-9a-fA-F]{2}):([0-9a-fA-F]{2})\\.([0-9])"; + +enum { MATCHES_SIZE = 6 }; +#define FEATURE_DEV \ + "/sys/bus/pci/devices/%s/" \ + "fpga_region/region*/dfl-fme*/dfl_dev*/feature_id" + +#define MAX_SIZE 256 + +#define PROTECTION (PROT_READ | PROT_WRITE) + +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 +#endif +#ifndef MAP_HUGE_SHIFT +#define MAP_HUGE_SHIFT 26 +#endif + +#define MAP_2M_HUGEPAGE (0x15 << MAP_HUGE_SHIFT) /* 2 ^ 0x15 = 2M */ +#define MAP_1G_HUGEPAGE (0x1e << MAP_HUGE_SHIFT) /* 2 ^ 0x1e = 1G */ + +#ifdef __ia64__ +#define ADDR ((void *)(0x8000000000000000UL)) +#define FLAGS_4K (MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED) +#define FLAGS_2M (FLAGS_4K | MAP_2M_HUGEPAGE | MAP_HUGETLB) +#define FLAGS_1G (FLAGS_4K | MAP_1G_HUGEPAGE | MAP_HUGETLB) +#else +#define ADDR ((void *)(0x0UL)) +#define FLAGS_4K (MAP_PRIVATE | MAP_ANONYMOUS) +#define FLAGS_2M (FLAGS_4K | MAP_2M_HUGEPAGE | MAP_HUGETLB) +#define FLAGS_1G (FLAGS_4K | MAP_1G_HUGEPAGE | MAP_HUGETLB) +#endif + +#define KiB(x) ((x)*1024) +#define MiB(x) ((x)*1024 * 1024) +#define GiB(x) ((x)*1024 * 1024 * 1024) + +#define DFL_HE_CACHE_DSM_BASE 0x030 +#define DFL_HE_CACHE_WR_ADDR_TABLE_DATA 0x068 +#define DFL_HE_CACHE_RD_ADDR_TABLE_DATA 0x088 + +void *alloc_2mb_hugepage(void) { + void *addr; + + addr = mmap(ADDR, MiB(2), PROTECTION, FLAGS_2M, 0, 0); + if (addr == MAP_FAILED) { + printf("alloc_2mb_hugepage() failed: %s\n", strerror(errno)); + addr = NULL; + } + + return addr; +} +void free_memory(void *addr, uint64_t len) { munmap(addr, len); } + +void *alloc_32kb_hugepage(void) { + void *addr; + + addr = mmap(ADDR, KiB(32), PROTECTION, FLAGS_4K, 0, 0); + if (addr == MAP_FAILED) { + printf("alloc_1kb_hugepage() failed: %s\n", strerror(errno)); + addr = NULL; + } + + return addr; +} + +void *alloc_4kb_hugepage(void) { + void *addr; + + addr = mmap(ADDR, KiB(4), PROTECTION, FLAGS_4K, 0, 0); + if (addr == MAP_FAILED) { + printf("alloc_1kb_hugepage() failed: %s\n", strerror(errno)); + addr = NULL; + } + + return addr; +} + +bool sysfs_read_u64(const char *path, uint64_t *value) { + ifstream fs; + fs.open(path, ios::in); + + std::string s; + if (fs.is_open()) { + std::string line; + std::getline(fs, line); + *value = std::stoul(line, 0, 16); + fs.close(); + return true; + } + return false; +} + +namespace opae { +namespace afu_test { + +namespace fpga = fpga::types; + +template +inline bool parse_match_int(const char *s, regmatch_t m, T &v, int radix = 10) { + if (m.rm_so == -1 || m.rm_eo == -1) + return false; + errno = 0; + v = std::strtoul(s + m.rm_so, NULL, radix); + return errno == 0; +} + +union pcie_address { + struct { + uint32_t function : 3; + uint32_t device : 5; + uint32_t bus : 8; + uint32_t domain : 16; + } fields; + uint32_t value; + + static pcie_address parse(const char *s) { + auto deleter = [&](regex_t *r) { + regfree(r); + delete r; + }; + std::unique_ptr re(new regex_t, deleter); + regmatch_t matches[MATCHES_SIZE]; + + int reg_res = regcomp(re.get(), sbdf_pattern, REG_EXTENDED | REG_ICASE); + if (reg_res) + throw std::runtime_error("could not compile regex"); + + reg_res = regexec(re.get(), s, MATCHES_SIZE, matches, 0); + if (reg_res) + throw std::runtime_error("pcie address not valid format"); + + uint16_t domain, bus, device, function; + if (!parse_match_int(s, matches[2], domain, 16)) + domain = 0; + if (!parse_match_int(s, matches[3], bus, 16)) + throw std::runtime_error("error parsing pcie address"); + if (!parse_match_int(s, matches[4], device, 16)) + throw std::runtime_error("error parsing pcie address"); + if (!parse_match_int(s, matches[5], function)) + throw std::runtime_error("error parsing; pcie address"); + pcie_address a; + a.fields.domain = domain; + a.fields.bus = bus; + a.fields.device = device; + a.fields.function = function; + return a; + } +}; + +class afu; // forward declaration + +class command { +public: + typedef std::shared_ptr ptr_t; + command() : running_(true) {} + virtual ~command() {} + virtual const char *name() const = 0; + virtual const char *description() const = 0; + virtual int run(afu *afu, CLI::App *app) = 0; + virtual void add_options(CLI::App *app) { (void)app; } + virtual const char *afu_id() const { return nullptr; } + + virtual uint64_t featureid() const = 0; + virtual uint64_t guidl() const = 0; + virtual uint64_t guidh() const = 0; + + bool running() const { return running_; } + void stop() { running_ = false; } + +private: + std::atomic running_; +}; + +#if SPDLOG_VERSION >= 10900 +// spdlog version 1.9.0 defines SPDLOG_LEVEL_NAMES as an array of string_view_t. +// Convert to vector of std::string to be used in CLI::IsMember(). +inline std::vector spdlog_levels() { + std::vector levels_view = SPDLOG_LEVEL_NAMES; + std::vector levels_str(levels_view.size()); + std::transform(levels_view.begin(), levels_view.end(), levels_str.begin(), + [](spdlog::string_view_t sv) { + return std::string(sv.data(), sv.size()); + }); + return levels_str; +} +#else +inline std::vector spdlog_levels() { return SPDLOG_LEVEL_NAMES; } +#endif // SPDLOG_VERSION + +class afu { +public: + typedef int (*command_fn)(afu *afu, CLI::App *app); + enum exit_codes { + success = 0, + not_run, + not_found, + no_access, + exception, + error + }; + + afu(const char *name, const char *afu_id = nullptr, + const char *log_level = nullptr) + : name_(name), afu_id_(afu_id ? afu_id : ""), app_(name_), pci_addr_(""), + log_level_(log_level ? log_level : "info"), timeout_msec_(60000), + current_command_(nullptr) { + if (!afu_id_.empty()) + app_.add_option("-g,--guid", afu_id_, "GUID")->default_str(afu_id_); + app_.add_option("-p,--pci-address", pci_addr_, + "[:]:."); + app_.add_option("-l,--log-level", log_level_, "stdout logging level") + ->default_str(log_level_) + ->check(CLI::IsMember(spdlog_levels())); + app_.add_option("-t,--timeout", timeout_msec_, "test timeout (msec)") + ->default_str(std::to_string(timeout_msec_)); + } + virtual ~afu() { + if (logger_) + spdlog::drop(logger_->name()); + } + + CLI::App &cli() { return app_; } + + int find_dev_feature() { + glob_t pglob; + char feature_path[MAX_SIZE] = {0}; + int gres = 0; + uint64_t value = 0; + size_t i = 0; + + if (!pci_addr_.empty()) { + if (snprintf(feature_path, sizeof(feature_path), FEATURE_DEV, + pci_addr_.c_str()) < 0) { + cerr << "snprintf buffer overflow" << endl; + return 1; + } + } else { + if (snprintf(feature_path, sizeof(feature_path), FEATURE_DEV, "*:*:*.*") < + 0) { + cerr << "snprintf buffer overflow" << endl; + return 2; + } + } + + gres = glob(feature_path, GLOB_NOSORT, NULL, &pglob); + if (gres) { + cerr << "Failed pattern match" << feature_path << ":" << strerror(errno) + << endl; + globfree(&pglob); + return 3; + } + + for (i = 0; i < pglob.gl_pathc; i++) { + bool retval = sysfs_read_u64(pglob.gl_pathv[i], &value); + if (!retval) { + cerr << "Failed to read sysfs value" << endl; + continue; + } + + if (current_command()->featureid() == value) { + string str(pglob.gl_pathv[i]); + string substr_dev(str.substr(0, str.rfind("/"))); + globfree(&pglob); + + substr_dev.append("/he-cache/he-cache*"); + gres = glob(substr_dev.c_str(), GLOB_NOSORT, NULL, &pglob); + if (gres) { + cerr << "Failed pattern match" << substr_dev.c_str() << ":" + << strerror(errno) << endl; + globfree(&pglob); + return 4; + } + string str1(pglob.gl_pathv[0]); + globfree(&pglob); + dev_path_.append("/dev"); + dev_path_.append(str1.substr(str1.rfind("/"), 13)); + + return 0; + } + } + + return 5; + } + + void unmap_mmio() { + if (mmio_base_) { + if (munmap(mmio_base_, rinfo_.size) == -1) + cerr << "Failed to unmap MMIO:" << strerror(errno) << endl; + } + } + + bool map_mmio() { + void *user_v; + user_v = mmap(NULL, rinfo_.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, + rinfo_.offset); + if (user_v == MAP_FAILED) { + cerr << "Failed to map MMIO:" << strerror(errno) << endl; + return false; + } + mmio_base_ = (uint8_t *)user_v; + + return true; + } + + int open_handle() { + + int res = 0; + cout << "dev_path_:" << dev_path_ << endl; + + fd_ = open(dev_path_.c_str(), O_RDWR); + if (fd_ < 0) { + cerr << "open() failed:" << strerror(errno) << endl; + return 1; + } + + memset(&rinfo_, 0, sizeof(rinfo_)); + rinfo_.argsz = sizeof(rinfo_); + res = ioctl(fd_, DFL_HE_CACHE_GET_REGION_INFO, &rinfo_); + if (res) { + cerr << "ioctl() DFL_HE_CACHE_GET_REGION_INFO failed:" << strerror(errno) + << endl; + close(fd_); + return 2; + } + + printf("MMIO region flags: 0x%x size: %llu offset: %llu\n", rinfo_.flags, + rinfo_.size, rinfo_.offset); + + if (!map_mmio()) { + cerr << "mmap failed:" << strerror(errno) << endl; + close(fd_); + return 3; + } + + volatile uint64_t *u64 = (volatile uint64_t *)mmio_base_; + printf("DFH : 0x%016" PRIx64 "\n", *u64); + printf("DFH + 8 : 0x%016" PRIx64 "\n", *(u64 + 1)); + printf("DFH + 16: 0x%016" PRIx64 "\n", *(u64 + 2)); + printf("DFH + 24: 0x%016" PRIx64 "\n", *(u64 + 3)); + + return exit_codes::not_run; + } + + int main(int argc, char *argv[]) { + if (!commands_.empty()) + app_.require_subcommand(); + CLI11_PARSE(app_, argc, argv); + + command::ptr_t test(nullptr); + CLI::App *app = nullptr; + for (auto kv : commands_) { + if (*kv.first) { + app = kv.first; + test = kv.second; + break; + } + } + if (!test) { + std::cerr << "no command specified\n"; + return exit_codes::not_run; + } + + auto console_sink = std::make_shared(); + logger_ = std::make_shared(test->name(), console_sink); + spdlog::register_logger(logger_); + logger_->set_level(spdlog::level::from_str(log_level_)); + current_command_ = test; + if (find_dev_feature() != 0) { + cerr << "fails to find feature" << endl; + return exit_codes::exception; + }; + + int res = open_handle(); + if (res != exit_codes::not_run) { + return res; + } + + return run(app, test); + } + + virtual int run(CLI::App *app, command::ptr_t test) { + int res = exit_codes::not_run; + current_command_ = test; + + try { + std::future f = std::async(std::launch::async, [this, test, app]() { + return test->run(this, app); + }); + auto status = f.wait_for(std::chrono::milliseconds(timeout_msec_)); + if (status == std::future_status::timeout) { + std::cerr << "Error: test timed out" << std::endl; + current_command_->stop(); + throw std::runtime_error("timeout"); + } + res = f.get(); + } catch (std::exception &ex) { + res = exit_codes::exception; + } + + current_command_.reset(); + return res; + } + + template CLI::App *register_command() { + command::ptr_t cmd(new T()); + auto sub = app_.add_subcommand(cmd->name(), cmd->description()); + cmd->add_options(sub); + commands_[sub] = cmd; + return sub; + } + + uint64_t read64(uint32_t offset) { + uint64_t value = *((uint64_t *)(mmio_base_ + offset)); + return value; + } + + void write64(uint32_t offset, uint64_t value) { + *((uint64_t *)(mmio_base_ + offset)) = value; + return; + } + + uint32_t read32(uint32_t offset) { + uint32_t value = *((uint64_t *)(mmio_base_ + offset)); + return value; + } + + void write32(uint32_t offset, uint32_t value) { + *((uint32_t *)(mmio_base_ + offset)) = value; + return; + } + + command::ptr_t current_command() const { return current_command_; } + + bool open_device() { + + // std::cerr << "open\n" << dev_str; + fd_ = open(dev_path_.c_str(), O_RDWR); + if (fd_ < 0) { + printf("open() failed: %s\n", strerror(errno)); + return false; + } + + return true; + } + + bool close_device() { + if (fd_ > 0) + close(fd_); + return true; + } + + bool allocate_dsm(size_t len = KiB(4), uint32_t node = 0) { + int res = 0; + void *ptr = NULL; + struct dfl_he_cache_dma_map dma_map; + // cout << "allocate_dsm\n"; + + memset(&dma_map, 0, sizeof(dma_map)); + + ptr = alloc_4kb_hugepage(); + if (!ptr) { + cerr << "failed to allocate 4k huge page:" << strerror(errno) << endl; + return false; + } + + dma_map.argsz = sizeof(dma_map); + dma_map.user_addr = (__u64)ptr; + dma_map.length = len; + dma_map.numa_node = node; + dma_map.csr_array[0] = DFL_HE_CACHE_DSM_BASE; // 0x030 + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_DSM_BASE); + + res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_MAP, &dma_map); + if (res) { + cerr << "ioctl DFL_HE_CACHE_NUMA_NODE_DSM_INFO failed" << strerror(errno) + << endl; + goto out_free; + } + printf("DSM_BASE: 0x%016" PRIx64 "\n", *u64); + + dsm_buffer_ = (uint8_t *)ptr; + dsm_buf_len_ = len; + return true; + + out_free: + free_memory(ptr, len); + return false; + } + + bool free_dsm() { + struct dfl_he_cache_dma_unmap dma_unmap; + int res = 0; + + // cout << "free_dsm\n" << endl; + memset(&dma_unmap, 0, sizeof(dma_unmap)); + + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.user_addr = (__u64)dsm_buffer_; + dma_unmap.length = dsm_buf_len_; + dma_unmap.csr_array[0] = DFL_HE_CACHE_DSM_BASE; // 0x030 + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_DSM_BASE); + + res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_UNMAP, &dma_unmap); + if (res) { + cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) + << endl; + } + printf("DSM_BASE: 0x%016" PRIx64 "\n", *u64); + free_memory(dsm_buffer_, dsm_buf_len_); + + return true; + } + + bool allocate_cache_read(size_t len = MiB(2), uint32_t numa_node = 0) { + + int res = 0; + void *ptr = NULL; + struct dfl_he_cache_dma_map dma_map; + + // cout << "allocate_cache_read\n"; + + memset(&dma_map, 0, sizeof(dma_map)); + + ptr = alloc_2mb_hugepage(); + if (!ptr) { + cerr << "failed to allocate huge pages\n" << endl; + return false; + } + + cout << "numa_node: " << numa_node << endl; + + dma_map.argsz = sizeof(dma_map); + dma_map.user_addr = (__u64)ptr; + dma_map.length = len; + dma_map.numa_node = numa_node; + dma_map.csr_array[0] = DFL_HE_CACHE_RD_ADDR_TABLE_DATA; // 0x88 + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_RD_ADDR_TABLE_DATA); + + res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_MAP, &dma_map); + if (res) { + cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_MAP failed" << strerror(errno) + << endl; + goto out_free; + } + printf("DFL_HE_CACHE_RD_ADDR_TABLE_DATA: 0x%016" PRIx64 "\n", *u64); + + rd_buffer_ = (uint8_t *)ptr; + rd_buf_len_ = len; + return true; + + out_free: + free_memory(ptr, len); + return false; + } + + bool free_cache_read() { + struct dfl_he_cache_dma_unmap dma_unmap; + int res = 0; + + memset(&dma_unmap, 0, sizeof(dma_unmap)); + + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.user_addr = (__u64)rd_buffer_; + dma_unmap.length = rd_buf_len_; + dma_unmap.csr_array[0] = DFL_HE_CACHE_RD_ADDR_TABLE_DATA; // 0x88 + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_RD_ADDR_TABLE_DATA); + res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_UNMAP, &dma_unmap); + if (res) { + cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) + << endl; + } + + printf("DFL_HE_CACHE_RD_ADDR_TABLE_DATA: 0x%016" PRIx64 "\n", *u64); + free_memory(rd_buffer_, rd_buf_len_); + + return true; + } + + bool allocate_cache_write(size_t len = MiB(2), uint32_t numa_node = 0) { + int res; + void *ptr; + struct dfl_he_cache_dma_map dma_map; + + // std::cout << "allocate_cache_write" << endl; + + memset(&dma_map, 0, sizeof(dma_map)); + + ptr = alloc_2mb_hugepage(); + if (!ptr) { + cerr << "failed to allocate huge pages\n" << endl; + return false; + } + + dma_map.argsz = sizeof(dma_map); + dma_map.user_addr = (__u64)ptr; + dma_map.length = len; + dma_map.numa_node = numa_node; + dma_map.csr_array[0] = DFL_HE_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_WR_ADDR_TABLE_DATA); + + res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_MAP, &dma_map); + if (res) { + cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_MAP failed" << strerror(errno) + << endl; + goto out_free; + } + printf("\nDFL_HE_CACHE_WR_ADDR_TABLE_DATA: 0x%016" PRIx64 "\n", *u64); + + wr_buffer_ = (uint8_t *)ptr; + + return true; + + out_free: + free_memory(ptr, len); + return false; + } + + bool free_cache_write() { + struct dfl_he_cache_dma_unmap dma_unmap; + int res; + + // cout << "free_cache_write" << endl; + memset(&dma_unmap, 0, sizeof(dma_unmap)); + + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.user_addr = (__u64)wr_buffer_; + dma_unmap.length = wr_buf_len_; + dma_unmap.csr_array[0] = DFL_HE_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_WR_ADDR_TABLE_DATA); + res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_UNMAP, &dma_unmap); + if (res) { + cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) + << endl; + } + + printf("\nDFL_HE_CACHE_WR_ADDR_TABLE_DATA: 0x%016" PRIx64 "\n", *u64); + free_memory(wr_buffer_, wr_buf_len_); + + return true; + } + + bool allocate_cache_read_write(size_t len = MiB(2), uint32_t numa_node = 0) { + + int res = 0; + void *ptr = NULL; + struct dfl_he_cache_dma_map dma_map; + + // cout<< "allocate_cache_read_write"; + + memset(&dma_map, 0, sizeof(dma_map)); + ptr = alloc_2mb_hugepage(); + if (!ptr) { + cerr << "failed to allocate huge pages\n" << endl; + return false; + } + + dma_map.argsz = sizeof(dma_map); + dma_map.user_addr = (__u64)ptr; + dma_map.length = len; + dma_map.numa_node = numa_node; + dma_map.csr_array[0] = DFL_HE_CACHE_RD_ADDR_TABLE_DATA; // 0x88; + dma_map.csr_array[1] = DFL_HE_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + + volatile uint64_t *u64_wr = + (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_WR_ADDR_TABLE_DATA); + volatile uint64_t *u64_rd = + (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_RD_ADDR_TABLE_DATA); + + res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_MAP, &dma_map); + if (res) { + cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_MAP failed" << strerror(errno) + << endl; + goto out_free; + } + + printf("\nDFL_HE_CACHE_WR_ADDR_TABLE_DATA: 0x%016" PRIx64 "\n", *u64_wr); + printf("\nDFL_HE_CACHE_RD_ADDR_TABLE_DATAs: 0x%016" PRIx64 "\n", *u64_rd); + + rd_wr_buffer_ = (uint8_t *)ptr; + rd_wr_buf_len_ = len; + + return true; + + out_free: + free_memory(ptr, len); + return false; + } + + bool free_cache_read_write() { + struct dfl_he_cache_dma_unmap dma_unmap; + int res; + + // cout << "free_cache_read_write\n" << endl; + + memset(&dma_unmap, 0, sizeof(dma_unmap)); + + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.user_addr = (__u64)rd_wr_buffer_; + dma_unmap.length = rd_wr_buf_len_; + dma_unmap.csr_array[0] = DFL_HE_CACHE_RD_ADDR_TABLE_DATA; // 0x88; + dma_unmap.csr_array[1] = DFL_HE_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + + volatile uint64_t *u64_wr = + (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_WR_ADDR_TABLE_DATA); + volatile uint64_t *u64_rd = + (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_RD_ADDR_TABLE_DATA); + + res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_UNMAP, &dma_unmap); + if (res) { + cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) + << endl; + } + + printf("\nDFL_HE_CACHE_WR_ADDR_TABLE_DATA: 0x%016" PRIx64 "\n", *u64_wr); + printf("\nDFL_HE_CACHE_RD_ADDR_TABLE_DATAs: 0x%016" PRIx64 "\n", *u64_rd); + + free_memory(rd_wr_buffer_, rd_wr_buf_len_); + rd_wr_buffer_ = NULL; + return true; + } + + uint8_t *get_dsm() const { return dsm_buffer_; } + + uint8_t *get_read() const { return rd_buffer_; } + + uint8_t *get_write() const { return wr_buffer_; } + + uint8_t *get_read_write() const { return rd_wr_buffer_; } + +protected: + std::string name_; + std::string afu_id_; + CLI::App app_; + std::string pci_addr_; + std::string log_level_; + uint32_t timeout_msec_; + + int fd_; + uint8_t *mmio_base_; + uint64_t mmio_len_; + + uint8_t *dsm_buffer_; + uint64_t dsm_buf_len_; + + uint8_t *rd_buffer_; + uint64_t rd_buf_len_; + + uint8_t *wr_buffer_; + uint64_t wr_buf_len_; + + uint8_t *rd_wr_buffer_; + uint64_t rd_wr_buf_len_; + + struct dfl_he_cache_region_info rinfo_; + + std::string dev_path_; + + command::ptr_t current_command_; + std::map commands_; + +public: + std::shared_ptr logger_; +}; + +} // end of namespace afu_test +} // end of namespace opae From 5b92bb079795f3738d9d608de4619eb83aefb647 Mon Sep 17 00:00:00 2001 From: anandaravuri Date: Wed, 20 Sep 2023 10:03:20 -0700 Subject: [PATCH 02/11] fix: code rview comments and bugs fixed Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/CMakeLists.txt | 4 +- ...ost_exerciser_cmd.h => cxl_he_cache_cmd.h} | 285 +++++++++++------- ...rciser_cache.h => cxl_he_cache_lpbk_cmd.h} | 51 +++- samples/cxl_host_exerciser/cxl_he_cmd.h | 206 +++++++++++++ .../cxl_host_exerciser/cxl_host_exerciser.cpp | 7 +- .../cxl_host_exerciser/cxl_host_exerciser.h | 135 ++------- samples/cxl_host_exerciser/dfl-he-cache.h | 83 +++-- samples/cxl_host_exerciser/he_cache_test.h | 1 - 8 files changed, 481 insertions(+), 291 deletions(-) rename samples/cxl_host_exerciser/{cxl_host_exerciser_cmd.h => cxl_he_cache_cmd.h} (79%) rename samples/cxl_host_exerciser/{cxl_host_exerciser_cache.h => cxl_he_cache_lpbk_cmd.h} (67%) create mode 100644 samples/cxl_host_exerciser/cxl_he_cmd.h diff --git a/samples/cxl_host_exerciser/CMakeLists.txt b/samples/cxl_host_exerciser/CMakeLists.txt index adcdf4580a62..ed8c2f5534dc 100644 --- a/samples/cxl_host_exerciser/CMakeLists.txt +++ b/samples/cxl_host_exerciser/CMakeLists.txt @@ -33,7 +33,7 @@ if (OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG) ${spdlog_LIBRARIES} ${json-c_LIBRARIES} ${uuid_LIBRARIES} - numa + ${numa_LIBRARIES} COMPONENT samplebin ) target_include_directories(cxl_host_exerciser @@ -42,6 +42,4 @@ if (OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG) ${CMAKE_CURRENT_SOURCE_DIR} ${CLI11_INCLUDE_DIRS} ${spdlog_INCLUDE_DIRS}) - - endif(OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG) diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h similarity index 79% rename from samples/cxl_host_exerciser/cxl_host_exerciser_cmd.h rename to samples/cxl_host_exerciser/cxl_he_cache_cmd.h index c9a099694788..81460d81bd6d 100644 --- a/samples/cxl_host_exerciser/cxl_host_exerciser_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h @@ -25,16 +25,9 @@ // POSSIBILITY OF SUCH DAMAGE. #pragma once +#include "cxl_he_cmd.h" #include "cxl_host_exerciser.h" #include "he_cache_test.h" -#include -#include -#include - -using test_afu = opae::afu_test::afu; -using opae::fpga::types::shared_buffer; -using opae::fpga::types::token; -namespace fpga = opae::fpga::types; #define UNUSED_PARAM(x) ((void)x) @@ -51,17 +44,65 @@ void he_sig_handler(int) { namespace host_exerciser { -std::mutex he_cache_read_mutex; -std::mutex he_cache_write_mutex; - -class host_exerciser_cmd; - void he_cache_thread(uint8_t *buf_ptr, uint64_t len); -class host_exerciser_cmd : public test_command { +class he_cache_cmd : public he_cmd { public: - host_exerciser_cmd() : host_exe_(NULL), numa_node_(0) {} - virtual ~host_exerciser_cmd() {} + he_cache_cmd() + : he_continuousmode_(false), he_contmodetime_(0), he_linerep_count_(0), + he_stide_(0), he_test_(0), he_test_all_(false) {} + + virtual ~he_cache_cmd() {} + + virtual const char *name() const override { return "cache"; } + + virtual const char *description() const override { + return "run simple cxl he cache test"; + } + + virtual const char *afu_id() const override { return HE_CACHE_AFU_ID; } + + virtual uint64_t featureid() const override { return MEM_TG_FEATURE_ID; } + + virtual uint64_t guidl() const override { return MEM_TG_FEATURE_GUIDL; } + + virtual uint64_t guidh() const override { return MEM_TG_FEATURE_GUIDH; } + + virtual void add_options(CLI::App *app) override { + app->add_option( + "--test", he_test_, + "host exerciser cache test {fpgardcachehit, fpgawrcachehit, all}") + ->transform(CLI::CheckedTransformer(he_test_modes)) + ->default_val("fpgardcachehit"); + + // Continuous mode + app->add_option("--continuousmode", he_continuousmode_, + "test rollover or test termination") + ->default_val("false"); + + // Continuous mode time + app->add_option("--contmodetime", he_contmodetime_, + "Continuous mode time in seconds") + ->default_val("1"); + + // target host or fpga + app->add_option("--target", he_target_, + "host exerciser run on host or fpga") + ->transform(CLI::CheckedTransformer(he_targets)) + ->default_val("host"); + + app->add_option("--stride", he_stide_, "Enable stride mode") + ->default_val("0"); + + // Line repeat count + app->add_option("--linerepcount", he_linerep_count_, "Line repeat count") + ->transform(CLI::Range(1, 256)) + ->default_val("10"); + + // Test all + app->add_option("--testall", he_test_all_, "Run all tests") + ->default_val("false"); + } int he_run_fpga_rd_cache_hit_test() { cout << "********** FPGA Read cache hit test start**********" << endl; @@ -69,11 +110,12 @@ class host_exerciser_cmd : public test_command { STEPS 1) Allocate DSM, Read buffer // flush 2) set cache lines 32kb/64 - 3) set loop count + 3) set line repeat count 4) Set RdShared (CXL) config 5) Run test ( AFU copies cache from host memory to FPGA cache) - 6) Set RdShared (CXL) config - 5) Run test ( AFU read cache from FPGA cache) + 6) set line repeat count + 7) Set RdShared (CXL) config + 8) Run test ( AFU read cache from FPGA cache) */ // HE_INFO @@ -82,13 +124,13 @@ class host_exerciser_cmd : public test_command { cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Numa node:" << numa_node_ << endl; - host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); - cout << "Read number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; - cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES); + cout << "Read number Lines:" << FPGA_32KB_CACHE_LINES << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; - he_rd_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_rd_cfg_.line_repeat_count = 1; he_rd_cfg_.read_traffic_enable = 1; he_rd_cfg_.opcode = RD_LINE_S; host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); @@ -135,7 +177,7 @@ class host_exerciser_cmd : public test_command { // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; - he_rd_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_rd_cfg_.line_repeat_count = he_linerep_count_; he_rd_cfg_.read_traffic_enable = 1; he_rd_cfg_.opcode = RD_LINE_S; host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); @@ -180,11 +222,12 @@ class host_exerciser_cmd : public test_command { STEPS 1) Allocate DSM, Read buffer, Write buffer // flush 2) set cache lines 32kb/64 - 3) set loop count + 3) set line repeat count 4) Set RdShared (CXL) config 5) Run test ( AFU copies cache from host memory to FPGA cache) - 6) Set WrLine_M/WrPart_M (CXL) config - 5) Run test ( AFU writes to FPGA cache) + 6) set line repeat count + 7) Set WrLine_M/WrPart_M (CXL) config + 8) Run test ( AFU writes to FPGA cache) */ // HE_INFO @@ -194,14 +237,13 @@ class host_exerciser_cmd : public test_command { cout << "Write address table size:" << he_info_.write_addr_table_size << endl; - host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); - host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); - cout << "Read/write number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; - cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES); + cout << "Read/write number Lines:" << FPGA_32KB_CACHE_LINES << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; - he_rd_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_rd_cfg_.line_repeat_count = 1; he_rd_cfg_.read_traffic_enable = 1; he_rd_cfg_.opcode = RD_LINE_S; host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); @@ -248,7 +290,7 @@ class host_exerciser_cmd : public test_command { // set W_CONFIG he_wr_cfg_.value = 0; - he_wr_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_wr_cfg_.line_repeat_count = he_linerep_count_; he_wr_cfg_.write_traffic_enable = 1; he_wr_cfg_.opcode = WR_LINE_M; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); @@ -258,6 +300,7 @@ class host_exerciser_cmd : public test_command { wr_table_ctl_.enable_address_stride = 1; host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); // Start test he_ctl_.Start = 1; host_exe_->write64(HE_CTL, he_ctl_.value); @@ -291,20 +334,11 @@ class host_exerciser_cmd : public test_command { /* STEPS 1) Allocate DSM, Read buffer, Write buffer - 2) Write number of lines more then 32 kb 2mb/64 - 3) Set RdShared (CXL) config - 4) Run test (Buffer is not present in FPGA - FPGA read Cache miss ) - - // 2) Set RdShared (CXL) config - //3) Run test ( AFU copies cache from host memory to FPGA cache) - //4) Set write Evict (CXL) config - //5) Run test ( AFU Invalidate to FPGA cache) + 2) Write number of lines more then 32kb 2mb/64 3) Set RdShared (CXL) config 4) Run test (Buffer is not present in FPGA - FPGA read Cache miss ) */ - // 2MB / 64 - // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); @@ -312,11 +346,11 @@ class host_exerciser_cmd : public test_command { host_exe_->write64(HE_RD_NUM_LINES, FPGA_2MB_CACHE_LINES - 1); cout << "Read number Lines:" << FPGA_2MB_CACHE_LINES - 1 << endl; - cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; - he_rd_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_rd_cfg_.line_repeat_count = he_linerep_count_; he_rd_cfg_.read_traffic_enable = 1; he_rd_cfg_.opcode = RD_LINE_S; host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); @@ -375,13 +409,6 @@ class host_exerciser_cmd : public test_command { 2) Write number of lines more then 32 kb 2mb/64 3) Set WR ItoMWr (CXL) config 4) Run test ( Buffer is not present in FPGA - FPGA write Cache miss ) - - //2) Set RdShared (CXL) config - //3) Run test ( AFU copies cache from host to HDM - //4) Set write Evict (CXL) config - //5) Run test ( AFU Invalidate to FPGA cache) - 6) Set WR ItoMWr (CXL) config - 7) Run test ( Buffer is not present in FPGA - FPGA write Cache miss ) */ // HE_INFO @@ -391,13 +418,13 @@ class host_exerciser_cmd : public test_command { cout << "Write address table size:" << he_info_.write_addr_table_size << endl; - host_exe_->write64(HE_WR_NUM_LINES, FPGA_2MB_CACHE_LINES - 1); - cout << "Read/write number Lines:" << FPGA_2MB_CACHE_LINES - 1 << endl; - cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + host_exe_->write64(HE_WR_NUM_LINES, FPGA_2MB_CACHE_LINES); + cout << "Read/write number Lines:" << FPGA_2MB_CACHE_LINES << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; // set W_CONFIG he_wr_cfg_.value = 0; - he_wr_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_wr_cfg_.line_repeat_count = he_linerep_count_; he_wr_cfg_.write_traffic_enable = 1; he_wr_cfg_.opcode = WR_LINE_M; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); @@ -467,11 +494,11 @@ class host_exerciser_cmd : public test_command { host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); cout << "Read number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; - cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; - he_rd_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_rd_cfg_.line_repeat_count = he_linerep_count_; he_rd_cfg_.read_traffic_enable = 1; he_rd_cfg_.opcode = RD_LINE_I; host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); @@ -556,11 +583,11 @@ class host_exerciser_cmd : public test_command { host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); cout << "Write number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; - cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; // set RD_CONFIG he_wr_cfg_.value = 0; - he_wr_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_wr_cfg_.line_repeat_count = he_linerep_count_; he_wr_cfg_.write_traffic_enable = 1; he_wr_cfg_.opcode = WR_LINE_I; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); @@ -640,11 +667,11 @@ class host_exerciser_cmd : public test_command { host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); cout << "Read/write number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; - cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; // set RD_CONFIG he_rd_cfg_.value = 0; - he_rd_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_rd_cfg_.line_repeat_count = he_linerep_count_; he_rd_cfg_.read_traffic_enable = 1; he_rd_cfg_.opcode = RD_LINE_I; host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); @@ -718,11 +745,11 @@ class host_exerciser_cmd : public test_command { host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); cout << "Write number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; - cout << "Line Repeat Count:" << host_exe_->he_linerep_count_ << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; // set RD_CONFIG he_wr_cfg_.value = 0; - he_wr_cfg_.line_repeat_count = host_exe_->he_linerep_count_; + he_wr_cfg_.line_repeat_count = he_linerep_count_; he_wr_cfg_.write_traffic_enable = 1; he_wr_cfg_.opcode = WR_PUSH_I; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); @@ -773,6 +800,11 @@ class host_exerciser_cmd : public test_command { return 0; } + // Convert number of transactions to bandwidth (GB/s) + double he_num_xfers_to_bw(uint64_t num_lines, uint64_t num_ticks) { + return (double)(num_lines * 64) / ((1000.0 / he_clock_mhz_ * num_ticks)); + } + void he_perf_counters() { volatile he_cache_dsm_status *dsm_status = NULL; @@ -781,18 +813,26 @@ class host_exerciser_cmd : public test_command { if (!dsm_status) return; - std::cout << "\n********* DSM Status CSR Start *********" << std::endl; - - std::cout << "test completed :" << dsm_status->test_completed << std::endl; - std::cout << "dsm number:" << dsm_status->dsm_number << std::endl; - std::cout << "error vector:" << dsm_status->err_vector << std::endl; - std::cout << "num ticks:" << dsm_status->num_ticks << std::endl; - std::cout << "num reads:" << dsm_status->num_reads << std::endl; - std::cout << "num writes:" << dsm_status->num_writes << std::endl; - std::cout << "penalty start:" << dsm_status->penalty_start << std::endl; - std::cout << "penalty end:" << dsm_status->penalty_end << std::endl; - std::cout << "actual data:" << dsm_status->actual_data << std::endl; - std::cout << "expected data:" << dsm_status->expected_data << std::endl; + cout << "\n********* DSM Status CSR Start *********" << std::endl; + + cout << "test completed :" << dsm_status->test_completed << endl; + cout << "dsm number:" << dsm_status->dsm_number << endl; + cout << "error vector:" << dsm_status->err_vector << endl; + cout << "num ticks:" << dsm_status->num_ticks << endl; + cout << "num reads:" << dsm_status->num_reads << endl; + cout << "num writes:" << dsm_status->num_writes << endl; + cout << "penalty start:" << dsm_status->penalty_start << endl; + cout << "penalty end:" << dsm_status->penalty_end << endl; + cout << "actual data:" << dsm_status->actual_data << endl; + cout << "expected data:" << dsm_status->expected_data << endl; + + // print bandwidth + if (dsm_status->num_ticks > 0) { + double perf_data = + he_num_xfers_to_bw(dsm_status->num_reads + dsm_status->num_writes, + dsm_status->num_ticks); + host_exe_->logger_->info("Bandwidth: {0:0.3f} GB/s", perf_data); + } std::cout << "********* DSM Status CSR end *********" << std::endl; } @@ -871,9 +911,6 @@ class host_exerciser_cmd : public test_command { printf("System does not support NUMA API!\n"); return false; } - - printf("SUpported NUMA API!\n"); - int n = numa_max_node(); printf("There are %d nodes on your system\n", n + 1); @@ -883,7 +920,7 @@ class host_exerciser_cmd : public test_command { int node = numa_node_of_cpu(cup_num); printf("node:%d\n", node); - if (host_exe_->he_target_ == HE_TARGET_HOST) { + if (he_target_ == HE_TARGET_HOST) { numa_node_ = node; printf("HE_TARGET_HOST numa_node_:%d\n", numa_node_); @@ -893,9 +930,6 @@ class host_exerciser_cmd : public test_command { printf("HE_TARGET_FPGA numa_node_:%d\n", numa_node_); } - int num_task = numa_num_task_nodes(); - printf("num_task:%d\n", num_task); - return true; } @@ -919,42 +953,82 @@ class host_exerciser_cmd : public test_command { he_ctl_.ResetL = 1; host_exe_->write64(HE_CTL, he_ctl_.value); - if (host_exe_->he_test_ == HE_FPGA_RD_CACHE_HIT) { + if (he_test_all_ == true) { + int retvalue = 0; + ret = he_run_fpga_rd_cache_hit_test(); + if (ret != 0) { + retvalue = ret; + } + ret = he_run_fpga_wr_cache_hit_test(); + if (ret != 0) { + retvalue = ret; + } + + ret = he_run_fpga_rd_cache_miss_test(); + if (ret != 0) { + retvalue = ret; + } + ret = he_run_fpga_wr_cache_miss_test(); + if (ret != 0) { + retvalue = ret; + } + ret = he_run_host_rd_cache_hit_test(); + if (ret != 0) { + retvalue = ret; + } + ret = he_run_host_wr_cache_hit_test(); + if (ret != 0) { + retvalue = ret; + } + + ret = he_run_host_rd_cache_miss_test(); + if (ret != 0) { + retvalue = ret; + } + ret = he_run_host_wr_cache_miss_test(); + if (ret != 0) { + retvalue = ret; + } + + return retvalue; + } + + if (he_test_ == HE_FPGA_RD_CACHE_HIT) { ret = he_run_fpga_rd_cache_hit_test(); return ret; } - if (host_exe_->he_test_ == HE_FPGA_WR_CACHE_HIT) { + if (he_test_ == HE_FPGA_WR_CACHE_HIT) { ret = he_run_fpga_wr_cache_hit_test(); return ret; } - if (host_exe_->he_test_ == HE_FPGA_RD_CACHE_MISS) { + if (he_test_ == HE_FPGA_RD_CACHE_MISS) { ret = he_run_fpga_rd_cache_miss_test(); return ret; } - if (host_exe_->he_test_ == HE_FPGA_WR_CACHE_MISS) { + if (he_test_ == HE_FPGA_WR_CACHE_MISS) { ret = he_run_fpga_wr_cache_miss_test(); return ret; } - if (host_exe_->he_test_ == HE_HOST_RD_CACHE_HIT) { + if (he_test_ == HE_HOST_RD_CACHE_HIT) { ret = he_run_host_rd_cache_hit_test(); return ret; } - if (host_exe_->he_test_ == HE_HOST_WR_CACHE_HIT) { + if (he_test_ == HE_HOST_WR_CACHE_HIT) { ret = he_run_host_wr_cache_hit_test(); return ret; } - if (host_exe_->he_test_ == HE_HOST_RD_CACHE_MISS) { + if (he_test_ == HE_HOST_RD_CACHE_MISS) { ret = he_run_host_rd_cache_miss_test(); return ret; } - if (host_exe_->he_test_ == HE_HOST_WR_CACHE_MISS) { + if (he_test_ == HE_HOST_WR_CACHE_MISS) { ret = he_run_host_wr_cache_miss_test(); return ret; } @@ -963,40 +1037,30 @@ class host_exerciser_cmd : public test_command { } protected: - host_exerciser *host_exe_; - token::ptr_t token_; - - he_ctl he_ctl_; - he_info he_info_; - he_rd_config he_rd_cfg_; - he_wr_config he_wr_cfg_; - - he_rd_addr_table_ctrl rd_table_ctl_; - he_wr_addr_table_ctrl wr_table_ctl_; - uint8_t *dsm_buf_; - uint8_t *rd_buf_; - - uint32_t numa_node_; + bool he_continuousmode_; + uint32_t he_contmodetime_; + uint32_t he_linerep_count_; + uint32_t he_stide_; + uint32_t he_target_; + uint32_t he_test_; + bool he_test_all_; }; void he_cache_thread(uint8_t *buf_ptr, uint64_t len) { - cout << "he_cache_thread enter" << endl; if (buf_ptr == NULL || len == 0) { return; } uint64_t value; UNUSED_PARAM(value); - uint64_t cache_lines = len / 64; + uint64_t cache_lines = len / CL; uint64_t i = 0; - cout << "he_cache_thread cache_lines:" << cache_lines << endl; while (true) { if (g_stop_thread == true) { - cout << "he_cache_thread g_stop_thread " << endl; + // cout << "he_cache_thread g_stop_thread " << endl; return; } - // cout << "he_cache_thread:i "<add_option("--target", he_target_, + "host exerciser run on host or fpga") + ->transform(CLI::CheckedTransformer(he_targets)) + ->default_val("host"); + } + virtual int run(test_afu *afu, CLI::App *app) { + (void)app; + // int ret = 0; + cout << "HE LPBK run" << endl; + host_exe_ = dynamic_cast(afu); + + if (!verify_numa_node()) { + numa_node_ = 0; + cout << "numa nodes are available set numa node to 0" << endl; + }; + + // reset HE cache + he_ctl_.value = 0; + he_ctl_.ResetL = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + he_ctl_.value = 0; + he_ctl_.ResetL = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + return 0; + } +}; } // end of namespace host_exerciser diff --git a/samples/cxl_host_exerciser/cxl_he_cmd.h b/samples/cxl_host_exerciser/cxl_he_cmd.h new file mode 100644 index 000000000000..048937fd2dcf --- /dev/null +++ b/samples/cxl_host_exerciser/cxl_he_cmd.h @@ -0,0 +1,206 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#pragma once +#include +#include +#include + +#include "cxl_he_cmd.h" +#include "cxl_host_exerciser.h" +#include "he_cache_test.h" + +namespace host_exerciser { + +class he_cmd : public test_command { +public: + he_cmd() : host_exe_(NULL), he_clock_mhz_(400), numa_node_(0), he_target_(0) { + + he_ctl_.value = 0; + he_info_.value = 0; + he_rd_cfg_.value = 0; + he_wr_cfg_.value = 0; + rd_table_ctl_.value = 0; + wr_table_ctl_.value = 0; + } + + virtual ~he_cmd() {} + + // Convert number of transactions to bandwidth (GB/s) + double he_num_xfers_to_bw(uint64_t num_lines, uint64_t num_ticks) { + return (double)(num_lines * 64) / ((1000.0 / he_clock_mhz_ * num_ticks)); + } + + void he_perf_counters() { + volatile he_cache_dsm_status *dsm_status = NULL; + + dsm_status = reinterpret_cast( + (uint8_t *)(host_exe_->get_dsm())); + if (!dsm_status) + return; + + std::cout << "\n********* DSM Status CSR Start *********" << std::endl; + + std::cout << "test completed :" << dsm_status->test_completed << std::endl; + std::cout << "dsm number:" << dsm_status->dsm_number << std::endl; + std::cout << "error vector:" << dsm_status->err_vector << std::endl; + std::cout << "num ticks:" << dsm_status->num_ticks << std::endl; + std::cout << "num reads:" << dsm_status->num_reads << std::endl; + std::cout << "num writes:" << dsm_status->num_writes << std::endl; + std::cout << "penalty start:" << dsm_status->penalty_start << std::endl; + std::cout << "penalty end:" << dsm_status->penalty_end << std::endl; + std::cout << "actual data:" << dsm_status->actual_data << std::endl; + std::cout << "expected data:" << dsm_status->expected_data << std::endl; + + // print bandwidth + if (dsm_status->num_ticks > 0) { + double perf_data = + he_num_xfers_to_bw(dsm_status->num_reads + dsm_status->num_writes, + dsm_status->num_ticks); + host_exe_->logger_->info("Bandwidth: {0:0.3f} GB/s", perf_data); + } + + std::cout << "********* DSM Status CSR end *********" << std::endl; + } + + void host_exerciser_errors() { + he_err_status err_status; + uint64_t err = 0; + if (host_exe_ == NULL) + return; + + err_status.value = host_exe_->read64(HE_ERROR_STATUS); + if (err_status.data_error == 1) { + cout << "Data Integrity Check error occured" << endl; + } + + if (err_status.err_index > 0) { + cout << "Error occurred at cache line address:" << err_status.err_index + << endl; + } + + err = host_exe_->read64(HE_ERROR_EXP_DATA); + cout << "Error Expected Data:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA0); + cout << "Error Expected Data0:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA1); + cout << "Error Expected Data1:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA2); + cout << "Error Expected Data2:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA3); + cout << "Error Expected Data3:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA4); + cout << "Error Expected Data4:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA5); + cout << "Error Expected Data5:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA6); + cout << "Error Expected Data6:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA7); + cout << "Error Expected Data7:" << err << endl; + } + + int parse_input_options() { + + if (!host_exe_) + return -1; + + return 0; + } + + bool he_wait_test_completion() { + /* Wait for test completion */ + uint32_t timeout = HELPBK_TEST_TIMEOUT; + + volatile uint8_t *status_ptr = host_exe_->get_dsm(); + while (0 == ((*status_ptr) & 0x1)) { + usleep(HELPBK_TEST_SLEEP_INVL); + if (--timeout == 0) { + cout << "HE LPBK TIME OUT" << std::endl; + + return false; + } + } + return true; + } + + bool verify_numa_node() { + + if (numa_available() < 0) { + printf("System does not support NUMA API!\n"); + return false; + } + + printf("SUpported NUMA API!\n"); + + int n = numa_max_node(); + printf("There are %d nodes on your system\n", n + 1); + + int cup_num = sched_getcpu(); + printf("cup_num:%d\n", cup_num); + + int node = numa_node_of_cpu(cup_num); + printf("node:%d\n", node); + + if (he_target_ == HE_TARGET_HOST) { + numa_node_ = node; + printf("HE_TARGET_HOST numa_node_:%d\n", numa_node_); + + } else { + // find fpga numa node numebr + numa_node_ = 2; + printf("HE_TARGET_FPGA numa_node_:%d\n", numa_node_); + } + + int num_config_cpu = numa_num_configured_cpus(); + printf("num_config_cpu:%d\n", num_config_cpu); + + int num_task_nodes = numa_num_task_nodes(); + printf("num_task_nodes:%d\n", num_task_nodes); + + return true; + } + +protected: + host_exerciser *host_exe_; + uint32_t he_clock_mhz_; + uint32_t numa_node_; + uint32_t he_target_; + + he_ctl he_ctl_; + he_info he_info_; + he_rd_config he_rd_cfg_; + he_wr_config he_wr_cfg_; + he_rd_addr_table_ctrl rd_table_ctl_; + he_wr_addr_table_ctrl wr_table_ctl_; +}; +} // end of namespace host_exerciser diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.cpp b/samples/cxl_host_exerciser/cxl_host_exerciser.cpp index 3d5eb10f1604..0f31d9155dce 100644 --- a/samples/cxl_host_exerciser/cxl_host_exerciser.cpp +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.cpp @@ -27,14 +27,17 @@ #include #include +#include "cxl_he_cache_cmd.h" +#include "cxl_he_cache_lpbk_cmd.h" #include "cxl_host_exerciser.h" -#include "cxl_host_exerciser_cache.h" void he_sig_handler(int); int main(int argc, char *argv[]) { + host_exerciser::host_exerciser app; - app.register_command(); + app.register_command(); + app.register_command(); // host exerciser signal handler struct sigaction act_old, act_new; diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.h b/samples/cxl_host_exerciser/cxl_host_exerciser.h index 293293a30255..adae83320674 100644 --- a/samples/cxl_host_exerciser/cxl_host_exerciser.h +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.h @@ -24,28 +24,23 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. #pragma once -#include -#include -#include #include "he_cache_test.h" +#define MEM_TG_FEATURE_ID 0x25 +#define MEM_TG_FEATURE_GUIDL 0x81599b5c2ebd4b23 +#define MEM_TG_FEATURE_GUIDH 0x0118e06b1fa349b9 +const char *HE_CACHE_AFU_ID = "0118E06B-1FA3-49B9-8159-9b5C2EBD4b23"; + namespace host_exerciser { -using opae::fpga::types::event; -using opae::fpga::types::shared_buffer; -using opae::fpga::types::token; static const uint64_t HELPBK_TEST_TIMEOUT = 30000; static const uint64_t HELPBK_TEST_SLEEP_INVL = 100; static const uint64_t CL = 64; static const uint64_t KB = 1024; static const uint64_t MB = KB * 1024; -static const uint64_t LOG2_CL = 6; - static const uint64_t BUFFER_SIZE_2MB = 2 * 1024 * 1024; - static const uint64_t FPGA_32KB_CACHE_LINES = (32 * 1024) / 64; - static const uint64_t FPGA_2MB_CACHE_LINES = (2 * 1024 * 1024) / 64; // Host execiser CSR Offset @@ -59,19 +54,16 @@ enum { HE_DSM_BASE = 0x030, HE_CTL = 0x038, HE_INFO = 0x040, - HE_WR_NUM_LINES = 0x048, HE_WR_BYTE_ENABLE = 0x050, HE_WR_CONFIG = 0x058, HE_WR_ADDR_TABLE_CTRL = 0x060, HE_WR_ADDR_TABLE_DATA = 0x068, - HE_RD_NUM_LINES = 0x070, HE_RD_CONFIG = 0x078, HE_RD_ADDR_TABLE_CTRL = 0x080, HE_RD_ADDR_TABLE_DATA = 0x088, HE_ERROR_STATUS = 0x090, - HE_ERROR_EXP_DATA = 0x098, HE_ERROR_ACT_DATA0 = 0x0A0, HE_ERROR_ACT_DATA1 = 0x0A8, @@ -81,23 +73,16 @@ enum { HE_ERROR_ACT_DATA5 = 0x0C8, HE_ERROR_ACT_DATA6 = 0x0D0, HE_ERROR_ACT_DATA7 = 0x0D8, - }; -// configures test mode -typedef enum { - HOST_EXEMODE_READ = 0x0, - HOST_EXEMODE_WRITE = 0x1, - HOST_EXEMODE_ALL = 0x2, -} host_exe_mode; - -// Write Traffic Opcode +// Read Traffic Opcode typedef enum { RD_LINE_I = 0x0, RD_LINE_S = 0x1, RD_LINE_EM = 0x2, } he_rd_opcode; +// Write Traffic Opcode typedef enum { WR_LINE_I = 0x0, WR_LINE_M = 0x1, @@ -113,16 +98,16 @@ union he_dfh { enum { offset = HE_DFH }; uint64_t value; struct { - uint16_t CcipVersionNumber : 12; - uint8_t AfuMajVersion : 4; - uint32_t NextDfhOffset : 24; - uint8_t EOL : 1; - uint32_t Reserved : 19; - uint8_t FeatureType : 4; + uint64_t CcipVersionNumber : 12; + uint64_t AfuMajVersion : 4; + uint64_t NextDfhOffset : 24; + uint64_t EOL : 1; + uint64_t Reserved : 19; + uint64_t FeatureType : 4; }; }; -// DSM BASEL +// DSM BASE union he_dsm_base { enum { offset = HE_DSM_BASE }; uint64_t value; @@ -139,7 +124,7 @@ union he_ctl { uint64_t ResetL : 1; uint64_t Start : 1; uint64_t ForcedTestCmpl : 1; - uint64_t bios_support : 1; + uint64_t bias_support : 1; uint64_t Reserved : 60; }; }; @@ -184,7 +169,7 @@ union he_wr_config { uint64_t waitfor_completion : 1; uint64_t preread_sync_enable : 1; uint64_t postread_sync_enable : 1; - uint64_t daata_pattern : 2; + uint64_t data_pattern : 2; uint64_t cl_evict_enable : 1; uint64_t opcode : 4; uint64_t line_repeat_count : 8; @@ -233,7 +218,7 @@ union he_rd_config { uint64_t waitfor_completion : 1; uint64_t prewrite_sync_enable : 1; uint64_t postwrite_sync_enable : 1; - uint64_t daata_pattern : 2; + uint64_t data_pattern : 2; uint64_t cl_evict_enable : 1; uint64_t opcode : 4; uint64_t line_repeat_count : 8; @@ -262,7 +247,7 @@ union he_rd_addr_table_data { }; }; -// HE_RD_ADDR_TABLE_DATA +// ERROR_STATUS union he_err_status { enum { offset = HE_ERROR_STATUS }; uint64_t value; @@ -290,12 +275,6 @@ struct he_cache_dsm_status { uint32_t res5[2]; }; -const std::map he_modes = { - {"read", HOST_EXEMODE_READ}, - {"write", HOST_EXEMODE_WRITE}, - {"all", HOST_EXEMODE_ALL}, -}; - // configures test mode typedef enum { HE_FPGA_RD_CACHE_HIT = 0x0, @@ -323,7 +302,6 @@ const std::map he_test_modes = { {"fpgawrcachehit", HE_FPGA_WR_CACHE_HIT}, {"fpgardcachemiss", HE_FPGA_RD_CACHE_MISS}, {"fpgawrcachemiss", HE_FPGA_WR_CACHE_MISS}, - {"hostrdcachehit", HE_HOST_RD_CACHE_HIT}, {"hostwrcachehit", HE_HOST_WR_CACHE_HIT}, {"hostrdcachemiss", HE_HOST_RD_CACHE_MISS}, @@ -381,66 +359,13 @@ std::map addrtable_size = { }; -// he test type -typedef enum { - HE_DISABLE_DATA_INTEGRITY_CHECK = 0x0, - HE_ENABLE_DATA_INTEGRITY_CHECK = 0x1, -} he_data_integrity_check; - -struct MapKeyComparator { - bool operator()(const std::string &a, const std::string &b) const { - if (a.length() != b.length()) - return (a.length() < b.length()); - else - return (a < b); - } -}; - using test_afu = opae::afu_test::afu; using test_command = opae::afu_test::command; class host_exerciser : public test_afu { public: host_exerciser() - : test_afu("host_exerciser", nullptr, "warning"), count_(1), - he_continuousmode_(false), he_test_all_(0), he_contmodetime_(0), - he_clock_mhz_(0),he_linerep_count_(10), he_stide_(0), he_target_(0), he_test_(0) { - - - // test - app_.add_option( - "--test", he_test_, - "host exerciser cache test {fpgardcachehit, fpgawrcachehit, all}") - ->transform(CLI::CheckedTransformer(he_test_modes)) - ->default_val("fpgardcachehit"); - - // Configures test rollover or test termination - app_.add_option("--continuousmode", he_continuousmode_, - "test rollover or test termination") - ->default_val("false"); - - // Continuous mode time - app_.add_option("--contmodetime", he_contmodetime_, - "Continuous mode time in seconds") - ->default_val("1"); - - app_.add_option("--target", he_target_, - "host exerciser run on host or fpga") - ->transform(CLI::CheckedTransformer(he_targets)) - ->default_val("host"); - - - app_.add_option("--stride", he_stide_, "Enable stride mode") - ->default_val("0"); - - app_.add_option("--linerepcount", he_linerep_count_, "Line repeat count") - ->transform(CLI::Range(1, 256)) - ->default_val("10"); - - // Test all - app_.add_option("--testall", he_test_all_, "Run all tests") - ->default_val("false"); - } + : test_afu("host_exerciser", nullptr, "warning"), count_(1) {} virtual int run(CLI::App *app, test_command::ptr_t test) override { int res = exit_codes::not_run; @@ -448,7 +373,7 @@ class host_exerciser : public test_afu { logger_->set_pattern(" %v"); // Info prints details of an individual run. Turn it on if doing only one // test and the user hasn't changed level from the default. - if ((log_level_.compare("warning") == 0) && !he_test_all_) + if ((log_level_.compare("warning") == 0)) logger_->set_level(spdlog::level::info); logger_->info("starting test run, count of {0:d}", count_); @@ -476,24 +401,6 @@ class host_exerciser : public test_afu { public: uint32_t count_; - bool he_continuousmode_; - bool he_test_all_; - uint32_t he_contmodetime_; - uint32_t he_clock_mhz_; - uint32_t he_linerep_count_; - uint32_t he_stide_; - uint32_t he_target_; - uint32_t he_test_; - std::map limits_; - - uint32_t get_offset(uint32_t base, uint32_t i) const { - auto limit = limits_.find(base); - auto offset = base + sizeof(uint64_t) * i; - if (limit != limits_.end() && offset > limit->second - sizeof(uint64_t)) { - throw std::out_of_range("offset out range in csr space"); - } - return offset; - } bool option_passed(std::string option_str) { if (app_.count(option_str) == 0) @@ -501,4 +408,4 @@ class host_exerciser : public test_afu { return true; } }; -} // end of namespace cxl_host_exerciser +} // namespace host_exerciser diff --git a/samples/cxl_host_exerciser/dfl-he-cache.h b/samples/cxl_host_exerciser/dfl-he-cache.h index b95df7414bdb..d6036c832dbf 100644 --- a/samples/cxl_host_exerciser/dfl-he-cache.h +++ b/samples/cxl_host_exerciser/dfl-he-cache.h @@ -30,7 +30,7 @@ #define DFL_HE_CACHE_BASE 0 /** - * DFL_FPGA_GET_API_VERSION - _IO(DFL_FPGA_MAGIC, DFL_FPGA_BASE + 0) + * DFL_HE_CACHE_GET_API_VERSION - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 0) * * Report the version of the driver API. * Return: Driver API Version. @@ -40,7 +40,7 @@ _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 0) /** - * DFL_FPGA_CHECK_EXTENSION - _IO(DFL_FPGA_MAGIC, DFL_FPGA_BASE + 1) + * DFL_HE_CACHE_CHECK_EXTENSION - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 1) * * Check whether an extension is supported. * Return: 0 if not supported, otherwise the extension is supported. @@ -49,18 +49,19 @@ #define DFL_HE_CACHE_CHECK_EXTENSION \ _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 1) -#define DFL_HE_CACHE_GET_REGION_INFO \ - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 2) - /** - * FPGA_PORT_GET_REGION_INFO - _IOWR(FPGA_MAGIC, PORT_BASE + 2, - * struct dfl_he_cache_region_info) + * DFL_HE_CACHE_GET_REGION_INFO - _IOWR(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + + * 2, struct dfl_he_cache_region_info) * * Retrieve information about a device memory region. - * Caller provides struct dfl_fpga_port_region_info with index value set. + * Caller provides struct dfl_he_cache_region_info with flags. * Driver returns the region info in other fields. * Return: 0 on success, -errno on failure. */ + +#define DFL_HE_CACHE_GET_REGION_INFO \ + _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 2) + struct dfl_he_cache_region_info { /* Input */ __u32 argsz; /* Structure length */ @@ -73,44 +74,25 @@ struct dfl_he_cache_region_info { __u64 offset; /* Region offset from start of device fd */ }; -#define DFL_HE_CACHE_SET_DSM_INFO _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 3) - -struct dfl_he_cache_dsm_info { - /* Input */ - __u32 argsz; /* Structure length */ - __u64 user_addr; /* Process virtual address */ - __u64 length; /* Length of mapping (bytes)*/ -}; - -#define DFL_HE_CACHE_CLEAR_DSM_INFO \ - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 4) - -#define DFL_HE_CACHE_ALLOC_ADDR_TABLE \ - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 5) -#define DFL_HE_CACHE_FREE_ADDR_TABLE \ - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 6) -#define DFL_HE_CACHE_APPEND_ADDR_TABLE \ - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 7) - -#define DFL_HE_CACHE_NUM_LINES_MIN 1 -#define DFL_HE_CACHE_NUM_LINES_MAX 0xffff - -struct dfl_he_cache_addr_table { - /* Input */ - __u32 argsz; /* Structure length */ - __u32 flags; /* Address Table ID */ -#define DFL_HE_CACHE_READ_ADDR_TABLE (1 << 0) -#define DFL_HE_CACHE_WRITE_ADDR_TABLE (1 << 1) - __u32 cache_lines; /* Buffer size/offset in cache lines */ -}; - -#define DFL_HE_CACHE_NUMA_DMA_MAP \ - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 12) -#define DFL_HE_CACHE_NUMA_DMA_UNMAP \ - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 13) +/** +* DFL_HE_CACHE_NUMA_DMA_MAP - _IOWR(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 3, +* struct dfl_he_cache_dma_map) +* +* Map the dma memory per user_addr,length and numa node which are provided by +caller. +* The driver allocates memory on the numa node, converts the user's virtual +address +* to a continuous physical address, and writes the physical address to +* the host executor's read/write address table CSR. + +* This interface only accepts page-size aligned user memory for dma mapping. +* Return: 0 on success, -errno on failure. +*/ #define DFL_ARRAY_MAX_SIZE 0x10 +#define DFL_HE_CACHE_NUMA_DMA_MAP _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 3) + struct dfl_he_cache_dma_map { /* Input */ __u32 argsz; /* Structure length */ @@ -121,13 +103,26 @@ struct dfl_he_cache_dma_map { __u64 csr_array[DFL_ARRAY_MAX_SIZE]; /* CSR */ }; +/** + * DFL_HE_CACHE_NUMA_DMA_UNMAP - _IOWR(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + + * 4, struct dfl_he_cache_dma_unmap) + * + * Unmpas the dma memory per user_addr and length which are provided by caller. + * The driver deletes the physical pages of the user address and writes a zero + * to the read/write address table CSR. + * Return: 0 on success, -errno on failure. + */ + +#define DFL_HE_CACHE_NUMA_DMA_UNMAP \ + _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 4) + struct dfl_he_cache_dma_unmap { /* Input */ __u32 argsz; /* Structure length */ __u32 flags; /* flags */ __u64 user_addr; /* Process virtual address */ __u64 length; /* Length of mapping (bytes)*/ - __u64 csr_array[DFL_ARRAY_MAX_SIZE]; /* CSR */ + __u64 csr_array[DFL_ARRAY_MAX_SIZE]; /* CSR */ }; #endif /* _UAPI_LINUX_HE_CACHE_DFL_H */ diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h index 1e3b4d503c60..db4d3f340d06 100644 --- a/samples/cxl_host_exerciser/he_cache_test.h +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -144,7 +144,6 @@ bool sysfs_read_u64(const char *path, uint64_t *value) { namespace opae { namespace afu_test { -namespace fpga = fpga::types; template inline bool parse_match_int(const char *s, regmatch_t m, T &v, int radix = 10) { From 79fe277cd1004e0542e98ba6729c73fe6624bc94 Mon Sep 17 00:00:00 2001 From: anandaravuri Date: Wed, 20 Sep 2023 10:20:25 -0700 Subject: [PATCH 03/11] fix: cmakefile typo Signed-off-by: anandaravuri --- samples/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 4f3661b4be4e..ab942e774b65 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -69,6 +69,6 @@ opae_add_subdirectory(mem_tg) opae_add_subdirectory(host_exerciser) opae_add_subdirectory(n5010-test) opae_add_subdirectory(n5010-ctl) -opae_add_subdirectory(clx_mem_tg) +opae_add_subdirectory(cxl_mem_tg) opae_add_subdirectory(cxl_host_exerciser) From 83b05bc7a3b8ac7912394668daed604f7d4fc957 Mon Sep 17 00:00:00 2001 From: anandaravuri Date: Wed, 20 Sep 2023 13:50:54 -0700 Subject: [PATCH 04/11] fix: ci build errors Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/CMakeLists.txt | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/samples/cxl_host_exerciser/CMakeLists.txt b/samples/cxl_host_exerciser/CMakeLists.txt index ed8c2f5534dc..b9ee688717ea 100644 --- a/samples/cxl_host_exerciser/CMakeLists.txt +++ b/samples/cxl_host_exerciser/CMakeLists.txt @@ -24,22 +24,33 @@ ## ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ## POSSIBILITY OF SUCH DAMAGE. -if (OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG) +if (OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG AND OPAE_WITH_NUMA) + + if (fmt_LIBRARIES) + # if we found fmt before (from CMakeLists.txt) + # then we need to find it again from this directory + # so we can "import" the fmt::fmt link target + find_package(fmt) + endif (fmt_LIBRARIES) + opae_add_executable(TARGET cxl_host_exerciser SOURCE cxl_host_exerciser.cpp LIBS - opae-c opae-cxx-core + opae-c ${spdlog_LIBRARIES} ${json-c_LIBRARIES} ${uuid_LIBRARIES} ${numa_LIBRARIES} + ${fmt_LIBRARIES} COMPONENT samplebin ) target_include_directories(cxl_host_exerciser PRIVATE ${OPAE_INCLUDE_PATHS} ${CMAKE_CURRENT_SOURCE_DIR} - ${CLI11_INCLUDE_DIRS} - ${spdlog_INCLUDE_DIRS}) -endif(OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG) + ${CLI11_INCLUDE_DIRS} + ${numa_INCLUDE_DIRS} + ${spdlog_INCLUDE_DIRS}) + +endif(OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG AND OPAE_WITH_NUMA) From 16dbe5b644babdcd0cc62e3075311e09410d25c2 Mon Sep 17 00:00:00 2001 From: anandaravuri Date: Wed, 20 Sep 2023 14:13:42 -0700 Subject: [PATCH 05/11] fix: ci build error Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/he_cache_test.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h index db4d3f340d06..ce9c33b6046b 100644 --- a/samples/cxl_host_exerciser/he_cache_test.h +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -26,22 +26,23 @@ #pragma once -#include #include #include #include #include #include #include -#include -#include -#include #include #include #include #include #include #include +#include +#include +#include +#include +#include #include "dfl-he-cache.h" From 688631f4fc7170aedf650bc56b38646621f66b6f Mon Sep 17 00:00:00 2001 From: anandaravuri Date: Wed, 20 Sep 2023 14:53:37 -0700 Subject: [PATCH 06/11] fix: ci build error Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/samples/cxl_host_exerciser/CMakeLists.txt b/samples/cxl_host_exerciser/CMakeLists.txt index b9ee688717ea..7298b63b62d9 100644 --- a/samples/cxl_host_exerciser/CMakeLists.txt +++ b/samples/cxl_host_exerciser/CMakeLists.txt @@ -45,6 +45,7 @@ if (OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG AND OPAE_WITH_NUMA) ${fmt_LIBRARIES} COMPONENT samplebin ) + target_include_directories(cxl_host_exerciser PRIVATE ${OPAE_INCLUDE_PATHS} @@ -53,4 +54,12 @@ if (OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG AND OPAE_WITH_NUMA) ${numa_INCLUDE_DIRS} ${spdlog_INCLUDE_DIRS}) + target_compile_options(cxl_host_exerciser PUBLIC + -Wno-unused-result + ) + + target_compile_definitions(cxl_host_exerciser PUBLIC + ${spdlog_DEFINITIONS} + ) + endif(OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG AND OPAE_WITH_NUMA) From f96980774b2cf1a98dd2faf290e275680af18c26 Mon Sep 17 00:00:00 2001 From: anandaravuri Date: Wed, 20 Sep 2023 15:12:35 -0700 Subject: [PATCH 07/11] fix: ci build errors Signed-off-by: anandaravuri --- samples/cxl_host_exerciser/he_cache_test.h | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h index ce9c33b6046b..4841eb9267ab 100644 --- a/samples/cxl_host_exerciser/he_cache_test.h +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include From 62a889447f0b913f847928ecec4bad17777edd5f Mon Sep 17 00:00:00 2001 From: Thanneeru Srinivasulu Date: Sat, 23 Sep 2023 03:52:19 +0530 Subject: [PATCH 08/11] fix: he cache ioctl Signed-off-by: Thanneeru Srinivasulu --- libraries/plugins/xfpga/fpga-dfl.h | 169 +++++++++-- samples/cxl_host_exerciser/cxl_he_cache_cmd.h | 269 ++++-------------- samples/cxl_host_exerciser/cxl_he_cmd.h | 56 ++-- .../cxl_host_exerciser/cxl_host_exerciser.h | 5 +- samples/cxl_host_exerciser/dfl-he-cache.h | 128 --------- samples/cxl_host_exerciser/he_cache_test.h | 242 ++++++++-------- 6 files changed, 351 insertions(+), 518 deletions(-) delete mode 100644 samples/cxl_host_exerciser/dfl-he-cache.h diff --git a/libraries/plugins/xfpga/fpga-dfl.h b/libraries/plugins/xfpga/fpga-dfl.h index fa5af9ae87bc..36fe3b98671d 100644 --- a/libraries/plugins/xfpga/fpga-dfl.h +++ b/libraries/plugins/xfpga/fpga-dfl.h @@ -1,28 +1,17 @@ -// Copyright(c) 2017-2020, Intel Corporation -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Intel Corporation nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Header File for FPGA DFL User API + * + * Copyright (C) 2017-2018 Intel Corporation, Inc. + * + * Authors: + * Kang Luwei + * Zhang Yi + * Wu Hao + * Xiao Guangrong + * Tim Whisonant + * Ananda Ravuri + */ #ifndef _UAPI_LINUX_FPGA_DFL_H #define _UAPI_LINUX_FPGA_DFL_H @@ -44,6 +33,8 @@ #define DFL_FPGA_BASE 0 #define DFL_PORT_BASE 0x40 #define DFL_FME_BASE 0x80 +#define DFL_PCI_SVA_BASE 0xf8 +#define DFL_CXL_CACHE_BASE 0xA0 /* Common IOCTLs for both FME and AFU file descriptor */ @@ -134,12 +125,20 @@ struct dfl_fpga_port_region_info { * Map the dma memory per user_addr and length which are provided by caller. * Driver fills the iova in provided struct afu_port_dma_map. * This interface only accepts page-size aligned user memory for dma mapping. + * + * Setting only one of DFL_DMA_MAP_FLAG_READ or WRITE limits FPGA-initiated + * DMA requests to only reads or only writes. To be back-compatiable with + * legacy driver, setting neither flag is equivalent to setting both flags: + * both read and write are requests permitted. + * * Return: 0 on success, -errno on failure. */ struct dfl_fpga_port_dma_map { /* Input */ __u32 argsz; /* Structure length */ - __u32 flags; /* Zero for now */ + __u32 flags; +#define DFL_DMA_MAP_FLAG_READ (1 << 0)/* readable from device */ +#define DFL_DMA_MAP_FLAG_WRITE (1 << 1)/* writable from device */ __u64 user_addr; /* Process virtual address */ __u64 length; /* Length of mapping (bytes)*/ /* Output */ @@ -169,7 +168,7 @@ struct dfl_fpga_port_dma_unmap { * * @start: Index of the first irq. * @count: The number of eventfd handler. - * @evtfds: Eventfd handler. + * @evtfds: Eventfd handlers. */ struct dfl_fpga_irq_set { __u32 start; @@ -289,4 +288,120 @@ struct dfl_fpga_fme_port_pr { DFL_FME_BASE + 4, \ struct dfl_fpga_irq_set) +/** + * DFL_PCI_SVA_BIND_DEV - _IO(DFL_FPGA_MAGIC, DFL_PCI_SVA_BASE + 0) + * + * Ensure that a PASID is present in the user process and enable the + * PASID on the IOMMU domain of the device associated with the file handle. + * Returns the PASID on success, -errno on failure. + */ +#define DFL_PCI_SVA_BIND_DEV _IO(DFL_FPGA_MAGIC, \ + DFL_PCI_SVA_BASE + 0) + +/** + * DFL_PCI_SVA_UNBIND_DEV - _IO(DFL_FPGA_MAGIC, DFL_PCI_SVA_BASE + 1) + * + * Unbind the current PASID from the device. + */ +#define DFL_PCI_SVA_UNBIND_DEV _IO(DFL_FPGA_MAGIC, \ + DFL_PCI_SVA_BASE + 1) + + /** + * DFL_CXL_CACHE_GET_REGION_INFO - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 0, + * struct dfl_cxl_cache_region_info) + * + * Retrieve information about a device memory region. + * Caller provides struct dfl_cxl_cache_region_info with flags. + * Driver returns the region info in other fields. + * Return: 0 on success, -errno on failure. + */ + +#define DFL_CXL_CACHE_GET_REGION_INFO _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 0) + + /** + * struct dfl_cxl_cache_region_info - CXL cache region information + * @argsz: structure length + * @flags: access permission + * @size: region size (bytes) + * @offset: region offset from start of device fd + * + * to retrieve information about a device memory region + */ +struct dfl_cxl_cache_region_info { + __u32 argsz; + __u32 flags; +#define DFL_CXL_CACHE_REGION_READ BIT(0) +#define DFL_CXL_CACHE_REGION_WRITE BIT(1) +#define DFL_CXL_CACHE_REGION_MMAP BIT(2) + __u64 size; + __u64 offset; +}; + +/** + * DFL_CXL_CACHE_NUMA_DMA_MAP - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 1, + * struct dfl_cxl_cache_dma_map) + * + * Map the dma memory per user_addr, length and numa node which are provided by caller + * The driver allocates memory on the numa node, converts the user's virtual address + * to a continuous physical address, and writes the physical address to + * the cxl cache read/write address table CSR. + + * This interface only accepts page-size aligned user memory for dma mapping. + * Return: 0 on success, -errno on failure. + */ + +#define DFL_ARRAY_MAX_SIZE 0x10 + +#define DFL_CXL_CACHE_NUMA_DMA_MAP _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 1) + + /** + * struct dfl_cxl_cache_dma_map - maps user address to physical address. + * @argsz: structure length + * @flags: flags + * @user_addr: user mmap virtual address + * @length: length of mapping (bytes) + * @numa_node: Numa node number + * @csr_array: array of region address offset + * + * maps user allocated virtual address to physical address. + */ +struct dfl_cxl_cache_dma_map { + __u32 argsz; + __u32 flags; + __u64 user_addr; + __u64 length; + __u32 numa_node; + __u64 csr_array[DFL_ARRAY_MAX_SIZE]; +}; + +/** + * DFL_CXL_CACHE_NUMA_DMA_UNMAP - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 1, + * struct dfl_cxl_cache_dma_unmap) + * + * Unmaps the dma memory per user_addr and length which are provided by caller + * The driver deletes the physical pages of the user address and writes a zero + * to the read/write address table CSR. + * Return: 0 on success, -errno on failure. + */ + +#define DFL_CXL_CACHE_NUMA_DMA_UNMAP _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 2) + + /** + * struct dfl_cxl_cache_dma_unmap - unmaps user allocated memory. + * @argsz: structure length + * @flags: flags + * @user_addr: user mmap virtual address + * @length: length of mapping (bytes) + * @csr_array: array of region address offset + * + * unmaps user allocated memory. + */ +struct dfl_cxl_cache_dma_unmap { + __u32 argsz; + __u32 flags; + __u64 user_addr; + __u64 length; + __u64 csr_array[DFL_ARRAY_MAX_SIZE]; +}; + #endif /* _UAPI_LINUX_FPGA_DFL_H */ diff --git a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h index 81460d81bd6d..dd0631b04325 100644 --- a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h @@ -39,7 +39,7 @@ volatile static bool g_stop_thread = false; void he_sig_handler(int) { g_he_exit = true; g_stop_thread = true; - printf("HE signal handler exit app \n"); + cout << "HE signal handler exit app" << endl; } namespace host_exerciser { @@ -69,9 +69,11 @@ class he_cache_cmd : public he_cmd { virtual uint64_t guidh() const override { return MEM_TG_FEATURE_GUIDH; } virtual void add_options(CLI::App *app) override { + + // test mode app->add_option( "--test", he_test_, - "host exerciser cache test {fpgardcachehit, fpgawrcachehit, all}") + "host exerciser cache test") ->transform(CLI::CheckedTransformer(he_test_modes)) ->default_val("fpgardcachehit"); @@ -121,12 +123,12 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - cout << "Read address table size:" << he_info_.read_addr_table_size << endl; - - cout << "Numa node:" << numa_node_ << endl; host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES); + cout << "Read number Lines:" << FPGA_32KB_CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Numa node:" << numa_node_ << endl; // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; @@ -172,8 +174,7 @@ class he_cache_cmd : public he_cmd { he_perf_counters(); cout << "********** AFU Copied host cache to FPGA Cache successfully " - "********** " - << endl; + "********** " << endl; // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; @@ -207,17 +208,15 @@ class he_cache_cmd : public he_cmd { host_exe_->free_dsm(); host_exe_->free_cache_read(); - cout - << "********** AFU reads cache from FPGA Cache successfully ********** " - << endl; - + cout << "********** AFU reads cache from FPGA Cache successfully" + " **********" << endl; cout << "********** FPGA Read cache hit test end**********" << endl; return 0; } int he_run_fpga_wr_cache_hit_test() { - cout << "********** FPGA Write cache hit test start**********" << endl; + cout << "********** FPGA Write cache hit test start**********" << endl; /* STEPS 1) Allocate DSM, Read buffer, Write buffer // flush @@ -233,13 +232,13 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - cout << "Read address table size:" << he_info_.read_addr_table_size << endl; - cout << "Write address table size:" << he_info_.write_addr_table_size - << endl; - host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES); + cout << "Read/write number Lines:" << FPGA_32KB_CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; @@ -255,7 +254,7 @@ class he_cache_cmd : public he_cmd { // Allocate DSM buffer if (!host_exe_->allocate_dsm()) { - cerr << "alloc dsm failed" << endl; + cerr << "allocate dsm failed" << endl; return -1; } @@ -285,8 +284,7 @@ class he_cache_cmd : public he_cmd { he_perf_counters(); cout << "********** AFU Copied host cache to FPGA Cache successfully " - "********** " - << endl; + "********** " << endl; // set W_CONFIG he_wr_cfg_.value = 0; @@ -300,7 +298,7 @@ class he_cache_cmd : public he_cmd { wr_table_ctl_.enable_address_stride = 1; host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); - host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); + host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES); // Start test he_ctl_.Start = 1; host_exe_->write64(HE_CTL, he_ctl_.value); @@ -330,6 +328,7 @@ class he_cache_cmd : public he_cmd { } int he_run_fpga_rd_cache_miss_test() { + cout << "********** FPGA Read cache miss test start**********" << endl; /* STEPS @@ -342,11 +341,11 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + host_exe_->write64(HE_RD_NUM_LINES, FPGA_2MB_CACHE_LINES); - host_exe_->write64(HE_RD_NUM_LINES, FPGA_2MB_CACHE_LINES - 1); - cout << "Read number Lines:" << FPGA_2MB_CACHE_LINES - 1 << endl; + cout << "Read number Lines:" << FPGA_2MB_CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; @@ -362,13 +361,13 @@ class he_cache_cmd : public he_cmd { // Allocate DSM buffer if (!host_exe_->allocate_dsm()) { - cerr << "alloc dsm failed" << endl; + cerr << "allocate dsm failed" << endl; return -1; } - // Allocate Read, Write buffer - if (!host_exe_->allocate_cache_read_write(BUFFER_SIZE_2MB, numa_node_)) { - cerr << "allocate cache read write failed" << endl; + // Allocate Read buffer + if (!host_exe_->allocate_cache_read(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; host_exe_->free_dsm(); return -1; } @@ -384,25 +383,24 @@ class he_cache_cmd : public he_cmd { cerr << "timeout error" << endl; he_perf_counters(); host_exerciser_errors(); - host_exe_->free_cache_read_write(); + host_exe_->free_cache_read(); host_exe_->free_dsm(); return -1; } he_perf_counters(); - host_exe_->free_cache_read_write(); + host_exe_->free_cache_read(); host_exe_->free_dsm(); cout << "********** AFU Read FPGA Cache Miss successfully ********** " << endl; - cout << "********** FPGA Read cache miss test end**********" << endl; return 0; } int he_run_fpga_wr_cache_miss_test() { - cout << "********** FPGA write cache miss test start**********" << endl; + cout << "********** FPGA write cache miss test start**********" << endl; /* STEPS 1) Allocate DSM, Read buffer, Write buffer @@ -414,13 +412,13 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - cout << "Read address table size:" << he_info_.read_addr_table_size << endl; - cout << "Write address table size:" << he_info_.write_addr_table_size - << endl; - host_exe_->write64(HE_WR_NUM_LINES, FPGA_2MB_CACHE_LINES); + cout << "Read/write number Lines:" << FPGA_2MB_CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; // set W_CONFIG he_wr_cfg_.value = 0; @@ -436,7 +434,7 @@ class he_cache_cmd : public he_cmd { // Allocate DSM buffer if (!host_exe_->allocate_dsm()) { - cerr << "alloc dsm failed" << endl; + cerr << "allocate dsm failed" << endl; return -1; } @@ -469,14 +467,13 @@ class he_cache_cmd : public he_cmd { cout << "********** AFU Write FPGA Cache Miss successfully ********** " << endl; - cout << "********** FPGA Write cache miss test end**********" << endl; return 0; } int he_run_host_rd_cache_hit_test() { - cout << "********** 1 Host LLC Read cache hit test start**********" << endl; + cout << "********** 1 Host LLC Read cache hit test start**********" << endl; /* STEPS 1) Allocate DSM, Read buffer @@ -488,13 +485,13 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - cout << "Read address table size:" << he_info_.read_addr_table_size << endl; - cout << "Write address table size:" << he_info_.write_addr_table_size - << endl; + host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES); - host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); - cout << "Read number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; + cout << "Read number Lines:" << FPGA_32KB_CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; // set RD_CONFIG RdShared (CXL) he_rd_cfg_.value = 0; @@ -545,8 +542,6 @@ class he_cache_cmd : public he_cmd { return -1; } - he_perf_counters(); - g_stop_thread = true; t1.join(); @@ -556,14 +551,13 @@ class he_cache_cmd : public he_cmd { host_exe_->free_dsm(); cout << "********** AFU Copied host cache to FPGA Cache successfully " - "********** " - << endl; - + "********** " << endl; cout << "********** Host LLC cache hit test end**********" << endl; return 0; } int he_run_host_wr_cache_hit_test() { + cout << "********** Host LLC Write cache hit test start**********" << endl; /* @@ -577,13 +571,13 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - cout << "Read address table size:" << he_info_.read_addr_table_size << endl; - cout << "Write address table size:" << he_info_.write_addr_table_size - << endl; - host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); - cout << "Write number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; + host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES); + cout << "Write number Lines:" << FPGA_32KB_CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; // set RD_CONFIG he_wr_cfg_.value = 0; @@ -661,13 +655,12 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - cout << "Read address table size:" << he_info_.read_addr_table_size << endl; - cout << "Write address table size:" << he_info_.write_addr_table_size - << endl; - host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); cout << "Read/write number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; // set RD_CONFIG he_rd_cfg_.value = 0; @@ -694,9 +687,6 @@ class he_cache_cmd : public he_cmd { return -1; } - // flush host cache - // int status = cacheflush((host_exe_->get_read(), BUFFER_SIZE_2MB, BCACHE); - // start he_ctl_.Start = 1; host_exe_->write64(HE_CTL, he_ctl_.value); @@ -714,7 +704,6 @@ class he_cache_cmd : public he_cmd { } he_perf_counters(); - host_exe_->free_cache_read(); host_exe_->free_dsm(); @@ -726,6 +715,7 @@ class he_cache_cmd : public he_cmd { } int he_run_host_wr_cache_miss_test() { + cout << "********** Host LLC Write cache miss test start**********" << endl; /* @@ -739,13 +729,12 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set write number Lines he_info_.value = host_exe_->read64(HE_INFO); + host_exe_->write64(HE_WR_NUM_LINES, 1); + cout << "Write number Lines:" << 1 << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size - << endl; - - host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); - cout << "Write number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; - cout << "Line Repeat Count:" << he_linerep_count_ << endl; + << endl; // set RD_CONFIG he_wr_cfg_.value = 0; @@ -765,7 +754,7 @@ class he_cache_cmd : public he_cmd { return -1; } - // Allocate Read buffer + // Allocate write buffer if (!host_exe_->allocate_cache_write(BUFFER_SIZE_2MB, numa_node_)) { cerr << "allocate cache read failed" << endl; host_exe_->free_dsm(); @@ -789,7 +778,6 @@ class he_cache_cmd : public he_cmd { } he_perf_counters(); - host_exe_->free_cache_write(); host_exe_->free_dsm(); @@ -800,139 +788,6 @@ class he_cache_cmd : public he_cmd { return 0; } - // Convert number of transactions to bandwidth (GB/s) - double he_num_xfers_to_bw(uint64_t num_lines, uint64_t num_ticks) { - return (double)(num_lines * 64) / ((1000.0 / he_clock_mhz_ * num_ticks)); - } - - void he_perf_counters() { - volatile he_cache_dsm_status *dsm_status = NULL; - - dsm_status = reinterpret_cast( - (uint8_t *)(host_exe_->get_dsm())); - if (!dsm_status) - return; - - cout << "\n********* DSM Status CSR Start *********" << std::endl; - - cout << "test completed :" << dsm_status->test_completed << endl; - cout << "dsm number:" << dsm_status->dsm_number << endl; - cout << "error vector:" << dsm_status->err_vector << endl; - cout << "num ticks:" << dsm_status->num_ticks << endl; - cout << "num reads:" << dsm_status->num_reads << endl; - cout << "num writes:" << dsm_status->num_writes << endl; - cout << "penalty start:" << dsm_status->penalty_start << endl; - cout << "penalty end:" << dsm_status->penalty_end << endl; - cout << "actual data:" << dsm_status->actual_data << endl; - cout << "expected data:" << dsm_status->expected_data << endl; - - // print bandwidth - if (dsm_status->num_ticks > 0) { - double perf_data = - he_num_xfers_to_bw(dsm_status->num_reads + dsm_status->num_writes, - dsm_status->num_ticks); - host_exe_->logger_->info("Bandwidth: {0:0.3f} GB/s", perf_data); - } - - std::cout << "********* DSM Status CSR end *********" << std::endl; - } - - void host_exerciser_errors() { - he_err_status err_status; - uint64_t err = 0; - if (host_exe_ == NULL) - return; - - err_status.value = host_exe_->read64(HE_ERROR_STATUS); - if (err_status.data_error == 1) { - cout << "Data Integrity Check error occured" << endl; - } - - if (err_status.err_index > 0) { - cout << "Error occurred at cache line address:" << err_status.err_index - << endl; - } - - err = host_exe_->read64(HE_ERROR_EXP_DATA); - cout << "Error Expected Data:" << err << endl; - - err = host_exe_->read64(HE_ERROR_ACT_DATA0); - cout << "Error Expected Data0:" << err << endl; - - err = host_exe_->read64(HE_ERROR_ACT_DATA1); - cout << "Error Expected Data1:" << err << endl; - - err = host_exe_->read64(HE_ERROR_ACT_DATA2); - cout << "Error Expected Data2:" << err << endl; - - err = host_exe_->read64(HE_ERROR_ACT_DATA3); - cout << "Error Expected Data3:" << err << endl; - - err = host_exe_->read64(HE_ERROR_ACT_DATA4); - cout << "Error Expected Data4:" << err << endl; - - err = host_exe_->read64(HE_ERROR_ACT_DATA5); - cout << "Error Expected Data5:" << err << endl; - - err = host_exe_->read64(HE_ERROR_ACT_DATA6); - cout << "Error Expected Data6:" << err << endl; - - err = host_exe_->read64(HE_ERROR_ACT_DATA7); - cout << "Error Expected Data7:" << err << endl; - } - - int parse_input_options() { - - if (!host_exe_) - return -1; - - return 0; - } - - bool he_wait_test_completion() { - /* Wait for test completion */ - uint32_t timeout = HELPBK_TEST_TIMEOUT; - - volatile uint8_t *status_ptr = host_exe_->get_dsm(); - while (0 == ((*status_ptr) & 0x1)) { - usleep(HELPBK_TEST_SLEEP_INVL); - if (--timeout == 0) { - cout << "HE LPBK TIME OUT" << std::endl; - - return false; - } - } - return true; - } - - bool verify_numa_node() { - - if (numa_available() < 0) { - printf("System does not support NUMA API!\n"); - return false; - } - int n = numa_max_node(); - printf("There are %d nodes on your system\n", n + 1); - - int cup_num = sched_getcpu(); - printf("cup_num:%d\n", cup_num); - - int node = numa_node_of_cpu(cup_num); - printf("node:%d\n", node); - - if (he_target_ == HE_TARGET_HOST) { - numa_node_ = node; - printf("HE_TARGET_HOST numa_node_:%d\n", numa_node_); - - } else { - // find fpga numa node numebr - numa_node_ = 2; - printf("HE_TARGET_FPGA numa_node_:%d\n", numa_node_); - } - - return true; - } - virtual int run(test_afu *afu, CLI::App *app) { (void)app; int ret = 0; @@ -1047,18 +902,18 @@ class he_cache_cmd : public he_cmd { }; void he_cache_thread(uint8_t *buf_ptr, uint64_t len) { + + uint64_t value; + UNUSED_PARAM(value); + uint64_t cache_lines = len / CL; + uint64_t i = 0; + if (buf_ptr == NULL || len == 0) { return; } - uint64_t value; - UNUSED_PARAM(value); - uint64_t cache_lines = len / CL; - uint64_t i = 0; while (true) { - if (g_stop_thread == true) { - // cout << "he_cache_thread g_stop_thread " << endl; return; } if (i < cache_lines) { diff --git a/samples/cxl_host_exerciser/cxl_he_cmd.h b/samples/cxl_host_exerciser/cxl_he_cmd.h index 048937fd2dcf..9596feec7930 100644 --- a/samples/cxl_host_exerciser/cxl_he_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cmd.h @@ -61,18 +61,17 @@ class he_cmd : public test_command { if (!dsm_status) return; - std::cout << "\n********* DSM Status CSR Start *********" << std::endl; - - std::cout << "test completed :" << dsm_status->test_completed << std::endl; - std::cout << "dsm number:" << dsm_status->dsm_number << std::endl; - std::cout << "error vector:" << dsm_status->err_vector << std::endl; - std::cout << "num ticks:" << dsm_status->num_ticks << std::endl; - std::cout << "num reads:" << dsm_status->num_reads << std::endl; - std::cout << "num writes:" << dsm_status->num_writes << std::endl; - std::cout << "penalty start:" << dsm_status->penalty_start << std::endl; - std::cout << "penalty end:" << dsm_status->penalty_end << std::endl; - std::cout << "actual data:" << dsm_status->actual_data << std::endl; - std::cout << "expected data:" << dsm_status->expected_data << std::endl; + cout << "\n********* DSM Status CSR Start *********" << endl; + cout << "test completed :" << dsm_status->test_completed << endl; + cout << "dsm number:" << dsm_status->dsm_number << endl; + cout << "error vector:" << dsm_status->err_vector << endl; + cout << "num ticks:" << dsm_status->num_ticks << endl; + cout << "num reads:" << dsm_status->num_reads << endl; + cout << "num writes:" << dsm_status->num_writes << endl; + cout << "penalty start:" << dsm_status->penalty_start << endl; + cout << "penalty end:" << dsm_status->penalty_end << endl; + cout << "actual data:" << dsm_status->actual_data << endl; + cout << "expected data:" << dsm_status->expected_data << endl; // print bandwidth if (dsm_status->num_ticks > 0) { @@ -82,7 +81,7 @@ class he_cmd : public test_command { host_exe_->logger_->info("Bandwidth: {0:0.3f} GB/s", perf_data); } - std::cout << "********* DSM Status CSR end *********" << std::endl; + cout << "********* DSM Status CSR end *********" << endl; } void host_exerciser_errors() { @@ -145,7 +144,7 @@ class he_cmd : public test_command { while (0 == ((*status_ptr) & 0x1)) { usleep(HELPBK_TEST_SLEEP_INVL); if (--timeout == 0) { - cout << "HE LPBK TIME OUT" << std::endl; + cout << "HE LPBK TIME OUT" << endl; return false; } @@ -156,37 +155,28 @@ class he_cmd : public test_command { bool verify_numa_node() { if (numa_available() < 0) { - printf("System does not support NUMA API!\n"); + cerr << "System does not support NUMA API" << endl; return false; } - printf("SUpported NUMA API!\n"); - int n = numa_max_node(); - printf("There are %d nodes on your system\n", n + 1); + cout << "There are %d nodes on your system:" << n + 1 << endl; - int cup_num = sched_getcpu(); - printf("cup_num:%d\n", cup_num); + int cpu_num = sched_getcpu(); + cout << "cpu num:" << cpu_num << endl; - int node = numa_node_of_cpu(cup_num); - printf("node:%d\n", node); + int numa_node = numa_node_of_cpu(cpu_num); + cout << "numa node:" << numa_node << endl; if (he_target_ == HE_TARGET_HOST) { - numa_node_ = node; - printf("HE_TARGET_HOST numa_node_:%d\n", numa_node_); - + numa_node_ = numa_node; + cout << "HE_TARGET_HOST numa node:" << numa_node_ << endl; } else { - // find fpga numa node numebr + // find fpga numa node number numa_node_ = 2; - printf("HE_TARGET_FPGA numa_node_:%d\n", numa_node_); + cout << "HE_TARGET_FPGA numa node:" << numa_node_ << endl; } - int num_config_cpu = numa_num_configured_cpus(); - printf("num_config_cpu:%d\n", num_config_cpu); - - int num_task_nodes = numa_num_task_nodes(); - printf("num_task_nodes:%d\n", num_task_nodes); - return true; } diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.h b/samples/cxl_host_exerciser/cxl_host_exerciser.h index adae83320674..917e59f798a3 100644 --- a/samples/cxl_host_exerciser/cxl_host_exerciser.h +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.h @@ -365,7 +365,7 @@ using test_command = opae::afu_test::command; class host_exerciser : public test_afu { public: host_exerciser() - : test_afu("host_exerciser", nullptr, "warning"), count_(1) {} + : test_afu("host_exerciser", nullptr, "info"), count_(1) {} virtual int run(CLI::App *app, test_command::ptr_t test) override { int res = exit_codes::not_run; @@ -374,7 +374,8 @@ class host_exerciser : public test_afu { // Info prints details of an individual run. Turn it on if doing only one // test and the user hasn't changed level from the default. if ((log_level_.compare("warning") == 0)) - logger_->set_level(spdlog::level::info); + logger_->set_level(spdlog::level::info); + logger_->info("starting test run, count of {0:d}", count_); uint32_t count = 0; diff --git a/samples/cxl_host_exerciser/dfl-he-cache.h b/samples/cxl_host_exerciser/dfl-he-cache.h deleted file mode 100644 index d6036c832dbf..000000000000 --- a/samples/cxl_host_exerciser/dfl-he-cache.h +++ /dev/null @@ -1,128 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Header File for host exerciser cache DFL User API - * - * Copyright (C) 2023 Intel Corporation, Inc. - * - * Authors: - * Tim Whisonant - * Ananda Ravuri - * Russell H. Weight - */ - -#ifndef _UAPI_LINUX_HE_CACHE_DFL_H -#define _UAPI_LINUX_HE_CACHE_DFL_H - -#include -#include - -#define DFL_HE_CACHE_API_VERSION 0 - -/* - * The IOCTL interface for DFL based HE CACHE is designed for extensibility by - * embedding the structure length (argsz) and flags into structures passed - * between kernel and userspace. This design referenced the VFIO IOCTL - * interface (include/uapi/linux/vfio.h). - */ - -#define DFL_HE_CACHE_MAGIC 0xB6 - -#define DFL_HE_CACHE_BASE 0 - -/** - * DFL_HE_CACHE_GET_API_VERSION - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 0) - * - * Report the version of the driver API. - * Return: Driver API Version. - */ - -#define DFL_HE_CACHE_GET_API_VERSION \ - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 0) - -/** - * DFL_HE_CACHE_CHECK_EXTENSION - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 1) - * - * Check whether an extension is supported. - * Return: 0 if not supported, otherwise the extension is supported. - */ - -#define DFL_HE_CACHE_CHECK_EXTENSION \ - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 1) - -/** - * DFL_HE_CACHE_GET_REGION_INFO - _IOWR(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + - * 2, struct dfl_he_cache_region_info) - * - * Retrieve information about a device memory region. - * Caller provides struct dfl_he_cache_region_info with flags. - * Driver returns the region info in other fields. - * Return: 0 on success, -errno on failure. - */ - -#define DFL_HE_CACHE_GET_REGION_INFO \ - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 2) - -struct dfl_he_cache_region_info { - /* Input */ - __u32 argsz; /* Structure length */ - /* Output */ - __u32 flags; /* Access permission */ -#define DFL_HE_CACHE_REGION_READ (1 << 0) /* Region is readable */ -#define DFL_HE_CACHE_REGION_WRITE (1 << 1) /* Region is writable */ -#define DFL_HE_CACHE_REGION_MMAP (1 << 2) /* Can be mmaped to userspace */ - __u64 size; /* Region size (bytes) */ - __u64 offset; /* Region offset from start of device fd */ -}; - -/** -* DFL_HE_CACHE_NUMA_DMA_MAP - _IOWR(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 3, -* struct dfl_he_cache_dma_map) -* -* Map the dma memory per user_addr,length and numa node which are provided by -caller. -* The driver allocates memory on the numa node, converts the user's virtual -address -* to a continuous physical address, and writes the physical address to -* the host executor's read/write address table CSR. - -* This interface only accepts page-size aligned user memory for dma mapping. -* Return: 0 on success, -errno on failure. -*/ - -#define DFL_ARRAY_MAX_SIZE 0x10 - -#define DFL_HE_CACHE_NUMA_DMA_MAP _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 3) - -struct dfl_he_cache_dma_map { - /* Input */ - __u32 argsz; /* Structure length */ - __u32 flags; /* flags */ - __u64 user_addr; /* Process virtual address */ - __u64 length; /* Length of mapping (bytes)*/ - __u32 numa_node; /* Node 0,1 2 */ - __u64 csr_array[DFL_ARRAY_MAX_SIZE]; /* CSR */ -}; - -/** - * DFL_HE_CACHE_NUMA_DMA_UNMAP - _IOWR(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + - * 4, struct dfl_he_cache_dma_unmap) - * - * Unmpas the dma memory per user_addr and length which are provided by caller. - * The driver deletes the physical pages of the user address and writes a zero - * to the read/write address table CSR. - * Return: 0 on success, -errno on failure. - */ - -#define DFL_HE_CACHE_NUMA_DMA_UNMAP \ - _IO(DFL_HE_CACHE_MAGIC, DFL_HE_CACHE_BASE + 4) - -struct dfl_he_cache_dma_unmap { - /* Input */ - __u32 argsz; /* Structure length */ - __u32 flags; /* flags */ - __u64 user_addr; /* Process virtual address */ - __u64 length; /* Length of mapping (bytes)*/ - __u64 csr_array[DFL_ARRAY_MAX_SIZE]; /* CSR */ -}; - -#endif /* _UAPI_LINUX_HE_CACHE_DFL_H */ diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h index 4841eb9267ab..9c60e5ad5d62 100644 --- a/samples/cxl_host_exerciser/he_cache_test.h +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -45,7 +45,7 @@ #include #include -#include "dfl-he-cache.h" +#include "../../libraries/plugins/xfpga/fpga-dfl.h" using namespace std; @@ -87,16 +87,16 @@ enum { MATCHES_SIZE = 6 }; #define MiB(x) ((x)*1024 * 1024) #define GiB(x) ((x)*1024 * 1024 * 1024) -#define DFL_HE_CACHE_DSM_BASE 0x030 -#define DFL_HE_CACHE_WR_ADDR_TABLE_DATA 0x068 -#define DFL_HE_CACHE_RD_ADDR_TABLE_DATA 0x088 +#define DFL_CXL_CACHE_DSM_BASE 0x030 +#define DFL_CXL_CACHE_WR_ADDR_TABLE_DATA 0x068 +#define DFL_CXL_CACHE_RD_ADDR_TABLE_DATA 0x088 void *alloc_2mb_hugepage(void) { void *addr; addr = mmap(ADDR, MiB(2), PROTECTION, FLAGS_2M, 0, 0); if (addr == MAP_FAILED) { - printf("alloc_2mb_hugepage() failed: %s\n", strerror(errno)); + cerr << "alloc_2mb_hugepage() failed:" << strerror(errno) << endl; addr = NULL; } @@ -109,7 +109,7 @@ void *alloc_32kb_hugepage(void) { addr = mmap(ADDR, KiB(32), PROTECTION, FLAGS_4K, 0, 0); if (addr == MAP_FAILED) { - printf("alloc_1kb_hugepage() failed: %s\n", strerror(errno)); + cerr << "alloc_32kb_hugepage() failed:" << strerror(errno) << endl;; addr = NULL; } @@ -121,7 +121,7 @@ void *alloc_4kb_hugepage(void) { addr = mmap(ADDR, KiB(4), PROTECTION, FLAGS_4K, 0, 0); if (addr == MAP_FAILED) { - printf("alloc_1kb_hugepage() failed: %s\n", strerror(errno)); + cerr << "alloc_4kb_hugepage() failed:" << strerror(errno) << endl;; addr = NULL; } @@ -267,6 +267,9 @@ class afu { ->default_str(std::to_string(timeout_msec_)); } virtual ~afu() { + + if (fd_ > 0) + close(fd_); if (logger_) spdlog::drop(logger_->name()); } @@ -314,7 +317,7 @@ class afu { string substr_dev(str.substr(0, str.rfind("/"))); globfree(&pglob); - substr_dev.append("/he-cache/he-cache*"); + substr_dev.append("/dfl-cxl-cache/dfl-cxl-cache*"); gres = glob(substr_dev.c_str(), GLOB_NOSORT, NULL, &pglob); if (gres) { cerr << "Failed pattern match" << substr_dev.c_str() << ":" @@ -325,7 +328,7 @@ class afu { string str1(pglob.gl_pathv[0]); globfree(&pglob); dev_path_.append("/dev"); - dev_path_.append(str1.substr(str1.rfind("/"), 13)); + dev_path_.append(str1.substr(str1.rfind("/"), 16)); return 0; } @@ -357,7 +360,7 @@ class afu { int open_handle() { int res = 0; - cout << "dev_path_:" << dev_path_ << endl; + logger_->debug("dev_path_:{0}", dev_path_); fd_ = open(dev_path_.c_str(), O_RDWR); if (fd_ < 0) { @@ -367,16 +370,15 @@ class afu { memset(&rinfo_, 0, sizeof(rinfo_)); rinfo_.argsz = sizeof(rinfo_); - res = ioctl(fd_, DFL_HE_CACHE_GET_REGION_INFO, &rinfo_); + res = ioctl(fd_, DFL_CXL_CACHE_GET_REGION_INFO, &rinfo_); if (res) { - cerr << "ioctl() DFL_HE_CACHE_GET_REGION_INFO failed:" << strerror(errno) + cerr << "ioctl() DFL_CXL_CACHE_GET_REGION_INFO failed:" << strerror(errno) << endl; close(fd_); return 2; } - - printf("MMIO region flags: 0x%x size: %llu offset: %llu\n", rinfo_.flags, - rinfo_.size, rinfo_.offset); + logger_->debug("MMIO region flags:0x:{0:x} size:0x {1:x} offset:0x {2:x}", + rinfo_.flags, rinfo_.size, rinfo_.offset); if (!map_mmio()) { cerr << "mmap failed:" << strerror(errno) << endl; @@ -385,10 +387,10 @@ class afu { } volatile uint64_t *u64 = (volatile uint64_t *)mmio_base_; - printf("DFH : 0x%016" PRIx64 "\n", *u64); - printf("DFH + 8 : 0x%016" PRIx64 "\n", *(u64 + 1)); - printf("DFH + 16: 0x%016" PRIx64 "\n", *(u64 + 2)); - printf("DFH + 24: 0x%016" PRIx64 "\n", *(u64 + 3)); + logger_->debug("DFH : 0x:{0:X}", *u64); + logger_->debug("DFH + 8 : 0x:{0:X}", *(u64 + 1)); + logger_->debug("DFH + 16: 0x:{0:X}", *(u64 + 2)); + logger_->debug("DFH + 24: 0x:{0:X}", *(u64 + 3)); return exit_codes::not_run; } @@ -483,54 +485,39 @@ class afu { command::ptr_t current_command() const { return current_command_; } - bool open_device() { - - // std::cerr << "open\n" << dev_str; - fd_ = open(dev_path_.c_str(), O_RDWR); - if (fd_ < 0) { - printf("open() failed: %s\n", strerror(errno)); - return false; - } - - return true; - } - - bool close_device() { - if (fd_ > 0) - close(fd_); - return true; - } + bool allocate_dsm(size_t len = KiB(4), uint32_t numa_node = 0) { - bool allocate_dsm(size_t len = KiB(4), uint32_t node = 0) { int res = 0; void *ptr = NULL; - struct dfl_he_cache_dma_map dma_map; - // cout << "allocate_dsm\n"; + struct dfl_cxl_cache_dma_map dma_map; memset(&dma_map, 0, sizeof(dma_map)); - ptr = alloc_4kb_hugepage(); if (!ptr) { - cerr << "failed to allocate 4k huge page:" << strerror(errno) << endl; + cerr << "Fails to allocate 4k huge page:" << strerror(errno) << endl; return false; } + cout << "DSM buffer numa node: " << numa_node << endl; dma_map.argsz = sizeof(dma_map); dma_map.user_addr = (__u64)ptr; dma_map.length = len; - dma_map.numa_node = node; - dma_map.csr_array[0] = DFL_HE_CACHE_DSM_BASE; // 0x030 + dma_map.numa_node = numa_node; + dma_map.csr_array[0] = DFL_CXL_CACHE_DSM_BASE; // 0x030 + + logger_->debug("Allocate DSM buffer user addr 0x:{0:x} length : {1:d} numa node : {2:d}", + dma_map.user_addr, dma_map.length, dma_map.numa_node); volatile uint64_t *u64 = - (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_DSM_BASE); + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_DSM_BASE); - res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_MAP, &dma_map); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_MAP, &dma_map); if (res) { - cerr << "ioctl DFL_HE_CACHE_NUMA_NODE_DSM_INFO failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_NODE_DSM_INFO failed" << strerror(errno) << endl; goto out_free; } - printf("DSM_BASE: 0x%016" PRIx64 "\n", *u64); + logger_->debug("DSM_BASE : 0x:{0:x}", *u64); dsm_buffer_ = (uint8_t *)ptr; dsm_buf_len_ = len; @@ -542,28 +529,30 @@ class afu { } bool free_dsm() { - struct dfl_he_cache_dma_unmap dma_unmap; + int res = 0; + struct dfl_cxl_cache_dma_unmap dma_unmap; - // cout << "free_dsm\n" << endl; memset(&dma_unmap, 0, sizeof(dma_unmap)); - dma_unmap.argsz = sizeof(dma_unmap); dma_unmap.user_addr = (__u64)dsm_buffer_; dma_unmap.length = dsm_buf_len_; - dma_unmap.csr_array[0] = DFL_HE_CACHE_DSM_BASE; // 0x030 + dma_unmap.csr_array[0] = DFL_CXL_CACHE_DSM_BASE; // 0x030 + + logger_->debug("free dsm user addr 0x:{0:x} length : {1:d} ", + dma_unmap.user_addr, dma_unmap.length); volatile uint64_t *u64 = - (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_DSM_BASE); + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_DSM_BASE); - res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_UNMAP, &dma_unmap); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_UNMAP, &dma_unmap); if (res) { - cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) << endl; } - printf("DSM_BASE: 0x%016" PRIx64 "\n", *u64); - free_memory(dsm_buffer_, dsm_buf_len_); + logger_->debug("DSM_BASE : 0x:{0:x}", *u64); + free_memory(dsm_buffer_, dsm_buf_len_); return true; } @@ -571,37 +560,37 @@ class afu { int res = 0; void *ptr = NULL; - struct dfl_he_cache_dma_map dma_map; - - // cout << "allocate_cache_read\n"; + struct dfl_cxl_cache_dma_map dma_map; memset(&dma_map, 0, sizeof(dma_map)); - ptr = alloc_2mb_hugepage(); if (!ptr) { - cerr << "failed to allocate huge pages\n" << endl; + cerr << "Fails to allocate 2MB huge pages" << endl; return false; } - cout << "numa_node: " << numa_node << endl; + cout << "Read buffer numa node: " << numa_node << endl; dma_map.argsz = sizeof(dma_map); dma_map.user_addr = (__u64)ptr; dma_map.length = len; dma_map.numa_node = numa_node; - dma_map.csr_array[0] = DFL_HE_CACHE_RD_ADDR_TABLE_DATA; // 0x88 + dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; // 0x88 + + logger_->debug("Allocate read buffer user addr 0x:{0:x} length : {1:d} numa node : {2:d}", + dma_map.user_addr, dma_map.length, dma_map.numa_node); volatile uint64_t *u64 = - (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_RD_ADDR_TABLE_DATA); + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); - res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_MAP, &dma_map); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_MAP, &dma_map); if (res) { - cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_MAP failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_MAP failed" << strerror(errno) << endl; goto out_free; } - printf("DFL_HE_CACHE_RD_ADDR_TABLE_DATA: 0x%016" PRIx64 "\n", *u64); + logger_->debug("DFL_CXL_CACHE_RD_ADDR_TABLE_DATA : 0x:{0:x}", *u64); rd_buffer_ = (uint8_t *)ptr; rd_buf_len_ = len; return true; @@ -612,64 +601,70 @@ class afu { } bool free_cache_read() { - struct dfl_he_cache_dma_unmap dma_unmap; + int res = 0; + struct dfl_cxl_cache_dma_unmap dma_unmap; memset(&dma_unmap, 0, sizeof(dma_unmap)); - dma_unmap.argsz = sizeof(dma_unmap); dma_unmap.user_addr = (__u64)rd_buffer_; dma_unmap.length = rd_buf_len_; - dma_unmap.csr_array[0] = DFL_HE_CACHE_RD_ADDR_TABLE_DATA; // 0x88 + dma_unmap.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; // 0x88 + + logger_->debug("free read user addr 0x:{0:x} length : {1:d} ", + dma_unmap.user_addr, dma_unmap.length); volatile uint64_t *u64 = - (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_RD_ADDR_TABLE_DATA); - res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_UNMAP, &dma_unmap); + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_UNMAP, &dma_unmap); if (res) { - cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) << endl; } - printf("DFL_HE_CACHE_RD_ADDR_TABLE_DATA: 0x%016" PRIx64 "\n", *u64); + logger_->debug("DFL_CXL_CACHE_RD_ADDR_TABLE_DATA : 0x:{0:x}", *u64); free_memory(rd_buffer_, rd_buf_len_); return true; } bool allocate_cache_write(size_t len = MiB(2), uint32_t numa_node = 0) { - int res; - void *ptr; - struct dfl_he_cache_dma_map dma_map; - // std::cout << "allocate_cache_write" << endl; + int res = 0; + void *ptr = NULL; + struct dfl_cxl_cache_dma_map dma_map; memset(&dma_map, 0, sizeof(dma_map)); - ptr = alloc_2mb_hugepage(); if (!ptr) { - cerr << "failed to allocate huge pages\n" << endl; + cerr << "Fails to allocate 2MB huge pages" << endl; return false; } + cout << "Write buffer numa node: " << numa_node << endl; dma_map.argsz = sizeof(dma_map); dma_map.user_addr = (__u64)ptr; dma_map.length = len; dma_map.numa_node = numa_node; - dma_map.csr_array[0] = DFL_HE_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + dma_map.csr_array[0] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + + logger_->debug("Allocate write buffer user addr 0x:{0:x}\ + length : {1:d} numa node : {2:d}", + dma_map.user_addr, dma_map.length, dma_map.numa_node); volatile uint64_t *u64 = - (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_WR_ADDR_TABLE_DATA); + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); - res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_MAP, &dma_map); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_MAP, &dma_map); if (res) { - cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_MAP failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_MAP failed" << strerror(errno) << endl; goto out_free; } - printf("\nDFL_HE_CACHE_WR_ADDR_TABLE_DATA: 0x%016" PRIx64 "\n", *u64); + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64); wr_buffer_ = (uint8_t *)ptr; - + wr_buf_len_ = len; return true; out_free: @@ -678,28 +673,29 @@ class afu { } bool free_cache_write() { - struct dfl_he_cache_dma_unmap dma_unmap; - int res; - // cout << "free_cache_write" << endl; - memset(&dma_unmap, 0, sizeof(dma_unmap)); + int res = 0; + struct dfl_cxl_cache_dma_unmap dma_unmap; + memset(&dma_unmap, 0, sizeof(dma_unmap)); dma_unmap.argsz = sizeof(dma_unmap); dma_unmap.user_addr = (__u64)wr_buffer_; dma_unmap.length = wr_buf_len_; - dma_unmap.csr_array[0] = DFL_HE_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + dma_unmap.csr_array[0] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + + logger_->debug("free write user addr 0x:{0:x} length : {1:d} ", + dma_unmap.user_addr, dma_unmap.length); volatile uint64_t *u64 = - (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_WR_ADDR_TABLE_DATA); - res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_UNMAP, &dma_unmap); + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_UNMAP, &dma_unmap); if (res) { - cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) << endl; } - printf("\nDFL_HE_CACHE_WR_ADDR_TABLE_DATA: 0x%016" PRIx64 "\n", *u64); + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64); free_memory(wr_buffer_, wr_buf_len_); - return true; } @@ -707,38 +703,41 @@ class afu { int res = 0; void *ptr = NULL; - struct dfl_he_cache_dma_map dma_map; - - // cout<< "allocate_cache_read_write"; + struct dfl_cxl_cache_dma_map dma_map; memset(&dma_map, 0, sizeof(dma_map)); ptr = alloc_2mb_hugepage(); if (!ptr) { - cerr << "failed to allocate huge pages\n" << endl; + cerr << "Fails to allocate 2MB huge pages" << endl; return false; } + cout << "Read/Write buffer numa node: " << numa_node << endl; dma_map.argsz = sizeof(dma_map); dma_map.user_addr = (__u64)ptr; dma_map.length = len; dma_map.numa_node = numa_node; - dma_map.csr_array[0] = DFL_HE_CACHE_RD_ADDR_TABLE_DATA; // 0x88; - dma_map.csr_array[1] = DFL_HE_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; // 0x88; + dma_map.csr_array[1] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + + logger_->debug("Allocate read/write buffer user addr 0x:{0:x}\ + length : {1:d} numa node : {2:d}", + dma_map.user_addr, dma_map.length, dma_map.numa_node); volatile uint64_t *u64_wr = - (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_WR_ADDR_TABLE_DATA); + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); volatile uint64_t *u64_rd = - (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_RD_ADDR_TABLE_DATA); + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); - res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_MAP, &dma_map); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_MAP, &dma_map); if (res) { - cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_MAP failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_MAP failed" << strerror(errno) << endl; goto out_free; } - printf("\nDFL_HE_CACHE_WR_ADDR_TABLE_DATA: 0x%016" PRIx64 "\n", *u64_wr); - printf("\nDFL_HE_CACHE_RD_ADDR_TABLE_DATAs: 0x%016" PRIx64 "\n", *u64_rd); + logger_->debug("nDFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_rd); + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_wr); rd_wr_buffer_ = (uint8_t *)ptr; rd_wr_buf_len_ = len; @@ -751,32 +750,33 @@ class afu { } bool free_cache_read_write() { - struct dfl_he_cache_dma_unmap dma_unmap; - int res; - // cout << "free_cache_read_write\n" << endl; + int res = 0 ; + struct dfl_cxl_cache_dma_unmap dma_unmap; memset(&dma_unmap, 0, sizeof(dma_unmap)); - dma_unmap.argsz = sizeof(dma_unmap); dma_unmap.user_addr = (__u64)rd_wr_buffer_; dma_unmap.length = rd_wr_buf_len_; - dma_unmap.csr_array[0] = DFL_HE_CACHE_RD_ADDR_TABLE_DATA; // 0x88; - dma_unmap.csr_array[1] = DFL_HE_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + dma_unmap.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; // 0x88; + dma_unmap.csr_array[1] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + + logger_->debug("free read/write user addr 0x:{0:x} length : {1:d} ", + dma_unmap.user_addr, dma_unmap.length); volatile uint64_t *u64_wr = - (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_WR_ADDR_TABLE_DATA); + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); volatile uint64_t *u64_rd = - (volatile uint64_t *)(mmio_base_ + DFL_HE_CACHE_RD_ADDR_TABLE_DATA); + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); - res = ioctl(fd_, DFL_HE_CACHE_NUMA_DMA_UNMAP, &dma_unmap); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_UNMAP, &dma_unmap); if (res) { - cerr << "ioctl DFL_HE_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) << endl; } - printf("\nDFL_HE_CACHE_WR_ADDR_TABLE_DATA: 0x%016" PRIx64 "\n", *u64_wr); - printf("\nDFL_HE_CACHE_RD_ADDR_TABLE_DATAs: 0x%016" PRIx64 "\n", *u64_rd); + logger_->debug("nDFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_rd); + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_wr); free_memory(rd_wr_buffer_, rd_wr_buf_len_); rd_wr_buffer_ = NULL; @@ -815,7 +815,7 @@ class afu { uint8_t *rd_wr_buffer_; uint64_t rd_wr_buf_len_; - struct dfl_he_cache_region_info rinfo_; + struct dfl_cxl_cache_region_info rinfo_; std::string dev_path_; From e44dc942a664ad0567bbb97edccd829e90de0781 Mon Sep 17 00:00:00 2001 From: anandaravuri Date: Tue, 26 Sep 2023 11:23:02 -0700 Subject: [PATCH 09/11] fix: update cxl cache ioctl and review comments Signed-off-by: anandaravuri --- libraries/plugins/xfpga/fpga-dfl.h | 71 +++--- samples/cxl_host_exerciser/CMakeLists.txt | 1 + samples/cxl_host_exerciser/cxl_he_cache_cmd.h | 13 -- samples/cxl_host_exerciser/cxl_he_cmd.h | 11 +- .../cxl_host_exerciser/cxl_host_exerciser.cpp | 5 +- samples/cxl_host_exerciser/he_cache_test.h | 210 ++++++++++-------- 6 files changed, 158 insertions(+), 153 deletions(-) diff --git a/libraries/plugins/xfpga/fpga-dfl.h b/libraries/plugins/xfpga/fpga-dfl.h index cec3023c1496..c218f7e27df6 100644 --- a/libraries/plugins/xfpga/fpga-dfl.h +++ b/libraries/plugins/xfpga/fpga-dfl.h @@ -33,9 +33,8 @@ #define DFL_FPGA_BASE 0 #define DFL_PORT_BASE 0x40 #define DFL_FME_BASE 0x80 -#define DFL_PCI_SVA_BASE 0xf8 #define DFL_CXL_CACHE_BASE 0xA0 - +#define DFL_PCI_SVA_BASE 0xf8 /* Common IOCTLs for both FME and AFU file descriptor */ @@ -308,26 +307,26 @@ struct dfl_fpga_fme_port_pr { DFL_PCI_SVA_BASE + 1) /** - * DFL_CXL_CACHE_GET_REGION_INFO - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 0, - * struct dfl_cxl_cache_region_info) - * - * Retrieve information about a device memory region. - * Caller provides struct dfl_cxl_cache_region_info with flags. - * Driver returns the region info in other fields. - * Return: 0 on success, -errno on failure. - */ + * DFL_CXL_CACHE_GET_REGION_INFO - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 0, + * struct dfl_cxl_cache_region_info) + * + * Retrieve information about a device memory region. + * Caller provides struct dfl_cxl_cache_region_info with flags. + * Driver returns the region info in other fields. + * Return: 0 on success, -errno on failure. + */ #define DFL_CXL_CACHE_GET_REGION_INFO _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 0) - /** - * struct dfl_cxl_cache_region_info - CXL cache region information - * @argsz: structure length - * @flags: access permission - * @size: region size (bytes) - * @offset: region offset from start of device fd - * - * to retrieve information about a device memory region - */ + /** + * struct dfl_cxl_cache_region_info - CXL cache region information + * @argsz: structure length + * @flags: access permission + * @size: region size (bytes) + * @offset: region offset from start of device fd + * + * to retrieve information about a device memory region + */ struct dfl_cxl_cache_region_info { __u32 argsz; __u32 flags; @@ -339,24 +338,24 @@ struct dfl_cxl_cache_region_info { }; /** - * DFL_CXL_CACHE_NUMA_DMA_MAP - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 1, - * struct dfl_cxl_cache_dma_map) + * DFL_CXL_CACHE_NUMA_BUFFER_MAP - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 1, + * struct dfl_cxl_cache_buffer_map) * - * Map the dma memory per user_addr, length and numa node which are provided by caller - * The driver allocates memory on the numa node, converts the user's virtual address - * to a continuous physical address, and writes the physical address to - * the cxl cache read/write address table CSR. - - * This interface only accepts page-size aligned user memory for dma mapping. + * Map the user memory per user_addr, length and numa node which are + * provided by caller. The driver allocates memory on the numa node, + * converts the user's virtual addressto a continuous physical address, + * and writes the physical address to the cxl cache read/write address table CSR. + * + * This interface only accepts page-size aligned user memory for mapping. * Return: 0 on success, -errno on failure. */ #define DFL_ARRAY_MAX_SIZE 0x10 -#define DFL_CXL_CACHE_NUMA_DMA_MAP _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 1) +#define DFL_CXL_CACHE_NUMA_BUFFER_MAP _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 1) /** - * struct dfl_cxl_cache_dma_map - maps user address to physical address. + * struct dfl_cxl_cache_buffer_map - maps user address to physical address. * @argsz: structure length * @flags: flags * @user_addr: user mmap virtual address @@ -366,7 +365,7 @@ struct dfl_cxl_cache_region_info { * * maps user allocated virtual address to physical address. */ -struct dfl_cxl_cache_dma_map { +struct dfl_cxl_cache_buffer_map { __u32 argsz; __u32 flags; __u64 user_addr; @@ -376,19 +375,19 @@ struct dfl_cxl_cache_dma_map { }; /** - * DFL_CXL_CACHE_NUMA_DMA_UNMAP - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 1, - * struct dfl_cxl_cache_dma_unmap) + * DFL_CXL_CACHE_NUMA_BUFFER_UNMAP - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 1, + * struct dfl_cxl_cache_buffer_unmap) * - * Unmaps the dma memory per user_addr and length which are provided by caller + * Unmaps the user memory per user_addr and length which are provided by caller * The driver deletes the physical pages of the user address and writes a zero * to the read/write address table CSR. * Return: 0 on success, -errno on failure. */ -#define DFL_CXL_CACHE_NUMA_DMA_UNMAP _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 2) +#define DFL_CXL_CACHE_NUMA_BUFFER_UNMAP _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 2) /** - * struct dfl_cxl_cache_dma_unmap - unmaps user allocated memory. + * struct dfl_cxl_cache_buffer_unmap - unmaps user allocated memory. * @argsz: structure length * @flags: flags * @user_addr: user mmap virtual address @@ -397,7 +396,7 @@ struct dfl_cxl_cache_dma_map { * * unmaps user allocated memory. */ -struct dfl_cxl_cache_dma_unmap { +struct dfl_cxl_cache_buffer_unmap { __u32 argsz; __u32 flags; __u64 user_addr; diff --git a/samples/cxl_host_exerciser/CMakeLists.txt b/samples/cxl_host_exerciser/CMakeLists.txt index 7298b63b62d9..2bdf25fc0bcd 100644 --- a/samples/cxl_host_exerciser/CMakeLists.txt +++ b/samples/cxl_host_exerciser/CMakeLists.txt @@ -50,6 +50,7 @@ if (OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG AND OPAE_WITH_NUMA) PRIVATE ${OPAE_INCLUDE_PATHS} ${CMAKE_CURRENT_SOURCE_DIR} + ${OPAE_LIB_SOURCE}/plugins/xfpga/ ${CLI11_INCLUDE_DIRS} ${numa_INCLUDE_DIRS} ${spdlog_INCLUDE_DIRS}) diff --git a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h index dd0631b04325..5272d5333067 100644 --- a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h @@ -163,7 +163,6 @@ class he_cache_cmd : public he_cmd { // wait for completion if (!he_wait_test_completion()) { - cerr << "timeout error" << endl; he_perf_counters(); host_exerciser_errors(); host_exe_->free_cache_read(); @@ -196,7 +195,6 @@ class he_cache_cmd : public he_cmd { // wait for completion if (!he_wait_test_completion()) { - cerr << "timeout error" << endl; he_perf_counters(); host_exerciser_errors(); host_exe_->free_cache_read(); @@ -273,7 +271,6 @@ class he_cache_cmd : public he_cmd { // wait for completion if (!he_wait_test_completion()) { - cerr << "timeout error" << endl; he_perf_counters(); host_exerciser_errors(); host_exe_->free_cache_read_write(); @@ -307,7 +304,6 @@ class he_cache_cmd : public he_cmd { // wait for completion if (!he_wait_test_completion()) { - cerr << "timeout error" << endl; he_perf_counters(); host_exerciser_errors(); host_exe_->free_cache_read_write(); @@ -380,7 +376,6 @@ class he_cache_cmd : public he_cmd { // wait for completion if (!he_wait_test_completion()) { - cerr << "timeout error" << endl; he_perf_counters(); host_exerciser_errors(); host_exe_->free_cache_read(); @@ -453,7 +448,6 @@ class he_cache_cmd : public he_cmd { // wait for completion if (!he_wait_test_completion()) { - cerr << "timeout error" << endl; he_perf_counters(); host_exerciser_errors(); host_exe_->free_cache_read_write(); @@ -530,10 +524,8 @@ class he_cache_cmd : public he_cmd { // wait for completion if (!he_wait_test_completion()) { - cerr << "timeout error" << endl; he_perf_counters(); host_exerciser_errors(); - g_stop_thread = true; t1.join(); sleep(1); @@ -616,8 +608,6 @@ class he_cache_cmd : public he_cmd { // wait for completion if (!he_wait_test_completion()) { - cerr << "timeout error" << endl; - he_perf_counters(); host_exerciser_errors(); g_stop_thread = true; @@ -695,7 +685,6 @@ class he_cache_cmd : public he_cmd { // wait for completion if (!he_wait_test_completion()) { - cerr << "timeout error" << endl; he_perf_counters(); host_exerciser_errors(); host_exe_->free_cache_read(); @@ -769,7 +758,6 @@ class he_cache_cmd : public he_cmd { // wait for completion if (!he_wait_test_completion()) { - cerr << "timeout error" << endl; he_perf_counters(); host_exerciser_errors(); host_exe_->free_cache_write(); @@ -896,7 +884,6 @@ class he_cache_cmd : public he_cmd { uint32_t he_contmodetime_; uint32_t he_linerep_count_; uint32_t he_stide_; - uint32_t he_target_; uint32_t he_test_; bool he_test_all_; }; diff --git a/samples/cxl_host_exerciser/cxl_he_cmd.h b/samples/cxl_host_exerciser/cxl_he_cmd.h index 9596feec7930..a5efe4b9f641 100644 --- a/samples/cxl_host_exerciser/cxl_he_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cmd.h @@ -140,12 +140,12 @@ class he_cmd : public test_command { /* Wait for test completion */ uint32_t timeout = HELPBK_TEST_TIMEOUT; + cout << "Test started ......" << endl; volatile uint8_t *status_ptr = host_exe_->get_dsm(); while (0 == ((*status_ptr) & 0x1)) { usleep(HELPBK_TEST_SLEEP_INVL); if (--timeout == 0) { - cout << "HE LPBK TIME OUT" << endl; - + cout << "HE Cache time out error" << endl; return false; } } @@ -162,11 +162,8 @@ class he_cmd : public test_command { int n = numa_max_node(); cout << "There are %d nodes on your system:" << n + 1 << endl; - int cpu_num = sched_getcpu(); - cout << "cpu num:" << cpu_num << endl; - - int numa_node = numa_node_of_cpu(cpu_num); - cout << "numa node:" << numa_node << endl; + int numa_node = numa_node_of_cpu(sched_getcpu()); + cout << "HE Cache app numa node:" << numa_node << endl; if (he_target_ == HE_TARGET_HOST) { numa_node_ = numa_node; diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.cpp b/samples/cxl_host_exerciser/cxl_host_exerciser.cpp index 0f31d9155dce..8fe4eecfad02 100644 --- a/samples/cxl_host_exerciser/cxl_host_exerciser.cpp +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.cpp @@ -40,12 +40,11 @@ int main(int argc, char *argv[]) { app.register_command(); // host exerciser signal handler - struct sigaction act_old, act_new; - memset(&act_old, 0, sizeof(act_old)); + struct sigaction act_new; memset(&act_new, 0, sizeof(act_new)); act_new.sa_handler = he_sig_handler; - sigaction(SIGINT, &act_new, &act_old); + sigaction(SIGINT, &act_new, NULL); return app.main(argc, argv); } diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h index 9c60e5ad5d62..845e895c5356 100644 --- a/samples/cxl_host_exerciser/he_cache_test.h +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -45,7 +45,7 @@ #include #include -#include "../../libraries/plugins/xfpga/fpga-dfl.h" +#include "fpga-dfl.h" using namespace std; @@ -91,41 +91,65 @@ enum { MATCHES_SIZE = 6 }; #define DFL_CXL_CACHE_WR_ADDR_TABLE_DATA 0x068 #define DFL_CXL_CACHE_RD_ADDR_TABLE_DATA 0x088 -void *alloc_2mb_hugepage(void) { - void *addr; - addr = mmap(ADDR, MiB(2), PROTECTION, FLAGS_2M, 0, 0); - if (addr == MAP_FAILED) { - cerr << "alloc_2mb_hugepage() failed:" << strerror(errno) << endl; - addr = NULL; - } - - return addr; -} -void free_memory(void *addr, uint64_t len) { munmap(addr, len); } +bool buffer_allocate(void** addr, uint64_t len, uint32_t numa_node) +{ + void* addr_local = NULL; + int i = 0; + long status = 0; + unsigned long mask[4]; + unsigned int bits_per_UL = sizeof(unsigned long) * 8; + + for (i = 0; i < 4; i++) mask[i] = 0; + mask[numa_node / bits_per_UL] |= 1UL << (numa_node % bits_per_UL); + + if (len > MiB(2)) + addr_local = mmap(ADDR, len, PROTECTION, FLAGS_1G, 0, 0); + else if (len > KiB(4)) + addr_local = mmap(ADDR, len, PROTECTION, FLAGS_2M, 0, 0); + else + addr_local = mmap(ADDR, len, PROTECTION, FLAGS_4K, 0, 0); + + if (addr_local == MAP_FAILED) { + if (errno == ENOMEM) { + if (len > MiB(2)) + cerr <<"Could not allocate buffer (no free 1 " + "GiB huge pages)"; + if (len > KiB(4)) + cerr << "Could not allocate buffer (no free 2 " + "MiB huge pages)"; + else + cerr <<"Could not allocate buffer (out of " + "memory)"; + return false; + } + cerr << "CXL cache mmap failed:"<< strerror(errno) << endl; + return false; + } -void *alloc_32kb_hugepage(void) { - void *addr; + if (addr_local == NULL) { + cerr << "Unable to mmap" << endl; + return false; + } - addr = mmap(ADDR, KiB(32), PROTECTION, FLAGS_4K, 0, 0); - if (addr == MAP_FAILED) { - cerr << "alloc_32kb_hugepage() failed:" << strerror(errno) << endl;; - addr = NULL; - } + status = syscall(__NR_mbind, addr_local, len, 2, &mask, numa_node + 2, 1); + if (status != 0) { + cerr << "buffer_allocate(): unable to mbind:" + << strerror(errno) << endl; + return false; + } - return addr; + *addr = addr_local; + return true; } -void *alloc_4kb_hugepage(void) { - void *addr; - - addr = mmap(ADDR, KiB(4), PROTECTION, FLAGS_4K, 0, 0); - if (addr == MAP_FAILED) { - cerr << "alloc_4kb_hugepage() failed:" << strerror(errno) << endl;; - addr = NULL; - } - - return addr; +bool buffer_release(void* addr, uint64_t len) +{ + if (munmap(addr, len)) { + cerr << "CXL cache unmap failed:", strerror(errno); + return false; + } + return true; } bool sysfs_read_u64(const char *path, uint64_t *value) { @@ -489,13 +513,13 @@ class afu { int res = 0; void *ptr = NULL; - struct dfl_cxl_cache_dma_map dma_map; + struct dfl_cxl_cache_buffer_map dma_map; memset(&dma_map, 0, sizeof(dma_map)); - ptr = alloc_4kb_hugepage(); - if (!ptr) { - cerr << "Fails to allocate 4k huge page:" << strerror(errno) << endl; - return false; + + if (!buffer_allocate(&ptr, len, numa_node)) { + cerr << "Fails to allocate 4k huge page:" << strerror(errno) << endl; + return false; } cout << "DSM buffer numa node: " << numa_node << endl; @@ -503,17 +527,18 @@ class afu { dma_map.user_addr = (__u64)ptr; dma_map.length = len; dma_map.numa_node = numa_node; - dma_map.csr_array[0] = DFL_CXL_CACHE_DSM_BASE; // 0x030 + dma_map.csr_array[0] = DFL_CXL_CACHE_DSM_BASE; - logger_->debug("Allocate DSM buffer user addr 0x:{0:x} length : {1:d} numa node : {2:d}", + logger_->debug("Allocate DSM buffer user addr 0x:{0:x} length :" + "{1:d} numa node : {2:d}", dma_map.user_addr, dma_map.length, dma_map.numa_node); volatile uint64_t *u64 = (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_DSM_BASE); - res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_MAP, &dma_map); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_MAP, &dma_map); if (res) { - cerr << "ioctl DFL_CXL_CACHE_NUMA_NODE_DSM_INFO failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_MAP failed" << strerror(errno) << endl; goto out_free; } @@ -524,20 +549,20 @@ class afu { return true; out_free: - free_memory(ptr, len); + buffer_release(ptr, len); return false; } bool free_dsm() { int res = 0; - struct dfl_cxl_cache_dma_unmap dma_unmap; + struct dfl_cxl_cache_buffer_unmap dma_unmap; memset(&dma_unmap, 0, sizeof(dma_unmap)); dma_unmap.argsz = sizeof(dma_unmap); dma_unmap.user_addr = (__u64)dsm_buffer_; dma_unmap.length = dsm_buf_len_; - dma_unmap.csr_array[0] = DFL_CXL_CACHE_DSM_BASE; // 0x030 + dma_unmap.csr_array[0] = DFL_CXL_CACHE_DSM_BASE; logger_->debug("free dsm user addr 0x:{0:x} length : {1:d} ", dma_unmap.user_addr, dma_unmap.length); @@ -545,14 +570,14 @@ class afu { volatile uint64_t *u64 = (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_DSM_BASE); - res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_UNMAP, &dma_unmap); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_UNMAP, &dma_unmap); if (res) { - cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) - << endl; + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_UNMAP failed" + << strerror(errno) << endl; } logger_->debug("DSM_BASE : 0x:{0:x}", *u64); - free_memory(dsm_buffer_, dsm_buf_len_); + buffer_release(dsm_buffer_, dsm_buf_len_); return true; } @@ -560,32 +585,32 @@ class afu { int res = 0; void *ptr = NULL; - struct dfl_cxl_cache_dma_map dma_map; + struct dfl_cxl_cache_buffer_map dma_map; memset(&dma_map, 0, sizeof(dma_map)); - ptr = alloc_2mb_hugepage(); - if (!ptr) { - cerr << "Fails to allocate 2MB huge pages" << endl; - return false; - } + if (!buffer_allocate(&ptr, len, numa_node)) { + cerr << "Fails to allocate 2MB huge page:" << strerror(errno) << endl; + return false; + } cout << "Read buffer numa node: " << numa_node << endl; dma_map.argsz = sizeof(dma_map); dma_map.user_addr = (__u64)ptr; dma_map.length = len; dma_map.numa_node = numa_node; - dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; // 0x88 + dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; - logger_->debug("Allocate read buffer user addr 0x:{0:x} length : {1:d} numa node : {2:d}", + logger_->debug("Allocate read buffer user addr 0x:{0:x} length :" + "{1:d} numa node : {2:d}", dma_map.user_addr, dma_map.length, dma_map.numa_node); volatile uint64_t *u64 = (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); - - res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_MAP, &dma_map); + sleep(1); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_MAP, &dma_map); if (res) { - cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_MAP failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_MAP failed" << strerror(errno) << endl; goto out_free; } @@ -596,35 +621,34 @@ class afu { return true; out_free: - free_memory(ptr, len); + buffer_release(ptr, len); return false; } bool free_cache_read() { int res = 0; - struct dfl_cxl_cache_dma_unmap dma_unmap; + struct dfl_cxl_cache_buffer_unmap dma_unmap; memset(&dma_unmap, 0, sizeof(dma_unmap)); dma_unmap.argsz = sizeof(dma_unmap); dma_unmap.user_addr = (__u64)rd_buffer_; dma_unmap.length = rd_buf_len_; - dma_unmap.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; // 0x88 + dma_unmap.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; logger_->debug("free read user addr 0x:{0:x} length : {1:d} ", dma_unmap.user_addr, dma_unmap.length); volatile uint64_t *u64 = (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); - res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_UNMAP, &dma_unmap); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_UNMAP, &dma_unmap); if (res) { - cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_UNMAP failed" << strerror(errno) << endl; } logger_->debug("DFL_CXL_CACHE_RD_ADDR_TABLE_DATA : 0x:{0:x}", *u64); - free_memory(rd_buffer_, rd_buf_len_); - + buffer_release(rd_buffer_, rd_buf_len_); return true; } @@ -632,13 +656,12 @@ class afu { int res = 0; void *ptr = NULL; - struct dfl_cxl_cache_dma_map dma_map; + struct dfl_cxl_cache_buffer_map dma_map; memset(&dma_map, 0, sizeof(dma_map)); - ptr = alloc_2mb_hugepage(); - if (!ptr) { - cerr << "Fails to allocate 2MB huge pages" << endl; - return false; + if (!buffer_allocate(&ptr, len, numa_node)) { + cerr << "Fails to allocate 2MB huge page:" << strerror(errno) << endl; + return false; } cout << "Write buffer numa node: " << numa_node << endl; @@ -646,7 +669,7 @@ class afu { dma_map.user_addr = (__u64)ptr; dma_map.length = len; dma_map.numa_node = numa_node; - dma_map.csr_array[0] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + dma_map.csr_array[0] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; logger_->debug("Allocate write buffer user addr 0x:{0:x}\ length : {1:d} numa node : {2:d}", @@ -655,9 +678,9 @@ class afu { volatile uint64_t *u64 = (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); - res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_MAP, &dma_map); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_MAP, &dma_map); if (res) { - cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_MAP failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_MAP failed" << strerror(errno) << endl; goto out_free; } @@ -668,34 +691,34 @@ class afu { return true; out_free: - free_memory(ptr, len); + buffer_release(ptr, len); return false; } bool free_cache_write() { int res = 0; - struct dfl_cxl_cache_dma_unmap dma_unmap; + struct dfl_cxl_cache_buffer_unmap dma_unmap; memset(&dma_unmap, 0, sizeof(dma_unmap)); dma_unmap.argsz = sizeof(dma_unmap); dma_unmap.user_addr = (__u64)wr_buffer_; dma_unmap.length = wr_buf_len_; - dma_unmap.csr_array[0] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + dma_unmap.csr_array[0] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; logger_->debug("free write user addr 0x:{0:x} length : {1:d} ", dma_unmap.user_addr, dma_unmap.length); volatile uint64_t *u64 = (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); - res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_UNMAP, &dma_unmap); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_UNMAP, &dma_unmap); if (res) { - cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_UNMAP failed" << strerror(errno) << endl; } logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64); - free_memory(wr_buffer_, wr_buf_len_); + buffer_release(wr_buffer_, wr_buf_len_); return true; } @@ -703,13 +726,12 @@ class afu { int res = 0; void *ptr = NULL; - struct dfl_cxl_cache_dma_map dma_map; + struct dfl_cxl_cache_buffer_map dma_map; memset(&dma_map, 0, sizeof(dma_map)); - ptr = alloc_2mb_hugepage(); - if (!ptr) { - cerr << "Fails to allocate 2MB huge pages" << endl; - return false; + if (!buffer_allocate(&ptr, len, numa_node)) { + cerr << "Fails to allocate 2MB huge page:" << strerror(errno) << endl; + return false; } cout << "Read/Write buffer numa node: " << numa_node << endl; @@ -717,8 +739,8 @@ class afu { dma_map.user_addr = (__u64)ptr; dma_map.length = len; dma_map.numa_node = numa_node; - dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; // 0x88; - dma_map.csr_array[1] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; + dma_map.csr_array[1] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; logger_->debug("Allocate read/write buffer user addr 0x:{0:x}\ length : {1:d} numa node : {2:d}", @@ -729,9 +751,9 @@ class afu { volatile uint64_t *u64_rd = (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); - res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_MAP, &dma_map); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_MAP, &dma_map); if (res) { - cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_MAP failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_MAP failed" << strerror(errno) << endl; goto out_free; } @@ -745,21 +767,21 @@ class afu { return true; out_free: - free_memory(ptr, len); + buffer_release(ptr, len); return false; } bool free_cache_read_write() { int res = 0 ; - struct dfl_cxl_cache_dma_unmap dma_unmap; + struct dfl_cxl_cache_buffer_unmap dma_unmap; memset(&dma_unmap, 0, sizeof(dma_unmap)); dma_unmap.argsz = sizeof(dma_unmap); dma_unmap.user_addr = (__u64)rd_wr_buffer_; dma_unmap.length = rd_wr_buf_len_; - dma_unmap.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; // 0x88; - dma_unmap.csr_array[1] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; // 0x68; + dma_unmap.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; + dma_unmap.csr_array[1] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; logger_->debug("free read/write user addr 0x:{0:x} length : {1:d} ", dma_unmap.user_addr, dma_unmap.length); @@ -769,16 +791,16 @@ class afu { volatile uint64_t *u64_rd = (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); - res = ioctl(fd_, DFL_CXL_CACHE_NUMA_DMA_UNMAP, &dma_unmap); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_UNMAP, &dma_unmap); if (res) { - cerr << "ioctl DFL_CXL_CACHE_NUMA_DMA_UNMAP failed" << strerror(errno) + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_UNMAP failed" << strerror(errno) << endl; } logger_->debug("nDFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_rd); logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_wr); - free_memory(rd_wr_buffer_, rd_wr_buf_len_); + buffer_release(rd_wr_buffer_, rd_wr_buf_len_); rd_wr_buffer_ = NULL; return true; } From 3cd6e2cb9a633901e448ec7ad0e7f32bc0d3dd9e Mon Sep 17 00:00:00 2001 From: anandaravuri Date: Tue, 26 Sep 2023 11:48:45 -0700 Subject: [PATCH 10/11] fix: ci build errors Signed-off-by: anandaravuri --- libraries/plugins/xfpga/fpga-dfl.h | 78 +++++++++++----------- samples/cxl_host_exerciser/he_cache_test.h | 4 ++ 2 files changed, 43 insertions(+), 39 deletions(-) diff --git a/libraries/plugins/xfpga/fpga-dfl.h b/libraries/plugins/xfpga/fpga-dfl.h index c218f7e27df6..83ab05447bc8 100644 --- a/libraries/plugins/xfpga/fpga-dfl.h +++ b/libraries/plugins/xfpga/fpga-dfl.h @@ -306,27 +306,27 @@ struct dfl_fpga_fme_port_pr { #define DFL_PCI_SVA_UNBIND_DEV _IO(DFL_FPGA_MAGIC, \ DFL_PCI_SVA_BASE + 1) - /** - * DFL_CXL_CACHE_GET_REGION_INFO - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 0, - * struct dfl_cxl_cache_region_info) - * - * Retrieve information about a device memory region. - * Caller provides struct dfl_cxl_cache_region_info with flags. - * Driver returns the region info in other fields. - * Return: 0 on success, -errno on failure. - */ +/** + * DFL_CXL_CACHE_GET_REGION_INFO - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 0, + * struct dfl_cxl_cache_region_info) + * + * Retrieve information about a device memory region. + * Caller provides struct dfl_cxl_cache_region_info with flags. + * Driver returns the region info in other fields. + * Return: 0 on success, -errno on failure. + */ #define DFL_CXL_CACHE_GET_REGION_INFO _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 0) - /** - * struct dfl_cxl_cache_region_info - CXL cache region information - * @argsz: structure length - * @flags: access permission - * @size: region size (bytes) - * @offset: region offset from start of device fd - * - * to retrieve information about a device memory region - */ +/** + * struct dfl_cxl_cache_region_info - CXL cache region information + * @argsz: structure length + * @flags: access permission + * @size: region size (bytes) + * @offset: region offset from start of device fd + * + * to retrieve information about a device memory region + */ struct dfl_cxl_cache_region_info { __u32 argsz; __u32 flags; @@ -354,17 +354,17 @@ struct dfl_cxl_cache_region_info { #define DFL_CXL_CACHE_NUMA_BUFFER_MAP _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 1) - /** - * struct dfl_cxl_cache_buffer_map - maps user address to physical address. - * @argsz: structure length - * @flags: flags - * @user_addr: user mmap virtual address - * @length: length of mapping (bytes) - * @numa_node: Numa node number - * @csr_array: array of region address offset - * - * maps user allocated virtual address to physical address. - */ +/** + * struct dfl_cxl_cache_buffer_map - maps user address to physical address. + * @argsz: structure length + * @flags: flags + * @user_addr: user mmap virtual address + * @length: length of mapping (bytes) + * @numa_node: Numa node number + * @csr_array: array of region address offset + * + * maps user allocated virtual address to physical address. + */ struct dfl_cxl_cache_buffer_map { __u32 argsz; __u32 flags; @@ -386,16 +386,16 @@ struct dfl_cxl_cache_buffer_map { #define DFL_CXL_CACHE_NUMA_BUFFER_UNMAP _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 2) - /** - * struct dfl_cxl_cache_buffer_unmap - unmaps user allocated memory. - * @argsz: structure length - * @flags: flags - * @user_addr: user mmap virtual address - * @length: length of mapping (bytes) - * @csr_array: array of region address offset - * - * unmaps user allocated memory. - */ +/** + * struct dfl_cxl_cache_buffer_unmap - unmaps user allocated memory. + * @argsz: structure length + * @flags: flags + * @user_addr: user mmap virtual address + * @length: length of mapping (bytes) + * @csr_array: array of region address offset + * + * unmaps user allocated memory. + */ struct dfl_cxl_cache_buffer_unmap { __u32 argsz; __u32 flags; diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h index 845e895c5356..900e56bf8f7c 100644 --- a/samples/cxl_host_exerciser/he_cache_test.h +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -39,12 +39,16 @@ #include #include #include +#include +#include #include #include #include #include #include + + #include "fpga-dfl.h" using namespace std; From 4776c2d12dd753f35634241c98c16192afe171bb Mon Sep 17 00:00:00 2001 From: anandaravuri Date: Tue, 26 Sep 2023 15:36:26 -0700 Subject: [PATCH 11/11] fix: replace original license header fpga-dfl.h Signed-off-by: anandaravuri --- libraries/plugins/xfpga/fpga-dfl.h | 39 +++++++++++++++++++----------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/libraries/plugins/xfpga/fpga-dfl.h b/libraries/plugins/xfpga/fpga-dfl.h index 83ab05447bc8..ab54fd40796b 100644 --- a/libraries/plugins/xfpga/fpga-dfl.h +++ b/libraries/plugins/xfpga/fpga-dfl.h @@ -1,17 +1,28 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Header File for FPGA DFL User API - * - * Copyright (C) 2017-2018 Intel Corporation, Inc. - * - * Authors: - * Kang Luwei - * Zhang Yi - * Wu Hao - * Xiao Guangrong - * Tim Whisonant - * Ananda Ravuri - */ +// Copyright(c) 2017-2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. #ifndef _UAPI_LINUX_FPGA_DFL_H #define _UAPI_LINUX_FPGA_DFL_H