diff --git a/libraries/plugins/xfpga/fpga-dfl.h b/libraries/plugins/xfpga/fpga-dfl.h index 215908bf9742..ab54fd40796b 100644 --- a/libraries/plugins/xfpga/fpga-dfl.h +++ b/libraries/plugins/xfpga/fpga-dfl.h @@ -1,4 +1,4 @@ -// Copyright(c) 2017-2020, Intel Corporation +// Copyright(c) 2017-2023, Intel Corporation // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: @@ -44,6 +44,7 @@ #define DFL_FPGA_BASE 0 #define DFL_PORT_BASE 0x40 #define DFL_FME_BASE 0x80 +#define DFL_CXL_CACHE_BASE 0xA0 #define DFL_PCI_SVA_BASE 0xf8 /* Common IOCTLs for both FME and AFU file descriptor */ @@ -135,12 +136,20 @@ struct dfl_fpga_port_region_info { * Map the dma memory per user_addr and length which are provided by caller. * Driver fills the iova in provided struct afu_port_dma_map. * This interface only accepts page-size aligned user memory for dma mapping. + * + * Setting only one of DFL_DMA_MAP_FLAG_READ or WRITE limits FPGA-initiated + * DMA requests to only reads or only writes. To be back-compatiable with + * legacy driver, setting neither flag is equivalent to setting both flags: + * both read and write are requests permitted. + * * Return: 0 on success, -errno on failure. */ struct dfl_fpga_port_dma_map { /* Input */ __u32 argsz; /* Structure length */ - __u32 flags; /* Zero for now */ + __u32 flags; +#define DFL_DMA_MAP_FLAG_READ (1 << 0)/* readable from device */ +#define DFL_DMA_MAP_FLAG_WRITE (1 << 1)/* writable from device */ __u64 user_addr; /* Process virtual address */ __u64 length; /* Length of mapping (bytes)*/ /* Output */ @@ -308,4 +317,102 @@ struct dfl_fpga_fme_port_pr { #define DFL_PCI_SVA_UNBIND_DEV _IO(DFL_FPGA_MAGIC, \ DFL_PCI_SVA_BASE + 1) +/** + * DFL_CXL_CACHE_GET_REGION_INFO - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 0, + * struct dfl_cxl_cache_region_info) + * + * Retrieve information about a device memory region. + * Caller provides struct dfl_cxl_cache_region_info with flags. + * Driver returns the region info in other fields. + * Return: 0 on success, -errno on failure. + */ + +#define DFL_CXL_CACHE_GET_REGION_INFO _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 0) + +/** + * struct dfl_cxl_cache_region_info - CXL cache region information + * @argsz: structure length + * @flags: access permission + * @size: region size (bytes) + * @offset: region offset from start of device fd + * + * to retrieve information about a device memory region + */ +struct dfl_cxl_cache_region_info { + __u32 argsz; + __u32 flags; +#define DFL_CXL_CACHE_REGION_READ BIT(0) +#define DFL_CXL_CACHE_REGION_WRITE BIT(1) +#define DFL_CXL_CACHE_REGION_MMAP BIT(2) + __u64 size; + __u64 offset; +}; + +/** + * DFL_CXL_CACHE_NUMA_BUFFER_MAP - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 1, + * struct dfl_cxl_cache_buffer_map) + * + * Map the user memory per user_addr, length and numa node which are + * provided by caller. The driver allocates memory on the numa node, + * converts the user's virtual addressto a continuous physical address, + * and writes the physical address to the cxl cache read/write address table CSR. + * + * This interface only accepts page-size aligned user memory for mapping. + * Return: 0 on success, -errno on failure. + */ + +#define DFL_ARRAY_MAX_SIZE 0x10 + +#define DFL_CXL_CACHE_NUMA_BUFFER_MAP _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 1) + +/** + * struct dfl_cxl_cache_buffer_map - maps user address to physical address. + * @argsz: structure length + * @flags: flags + * @user_addr: user mmap virtual address + * @length: length of mapping (bytes) + * @numa_node: Numa node number + * @csr_array: array of region address offset + * + * maps user allocated virtual address to physical address. + */ +struct dfl_cxl_cache_buffer_map { + __u32 argsz; + __u32 flags; + __u64 user_addr; + __u64 length; + __u32 numa_node; + __u64 csr_array[DFL_ARRAY_MAX_SIZE]; +}; + +/** + * DFL_CXL_CACHE_NUMA_BUFFER_UNMAP - _IOWR(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 1, + * struct dfl_cxl_cache_buffer_unmap) + * + * Unmaps the user memory per user_addr and length which are provided by caller + * The driver deletes the physical pages of the user address and writes a zero + * to the read/write address table CSR. + * Return: 0 on success, -errno on failure. + */ + +#define DFL_CXL_CACHE_NUMA_BUFFER_UNMAP _IO(DFL_FPGA_MAGIC, DFL_CXL_CACHE_BASE + 2) + +/** + * struct dfl_cxl_cache_buffer_unmap - unmaps user allocated memory. + * @argsz: structure length + * @flags: flags + * @user_addr: user mmap virtual address + * @length: length of mapping (bytes) + * @csr_array: array of region address offset + * + * unmaps user allocated memory. + */ +struct dfl_cxl_cache_buffer_unmap { + __u32 argsz; + __u32 flags; + __u64 user_addr; + __u64 length; + __u64 csr_array[DFL_ARRAY_MAX_SIZE]; +}; + #endif /* _UAPI_LINUX_FPGA_DFL_H */ diff --git a/opae.spec.fedora b/opae.spec.fedora index 893bcec6416a..dcdbe10e7fa2 100644 --- a/opae.spec.fedora +++ b/opae.spec.fedora @@ -355,6 +355,7 @@ done %{_bindir}/mem_tg %{_bindir}/ofs.uio %{_bindir}/cxl_mem_tg +%{_bindir}/cxl_host_exerciser %{python3_sitearch}/opae.diag* %{python3_sitearch}/opae/diag* diff --git a/packaging/opae/deb/opae-extra-tools.install b/packaging/opae/deb/opae-extra-tools.install index a85827b3f0de..a363035c3704 100644 --- a/packaging/opae/deb/opae-extra-tools.install +++ b/packaging/opae/deb/opae-extra-tools.install @@ -18,6 +18,7 @@ usr/bin/fpga_dma_N3000_test usr/bin/fpga_dma_test usr/bin/host_exerciser usr/bin/cxl_mem_tg +usr/bin/cxl_host_exerciser usr/bin/bist usr/bin/hps usr/bin/hssi diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index db5023c44118..ab942e774b65 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -70,3 +70,5 @@ opae_add_subdirectory(host_exerciser) opae_add_subdirectory(n5010-test) opae_add_subdirectory(n5010-ctl) opae_add_subdirectory(cxl_mem_tg) +opae_add_subdirectory(cxl_host_exerciser) + diff --git a/samples/cxl_host_exerciser/CMakeLists.txt b/samples/cxl_host_exerciser/CMakeLists.txt new file mode 100644 index 000000000000..2bdf25fc0bcd --- /dev/null +++ b/samples/cxl_host_exerciser/CMakeLists.txt @@ -0,0 +1,66 @@ +## Copyright(c) 2023, Intel Corporation +## +## Redistribution and use in source and binary forms, with or without +## modification, are permitted provided that the following conditions are met: +## +## * Redistributions of source code must retain the above copyright notice, +## this list of conditions and the following disclaimer. +## * Redistributions in binary form must reproduce the above copyright notice, +## this list of conditions and the following disclaimer in the documentation +## and/or other materials provided with the distribution. +## * Neither the name of Intel Corporation nor the names of its contributors +## may be used to endorse or promote products derived from this software +## without specific prior written permission. +## +## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +## ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +## LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +## CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +## SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +## INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +## ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +## POSSIBILITY OF SUCH DAMAGE. + +if (OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG AND OPAE_WITH_NUMA) + + if (fmt_LIBRARIES) + # if we found fmt before (from CMakeLists.txt) + # then we need to find it again from this directory + # so we can "import" the fmt::fmt link target + find_package(fmt) + endif (fmt_LIBRARIES) + + opae_add_executable(TARGET cxl_host_exerciser + SOURCE cxl_host_exerciser.cpp + LIBS + opae-cxx-core + opae-c + ${spdlog_LIBRARIES} + ${json-c_LIBRARIES} + ${uuid_LIBRARIES} + ${numa_LIBRARIES} + ${fmt_LIBRARIES} + COMPONENT samplebin + ) + + target_include_directories(cxl_host_exerciser + PRIVATE + ${OPAE_INCLUDE_PATHS} + ${CMAKE_CURRENT_SOURCE_DIR} + ${OPAE_LIB_SOURCE}/plugins/xfpga/ + ${CLI11_INCLUDE_DIRS} + ${numa_INCLUDE_DIRS} + ${spdlog_INCLUDE_DIRS}) + + target_compile_options(cxl_host_exerciser PUBLIC + -Wno-unused-result + ) + + target_compile_definitions(cxl_host_exerciser PUBLIC + ${spdlog_DEFINITIONS} + ) + +endif(OPAE_WITH_CLI11 AND OPAE_WITH_SPDLOG AND OPAE_WITH_NUMA) diff --git a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h new file mode 100644 index 000000000000..5272d5333067 --- /dev/null +++ b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h @@ -0,0 +1,918 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#pragma once + +#include "cxl_he_cmd.h" +#include "cxl_host_exerciser.h" +#include "he_cache_test.h" + +#define UNUSED_PARAM(x) ((void)x) + +// HE exit global flag +volatile bool g_he_exit = false; +volatile static bool g_stop_thread = false; + +// host exerciser signal handler +void he_sig_handler(int) { + g_he_exit = true; + g_stop_thread = true; + cout << "HE signal handler exit app" << endl; +} + +namespace host_exerciser { + +void he_cache_thread(uint8_t *buf_ptr, uint64_t len); + +class he_cache_cmd : public he_cmd { +public: + he_cache_cmd() + : he_continuousmode_(false), he_contmodetime_(0), he_linerep_count_(0), + he_stide_(0), he_test_(0), he_test_all_(false) {} + + virtual ~he_cache_cmd() {} + + virtual const char *name() const override { return "cache"; } + + virtual const char *description() const override { + return "run simple cxl he cache test"; + } + + virtual const char *afu_id() const override { return HE_CACHE_AFU_ID; } + + virtual uint64_t featureid() const override { return MEM_TG_FEATURE_ID; } + + virtual uint64_t guidl() const override { return MEM_TG_FEATURE_GUIDL; } + + virtual uint64_t guidh() const override { return MEM_TG_FEATURE_GUIDH; } + + virtual void add_options(CLI::App *app) override { + + // test mode + app->add_option( + "--test", he_test_, + "host exerciser cache test") + ->transform(CLI::CheckedTransformer(he_test_modes)) + ->default_val("fpgardcachehit"); + + // Continuous mode + app->add_option("--continuousmode", he_continuousmode_, + "test rollover or test termination") + ->default_val("false"); + + // Continuous mode time + app->add_option("--contmodetime", he_contmodetime_, + "Continuous mode time in seconds") + ->default_val("1"); + + // target host or fpga + app->add_option("--target", he_target_, + "host exerciser run on host or fpga") + ->transform(CLI::CheckedTransformer(he_targets)) + ->default_val("host"); + + app->add_option("--stride", he_stide_, "Enable stride mode") + ->default_val("0"); + + // Line repeat count + app->add_option("--linerepcount", he_linerep_count_, "Line repeat count") + ->transform(CLI::Range(1, 256)) + ->default_val("10"); + + // Test all + app->add_option("--testall", he_test_all_, "Run all tests") + ->default_val("false"); + } + + int he_run_fpga_rd_cache_hit_test() { + cout << "********** FPGA Read cache hit test start**********" << endl; + /* + STEPS + 1) Allocate DSM, Read buffer // flush + 2) set cache lines 32kb/64 + 3) set line repeat count + 4) Set RdShared (CXL) config + 5) Run test ( AFU copies cache from host memory to FPGA cache) + 6) set line repeat count + 7) Set RdShared (CXL) config + 8) Run test ( AFU read cache from FPGA cache) + */ + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES); + + cout << "Read number Lines:" << FPGA_32KB_CACHE_LINES << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Numa node:" << numa_node_ << endl; + + // set RD_CONFIG RdShared (CXL) + he_rd_cfg_.value = 0; + he_rd_cfg_.line_repeat_count = 1; + he_rd_cfg_.read_traffic_enable = 1; + he_rd_cfg_.opcode = RD_LINE_S; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // set RD_ADDR_TABLE_CTRL + rd_table_ctl_.value = 0; + rd_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "allocate dsm failed" << endl; + return -1; + } + + // Allocate Read buffer + if (!host_exe_->allocate_cache_read(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + // Start test + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + + cout << "********** AFU Copied host cache to FPGA Cache successfully " + "********** " << endl; + + // set RD_CONFIG RdShared (CXL) + he_rd_cfg_.value = 0; + he_rd_cfg_.line_repeat_count = he_linerep_count_; + he_rd_cfg_.read_traffic_enable = 1; + he_rd_cfg_.opcode = RD_LINE_S; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // set RD_ADDR_TABLE_CTRL + rd_table_ctl_.value = 0; + rd_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Start test + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + host_exe_->free_dsm(); + host_exe_->free_cache_read(); + + cout << "********** AFU reads cache from FPGA Cache successfully" + " **********" << endl; + cout << "********** FPGA Read cache hit test end**********" << endl; + return 0; + } + + int he_run_fpga_wr_cache_hit_test() { + + cout << "********** FPGA Write cache hit test start**********" << endl; + /* + STEPS + 1) Allocate DSM, Read buffer, Write buffer // flush + 2) set cache lines 32kb/64 + 3) set line repeat count + 4) Set RdShared (CXL) config + 5) Run test ( AFU copies cache from host memory to FPGA cache) + 6) set line repeat count + 7) Set WrLine_M/WrPart_M (CXL) config + 8) Run test ( AFU writes to FPGA cache) + */ + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES); + + cout << "Read/write number Lines:" << FPGA_32KB_CACHE_LINES << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; + + // set RD_CONFIG RdShared (CXL) + he_rd_cfg_.value = 0; + he_rd_cfg_.line_repeat_count = 1; + he_rd_cfg_.read_traffic_enable = 1; + he_rd_cfg_.opcode = RD_LINE_S; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // set RD_ADDR_TABLE_CTRL + rd_table_ctl_.value = 0; + rd_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "allocate dsm failed" << endl; + return -1; + } + + // Allocate Read, Write buffer + if (!host_exe_->allocate_cache_read_write(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + // Start test + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + + cout << "********** AFU Copied host cache to FPGA Cache successfully " + "********** " << endl; + + // set W_CONFIG + he_wr_cfg_.value = 0; + he_wr_cfg_.line_repeat_count = he_linerep_count_; + he_wr_cfg_.write_traffic_enable = 1; + he_wr_cfg_.opcode = WR_LINE_M; + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + + // Set WR_ADDR_TABLE_CTRL + wr_table_ctl_.value = 0; + wr_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + + host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES); + // Start test + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + cout << "********** AFU Write to FPGA Cache successfully ********** " + << endl; + + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + + cout << "********** FPGA Write cache hit test end**********" << endl; + + return 0; + } + + int he_run_fpga_rd_cache_miss_test() { + + cout << "********** FPGA Read cache miss test start**********" << endl; + /* + STEPS + 1) Allocate DSM, Read buffer, Write buffer + 2) Write number of lines more then 32kb 2mb/64 + 3) Set RdShared (CXL) config + 4) Run test (Buffer is not present in FPGA - FPGA read Cache miss ) + */ + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + host_exe_->write64(HE_RD_NUM_LINES, FPGA_2MB_CACHE_LINES); + + cout << "Read number Lines:" << FPGA_2MB_CACHE_LINES << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + + // set RD_CONFIG RdShared (CXL) + he_rd_cfg_.value = 0; + he_rd_cfg_.line_repeat_count = he_linerep_count_; + he_rd_cfg_.read_traffic_enable = 1; + he_rd_cfg_.opcode = RD_LINE_S; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // set RD_ADDR_TABLE_CTRL + rd_table_ctl_.value = 0; + rd_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "allocate dsm failed" << endl; + return -1; + } + + // Allocate Read buffer + if (!host_exe_->allocate_cache_read(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + // start test + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + + cout << "********** AFU Read FPGA Cache Miss successfully ********** " + << endl; + cout << "********** FPGA Read cache miss test end**********" << endl; + return 0; + } + + int he_run_fpga_wr_cache_miss_test() { + + cout << "********** FPGA write cache miss test start**********" << endl; + /* + STEPS + 1) Allocate DSM, Read buffer, Write buffer + 2) Write number of lines more then 32 kb 2mb/64 + 3) Set WR ItoMWr (CXL) config + 4) Run test ( Buffer is not present in FPGA - FPGA write Cache miss ) + */ + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + host_exe_->write64(HE_WR_NUM_LINES, FPGA_2MB_CACHE_LINES); + + cout << "Read/write number Lines:" << FPGA_2MB_CACHE_LINES << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; + + // set W_CONFIG + he_wr_cfg_.value = 0; + he_wr_cfg_.line_repeat_count = he_linerep_count_; + he_wr_cfg_.write_traffic_enable = 1; + he_wr_cfg_.opcode = WR_LINE_M; + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + + // Set WR_ADDR_TABLE_CTRL + wr_table_ctl_.value = 0; + wr_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "allocate dsm failed" << endl; + return -1; + } + + // Allocate Read, Write buffer + if (!host_exe_->allocate_cache_read_write(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + // start test + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + host_exe_->free_cache_read_write(); + host_exe_->free_dsm(); + + cout << "********** AFU Write FPGA Cache Miss successfully ********** " + << endl; + cout << "********** FPGA Write cache miss test end**********" << endl; + return 0; + } + + int he_run_host_rd_cache_hit_test() { + + cout << "********** 1 Host LLC Read cache hit test start**********" << endl; + /* + STEPS + 1) Allocate DSM, Read buffer + 2) create thread read buffer + 3) Set RdLine_I (CXL) config + 4) Run test ( AFU reads from host cache to FPGA cache) + */ + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES); + + cout << "Read number Lines:" << FPGA_32KB_CACHE_LINES << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; + + // set RD_CONFIG RdShared (CXL) + he_rd_cfg_.value = 0; + he_rd_cfg_.line_repeat_count = he_linerep_count_; + he_rd_cfg_.read_traffic_enable = 1; + he_rd_cfg_.opcode = RD_LINE_I; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // set RD_ADDR_TABLE_CTRL + rd_table_ctl_.value = 0; + rd_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "alloc dsm failed" << endl; + return -1; + } + + // Allocate Read buffer + if (!host_exe_->allocate_cache_read(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + cout << " create thread - moves read buffer to host cache " << endl; + std::thread t1(he_cache_thread, host_exe_->get_read(), BUFFER_SIZE_2MB); + sleep(1); + + // start + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + g_stop_thread = true; + t1.join(); + sleep(1); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + + g_stop_thread = true; + t1.join(); + + he_perf_counters(); + sleep(1); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + + cout << "********** AFU Copied host cache to FPGA Cache successfully " + "********** " << endl; + cout << "********** Host LLC cache hit test end**********" << endl; + return 0; + } + + int he_run_host_wr_cache_hit_test() { + + cout << "********** Host LLC Write cache hit test start**********" << endl; + + /* + STEPS + 1) Allocate DSM, Write buffer + 2) create thread read buffer + 3) Set ItoMWr (CXL) config + 4) Run test ( AFU write to host cache) + */ + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + + host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES); + cout << "Write number Lines:" << FPGA_32KB_CACHE_LINES << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; + + // set RD_CONFIG + he_wr_cfg_.value = 0; + he_wr_cfg_.line_repeat_count = he_linerep_count_; + he_wr_cfg_.write_traffic_enable = 1; + he_wr_cfg_.opcode = WR_LINE_I; + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + + // set RD_ADDR_TABLE_CTRL + wr_table_ctl_.value = 0; + wr_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "alloc dsm failed" << endl; + return -1; + } + + // Allocate Read buffer + if (!host_exe_->allocate_cache_write(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + cout << " create thread - moves read buffer to host cache " << endl; + std::thread t1(he_cache_thread, host_exe_->get_write(), BUFFER_SIZE_2MB); + sleep(1); + + // start + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + g_stop_thread = true; + t1.join(); + sleep(1); + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + return -1; + } + + g_stop_thread = true; + t1.join(); + he_perf_counters(); + cout << "********** AFU write host cache successfully ********** " << endl; + + sleep(1); + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + + cout << "********** Host LLC cache hit Write test end**********" << endl; + return 0; + } + + int he_run_host_rd_cache_miss_test() { + cout << "********** Host LLC Read cache miss test start**********" << endl; + + /* + STEPS + 1) Allocate DSM, Read buffer + 2) flush host read buffer cachde + 3) Set RdLine_I (CXL) config + 4) Run test ( AFU reads from host cache to FPGA cache) + */ + + // HE_INFO + // Set Read number Lines + he_info_.value = host_exe_->read64(HE_INFO); + host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); + cout << "Read/write number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; + + // set RD_CONFIG + he_rd_cfg_.value = 0; + he_rd_cfg_.line_repeat_count = he_linerep_count_; + he_rd_cfg_.read_traffic_enable = 1; + he_rd_cfg_.opcode = RD_LINE_I; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + + // set RD_ADDR_TABLE_CTR + rd_table_ctl_.value = 0; + rd_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "alloc dsm failed" << endl; + return -1; + } + + // Allocate Read buffer + if (!host_exe_->allocate_cache_read(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + // start + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + host_exe_->free_cache_read(); + host_exe_->free_dsm(); + + cout << "********** Ran Host LLC Read cache miss successfully ********** " + << endl; + + cout << "********** Host LLC Read cache miss test end**********" << endl; + return 0; + } + + int he_run_host_wr_cache_miss_test() { + + cout << "********** Host LLC Write cache miss test start**********" << endl; + + /* + STEPS + 1) Allocate DSM, write buffer + 2) flush host write buffer cachde + 3) Set RdLine_I (CXL) config + 4) Run test ( AFU reads from host cache to FPGA cache) + */ + + // HE_INFO + // Set write number Lines + he_info_.value = host_exe_->read64(HE_INFO); + host_exe_->write64(HE_WR_NUM_LINES, 1); + cout << "Write number Lines:" << 1 << endl; + cout << "Line Repeat Count:" << he_linerep_count_ << endl; + cout << "Read address table size:" << he_info_.read_addr_table_size << endl; + cout << "Write address table size:" << he_info_.write_addr_table_size + << endl; + + // set RD_CONFIG + he_wr_cfg_.value = 0; + he_wr_cfg_.line_repeat_count = he_linerep_count_; + he_wr_cfg_.write_traffic_enable = 1; + he_wr_cfg_.opcode = WR_PUSH_I; + host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + + // set RD_ADDR_TABLE_CTR + wr_table_ctl_.value = 0; + wr_table_ctl_.enable_address_stride = 1; + host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, rd_table_ctl_.value); + + // Allocate DSM buffer + if (!host_exe_->allocate_dsm()) { + cerr << "alloc dsm failed" << endl; + return -1; + } + + // Allocate write buffer + if (!host_exe_->allocate_cache_write(BUFFER_SIZE_2MB, numa_node_)) { + cerr << "allocate cache read failed" << endl; + host_exe_->free_dsm(); + return -1; + } + + // start + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + // wait for completion + if (!he_wait_test_completion()) { + he_perf_counters(); + host_exerciser_errors(); + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + return -1; + } + + he_perf_counters(); + host_exe_->free_cache_write(); + host_exe_->free_dsm(); + + cout << "********** Ran Host LLC Write cache miss successfully ********** " + << endl; + + cout << "********** Host LLC Write cache miss test end**********" << endl; + return 0; + } + + virtual int run(test_afu *afu, CLI::App *app) { + (void)app; + int ret = 0; + + host_exe_ = dynamic_cast(afu); + + if (!verify_numa_node()) { + numa_node_ = 0; + cout << "numa nodes are available set numa node to 0" << endl; + }; + + // reset HE cache + he_ctl_.value = 0; + he_ctl_.ResetL = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + he_ctl_.value = 0; + he_ctl_.ResetL = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + + if (he_test_all_ == true) { + int retvalue = 0; + ret = he_run_fpga_rd_cache_hit_test(); + if (ret != 0) { + retvalue = ret; + } + ret = he_run_fpga_wr_cache_hit_test(); + if (ret != 0) { + retvalue = ret; + } + + ret = he_run_fpga_rd_cache_miss_test(); + if (ret != 0) { + retvalue = ret; + } + ret = he_run_fpga_wr_cache_miss_test(); + if (ret != 0) { + retvalue = ret; + } + ret = he_run_host_rd_cache_hit_test(); + if (ret != 0) { + retvalue = ret; + } + ret = he_run_host_wr_cache_hit_test(); + if (ret != 0) { + retvalue = ret; + } + + ret = he_run_host_rd_cache_miss_test(); + if (ret != 0) { + retvalue = ret; + } + ret = he_run_host_wr_cache_miss_test(); + if (ret != 0) { + retvalue = ret; + } + + return retvalue; + } + + if (he_test_ == HE_FPGA_RD_CACHE_HIT) { + ret = he_run_fpga_rd_cache_hit_test(); + return ret; + } + + if (he_test_ == HE_FPGA_WR_CACHE_HIT) { + ret = he_run_fpga_wr_cache_hit_test(); + return ret; + } + + if (he_test_ == HE_FPGA_RD_CACHE_MISS) { + ret = he_run_fpga_rd_cache_miss_test(); + return ret; + } + + if (he_test_ == HE_FPGA_WR_CACHE_MISS) { + ret = he_run_fpga_wr_cache_miss_test(); + return ret; + } + + if (he_test_ == HE_HOST_RD_CACHE_HIT) { + ret = he_run_host_rd_cache_hit_test(); + return ret; + } + + if (he_test_ == HE_HOST_WR_CACHE_HIT) { + ret = he_run_host_wr_cache_hit_test(); + return ret; + } + + if (he_test_ == HE_HOST_RD_CACHE_MISS) { + ret = he_run_host_rd_cache_miss_test(); + return ret; + } + + if (he_test_ == HE_HOST_WR_CACHE_MISS) { + ret = he_run_host_wr_cache_miss_test(); + return ret; + } + + return 0; + } + +protected: + bool he_continuousmode_; + uint32_t he_contmodetime_; + uint32_t he_linerep_count_; + uint32_t he_stide_; + uint32_t he_test_; + bool he_test_all_; +}; + +void he_cache_thread(uint8_t *buf_ptr, uint64_t len) { + + uint64_t value; + UNUSED_PARAM(value); + uint64_t cache_lines = len / CL; + uint64_t i = 0; + + if (buf_ptr == NULL || len == 0) { + return; + } + + while (true) { + if (g_stop_thread == true) { + return; + } + if (i < cache_lines) { + value = *((volatile uint64_t *)(buf_ptr + i * 8)); + } + i++; + if (i >= cache_lines) { + i = 0; + } + } + + return; +} + +} // end of namespace host_exerciser diff --git a/samples/cxl_host_exerciser/cxl_he_cache_lpbk_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_lpbk_cmd.h new file mode 100644 index 000000000000..82f2dcba91a0 --- /dev/null +++ b/samples/cxl_host_exerciser/cxl_he_cache_lpbk_cmd.h @@ -0,0 +1,81 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#pragma once + +#include "cxl_host_exerciser.h" +#include "he_cache_test.h" + +namespace host_exerciser { + +class he_cache_lpbk_cmd : public he_cmd { +public: + he_cache_lpbk_cmd() {} + virtual ~he_cache_lpbk_cmd() {} + + virtual const char *name() const override { return "lpbk"; } + + virtual const char *description() const override { + return "run simple cxl he lpbk test"; + } + + virtual const char *afu_id() const override { return HE_CACHE_AFU_ID; } + + virtual uint64_t featureid() const override { return MEM_TG_FEATURE_ID; } + + virtual uint64_t guidl() const override { return MEM_TG_FEATURE_GUIDL; } + + virtual uint64_t guidh() const override { return MEM_TG_FEATURE_GUIDH; } + virtual void add_options(CLI::App *app) override { + // target host or fpga + app->add_option("--target", he_target_, + "host exerciser run on host or fpga") + ->transform(CLI::CheckedTransformer(he_targets)) + ->default_val("host"); + } + + virtual int run(test_afu *afu, CLI::App *app) { + (void)app; + // int ret = 0; + cout << "HE LPBK run" << endl; + host_exe_ = dynamic_cast(afu); + + if (!verify_numa_node()) { + numa_node_ = 0; + cout << "numa nodes are available set numa node to 0" << endl; + }; + + // reset HE cache + he_ctl_.value = 0; + he_ctl_.ResetL = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + + he_ctl_.value = 0; + he_ctl_.ResetL = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + return 0; + } +}; +} // end of namespace host_exerciser diff --git a/samples/cxl_host_exerciser/cxl_he_cmd.h b/samples/cxl_host_exerciser/cxl_he_cmd.h new file mode 100644 index 000000000000..a5efe4b9f641 --- /dev/null +++ b/samples/cxl_host_exerciser/cxl_he_cmd.h @@ -0,0 +1,193 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#pragma once +#include +#include +#include + +#include "cxl_he_cmd.h" +#include "cxl_host_exerciser.h" +#include "he_cache_test.h" + +namespace host_exerciser { + +class he_cmd : public test_command { +public: + he_cmd() : host_exe_(NULL), he_clock_mhz_(400), numa_node_(0), he_target_(0) { + + he_ctl_.value = 0; + he_info_.value = 0; + he_rd_cfg_.value = 0; + he_wr_cfg_.value = 0; + rd_table_ctl_.value = 0; + wr_table_ctl_.value = 0; + } + + virtual ~he_cmd() {} + + // Convert number of transactions to bandwidth (GB/s) + double he_num_xfers_to_bw(uint64_t num_lines, uint64_t num_ticks) { + return (double)(num_lines * 64) / ((1000.0 / he_clock_mhz_ * num_ticks)); + } + + void he_perf_counters() { + volatile he_cache_dsm_status *dsm_status = NULL; + + dsm_status = reinterpret_cast( + (uint8_t *)(host_exe_->get_dsm())); + if (!dsm_status) + return; + + cout << "\n********* DSM Status CSR Start *********" << endl; + cout << "test completed :" << dsm_status->test_completed << endl; + cout << "dsm number:" << dsm_status->dsm_number << endl; + cout << "error vector:" << dsm_status->err_vector << endl; + cout << "num ticks:" << dsm_status->num_ticks << endl; + cout << "num reads:" << dsm_status->num_reads << endl; + cout << "num writes:" << dsm_status->num_writes << endl; + cout << "penalty start:" << dsm_status->penalty_start << endl; + cout << "penalty end:" << dsm_status->penalty_end << endl; + cout << "actual data:" << dsm_status->actual_data << endl; + cout << "expected data:" << dsm_status->expected_data << endl; + + // print bandwidth + if (dsm_status->num_ticks > 0) { + double perf_data = + he_num_xfers_to_bw(dsm_status->num_reads + dsm_status->num_writes, + dsm_status->num_ticks); + host_exe_->logger_->info("Bandwidth: {0:0.3f} GB/s", perf_data); + } + + cout << "********* DSM Status CSR end *********" << endl; + } + + void host_exerciser_errors() { + he_err_status err_status; + uint64_t err = 0; + if (host_exe_ == NULL) + return; + + err_status.value = host_exe_->read64(HE_ERROR_STATUS); + if (err_status.data_error == 1) { + cout << "Data Integrity Check error occured" << endl; + } + + if (err_status.err_index > 0) { + cout << "Error occurred at cache line address:" << err_status.err_index + << endl; + } + + err = host_exe_->read64(HE_ERROR_EXP_DATA); + cout << "Error Expected Data:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA0); + cout << "Error Expected Data0:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA1); + cout << "Error Expected Data1:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA2); + cout << "Error Expected Data2:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA3); + cout << "Error Expected Data3:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA4); + cout << "Error Expected Data4:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA5); + cout << "Error Expected Data5:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA6); + cout << "Error Expected Data6:" << err << endl; + + err = host_exe_->read64(HE_ERROR_ACT_DATA7); + cout << "Error Expected Data7:" << err << endl; + } + + int parse_input_options() { + + if (!host_exe_) + return -1; + + return 0; + } + + bool he_wait_test_completion() { + /* Wait for test completion */ + uint32_t timeout = HELPBK_TEST_TIMEOUT; + + cout << "Test started ......" << endl; + volatile uint8_t *status_ptr = host_exe_->get_dsm(); + while (0 == ((*status_ptr) & 0x1)) { + usleep(HELPBK_TEST_SLEEP_INVL); + if (--timeout == 0) { + cout << "HE Cache time out error" << endl; + return false; + } + } + return true; + } + + bool verify_numa_node() { + + if (numa_available() < 0) { + cerr << "System does not support NUMA API" << endl; + return false; + } + + int n = numa_max_node(); + cout << "There are %d nodes on your system:" << n + 1 << endl; + + int numa_node = numa_node_of_cpu(sched_getcpu()); + cout << "HE Cache app numa node:" << numa_node << endl; + + if (he_target_ == HE_TARGET_HOST) { + numa_node_ = numa_node; + cout << "HE_TARGET_HOST numa node:" << numa_node_ << endl; + } else { + // find fpga numa node number + numa_node_ = 2; + cout << "HE_TARGET_FPGA numa node:" << numa_node_ << endl; + } + + return true; + } + +protected: + host_exerciser *host_exe_; + uint32_t he_clock_mhz_; + uint32_t numa_node_; + uint32_t he_target_; + + he_ctl he_ctl_; + he_info he_info_; + he_rd_config he_rd_cfg_; + he_wr_config he_wr_cfg_; + he_rd_addr_table_ctrl rd_table_ctl_; + he_wr_addr_table_ctrl wr_table_ctl_; +}; +} // end of namespace host_exerciser diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.cpp b/samples/cxl_host_exerciser/cxl_host_exerciser.cpp new file mode 100644 index 000000000000..8fe4eecfad02 --- /dev/null +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.cpp @@ -0,0 +1,50 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#include +#include +#include + +#include "cxl_he_cache_cmd.h" +#include "cxl_he_cache_lpbk_cmd.h" +#include "cxl_host_exerciser.h" + +void he_sig_handler(int); + +int main(int argc, char *argv[]) { + + host_exerciser::host_exerciser app; + app.register_command(); + app.register_command(); + + // host exerciser signal handler + struct sigaction act_new; + memset(&act_new, 0, sizeof(act_new)); + + act_new.sa_handler = he_sig_handler; + sigaction(SIGINT, &act_new, NULL); + + return app.main(argc, argv); +} diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.h b/samples/cxl_host_exerciser/cxl_host_exerciser.h new file mode 100644 index 000000000000..917e59f798a3 --- /dev/null +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.h @@ -0,0 +1,412 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +#pragma once + +#include "he_cache_test.h" + +#define MEM_TG_FEATURE_ID 0x25 +#define MEM_TG_FEATURE_GUIDL 0x81599b5c2ebd4b23 +#define MEM_TG_FEATURE_GUIDH 0x0118e06b1fa349b9 +const char *HE_CACHE_AFU_ID = "0118E06B-1FA3-49B9-8159-9b5C2EBD4b23"; + +namespace host_exerciser { + +static const uint64_t HELPBK_TEST_TIMEOUT = 30000; +static const uint64_t HELPBK_TEST_SLEEP_INVL = 100; +static const uint64_t CL = 64; +static const uint64_t KB = 1024; +static const uint64_t MB = KB * 1024; +static const uint64_t BUFFER_SIZE_2MB = 2 * 1024 * 1024; +static const uint64_t FPGA_32KB_CACHE_LINES = (32 * 1024) / 64; +static const uint64_t FPGA_2MB_CACHE_LINES = (2 * 1024 * 1024) / 64; + +// Host execiser CSR Offset +enum { + HE_DFH = 0x0000, + HE_ID_L = 0x0008, + HE_ID_H = 0x0010, + HE_DFH_RSVD0 = 0x0018, + HE_DFH_RSVD1 = 0x0020, + HE_SCRATCHPAD0 = 0x028, + HE_DSM_BASE = 0x030, + HE_CTL = 0x038, + HE_INFO = 0x040, + HE_WR_NUM_LINES = 0x048, + HE_WR_BYTE_ENABLE = 0x050, + HE_WR_CONFIG = 0x058, + HE_WR_ADDR_TABLE_CTRL = 0x060, + HE_WR_ADDR_TABLE_DATA = 0x068, + HE_RD_NUM_LINES = 0x070, + HE_RD_CONFIG = 0x078, + HE_RD_ADDR_TABLE_CTRL = 0x080, + HE_RD_ADDR_TABLE_DATA = 0x088, + HE_ERROR_STATUS = 0x090, + HE_ERROR_EXP_DATA = 0x098, + HE_ERROR_ACT_DATA0 = 0x0A0, + HE_ERROR_ACT_DATA1 = 0x0A8, + HE_ERROR_ACT_DATA2 = 0x0B0, + HE_ERROR_ACT_DATA3 = 0x0B8, + HE_ERROR_ACT_DATA4 = 0x0C0, + HE_ERROR_ACT_DATA5 = 0x0C8, + HE_ERROR_ACT_DATA6 = 0x0D0, + HE_ERROR_ACT_DATA7 = 0x0D8, +}; + +// Read Traffic Opcode +typedef enum { + RD_LINE_I = 0x0, + RD_LINE_S = 0x1, + RD_LINE_EM = 0x2, +} he_rd_opcode; + +// Write Traffic Opcode +typedef enum { + WR_LINE_I = 0x0, + WR_LINE_M = 0x1, + WR_PUSH_I = 0x2, + WR_BARRIER_FRNCE = 0x3, + WR_FLUSH_CL = 0x4, + WR_FLUSH_CL_HCOH = 0x5, + WR_FLUSH_CL_DCOH = 0x6, +} he_wr_opcode; + +// DFH Header +union he_dfh { + enum { offset = HE_DFH }; + uint64_t value; + struct { + uint64_t CcipVersionNumber : 12; + uint64_t AfuMajVersion : 4; + uint64_t NextDfhOffset : 24; + uint64_t EOL : 1; + uint64_t Reserved : 19; + uint64_t FeatureType : 4; + }; +}; + +// DSM BASE +union he_dsm_base { + enum { offset = HE_DSM_BASE }; + uint64_t value; + struct { + uint64_t DsmBase : 64; + }; +}; + +// CSR CTL +union he_ctl { + enum { offset = HE_CTL }; + uint64_t value; + struct { + uint64_t ResetL : 1; + uint64_t Start : 1; + uint64_t ForcedTestCmpl : 1; + uint64_t bias_support : 1; + uint64_t Reserved : 60; + }; +}; + +// CSR INFO +union he_info { + enum { offset = HE_INFO }; + uint64_t value; + struct { + uint64_t write_addr_table_size : 4; + uint64_t read_addr_table_size : 4; + uint64_t Reserved : 56; + }; +}; + +// HE_WR_NUM_LINES +union he_wr_num_lines { + enum { offset = HE_WR_NUM_LINES }; + uint64_t value; + struct { + uint64_t write_num_lines : 16; + uint64_t reserved : 48; + }; +}; + +// HE_WR_BYTE_ENABLE +union he_wr_byte_enable { + enum { offset = HE_WR_BYTE_ENABLE }; + uint64_t value; + struct { + uint64_t write_byte_enable : 64; + }; +}; + +// HE_WR_CONFIG +union he_wr_config { + enum { offset = HE_WR_CONFIG }; + uint64_t value; + struct { + uint64_t write_traffic_enable : 1; + uint64_t continuous_mode_enable : 1; + uint64_t waitfor_completion : 1; + uint64_t preread_sync_enable : 1; + uint64_t postread_sync_enable : 1; + uint64_t data_pattern : 2; + uint64_t cl_evict_enable : 1; + uint64_t opcode : 4; + uint64_t line_repeat_count : 8; + uint64_t reserved : 44; + }; +}; + +// HE_WR_ADDR_TABLE_CTRL +union he_wr_addr_table_ctrl { + enum { offset = HE_WR_ADDR_TABLE_CTRL }; + uint64_t value; + struct { + uint64_t enable_address_table : 1; + uint64_t enable_address_stride : 1; + uint64_t stride : 2; + uint64_t reserved : 60; + }; +}; + +// HE_WR_ADDR_TABLE_DATA +union he_wr_addr_table_data { + enum { offset = HE_WR_ADDR_TABLE_DATA }; + uint64_t value; + struct { + uint64_t address_table_value : 64; + }; +}; + +// HE_RD_NUM_LINES +union he_rd_num_lines { + enum { offset = HE_RD_NUM_LINES }; + uint64_t value; + struct { + uint64_t read_num_lines : 16; + uint64_t reserved : 48; + }; +}; + +// HE_RD_CONFIG +union he_rd_config { + enum { offset = HE_RD_CONFIG }; + uint64_t value; + struct { + uint64_t read_traffic_enable : 1; + uint64_t continuous_mode_Enable : 1; + uint64_t waitfor_completion : 1; + uint64_t prewrite_sync_enable : 1; + uint64_t postwrite_sync_enable : 1; + uint64_t data_pattern : 2; + uint64_t cl_evict_enable : 1; + uint64_t opcode : 4; + uint64_t line_repeat_count : 8; + uint64_t reserved : 44; + }; +}; + +// HE_RD_ADDR_TABLE_CTRL +union he_rd_addr_table_ctrl { + enum { offset = HE_RD_ADDR_TABLE_CTRL }; + uint64_t value; + struct { + uint64_t enable_address_table : 1; + uint64_t enable_address_stride : 1; + uint64_t stride : 2; + uint64_t reserved : 60; + }; +}; + +// HE_RD_ADDR_TABLE_DATA +union he_rd_addr_table_data { + enum { offset = HE_RD_ADDR_TABLE_DATA }; + uint64_t value; + struct { + uint64_t address_table_value : 64; + }; +}; + +// ERROR_STATUS +union he_err_status { + enum { offset = HE_ERROR_STATUS }; + uint64_t value; + struct { + uint64_t data_error : 1; + uint64_t rsvd1 : 15; + uint64_t err_index : 16; + uint64_t rsvd2 : 32; + }; +}; + +// HE DSM status +struct he_cache_dsm_status { + uint32_t test_completed : 1; + uint32_t dsm_number : 15; + uint32_t res1 : 16; + uint32_t err_vector : 32; + uint64_t num_ticks : 64; + uint32_t num_reads : 32; + uint32_t num_writes : 32; + uint32_t penalty_start : 32; + uint32_t penalty_end : 32; + uint32_t actual_data : 32; + uint32_t expected_data : 32; + uint32_t res5[2]; +}; + +// configures test mode +typedef enum { + HE_FPGA_RD_CACHE_HIT = 0x0, + HE_FPGA_WR_CACHE_HIT = 0x1, + + HE_FPGA_RD_CACHE_MISS = 0x2, + HE_FPGA_WR_CACHE_MISS = 0x3, + + HE_HOST_RD_CACHE_HIT = 0x4, + HE_HOST_WR_CACHE_HIT = 0x5, + + HE_HOST_RD_CACHE_MISS = 0x6, + HE_HOST_WR_CACHE_MISS = 0x7, + +} he_test_mode; + +// configures traget +typedef enum { + HE_TARGET_HOST = 0x0, + HE_TARGET_FPGA = 0x1, +} he_target; + +const std::map he_test_modes = { + {"fpgardcachehit", HE_FPGA_RD_CACHE_HIT}, + {"fpgawrcachehit", HE_FPGA_WR_CACHE_HIT}, + {"fpgardcachemiss", HE_FPGA_RD_CACHE_MISS}, + {"fpgawrcachemiss", HE_FPGA_WR_CACHE_MISS}, + {"hostrdcachehit", HE_HOST_RD_CACHE_HIT}, + {"hostwrcachehit", HE_HOST_WR_CACHE_HIT}, + {"hostrdcachemiss", HE_HOST_RD_CACHE_MISS}, + {"hostwrcachemiss", HE_HOST_WR_CACHE_MISS}, +}; + +const std::map he_targets = { + {"host", HE_TARGET_HOST}, + {"fpga", HE_TARGET_FPGA}, +}; + +/////////////////////// +// Bias Support +typedef enum { + HOST_BIOS = 0x0, + DEVIC_BIOA = 0x1, +} he_ctl_bios_support; + +// configures test mode +typedef enum { + HE_ADDRTABLE_SIZE4096 = 0xC, + HE_ADDRTABLE_SIZE2048 = 0xB, + HE_ADDRTABLE_SIZE1024 = 0xA, + HE_ADDRTABLE_SIZE512 = 0x9, + HE_ADDRTABLE_SIZE256 = 0x8, + HE_ADDRTABLE_SIZE128 = 0x7, + HE_ADDRTABLE_SIZE64 = 0x6, + HE_ADDRTABLE_SIZE32 = 0x5, + HE_ADDRTABLE_SIZE16 = 0x4, + HE_ADDRTABLE_SIZE8 = 0x3, + HE_ADDRTABLE_SIZE4 = 0x2, + HE_ADDRTABLE_SIZE2 = 0x1, + +} he_addrtable_size; + +// he test type +typedef enum { + HE_ENABLE_TRAFFIC_STAGE = 0x0, + HE_SIP_SEQ_STAGE = 0x1, +} he_traffic_enable; + +const std::map traffic_enable = { + {"enable", HE_ENABLE_TRAFFIC_STAGE}, + {"skip", HE_SIP_SEQ_STAGE}, + +}; + +std::map addrtable_size = { + {HE_ADDRTABLE_SIZE4096, 4096}, {HE_ADDRTABLE_SIZE2048, 2048}, + {HE_ADDRTABLE_SIZE1024, 1024}, {HE_ADDRTABLE_SIZE512, 512}, + {HE_ADDRTABLE_SIZE256, 256}, {HE_ADDRTABLE_SIZE128, 128}, + {HE_ADDRTABLE_SIZE64, 64}, {HE_ADDRTABLE_SIZE32, 32}, + {HE_ADDRTABLE_SIZE16, 16}, {HE_ADDRTABLE_SIZE8, 8}, + {HE_ADDRTABLE_SIZE4, 4}, {HE_ADDRTABLE_SIZE2, 2}, + +}; + +using test_afu = opae::afu_test::afu; +using test_command = opae::afu_test::command; + +class host_exerciser : public test_afu { +public: + host_exerciser() + : test_afu("host_exerciser", nullptr, "info"), count_(1) {} + + virtual int run(CLI::App *app, test_command::ptr_t test) override { + int res = exit_codes::not_run; + + logger_->set_pattern(" %v"); + // Info prints details of an individual run. Turn it on if doing only one + // test and the user hasn't changed level from the default. + if ((log_level_.compare("warning") == 0)) + logger_->set_level(spdlog::level::info); + + + logger_->info("starting test run, count of {0:d}", count_); + uint32_t count = 0; + try { + while (count < count_) { + logger_->debug("starting iteration: {0:d}", count + 1); + + res = test_afu::run(app, test); + count++; + logger_->debug("end iteration: {0:d}", count); + if (res) + break; + } + } catch (std::exception &ex) { + logger_->error(ex.what()); + res = exit_codes::exception; + } + + auto pass = res == exit_codes::success ? "PASS" : "FAIL"; + logger_->info("Test {}({}): {}", test->name(), count, pass); + spdlog::drop_all(); + return res; + } + +public: + uint32_t count_; + + bool option_passed(std::string option_str) { + if (app_.count(option_str) == 0) + return false; + return true; + } +}; +} // namespace host_exerciser diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h new file mode 100644 index 000000000000..900e56bf8f7c --- /dev/null +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -0,0 +1,856 @@ +// Copyright(c) 2023, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include "fpga-dfl.h" + +using namespace std; + +const char *sbdf_pattern = + "(([0-9a-fA-F]{4}):)?([0-9a-fA-F]{2}):([0-9a-fA-F]{2})\\.([0-9])"; + +enum { MATCHES_SIZE = 6 }; +#define FEATURE_DEV \ + "/sys/bus/pci/devices/%s/" \ + "fpga_region/region*/dfl-fme*/dfl_dev*/feature_id" + +#define MAX_SIZE 256 + +#define PROTECTION (PROT_READ | PROT_WRITE) + +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 +#endif +#ifndef MAP_HUGE_SHIFT +#define MAP_HUGE_SHIFT 26 +#endif + +#define MAP_2M_HUGEPAGE (0x15 << MAP_HUGE_SHIFT) /* 2 ^ 0x15 = 2M */ +#define MAP_1G_HUGEPAGE (0x1e << MAP_HUGE_SHIFT) /* 2 ^ 0x1e = 1G */ + +#ifdef __ia64__ +#define ADDR ((void *)(0x8000000000000000UL)) +#define FLAGS_4K (MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED) +#define FLAGS_2M (FLAGS_4K | MAP_2M_HUGEPAGE | MAP_HUGETLB) +#define FLAGS_1G (FLAGS_4K | MAP_1G_HUGEPAGE | MAP_HUGETLB) +#else +#define ADDR ((void *)(0x0UL)) +#define FLAGS_4K (MAP_PRIVATE | MAP_ANONYMOUS) +#define FLAGS_2M (FLAGS_4K | MAP_2M_HUGEPAGE | MAP_HUGETLB) +#define FLAGS_1G (FLAGS_4K | MAP_1G_HUGEPAGE | MAP_HUGETLB) +#endif + +#define KiB(x) ((x)*1024) +#define MiB(x) ((x)*1024 * 1024) +#define GiB(x) ((x)*1024 * 1024 * 1024) + +#define DFL_CXL_CACHE_DSM_BASE 0x030 +#define DFL_CXL_CACHE_WR_ADDR_TABLE_DATA 0x068 +#define DFL_CXL_CACHE_RD_ADDR_TABLE_DATA 0x088 + + +bool buffer_allocate(void** addr, uint64_t len, uint32_t numa_node) +{ + void* addr_local = NULL; + int i = 0; + long status = 0; + unsigned long mask[4]; + unsigned int bits_per_UL = sizeof(unsigned long) * 8; + + for (i = 0; i < 4; i++) mask[i] = 0; + mask[numa_node / bits_per_UL] |= 1UL << (numa_node % bits_per_UL); + + if (len > MiB(2)) + addr_local = mmap(ADDR, len, PROTECTION, FLAGS_1G, 0, 0); + else if (len > KiB(4)) + addr_local = mmap(ADDR, len, PROTECTION, FLAGS_2M, 0, 0); + else + addr_local = mmap(ADDR, len, PROTECTION, FLAGS_4K, 0, 0); + + if (addr_local == MAP_FAILED) { + if (errno == ENOMEM) { + if (len > MiB(2)) + cerr <<"Could not allocate buffer (no free 1 " + "GiB huge pages)"; + if (len > KiB(4)) + cerr << "Could not allocate buffer (no free 2 " + "MiB huge pages)"; + else + cerr <<"Could not allocate buffer (out of " + "memory)"; + return false; + } + cerr << "CXL cache mmap failed:"<< strerror(errno) << endl; + return false; + } + + if (addr_local == NULL) { + cerr << "Unable to mmap" << endl; + return false; + } + + status = syscall(__NR_mbind, addr_local, len, 2, &mask, numa_node + 2, 1); + if (status != 0) { + cerr << "buffer_allocate(): unable to mbind:" + << strerror(errno) << endl; + return false; + } + + *addr = addr_local; + return true; +} + +bool buffer_release(void* addr, uint64_t len) +{ + if (munmap(addr, len)) { + cerr << "CXL cache unmap failed:", strerror(errno); + return false; + } + return true; +} + +bool sysfs_read_u64(const char *path, uint64_t *value) { + ifstream fs; + fs.open(path, ios::in); + + std::string s; + if (fs.is_open()) { + std::string line; + std::getline(fs, line); + *value = std::stoul(line, 0, 16); + fs.close(); + return true; + } + return false; +} + +namespace opae { +namespace afu_test { + + +template +inline bool parse_match_int(const char *s, regmatch_t m, T &v, int radix = 10) { + if (m.rm_so == -1 || m.rm_eo == -1) + return false; + errno = 0; + v = std::strtoul(s + m.rm_so, NULL, radix); + return errno == 0; +} + +union pcie_address { + struct { + uint32_t function : 3; + uint32_t device : 5; + uint32_t bus : 8; + uint32_t domain : 16; + } fields; + uint32_t value; + + static pcie_address parse(const char *s) { + auto deleter = [&](regex_t *r) { + regfree(r); + delete r; + }; + std::unique_ptr re(new regex_t, deleter); + regmatch_t matches[MATCHES_SIZE]; + + int reg_res = regcomp(re.get(), sbdf_pattern, REG_EXTENDED | REG_ICASE); + if (reg_res) + throw std::runtime_error("could not compile regex"); + + reg_res = regexec(re.get(), s, MATCHES_SIZE, matches, 0); + if (reg_res) + throw std::runtime_error("pcie address not valid format"); + + uint16_t domain, bus, device, function; + if (!parse_match_int(s, matches[2], domain, 16)) + domain = 0; + if (!parse_match_int(s, matches[3], bus, 16)) + throw std::runtime_error("error parsing pcie address"); + if (!parse_match_int(s, matches[4], device, 16)) + throw std::runtime_error("error parsing pcie address"); + if (!parse_match_int(s, matches[5], function)) + throw std::runtime_error("error parsing; pcie address"); + pcie_address a; + a.fields.domain = domain; + a.fields.bus = bus; + a.fields.device = device; + a.fields.function = function; + return a; + } +}; + +class afu; // forward declaration + +class command { +public: + typedef std::shared_ptr ptr_t; + command() : running_(true) {} + virtual ~command() {} + virtual const char *name() const = 0; + virtual const char *description() const = 0; + virtual int run(afu *afu, CLI::App *app) = 0; + virtual void add_options(CLI::App *app) { (void)app; } + virtual const char *afu_id() const { return nullptr; } + + virtual uint64_t featureid() const = 0; + virtual uint64_t guidl() const = 0; + virtual uint64_t guidh() const = 0; + + bool running() const { return running_; } + void stop() { running_ = false; } + +private: + std::atomic running_; +}; + +#if SPDLOG_VERSION >= 10900 +// spdlog version 1.9.0 defines SPDLOG_LEVEL_NAMES as an array of string_view_t. +// Convert to vector of std::string to be used in CLI::IsMember(). +inline std::vector spdlog_levels() { + std::vector levels_view = SPDLOG_LEVEL_NAMES; + std::vector levels_str(levels_view.size()); + std::transform(levels_view.begin(), levels_view.end(), levels_str.begin(), + [](spdlog::string_view_t sv) { + return std::string(sv.data(), sv.size()); + }); + return levels_str; +} +#else +inline std::vector spdlog_levels() { return SPDLOG_LEVEL_NAMES; } +#endif // SPDLOG_VERSION + +class afu { +public: + typedef int (*command_fn)(afu *afu, CLI::App *app); + enum exit_codes { + success = 0, + not_run, + not_found, + no_access, + exception, + error + }; + + afu(const char *name, const char *afu_id = nullptr, + const char *log_level = nullptr) + : name_(name), afu_id_(afu_id ? afu_id : ""), app_(name_), pci_addr_(""), + log_level_(log_level ? log_level : "info"), timeout_msec_(60000), + current_command_(nullptr) { + if (!afu_id_.empty()) + app_.add_option("-g,--guid", afu_id_, "GUID")->default_str(afu_id_); + app_.add_option("-p,--pci-address", pci_addr_, + "[:]:."); + app_.add_option("-l,--log-level", log_level_, "stdout logging level") + ->default_str(log_level_) + ->check(CLI::IsMember(spdlog_levels())); + app_.add_option("-t,--timeout", timeout_msec_, "test timeout (msec)") + ->default_str(std::to_string(timeout_msec_)); + } + virtual ~afu() { + + if (fd_ > 0) + close(fd_); + if (logger_) + spdlog::drop(logger_->name()); + } + + CLI::App &cli() { return app_; } + + int find_dev_feature() { + glob_t pglob; + char feature_path[MAX_SIZE] = {0}; + int gres = 0; + uint64_t value = 0; + size_t i = 0; + + if (!pci_addr_.empty()) { + if (snprintf(feature_path, sizeof(feature_path), FEATURE_DEV, + pci_addr_.c_str()) < 0) { + cerr << "snprintf buffer overflow" << endl; + return 1; + } + } else { + if (snprintf(feature_path, sizeof(feature_path), FEATURE_DEV, "*:*:*.*") < + 0) { + cerr << "snprintf buffer overflow" << endl; + return 2; + } + } + + gres = glob(feature_path, GLOB_NOSORT, NULL, &pglob); + if (gres) { + cerr << "Failed pattern match" << feature_path << ":" << strerror(errno) + << endl; + globfree(&pglob); + return 3; + } + + for (i = 0; i < pglob.gl_pathc; i++) { + bool retval = sysfs_read_u64(pglob.gl_pathv[i], &value); + if (!retval) { + cerr << "Failed to read sysfs value" << endl; + continue; + } + + if (current_command()->featureid() == value) { + string str(pglob.gl_pathv[i]); + string substr_dev(str.substr(0, str.rfind("/"))); + globfree(&pglob); + + substr_dev.append("/dfl-cxl-cache/dfl-cxl-cache*"); + gres = glob(substr_dev.c_str(), GLOB_NOSORT, NULL, &pglob); + if (gres) { + cerr << "Failed pattern match" << substr_dev.c_str() << ":" + << strerror(errno) << endl; + globfree(&pglob); + return 4; + } + string str1(pglob.gl_pathv[0]); + globfree(&pglob); + dev_path_.append("/dev"); + dev_path_.append(str1.substr(str1.rfind("/"), 16)); + + return 0; + } + } + + return 5; + } + + void unmap_mmio() { + if (mmio_base_) { + if (munmap(mmio_base_, rinfo_.size) == -1) + cerr << "Failed to unmap MMIO:" << strerror(errno) << endl; + } + } + + bool map_mmio() { + void *user_v; + user_v = mmap(NULL, rinfo_.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, + rinfo_.offset); + if (user_v == MAP_FAILED) { + cerr << "Failed to map MMIO:" << strerror(errno) << endl; + return false; + } + mmio_base_ = (uint8_t *)user_v; + + return true; + } + + int open_handle() { + + int res = 0; + logger_->debug("dev_path_:{0}", dev_path_); + + fd_ = open(dev_path_.c_str(), O_RDWR); + if (fd_ < 0) { + cerr << "open() failed:" << strerror(errno) << endl; + return 1; + } + + memset(&rinfo_, 0, sizeof(rinfo_)); + rinfo_.argsz = sizeof(rinfo_); + res = ioctl(fd_, DFL_CXL_CACHE_GET_REGION_INFO, &rinfo_); + if (res) { + cerr << "ioctl() DFL_CXL_CACHE_GET_REGION_INFO failed:" << strerror(errno) + << endl; + close(fd_); + return 2; + } + logger_->debug("MMIO region flags:0x:{0:x} size:0x {1:x} offset:0x {2:x}", + rinfo_.flags, rinfo_.size, rinfo_.offset); + + if (!map_mmio()) { + cerr << "mmap failed:" << strerror(errno) << endl; + close(fd_); + return 3; + } + + volatile uint64_t *u64 = (volatile uint64_t *)mmio_base_; + logger_->debug("DFH : 0x:{0:X}", *u64); + logger_->debug("DFH + 8 : 0x:{0:X}", *(u64 + 1)); + logger_->debug("DFH + 16: 0x:{0:X}", *(u64 + 2)); + logger_->debug("DFH + 24: 0x:{0:X}", *(u64 + 3)); + + return exit_codes::not_run; + } + + int main(int argc, char *argv[]) { + if (!commands_.empty()) + app_.require_subcommand(); + CLI11_PARSE(app_, argc, argv); + + command::ptr_t test(nullptr); + CLI::App *app = nullptr; + for (auto kv : commands_) { + if (*kv.first) { + app = kv.first; + test = kv.second; + break; + } + } + if (!test) { + std::cerr << "no command specified\n"; + return exit_codes::not_run; + } + + auto console_sink = std::make_shared(); + logger_ = std::make_shared(test->name(), console_sink); + spdlog::register_logger(logger_); + logger_->set_level(spdlog::level::from_str(log_level_)); + current_command_ = test; + if (find_dev_feature() != 0) { + cerr << "fails to find feature" << endl; + return exit_codes::exception; + }; + + int res = open_handle(); + if (res != exit_codes::not_run) { + return res; + } + + return run(app, test); + } + + virtual int run(CLI::App *app, command::ptr_t test) { + int res = exit_codes::not_run; + current_command_ = test; + + try { + std::future f = std::async(std::launch::async, [this, test, app]() { + return test->run(this, app); + }); + auto status = f.wait_for(std::chrono::milliseconds(timeout_msec_)); + if (status == std::future_status::timeout) { + std::cerr << "Error: test timed out" << std::endl; + current_command_->stop(); + throw std::runtime_error("timeout"); + } + res = f.get(); + } catch (std::exception &ex) { + res = exit_codes::exception; + } + + current_command_.reset(); + return res; + } + + template CLI::App *register_command() { + command::ptr_t cmd(new T()); + auto sub = app_.add_subcommand(cmd->name(), cmd->description()); + cmd->add_options(sub); + commands_[sub] = cmd; + return sub; + } + + uint64_t read64(uint32_t offset) { + uint64_t value = *((uint64_t *)(mmio_base_ + offset)); + return value; + } + + void write64(uint32_t offset, uint64_t value) { + *((uint64_t *)(mmio_base_ + offset)) = value; + return; + } + + uint32_t read32(uint32_t offset) { + uint32_t value = *((uint64_t *)(mmio_base_ + offset)); + return value; + } + + void write32(uint32_t offset, uint32_t value) { + *((uint32_t *)(mmio_base_ + offset)) = value; + return; + } + + command::ptr_t current_command() const { return current_command_; } + + bool allocate_dsm(size_t len = KiB(4), uint32_t numa_node = 0) { + + int res = 0; + void *ptr = NULL; + struct dfl_cxl_cache_buffer_map dma_map; + + memset(&dma_map, 0, sizeof(dma_map)); + + if (!buffer_allocate(&ptr, len, numa_node)) { + cerr << "Fails to allocate 4k huge page:" << strerror(errno) << endl; + return false; + } + + cout << "DSM buffer numa node: " << numa_node << endl; + dma_map.argsz = sizeof(dma_map); + dma_map.user_addr = (__u64)ptr; + dma_map.length = len; + dma_map.numa_node = numa_node; + dma_map.csr_array[0] = DFL_CXL_CACHE_DSM_BASE; + + logger_->debug("Allocate DSM buffer user addr 0x:{0:x} length :" + "{1:d} numa node : {2:d}", + dma_map.user_addr, dma_map.length, dma_map.numa_node); + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_DSM_BASE); + + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_MAP, &dma_map); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_MAP failed" << strerror(errno) + << endl; + goto out_free; + } + logger_->debug("DSM_BASE : 0x:{0:x}", *u64); + + dsm_buffer_ = (uint8_t *)ptr; + dsm_buf_len_ = len; + return true; + + out_free: + buffer_release(ptr, len); + return false; + } + + bool free_dsm() { + + int res = 0; + struct dfl_cxl_cache_buffer_unmap dma_unmap; + + memset(&dma_unmap, 0, sizeof(dma_unmap)); + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.user_addr = (__u64)dsm_buffer_; + dma_unmap.length = dsm_buf_len_; + dma_unmap.csr_array[0] = DFL_CXL_CACHE_DSM_BASE; + + logger_->debug("free dsm user addr 0x:{0:x} length : {1:d} ", + dma_unmap.user_addr, dma_unmap.length); + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_DSM_BASE); + + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_UNMAP, &dma_unmap); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_UNMAP failed" + << strerror(errno) << endl; + } + + logger_->debug("DSM_BASE : 0x:{0:x}", *u64); + buffer_release(dsm_buffer_, dsm_buf_len_); + return true; + } + + bool allocate_cache_read(size_t len = MiB(2), uint32_t numa_node = 0) { + + int res = 0; + void *ptr = NULL; + struct dfl_cxl_cache_buffer_map dma_map; + + memset(&dma_map, 0, sizeof(dma_map)); + + if (!buffer_allocate(&ptr, len, numa_node)) { + cerr << "Fails to allocate 2MB huge page:" << strerror(errno) << endl; + return false; + } + cout << "Read buffer numa node: " << numa_node << endl; + + dma_map.argsz = sizeof(dma_map); + dma_map.user_addr = (__u64)ptr; + dma_map.length = len; + dma_map.numa_node = numa_node; + dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; + + logger_->debug("Allocate read buffer user addr 0x:{0:x} length :" + "{1:d} numa node : {2:d}", + dma_map.user_addr, dma_map.length, dma_map.numa_node); + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); + sleep(1); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_MAP, &dma_map); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_MAP failed" << strerror(errno) + << endl; + goto out_free; + } + + logger_->debug("DFL_CXL_CACHE_RD_ADDR_TABLE_DATA : 0x:{0:x}", *u64); + rd_buffer_ = (uint8_t *)ptr; + rd_buf_len_ = len; + return true; + + out_free: + buffer_release(ptr, len); + return false; + } + + bool free_cache_read() { + + int res = 0; + struct dfl_cxl_cache_buffer_unmap dma_unmap; + + memset(&dma_unmap, 0, sizeof(dma_unmap)); + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.user_addr = (__u64)rd_buffer_; + dma_unmap.length = rd_buf_len_; + dma_unmap.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; + + logger_->debug("free read user addr 0x:{0:x} length : {1:d} ", + dma_unmap.user_addr, dma_unmap.length); + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_UNMAP, &dma_unmap); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_UNMAP failed" << strerror(errno) + << endl; + } + + logger_->debug("DFL_CXL_CACHE_RD_ADDR_TABLE_DATA : 0x:{0:x}", *u64); + buffer_release(rd_buffer_, rd_buf_len_); + return true; + } + + bool allocate_cache_write(size_t len = MiB(2), uint32_t numa_node = 0) { + + int res = 0; + void *ptr = NULL; + struct dfl_cxl_cache_buffer_map dma_map; + + memset(&dma_map, 0, sizeof(dma_map)); + if (!buffer_allocate(&ptr, len, numa_node)) { + cerr << "Fails to allocate 2MB huge page:" << strerror(errno) << endl; + return false; + } + + cout << "Write buffer numa node: " << numa_node << endl; + dma_map.argsz = sizeof(dma_map); + dma_map.user_addr = (__u64)ptr; + dma_map.length = len; + dma_map.numa_node = numa_node; + dma_map.csr_array[0] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; + + logger_->debug("Allocate write buffer user addr 0x:{0:x}\ + length : {1:d} numa node : {2:d}", + dma_map.user_addr, dma_map.length, dma_map.numa_node); + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); + + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_MAP, &dma_map); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_MAP failed" << strerror(errno) + << endl; + goto out_free; + } + + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64); + wr_buffer_ = (uint8_t *)ptr; + wr_buf_len_ = len; + return true; + + out_free: + buffer_release(ptr, len); + return false; + } + + bool free_cache_write() { + + int res = 0; + struct dfl_cxl_cache_buffer_unmap dma_unmap; + + memset(&dma_unmap, 0, sizeof(dma_unmap)); + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.user_addr = (__u64)wr_buffer_; + dma_unmap.length = wr_buf_len_; + dma_unmap.csr_array[0] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; + + logger_->debug("free write user addr 0x:{0:x} length : {1:d} ", + dma_unmap.user_addr, dma_unmap.length); + + volatile uint64_t *u64 = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_UNMAP, &dma_unmap); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_UNMAP failed" << strerror(errno) + << endl; + } + + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64); + buffer_release(wr_buffer_, wr_buf_len_); + return true; + } + + bool allocate_cache_read_write(size_t len = MiB(2), uint32_t numa_node = 0) { + + int res = 0; + void *ptr = NULL; + struct dfl_cxl_cache_buffer_map dma_map; + + memset(&dma_map, 0, sizeof(dma_map)); + if (!buffer_allocate(&ptr, len, numa_node)) { + cerr << "Fails to allocate 2MB huge page:" << strerror(errno) << endl; + return false; + } + cout << "Read/Write buffer numa node: " << numa_node << endl; + + dma_map.argsz = sizeof(dma_map); + dma_map.user_addr = (__u64)ptr; + dma_map.length = len; + dma_map.numa_node = numa_node; + dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; + dma_map.csr_array[1] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; + + logger_->debug("Allocate read/write buffer user addr 0x:{0:x}\ + length : {1:d} numa node : {2:d}", + dma_map.user_addr, dma_map.length, dma_map.numa_node); + + volatile uint64_t *u64_wr = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); + volatile uint64_t *u64_rd = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); + + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_MAP, &dma_map); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_MAP failed" << strerror(errno) + << endl; + goto out_free; + } + + logger_->debug("nDFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_rd); + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_wr); + + rd_wr_buffer_ = (uint8_t *)ptr; + rd_wr_buf_len_ = len; + + return true; + + out_free: + buffer_release(ptr, len); + return false; + } + + bool free_cache_read_write() { + + int res = 0 ; + struct dfl_cxl_cache_buffer_unmap dma_unmap; + + memset(&dma_unmap, 0, sizeof(dma_unmap)); + dma_unmap.argsz = sizeof(dma_unmap); + dma_unmap.user_addr = (__u64)rd_wr_buffer_; + dma_unmap.length = rd_wr_buf_len_; + dma_unmap.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; + dma_unmap.csr_array[1] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; + + logger_->debug("free read/write user addr 0x:{0:x} length : {1:d} ", + dma_unmap.user_addr, dma_unmap.length); + + volatile uint64_t *u64_wr = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); + volatile uint64_t *u64_rd = + (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); + + res = ioctl(fd_, DFL_CXL_CACHE_NUMA_BUFFER_UNMAP, &dma_unmap); + if (res) { + cerr << "ioctl DFL_CXL_CACHE_NUMA_BUFFER_UNMAP failed" << strerror(errno) + << endl; + } + + logger_->debug("nDFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_rd); + logger_->debug("DFL_CXL_CACHE_WR_ADDR_TABLE_DATA : 0x:{0:x}", *u64_wr); + + buffer_release(rd_wr_buffer_, rd_wr_buf_len_); + rd_wr_buffer_ = NULL; + return true; + } + + uint8_t *get_dsm() const { return dsm_buffer_; } + + uint8_t *get_read() const { return rd_buffer_; } + + uint8_t *get_write() const { return wr_buffer_; } + + uint8_t *get_read_write() const { return rd_wr_buffer_; } + +protected: + std::string name_; + std::string afu_id_; + CLI::App app_; + std::string pci_addr_; + std::string log_level_; + uint32_t timeout_msec_; + + int fd_; + uint8_t *mmio_base_; + uint64_t mmio_len_; + + uint8_t *dsm_buffer_; + uint64_t dsm_buf_len_; + + uint8_t *rd_buffer_; + uint64_t rd_buf_len_; + + uint8_t *wr_buffer_; + uint64_t wr_buf_len_; + + uint8_t *rd_wr_buffer_; + uint64_t rd_wr_buf_len_; + + struct dfl_cxl_cache_region_info rinfo_; + + std::string dev_path_; + + command::ptr_t current_command_; + std::map commands_; + +public: + std::shared_ptr logger_; +}; + +} // end of namespace afu_test +} // end of namespace opae