diff --git a/libraries/plugins/xfpga/fpga-dfl.h b/libraries/plugins/xfpga/fpga-dfl.h index ab54fd40796b..43ffaee92cb2 100644 --- a/libraries/plugins/xfpga/fpga-dfl.h +++ b/libraries/plugins/xfpga/fpga-dfl.h @@ -371,7 +371,6 @@ struct dfl_cxl_cache_region_info { * @flags: flags * @user_addr: user mmap virtual address * @length: length of mapping (bytes) - * @numa_node: Numa node number * @csr_array: array of region address offset * * maps user allocated virtual address to physical address. @@ -381,7 +380,6 @@ struct dfl_cxl_cache_buffer_map { __u32 flags; __u64 user_addr; __u64 length; - __u32 numa_node; __u64 csr_array[DFL_ARRAY_MAX_SIZE]; }; diff --git a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h index 5272d5333067..2945ba23810e 100644 --- a/samples/cxl_host_exerciser/cxl_he_cache_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cache_cmd.h @@ -50,7 +50,7 @@ class he_cache_cmd : public he_cmd { public: he_cache_cmd() : he_continuousmode_(false), he_contmodetime_(0), he_linerep_count_(0), - he_stide_(0), he_test_(0), he_test_all_(false) {} + he_stide_(0), he_test_(0), he_test_all_(false), he_dev_instance_(0) {} virtual ~he_cache_cmd() {} @@ -93,8 +93,21 @@ class he_cache_cmd : public he_cmd { ->transform(CLI::CheckedTransformer(he_targets)) ->default_val("host"); - app->add_option("--stride", he_stide_, "Enable stride mode") - ->default_val("0"); + app->add_option("--bias", he_bias_, + "host exerciser run on hostmem or fpgamem") + ->transform(CLI::CheckedTransformer(he_bias)) + ->default_val("hostmem"); + + // device cache0 or cache1 + app->add_option("--device", he_dev_instance_, + "run host exerciser device /dev/dfl-cxl-cache.0 (instance 0) \ + or /dev/dfl-cxl-cache.1 (instance 1)") + ->transform(CLI::CheckedTransformer(he_cxl_device)) + ->default_val("/dev/dfl-cxl-cache.0"); + + // Set sride + app->add_option("--stride", he_stide_, "Set stride value") + ->transform(CLI::Range(0, 3))->default_val("0"); // Line repeat count app->add_option("--linerepcount", he_linerep_count_, "Line repeat count") @@ -123,9 +136,12 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES); + host_exe_->write64(HE_RD_NUM_LINES, FPGA_512CACHE_LINES); - cout << "Read number Lines:" << FPGA_32KB_CACHE_LINES << endl; + // Set Stride to 3 for FPGA read/write cache hit/miss + he_stide_ = 3; + + cout << "Read number Lines:" << FPGA_512CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Numa node:" << numa_node_ << endl; @@ -139,7 +155,10 @@ class he_cache_cmd : public he_cmd { // set RD_ADDR_TABLE_CTRL rd_table_ctl_.value = 0; - rd_table_ctl_.enable_address_stride = 1; + if (he_stide_ > 0) { + rd_table_ctl_.enable_address_stride = 1; + rd_table_ctl_.stride = he_stide_; + } host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); // Allocate DSM buffer @@ -156,10 +175,7 @@ class he_cache_cmd : public he_cmd { } // Start test - he_ctl_.Start = 1; - host_exe_->write64(HE_CTL, he_ctl_.value); - he_ctl_.Start = 0; - host_exe_->write64(HE_CTL, he_ctl_.value); + he_start_test(); // wait for completion if (!he_wait_test_completion()) { @@ -170,7 +186,7 @@ class he_cache_cmd : public he_cmd { return -1; } - he_perf_counters(); + he_perf_counters(HE_CXL_RD_LATENCY); cout << "********** AFU Copied host cache to FPGA Cache successfully " "********** " << endl; @@ -184,14 +200,14 @@ class he_cache_cmd : public he_cmd { // set RD_ADDR_TABLE_CTRL rd_table_ctl_.value = 0; - rd_table_ctl_.enable_address_stride = 1; + if (he_stide_ > 0) { + rd_table_ctl_.enable_address_stride = 1; + rd_table_ctl_.stride = he_stide_; + } host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); // Start test - he_ctl_.Start = 1; - host_exe_->write64(HE_CTL, he_ctl_.value); - he_ctl_.Start = 0; - host_exe_->write64(HE_CTL, he_ctl_.value); + he_start_test(); // wait for completion if (!he_wait_test_completion()) { @@ -202,7 +218,7 @@ class he_cache_cmd : public he_cmd { return -1; } - he_perf_counters(); + he_perf_counters(HE_CXL_RD_LATENCY); host_exe_->free_dsm(); host_exe_->free_cache_read(); @@ -230,9 +246,12 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES); + host_exe_->write64(HE_RD_NUM_LINES, FPGA_512CACHE_LINES); - cout << "Read/write number Lines:" << FPGA_32KB_CACHE_LINES << endl; + // Set Stride to 3 for FPGA read/write cache hit/miss + he_stide_ = 3; + + cout << "Read/write number Lines:" << FPGA_512CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size @@ -247,7 +266,10 @@ class he_cache_cmd : public he_cmd { // set RD_ADDR_TABLE_CTRL rd_table_ctl_.value = 0; - rd_table_ctl_.enable_address_stride = 1; + if (he_stide_ > 0) { + rd_table_ctl_.enable_address_stride = 1; + rd_table_ctl_.stride = he_stide_; + } host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); // Allocate DSM buffer @@ -264,10 +286,7 @@ class he_cache_cmd : public he_cmd { } // Start test - he_ctl_.Start = 1; - host_exe_->write64(HE_CTL, he_ctl_.value); - he_ctl_.Start = 0; - host_exe_->write64(HE_CTL, he_ctl_.value); + he_start_test(); // wait for completion if (!he_wait_test_completion()) { @@ -290,17 +309,23 @@ class he_cache_cmd : public he_cmd { he_wr_cfg_.opcode = WR_LINE_M; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); + // set RD_ADDR_TABLE_CTRL + he_rd_cfg_.value = 0; + host_exe_->write64(HE_RD_CONFIG, he_rd_cfg_.value); + rd_table_ctl_.value = 0; + host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); + // Set WR_ADDR_TABLE_CTRL wr_table_ctl_.value = 0; - wr_table_ctl_.enable_address_stride = 1; + if (he_stide_ > 0) { + wr_table_ctl_.enable_address_stride = 1; + wr_table_ctl_.stride = he_stide_; + } host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); + host_exe_->write64(HE_WR_NUM_LINES, FPGA_512CACHE_LINES); - host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES); // Start test - he_ctl_.Start = 1; - host_exe_->write64(HE_CTL, he_ctl_.value); - he_ctl_.Start = 0; - host_exe_->write64(HE_CTL, he_ctl_.value); + he_start_test(); // wait for completion if (!he_wait_test_completion()) { @@ -337,9 +362,12 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_RD_NUM_LINES, FPGA_2MB_CACHE_LINES); + host_exe_->write64(HE_RD_NUM_LINES, FPGA_512CACHE_LINES); + + // Set Stride to 3 for FPGA read/write cache hit/miss + he_stide_ = 3; - cout << "Read number Lines:" << FPGA_2MB_CACHE_LINES << endl; + cout << "Read number Lines:" << FPGA_512CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; @@ -352,7 +380,10 @@ class he_cache_cmd : public he_cmd { // set RD_ADDR_TABLE_CTRL rd_table_ctl_.value = 0; - rd_table_ctl_.enable_address_stride = 1; + if (he_stide_ > 0) { + rd_table_ctl_.enable_address_stride = 1; + rd_table_ctl_.stride = he_stide_; + } host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); // Allocate DSM buffer @@ -368,11 +399,8 @@ class he_cache_cmd : public he_cmd { return -1; } - // start test - he_ctl_.Start = 1; - host_exe_->write64(HE_CTL, he_ctl_.value); - he_ctl_.Start = 0; - host_exe_->write64(HE_CTL, he_ctl_.value); + // Start test + he_start_test(); // wait for completion if (!he_wait_test_completion()) { @@ -383,7 +411,7 @@ class he_cache_cmd : public he_cmd { return -1; } - he_perf_counters(); + he_perf_counters(HE_CXL_RD_LATENCY); host_exe_->free_cache_read(); host_exe_->free_dsm(); @@ -407,9 +435,12 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_WR_NUM_LINES, FPGA_2MB_CACHE_LINES); + host_exe_->write64(HE_WR_NUM_LINES, FPGA_512CACHE_LINES); - cout << "Read/write number Lines:" << FPGA_2MB_CACHE_LINES << endl; + // Set Stride to 3 for FPGA read/write cache hit/miss + he_stide_ = 0x3; + + cout << "Read/write number Lines:" << FPGA_512CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size @@ -424,7 +455,10 @@ class he_cache_cmd : public he_cmd { // Set WR_ADDR_TABLE_CTRL wr_table_ctl_.value = 0; - wr_table_ctl_.enable_address_stride = 1; + if (he_stide_ > 0) { + wr_table_ctl_.enable_address_stride = 1; + wr_table_ctl_.stride = he_stide_; + } host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); // Allocate DSM buffer @@ -440,11 +474,8 @@ class he_cache_cmd : public he_cmd { return -1; } - // start test - he_ctl_.Start = 1; - host_exe_->write64(HE_CTL, he_ctl_.value); - he_ctl_.Start = 0; - host_exe_->write64(HE_CTL, he_ctl_.value); + // Start test + he_start_test(); // wait for completion if (!he_wait_test_completion()) { @@ -467,7 +498,7 @@ class he_cache_cmd : public he_cmd { int he_run_host_rd_cache_hit_test() { - cout << "********** 1 Host LLC Read cache hit test start**********" << endl; + cout << "********** Host LLC Read cache hit test start**********" << endl; /* STEPS 1) Allocate DSM, Read buffer @@ -479,9 +510,9 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES); + host_exe_->write64(HE_RD_NUM_LINES, FPGA_512CACHE_LINES); - cout << "Read number Lines:" << FPGA_32KB_CACHE_LINES << endl; + cout << "Read number Lines:" << FPGA_512CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size @@ -496,7 +527,10 @@ class he_cache_cmd : public he_cmd { // set RD_ADDR_TABLE_CTRL rd_table_ctl_.value = 0; - rd_table_ctl_.enable_address_stride = 1; + if (he_stide_ > 0) { + rd_table_ctl_.enable_address_stride = 1; + rd_table_ctl_.stride = he_stide_; + } host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); // Allocate DSM buffer @@ -513,14 +547,11 @@ class he_cache_cmd : public he_cmd { } cout << " create thread - moves read buffer to host cache " << endl; - std::thread t1(he_cache_thread, host_exe_->get_read(), BUFFER_SIZE_2MB); + std::thread t1(he_cache_thread, host_exe_->get_read(), BUFFER_SIZE_32KB); sleep(1); - // start - he_ctl_.Start = 1; - host_exe_->write64(HE_CTL, he_ctl_.value); - he_ctl_.Start = 0; - host_exe_->write64(HE_CTL, he_ctl_.value); + // Start test + he_start_test(); // wait for completion if (!he_wait_test_completion()) { @@ -537,7 +568,7 @@ class he_cache_cmd : public he_cmd { g_stop_thread = true; t1.join(); - he_perf_counters(); + he_perf_counters(HE_CXL_RD_LATENCY); sleep(1); host_exe_->free_cache_read(); host_exe_->free_dsm(); @@ -564,8 +595,8 @@ class he_cache_cmd : public he_cmd { // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_WR_NUM_LINES, FPGA_32KB_CACHE_LINES); - cout << "Write number Lines:" << FPGA_32KB_CACHE_LINES << endl; + host_exe_->write64(HE_WR_NUM_LINES, FPGA_512CACHE_LINES); + cout << "Write number Lines:" << FPGA_512CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size @@ -575,12 +606,15 @@ class he_cache_cmd : public he_cmd { he_wr_cfg_.value = 0; he_wr_cfg_.line_repeat_count = he_linerep_count_; he_wr_cfg_.write_traffic_enable = 1; - he_wr_cfg_.opcode = WR_LINE_I; + he_wr_cfg_.opcode = WR_PUSH_I; host_exe_->write64(HE_WR_CONFIG, he_wr_cfg_.value); // set RD_ADDR_TABLE_CTRL wr_table_ctl_.value = 0; - wr_table_ctl_.enable_address_stride = 1; + if (he_stide_ > 0) { + wr_table_ctl_.enable_address_stride = 1; + wr_table_ctl_.stride = he_stide_; + } host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, wr_table_ctl_.value); // Allocate DSM buffer @@ -597,14 +631,11 @@ class he_cache_cmd : public he_cmd { } cout << " create thread - moves read buffer to host cache " << endl; - std::thread t1(he_cache_thread, host_exe_->get_write(), BUFFER_SIZE_2MB); + std::thread t1(he_cache_thread, host_exe_->get_write(), BUFFER_SIZE_32KB); sleep(1); - // start - he_ctl_.Start = 1; - host_exe_->write64(HE_CTL, he_ctl_.value); - he_ctl_.Start = 0; - host_exe_->write64(HE_CTL, he_ctl_.value); + // Start test + he_start_test(); // wait for completion if (!he_wait_test_completion()) { @@ -645,8 +676,8 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set Read number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_RD_NUM_LINES, FPGA_32KB_CACHE_LINES - 1); - cout << "Read/write number Lines:" << FPGA_32KB_CACHE_LINES - 1 << endl; + host_exe_->write64(HE_RD_NUM_LINES, FPGA_512CACHE_LINES ); + cout << "Read/write number Lines:" << FPGA_512CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size @@ -661,7 +692,10 @@ class he_cache_cmd : public he_cmd { // set RD_ADDR_TABLE_CTR rd_table_ctl_.value = 0; - rd_table_ctl_.enable_address_stride = 1; + if (he_stide_ > 0) { + rd_table_ctl_.enable_address_stride = 1; + rd_table_ctl_.stride = he_stide_; + } host_exe_->write64(HE_RD_ADDR_TABLE_CTRL, rd_table_ctl_.value); // Allocate DSM buffer @@ -677,11 +711,8 @@ class he_cache_cmd : public he_cmd { return -1; } - // start - he_ctl_.Start = 1; - host_exe_->write64(HE_CTL, he_ctl_.value); - he_ctl_.Start = 0; - host_exe_->write64(HE_CTL, he_ctl_.value); + // Start test + he_start_test(); // wait for completion if (!he_wait_test_completion()) { @@ -692,7 +723,7 @@ class he_cache_cmd : public he_cmd { return -1; } - he_perf_counters(); + he_perf_counters(HE_CXL_RD_LATENCY); host_exe_->free_cache_read(); host_exe_->free_dsm(); @@ -718,8 +749,8 @@ class he_cache_cmd : public he_cmd { // HE_INFO // Set write number Lines he_info_.value = host_exe_->read64(HE_INFO); - host_exe_->write64(HE_WR_NUM_LINES, 1); - cout << "Write number Lines:" << 1 << endl; + host_exe_->write64(HE_WR_NUM_LINES, FPGA_512CACHE_LINES); + cout << "Write number Lines:" << FPGA_512CACHE_LINES << endl; cout << "Line Repeat Count:" << he_linerep_count_ << endl; cout << "Read address table size:" << he_info_.read_addr_table_size << endl; cout << "Write address table size:" << he_info_.write_addr_table_size @@ -734,7 +765,10 @@ class he_cache_cmd : public he_cmd { // set RD_ADDR_TABLE_CTR wr_table_ctl_.value = 0; - wr_table_ctl_.enable_address_stride = 1; + if (he_stide_ > 0) { + wr_table_ctl_.enable_address_stride = 1; + wr_table_ctl_.stride = he_stide_; + } host_exe_->write64(HE_WR_ADDR_TABLE_CTRL, rd_table_ctl_.value); // Allocate DSM buffer @@ -750,11 +784,8 @@ class he_cache_cmd : public he_cmd { return -1; } - // start - he_ctl_.Start = 1; - host_exe_->write64(HE_CTL, he_ctl_.value); - he_ctl_.Start = 0; - host_exe_->write64(HE_CTL, he_ctl_.value); + // Start test + he_start_test(); // wait for completion if (!he_wait_test_completion()) { @@ -792,10 +823,11 @@ class he_cache_cmd : public he_cmd { he_ctl_.ResetL = 0; host_exe_->write64(HE_CTL, he_ctl_.value); - he_ctl_.value = 0; he_ctl_.ResetL = 1; host_exe_->write64(HE_CTL, he_ctl_.value); + print_csr(); + if (he_test_all_ == true) { int retvalue = 0; ret = he_run_fpga_rd_cache_hit_test(); @@ -886,6 +918,7 @@ class he_cache_cmd : public he_cmd { uint32_t he_stide_; uint32_t he_test_; bool he_test_all_; + uint32_t he_dev_instance_; }; void he_cache_thread(uint8_t *buf_ptr, uint64_t len) { diff --git a/samples/cxl_host_exerciser/cxl_he_cmd.h b/samples/cxl_host_exerciser/cxl_he_cmd.h index a5efe4b9f641..ae6fc6e68965 100644 --- a/samples/cxl_host_exerciser/cxl_he_cmd.h +++ b/samples/cxl_host_exerciser/cxl_he_cmd.h @@ -36,7 +36,8 @@ namespace host_exerciser { class he_cmd : public test_command { public: - he_cmd() : host_exe_(NULL), he_clock_mhz_(400), numa_node_(0), he_target_(0) { + he_cmd() : host_exe_(NULL), he_clock_mhz_(400), numa_node_(0), he_target_(0), + he_bias_(0) { he_ctl_.value = 0; he_info_.value = 0; @@ -53,7 +54,7 @@ class he_cmd : public test_command { return (double)(num_lines * 64) / ((1000.0 / he_clock_mhz_ * num_ticks)); } - void he_perf_counters() { + void he_perf_counters(he_cxl_latency cxl_latency = HE_CXL_LATENCY_NONE) { volatile he_cache_dsm_status *dsm_status = NULL; dsm_status = reinterpret_cast( @@ -81,9 +82,95 @@ class he_cmd : public test_command { host_exe_->logger_->info("Bandwidth: {0:0.3f} GB/s", perf_data); } + if (cxl_latency == HE_CXL_RD_LATENCY) { + if (dsm_status->num_ticks > 0 && dsm_status->num_reads > 0) { + double latency = (double)((dsm_status->num_ticks / (double)dsm_status->num_reads) + *( 2.5)); + + host_exe_->logger_->info("Read Latency : {0:0.2f} nanoseconds", latency); + } + else { + host_exe_->logger_->info("Read Latency: N/A"); + } + } + cout << "********* DSM Status CSR end *********" << endl; } + void print_csr() { + + host_exe_->logger_->debug("HE_DFH:0x{:x}", host_exe_->read64(HE_DFH)); + host_exe_->logger_->debug("HE_ID_L:0x{:x}", host_exe_->read64(HE_ID_L)); + host_exe_->logger_->debug("HE_ID_H:0x{:x}", host_exe_->read64(HE_ID_H)); + + host_exe_->logger_->debug("HE_SCRATCHPAD0:0x{:x}", + host_exe_->read64(HE_SCRATCHPAD0)); + + host_exe_->logger_->debug("HE_DSM_BASE:0x{:x}", host_exe_->read64(HE_DSM_BASE)); + + host_exe_->logger_->debug("HE_CTL:0x{:x}", host_exe_->read64(HE_CTL)); + + host_exe_->logger_->debug("HE_INFO:0x{:x}", host_exe_->read64(HE_INFO)); + + host_exe_->logger_->debug("HE_WR_NUM_LINES:0x{:x}", + host_exe_->read64(HE_WR_NUM_LINES)); + + host_exe_->logger_->debug("HE_WR_BYTE_ENABLE:0x{:x}", + host_exe_->read64(HE_WR_BYTE_ENABLE)); + + host_exe_->logger_->debug("HE_WR_CONFIG:0x{:x}", + host_exe_->read64(HE_WR_CONFIG)); + + host_exe_->logger_->debug("HE_WR_ADDR_TABLE_CTRL:0x{:x}", + host_exe_->read64(HE_WR_ADDR_TABLE_CTRL)); + + host_exe_->logger_->debug("HE_WR_ADDR_TABLE_DATA:0x{:x}", + host_exe_->read64(HE_WR_ADDR_TABLE_DATA)); + + host_exe_->logger_->debug("HE_RD_NUM_LINES:0x{:x}", + host_exe_->read64(HE_RD_NUM_LINES)); + + host_exe_->logger_->debug("HE_RD_CONFIG:0x{:x}", + host_exe_->read64(HE_RD_CONFIG)); + + host_exe_->logger_->debug("HE_RD_ADDR_TABLE_CTRL:0x{:x}", + host_exe_->read64(HE_RD_ADDR_TABLE_CTRL)); + + host_exe_->logger_->debug("HE_RD_ADDR_TABLE_DATA:0x{:x}", + host_exe_->read64(HE_RD_ADDR_TABLE_DATA)); + + host_exe_->logger_->debug("HE_ERROR_STATUS:0x{:x}", + host_exe_->read64(HE_ERROR_STATUS)); + + host_exe_->logger_->debug("HE_ERROR_EXP_DATA:0x{:x}", + host_exe_->read64(HE_ERROR_EXP_DATA)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA0:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA0)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA1:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA1)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA2:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA2)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA3:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA3)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA4:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA4)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA5:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA5)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA6:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA6)); + + host_exe_->logger_->debug("HE_ERROR_ACT_DATA7:0x{:x}", + host_exe_->read64(HE_ERROR_ACT_DATA7)); + + } + void host_exerciser_errors() { he_err_status err_status; uint64_t err = 0; @@ -152,6 +239,31 @@ class he_cmd : public test_command { return true; } + void he_start_test() { + // start test + + switch (he_bias_) { + case HOSTMEM_BIAS: + he_ctl_.bias_support = 0x0; + break; + + case FPGAMEM_HOST_BIAS: + he_ctl_.bias_support = 0x2; + break; + + case FPGAMEM_DEVICE_BIAS: + he_ctl_.bias_support = 0x3; + break; + default: + he_ctl_.bias_support = 0x0; + } + + he_ctl_.Start = 0; + host_exe_->write64(HE_CTL, he_ctl_.value); + he_ctl_.Start = 1; + host_exe_->write64(HE_CTL, he_ctl_.value); + } + bool verify_numa_node() { if (numa_available() < 0) { @@ -182,6 +294,7 @@ class he_cmd : public test_command { uint32_t he_clock_mhz_; uint32_t numa_node_; uint32_t he_target_; + uint32_t he_bias_; he_ctl he_ctl_; he_info he_info_; diff --git a/samples/cxl_host_exerciser/cxl_host_exerciser.h b/samples/cxl_host_exerciser/cxl_host_exerciser.h index 917e59f798a3..a406e7cc5449 100644 --- a/samples/cxl_host_exerciser/cxl_host_exerciser.h +++ b/samples/cxl_host_exerciser/cxl_host_exerciser.h @@ -39,9 +39,11 @@ static const uint64_t HELPBK_TEST_SLEEP_INVL = 100; static const uint64_t CL = 64; static const uint64_t KB = 1024; static const uint64_t MB = KB * 1024; -static const uint64_t BUFFER_SIZE_2MB = 2 * 1024 * 1024; -static const uint64_t FPGA_32KB_CACHE_LINES = (32 * 1024) / 64; -static const uint64_t FPGA_2MB_CACHE_LINES = (2 * 1024 * 1024) / 64; +static const uint64_t BUFFER_SIZE_2MB = 2 * MB; +static const uint64_t BUFFER_SIZE_32KB = 32* KB; +static const uint64_t FPGA_32KB_CACHE_LINES = (32 * KB) / 64; +static const uint64_t FPGA_2MB_CACHE_LINES = (2 * MB) / 64; +static const uint64_t FPGA_512CACHE_LINES = 512; // Host execiser CSR Offset enum { @@ -124,8 +126,8 @@ union he_ctl { uint64_t ResetL : 1; uint64_t Start : 1; uint64_t ForcedTestCmpl : 1; - uint64_t bias_support : 1; - uint64_t Reserved : 60; + uint64_t bias_support : 2; + uint64_t Reserved : 59; }; }; @@ -297,6 +299,15 @@ typedef enum { HE_TARGET_FPGA = 0x1, } he_target; + +// he cxl cache latency +typedef enum { + HE_CXL_LATENCY_NONE = 0x0, + HE_CXL_RD_LATENCY = 0x1, + HE_CXL_WR_LATENCY = 0x2, + HE_CXL_RD_WR_LATENCY = 0x3, +} he_cxl_latency; + const std::map he_test_modes = { {"fpgardcachehit", HE_FPGA_RD_CACHE_HIT}, {"fpgawrcachehit", HE_FPGA_WR_CACHE_HIT}, @@ -308,17 +319,36 @@ const std::map he_test_modes = { {"hostwrcachemiss", HE_HOST_WR_CACHE_MISS}, }; +// Bias Support +typedef enum { + HOSTMEM_BIAS = 0x0, + HOST_BIAS_NA = 0x1, + FPGAMEM_HOST_BIAS = 0x2, + FPGAMEM_DEVICE_BIAS = 0x3, +} he_bisa_support; + const std::map he_targets = { {"host", HE_TARGET_HOST}, {"fpga", HE_TARGET_FPGA}, }; -/////////////////////// -// Bias Support +// Bias support +const std::map he_bias = { + {"hostmem", HOSTMEM_BIAS}, + {"fpgamem_host_bias", FPGAMEM_HOST_BIAS}, + {"fpgamem_device_bias", FPGAMEM_DEVICE_BIAS}, +}; + +// he cxl cache device instance typedef enum { - HOST_BIOS = 0x0, - DEVIC_BIOA = 0x1, -} he_ctl_bios_support; + HE_CXL_DEVICE0 = 0x0, + HE_CXL_DEVICE1 = 0x1, +} he_cxl_dev; + +const std::map he_cxl_device = { + {"/dev/dfl-cxl-cache.0", HE_CXL_DEVICE0}, + {"/dev/dfl-cxl-cache.1", HE_CXL_DEVICE1}, +}; // configures test mode typedef enum { @@ -334,7 +364,6 @@ typedef enum { HE_ADDRTABLE_SIZE8 = 0x3, HE_ADDRTABLE_SIZE4 = 0x2, HE_ADDRTABLE_SIZE2 = 0x1, - } he_addrtable_size; // he test type @@ -346,7 +375,6 @@ typedef enum { const std::map traffic_enable = { {"enable", HE_ENABLE_TRAFFIC_STAGE}, {"skip", HE_SIP_SEQ_STAGE}, - }; std::map addrtable_size = { diff --git a/samples/cxl_host_exerciser/he_cache_test.h b/samples/cxl_host_exerciser/he_cache_test.h index 900e56bf8f7c..1d105fb5c5d9 100644 --- a/samples/cxl_host_exerciser/he_cache_test.h +++ b/samples/cxl_host_exerciser/he_cache_test.h @@ -47,8 +47,6 @@ #include #include - - #include "fpga-dfl.h" using namespace std; @@ -62,6 +60,7 @@ enum { MATCHES_SIZE = 6 }; "fpga_region/region*/dfl-fme*/dfl_dev*/feature_id" #define MAX_SIZE 256 +#define MAX_HE_CACHE_DEVICE 2 #define PROTECTION (PROT_READ | PROT_WRITE) @@ -306,10 +305,12 @@ class afu { int find_dev_feature() { glob_t pglob; + glob_t dev_pglob; char feature_path[MAX_SIZE] = {0}; int gres = 0; uint64_t value = 0; size_t i = 0; + size_t dev_index = 0; if (!pci_addr_.empty()) { if (snprintf(feature_path, sizeof(feature_path), FEATURE_DEV, @@ -325,7 +326,7 @@ class afu { } } - gres = glob(feature_path, GLOB_NOSORT, NULL, &pglob); + gres = glob(feature_path, 0, NULL, &pglob); if (gres) { cerr << "Failed pattern match" << feature_path << ":" << strerror(errno) << endl; @@ -341,27 +342,34 @@ class afu { } if (current_command()->featureid() == value) { - string str(pglob.gl_pathv[i]); - string substr_dev(str.substr(0, str.rfind("/"))); - globfree(&pglob); + string str(pglob.gl_pathv[i]); + string substr_dev(str.substr(0, str.rfind("/"))); + + substr_dev.append("/dfl-cxl-cache/dfl-cxl-cache*"); + gres = glob(substr_dev.c_str(), GLOB_NOSORT, NULL, &dev_pglob); + if (gres) { + cerr << "Failed pattern match" << substr_dev.c_str() << ":" + << strerror(errno) << endl; + globfree(&dev_pglob); + return 4; + } + + string str1(dev_pglob.gl_pathv[0]); + globfree(&dev_pglob); + dev_path_[dev_index].append("/dev"); + dev_path_[dev_index].append(str1.substr(str1.rfind("/"), 16)); + dev_index++; + } + } - substr_dev.append("/dfl-cxl-cache/dfl-cxl-cache*"); - gres = glob(substr_dev.c_str(), GLOB_NOSORT, NULL, &pglob); - if (gres) { - cerr << "Failed pattern match" << substr_dev.c_str() << ":" - << strerror(errno) << endl; - globfree(&pglob); - return 4; - } - string str1(pglob.gl_pathv[0]); + if (pglob.gl_pathv) { globfree(&pglob); - dev_path_.append("/dev"); - dev_path_.append(str1.substr(str1.rfind("/"), 16)); + } + if (dev_index > 0) { return 0; - } } - + return 5; } @@ -385,12 +393,12 @@ class afu { return true; } - int open_handle() { + int open_handle(const char *dev) { int res = 0; - logger_->debug("dev_path_:{0}", dev_path_); + logger_->debug("CXL device:{0}", dev); - fd_ = open(dev_path_.c_str(), O_RDWR); + fd_ = open(dev, O_RDWR); if (fd_ < 0) { cerr << "open() failed:" << strerror(errno) << endl; return 1; @@ -452,7 +460,13 @@ class afu { return exit_codes::exception; }; - int res = open_handle(); + int dev_index = 0; + CLI::Option* opt = app->get_option_no_throw("--device"); + if (opt && opt->count() == 1) { + dev_index = stoi(opt->results().at(0)); + } + + int res = open_handle(dev_path_[dev_index].c_str()); if (res != exit_codes::not_run) { return res; } @@ -530,12 +544,10 @@ class afu { dma_map.argsz = sizeof(dma_map); dma_map.user_addr = (__u64)ptr; dma_map.length = len; - dma_map.numa_node = numa_node; dma_map.csr_array[0] = DFL_CXL_CACHE_DSM_BASE; logger_->debug("Allocate DSM buffer user addr 0x:{0:x} length :" - "{1:d} numa node : {2:d}", - dma_map.user_addr, dma_map.length, dma_map.numa_node); + "{1:d}", dma_map.user_addr, dma_map.length); volatile uint64_t *u64 = (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_DSM_BASE); @@ -585,6 +597,10 @@ class afu { return true; } + void reset_dsm() { + memset(dsm_buffer_, 0, dsm_buf_len_); + } + bool allocate_cache_read(size_t len = MiB(2), uint32_t numa_node = 0) { int res = 0; @@ -602,12 +618,10 @@ class afu { dma_map.argsz = sizeof(dma_map); dma_map.user_addr = (__u64)ptr; dma_map.length = len; - dma_map.numa_node = numa_node; dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; logger_->debug("Allocate read buffer user addr 0x:{0:x} length :" - "{1:d} numa node : {2:d}", - dma_map.user_addr, dma_map.length, dma_map.numa_node); + "{1:d}", dma_map.user_addr, dma_map.length); volatile uint64_t *u64 = (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_RD_ADDR_TABLE_DATA); @@ -672,12 +686,10 @@ class afu { dma_map.argsz = sizeof(dma_map); dma_map.user_addr = (__u64)ptr; dma_map.length = len; - dma_map.numa_node = numa_node; dma_map.csr_array[0] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; logger_->debug("Allocate write buffer user addr 0x:{0:x}\ - length : {1:d} numa node : {2:d}", - dma_map.user_addr, dma_map.length, dma_map.numa_node); + length : {1:d}", dma_map.user_addr, dma_map.length); volatile uint64_t *u64 = (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); @@ -742,13 +754,11 @@ class afu { dma_map.argsz = sizeof(dma_map); dma_map.user_addr = (__u64)ptr; dma_map.length = len; - dma_map.numa_node = numa_node; dma_map.csr_array[0] = DFL_CXL_CACHE_RD_ADDR_TABLE_DATA; dma_map.csr_array[1] = DFL_CXL_CACHE_WR_ADDR_TABLE_DATA; logger_->debug("Allocate read/write buffer user addr 0x:{0:x}\ - length : {1:d} numa node : {2:d}", - dma_map.user_addr, dma_map.length, dma_map.numa_node); + length : {1:d}", dma_map.user_addr, dma_map.length); volatile uint64_t *u64_wr = (volatile uint64_t *)(mmio_base_ + DFL_CXL_CACHE_WR_ADDR_TABLE_DATA); @@ -843,7 +853,7 @@ class afu { struct dfl_cxl_cache_region_info rinfo_; - std::string dev_path_; + std::string dev_path_[MAX_HE_CACHE_DEVICE]; command::ptr_t current_command_; std::map commands_;