From dfabc031e52e8413dbac75835a6a3cb4c5737d8c Mon Sep 17 00:00:00 2001 From: Ananda Ravuri <33236856+anandaravuri@users.noreply.github.com> Date: Thu, 22 Feb 2024 08:50:03 -0800 Subject: [PATCH] host_exerciser:support no FPGA mgmt PF instances (#3108) * host_exerciser:support no FPGA mgmt PF instances Issue: The host exerciser fails to execute on Devkits if it does not support FPGA management PF. The host exerciser enumerates FME and quits the program if the bitstream does not support it. The I series-DK and F series-DK have multiple host exerciser AFUs on separate PCIe bus, device functions. fix: If the bitstream does not support FME, perform the host exerciser tests and print message. host_exerciser --pci-address 0000:03:00.2 lpbk host_exerciser --pci-address 0000:04:00.2 lpbk opae.io ls [0000:03:00.0] (0x8086:0xbcce 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: dfl-pci) [0000:04:00.7] (0x8086:0xbccf 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: None) [0000:04:00.5] (0x8086:0xbccf 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: vfio-pci) [0000:03:00.7] (0x8086:0xbccf 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: vfio-pci) [0000:04:00.3] (0x8086:0xbcce 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: dfl-pci) [0000:03:00.5] (0x8086:0xbccf 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: vfio-pci) [0000:04:00.1] (0x8086:0xbcce 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: dfl-pci) [0000:03:00.3] (0x8086:0xbcce 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: dfl-pci) [0000:03:00.1] (0x8086:0xbcce 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: dfl-pci) [0000:04:00.6] (0x8086:0xbccf 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: None) [0000:04:00.4] (0x8086:0xbcce 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: dfl-pci) [0000:03:00.6] (0x8086:0xbccf 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: vfio-pci) [0000:04:00.2] (0x8086:0xbcce 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: vfio-pci) [0000:03:00.4] (0x8086:0xbcce 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: dfl-pci) [0000:04:00.0] (0x8086:0xbcce 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: dfl-pci) [0000:03:00.2] (0x8086:0xbcce 0x8086:0x1771) Intel Acceleration Development Platform N6001 (Driver: vfio-pci) -------- Signed-off-by: anandaravuri Co-authored-by: Peter Colberg --- libraries/afu-test/afu_test.h | 72 +++++++++-------- samples/host_exerciser/host_exerciser.h | 4 +- samples/host_exerciser/host_exerciser_cmd.h | 88 ++++++++++++--------- 3 files changed, 92 insertions(+), 72 deletions(-) diff --git a/libraries/afu-test/afu_test.h b/libraries/afu-test/afu_test.h index 1c6e7875f3a0..8a8f78496919 100644 --- a/libraries/afu-test/afu_test.h +++ b/libraries/afu-test/afu_test.h @@ -172,6 +172,7 @@ class afu { , shared_(false) , timeout_msec_(60000) , handle_(nullptr) + , handle_device_(nullptr) , current_command_(nullptr) { if (!afu_id_.empty()) @@ -196,55 +197,65 @@ class afu { return fpga::properties::get(handle_); } - int open_handle(const char *afu_id) { - + bool enum_fpga_device() + { auto filter = fpga::properties::get(); // Get an empty properties object - // The following code attempts to get a token+handle for the DEVICE. + // The following code attempts to get a token+handle for the DEVICE. // This is to allow access to OPAE-API functions that are only supported // through the xfpga plugin (i.e accessing sysfs entries) // In contrast, the ACCELERATOR token may be underlied by the vfio plugin. // Set PCIe segment, bus, and device properties to enumerate FPGA DEVICE (FME) if (!pci_addr_.empty()) { - auto p = pcie_address::parse(pci_addr_.c_str()); - filter->segment = p.fields.domain; - filter->bus = p.fields.bus; - filter->device = p.fields.device; + auto p = pcie_address::parse(pci_addr_.c_str()); + filter->segment = p.fields.domain; + filter->bus = p.fields.bus; + filter->device = p.fields.device; } filter->type = FPGA_DEVICE; - auto tokens = fpga::token::enumerate({filter}); + auto tokens = fpga::token::enumerate({ filter }); // Error out if the # of tokens != 1 if (tokens.size() < 1) { - if (pci_addr_.empty()) { - logger_->error("no DEVICE found"); - } else { - logger_->error("no accelerator found at PCIe address {1}", - pci_addr_); - } - return exit_codes::not_found; - } + logger_->info("no FPGA DEVICE found"); + return false; + } if (tokens.size() > 1) { - std::cerr << "more than one DEVICE found matching filter\n"; + logger_->info("more than one FPGA DEVICE found "); + return false; } + int flags = shared_ ? FPGA_OPEN_SHARED : 0; - // Open a handle to the resource try { - handle_device_ = fpga::handle::open(tokens[0], flags); - } catch (fpga::no_access &err) { - std::cerr << err.what() << "\n"; - return exit_codes::no_access; + handle_device_ = fpga::handle::open(tokens[0], flags); } + catch (fpga::no_access& err) { + std::cerr << err.what() << "\n"; + return false; + } + return true; + } - // The following code attempts to get a token + handle for the AFU - // (ACCELERATOR device) matching the given command's afu_id. - // Set PCIe segment, bus, device, and functionproperties to enumerate FPGA ACCELERATOR + int open_handle(const char *afu_id) { + + enum_fpga_device(); + + auto filter = fpga::properties::get(); // Get an empty properties object + + // The following code attempts to get a token+handle for the DEVICE. + // This is to allow access to OPAE-API functions that are only supported + // through the xfpga plugin (i.e accessing sysfs entries) + // In contrast, the ACCELERATOR token may be underlied by the vfio plugin. + // Set PCIe segment, bus, and device properties to enumerate FPGA DEVICE (FME) if (!pci_addr_.empty()) { - auto p = pcie_address::parse(pci_addr_.c_str()); - filter->function = p.fields.function; + auto p = pcie_address::parse(pci_addr_.c_str()); + filter->segment = p.fields.domain; + filter->bus = p.fields.bus; + filter->device = p.fields.device; + filter->function = p.fields.function; } auto app_afu_id = afu_id ? afu_id : afu_id_.c_str(); @@ -255,7 +266,7 @@ class afu { return error; } - tokens = fpga::token::enumerate({filter}); + auto tokens = fpga::token::enumerate({filter}); if (tokens.size() < 1) { if (pci_addr_.empty()) { logger_->error("no accelerator found with id: {0}", app_afu_id); @@ -266,10 +277,7 @@ class afu { return exit_codes::not_found; } - if (tokens.size() > 1) { - std::cerr << "more than one accelerator found matching filter\n"; - } - + int flags = shared_ ? FPGA_OPEN_SHARED : 0; try { handle_ = fpga::handle::open(tokens[0], flags); } catch (fpga::no_access &err) { diff --git a/samples/host_exerciser/host_exerciser.h b/samples/host_exerciser/host_exerciser.h index 6e05569ca96a..6c9a464dd152 100644 --- a/samples/host_exerciser/host_exerciser.h +++ b/samples/host_exerciser/host_exerciser.h @@ -575,7 +575,9 @@ class host_exerciser : public test_afu { token::ptr_t get_token_device() { - return handle_device_->get_token(); + if (handle_device_) + return handle_device_->get_token(); + return nullptr; } bool option_passed(std::string option_str) diff --git a/samples/host_exerciser/host_exerciser_cmd.h b/samples/host_exerciser/host_exerciser_cmd.h index 47b1fcdd0539..50a6b06a10f5 100644 --- a/samples/host_exerciser/host_exerciser_cmd.h +++ b/samples/host_exerciser/host_exerciser_cmd.h @@ -737,46 +737,8 @@ class host_exerciser_cmd : public test_command host_exe_ = dynamic_cast(afu); token_ = d_afu->get_token(); - token_device_ = d_afu->get_token_device(); - - // Check if memory calibration has failed and error out before proceeding - // with the test. The dfl-emif driver creates sysfs entries to report the - // calibration status for each memory channel. sysobjects are the OPAE-API's - // abstraction for sysfs entries. However, at this time, these are only - // accessible through tokens that use the xfpga plugin and not the vfio - // plugin. Hence our use of the DEVICE token (token_device_). One - // non-ideality of the following implementation is the use of - // MAX_NUM_MEM_CHANNELS. We are essentially doing a brute-force query of - // sysfs entries since we don't know how many mem channels exist on the - // given platform. What about glob wildcards? Why not simply glob for - // "*dfl*/**/inf*_cal_fail" and use the OPAE-API's support for arrays of - // sysobjects? The reason is that, at the time of this writing, the - // xfpga-plugin's sysobject implementation does not support arrays - // specifically when the glob contains a recursive wildcard "/**/". It's a - // strange and perhaps unnecessary limitation. Therefore, future work is to - // fix that and clean up the code below. - for (size_t i = 0; i < MAX_NUM_MEM_CHANNELS; i++) { - std::stringstream mem_cal_glob; - // Construct the glob string to search for the cal_fail sysfs entry - // for the i'th mem channel - mem_cal_glob << "*dfl*/**/inf" << i << "_cal_fail"; - // Ask for a sysobject with this glob string - fpga::sysobject::ptr_t testobj = fpga::sysobject::get( - token_device_, mem_cal_glob.str().c_str(), FPGA_OBJECT_GLOB); - - // if test obj !=null, the sysfs entry was found. - // Read the calibration status from the sysfs entry. - // A non-zero value (typically '1') means - // calibration has failed --> we error out. - if (testobj && testobj->read64(0)) { - std::cout - << "This sysfs entry reports that memory calibration has failed:" - << mem_cal_glob.str().c_str() << std::endl; - return -1; - } - } - + fpga_emif_status(afu); // Read HW details uint64_t he_info = host_exe_->read64(HE_INFO0); he_lpbk_api_ver_ = (he_info >> 16); @@ -880,6 +842,54 @@ class host_exerciser_cmd : public test_command return status; } + + void fpga_emif_status(test_afu* afu) + { + auto d_afu = dynamic_cast(afu); + token_device_ = d_afu->get_token_device(); + + if (!token_device_) + return; + + // Check if memory calibration has failed and error out before proceeding + // with the test. The dfl-emif driver creates sysfs entries to report the + // calibration status for each memory channel. sysobjects are the OPAE-API's + // abstraction for sysfs entries. However, at this time, these are only + // accessible through tokens that use the xfpga plugin and not the vfio + // plugin. Hence our use of the DEVICE token (token_device_). One + // non-ideality of the following implementation is the use of + // MAX_NUM_MEM_CHANNELS. We are essentially doing a brute-force query of + // sysfs entries since we don't know how many mem channels exist on the + // given platform. What about glob wildcards? Why not simply glob for + // "*dfl*/**/inf*_cal_fail" and use the OPAE-API's support for arrays of + // sysobjects? The reason is that, at the time of this writing, the + // xfpga-plugin's sysobject implementation does not support arrays + // specifically when the glob contains a recursive wildcard "/**/". It's a + // strange and perhaps unnecessary limitation. Therefore, future work is to + // fix that and clean up the code below. + + for (size_t i = 0; i < MAX_NUM_MEM_CHANNELS; i++) { + std::stringstream mem_cal_glob; + // Construct the glob string to search for the cal_fail sysfs entry + // for the i'th mem channel + mem_cal_glob << "*dfl*/**/inf" << i << "_cal_fail"; + // Ask for a sysobject with this glob string + fpga::sysobject::ptr_t testobj = fpga::sysobject::get( + token_device_, mem_cal_glob.str().c_str(), FPGA_OBJECT_GLOB); + + // if test obj !=null, the sysfs entry was found. + // Read the calibration status from the sysfs entry. + // A non-zero value (typically '1') means + // calibration has failed --> we error out. + if (testobj && testobj->read64(0)) { + std::cout + << "This sysfs entry reports that memory calibration has failed:" + << mem_cal_glob.str().c_str() << std::endl; + return; + } + } + + } protected: he_cfg he_lpbk_cfg_;