Skip to content

Commit

Permalink
Removed need for gpudev and added support for accurate TX sending (#174)
Browse files Browse the repository at this point in the history
Signed-off-by: cliffburdick <[email protected]>
  • Loading branch information
cliffburdick authored Feb 7, 2024
1 parent cf54c8f commit 8d46327
Show file tree
Hide file tree
Showing 11 changed files with 189 additions and 68 deletions.
44 changes: 29 additions & 15 deletions applications/adv_networking_bench/adv_networking_bench_rx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,41 +14,55 @@
# See the License for the specific language governing permissions and
# limitations under the License.
---
multithreaded: true
num_delay_ops: 32
delay: 0.1
delay_step: 0.01

scheduler:
check_recession_period_ms: 0
worker_thread_number: 5
stop_on_deadlock: true
stop_on_deadlock_timeout: 500

advanced_network:
cfg:
version: 1
master_core: 5 # Master CPU core
master_core: 5 # Master CPU core
rx:
- if_name: 3d:00.0 # PCIe BFD of NIC
- if_name: 0005:03:00.1 # PCIe BFD of NIC
flow_isolation: true
queues:
- name: "Default"
id: 0
gpu_direct: false
cpu_cores: "7"
cpu_cores: "10"
max_packet_size: 9000 # Maximum payload size
num_concurrent_batches: 32767 # Number of batches that can be used at any time
batch_size: 1 # Number of packets in a batch
batch_size: 1 # Number of packets in a batch
output_port: "bench_rx_out"
- name: "ADC Samples"
id: 1
gpu_device: 0
gpu_direct: true
split_boundary: 42
cpu_cores: "6"
split_boundary: 0
cpu_cores: "11"
max_packet_size: 9000 # Maximum payload size
num_concurrent_batches: 20 # Number of batches that can be used at any time
batch_size: 1000 # Number of packets in a batch
num_concurrent_batches: 15 # Number of batches that can be used at any time
batch_size: 5120 # Number of packets in a batch
output_port: "bench_rx_out"
flows:
- name: "ADC Samples"
action:
action:
type: queue
id: 1
match:
udp_src: 4096
udp_dst: 4096
udp_src: 4096 #12288
udp_dst: 4096 #12288

bench_rx:
split_boundary: true
batch_size: 10000
max_packet_size: 7680
header_size: 42
split_boundary: false
gpu_direct: true
batch_size: 10240
max_packet_size: 8064
header_size: 64
17 changes: 10 additions & 7 deletions applications/adv_networking_bench/adv_networking_bench_tx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,34 +21,37 @@ delay_step: 0.01

scheduler:
check_recession_period_ms: 0
worker_thread_number: 4
worker_thread_number: 5
stop_on_deadlock: true
stop_on_deadlock_timeout: 500

advanced_network:
cfg:
version: 1
master_core: 8 # Master CPU core
master_core: 5 # Master CPU core
tx:
- if_name: 0005:03:00.1 # PCIe BFD of NIC
- if_name: 0005:03:00.1 # PCIe BFD of NIC
accurate_send: false
queues:
- name: "ADC Samples"
id: 0
gpu_direct: true
gpu_device: 0
split_boundary: 64
split_boundary: 0
max_packet_size: 8064 # Maximum payload size
num_concurrent_batches: 5 # Number of batches that can be used at any time
batch_size: 10240 # Number of packets in a batch
cpu_cores: "9" # CPU cores for transmitting
cpu_cores: "7" # CPU cores for transmitting

bench_tx:
eth_dst_addr: 00:00:00:00:11:22 # Destination MAC
udp_dst_port: 4096 # UDP destination port
udp_src_port: 4096 # UDP source port
gpu_direct: true
split_boundary: 64
split_boundary: 0
batch_size: 10000
payload_size: 8000
header_size: 64
ip_src_addr: 192.168.0.1 # Source IP send from
ip_dst_addr: 192.168.0.2 # Destination IP to send to
ip_dst_addr: 192.168.0.2 # Destination IP to send to
port_id: 0
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ advanced_network:
master_core: 5 # Master CPU core
tx:
- if_name: 0005:03:00.1 # PCIe BFD of NIC
accurate_send: false
queues:
- name: "ADC Samples"
id: 0
Expand Down
2 changes: 0 additions & 2 deletions operators/advanced_network/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ RUN cd /tmp && tar xf dpdk-${DPDK_VERSION}.tar.xz
COPY ./dpdk_patches/*.patch /tmp/dpdk-stable-${DPDK_VERSION}
WORKDIR /tmp/dpdk-stable-${DPDK_VERSION}/
RUN patch --ignore-whitespace --fuzz 3 config/arm/meson.build /tmp/dpdk-stable-${DPDK_VERSION}/dpdk.nvidia.patch
RUN patch --ignore-whitespace --fuzz 3 drivers/gpu/cuda/devices.h /tmp/dpdk-stable-${DPDK_VERSION}/devices.h.patch
RUN patch --ignore-whitespace --fuzz 3 drivers/gpu/cuda/cuda.c /tmp/dpdk-stable-${DPDK_VERSION}/cuda.c.patch
RUN CFLAGS=-I/usr/local/cuda/include meson build -Dplatform=generic -Dc_args=-I/usr/local/cuda/include \
-Ddisable_drivers=baseband/*,bus/ifpga/*,common/cpt,common/dpaax,common/iavf,common/octeontx,common/octeontx2,crypto/nitrox,net/ark,net/atlantic,net/avp,net/axgbe,net/bnx2x,net/bnxt,net/cxgbe,net/e1000,net/ena,net/enic,net/fm10k,net/hinic,net/hns3,net/i40e,net/ixgbe,vdpa/ifc,net/igc,net/liquidio,net/netvsc,net/nfp,net/qede,net/sfc,net/thunderx,net/vdev_netvsc,net/vmxnet3,regex/octeontx2,
RUN ninja -C build install
Expand Down
12 changes: 8 additions & 4 deletions operators/advanced_network/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ but raw headers can also be constructed.
- Linux
- A DPDK-compatible network card. For GPUDirect only NVIDIA NICs are supported
- System tuning as described below
- DPDK 22.11 installed with gpudev support compiled in
- DPDK 22.11
- MOFED 5.8-1.0.1.1 or later

#### Features
Expand Down Expand Up @@ -74,8 +74,8 @@ translations that have to be actively maintained in MMUs. 1GB hugepages are idea
available. To configure 1GB hugepages:

```
mkdir /mnt/huge
mount -t hugetlbfs nodev /mnt/huge
sudo mkdir /mnt/huge
sudo mount -t hugetlbfs nodev /mnt/huge
sudo sh -c "echo nodev /mnt/huge hugetlbfs pagesize=1GB 0 0 >> /etc/fstab"
```

Expand All @@ -86,7 +86,7 @@ only available at the boot command since they must be provided before the kernel
editing the boot command can be done with the following configuration:

```
vim /boot/extlinux/extlinux.conf
sudo vim /boot/extlinux/extlinux.conf
# Find the line starting with APPEND and add the following
# For Orin IGX:
Expand All @@ -98,6 +98,8 @@ isolcpus=4-7 nohz_full=4=7 irqaffinity=0-3 rcu_nocbs=4-7 rcu_nocb_poll tsc=relia

The settings above isolate CPU cores 6-11 on the Orin and 4-7 on the Clara, and turn 1GB hugepages on.

For non-IGX or AGX systems please look at the documentation for your system to change the boot command.

##### Setting the CPU governor

The CPU governor reduces power consumption by decreasing the clock frequency of the CPU when cores are idle. While this is useful
Expand Down Expand Up @@ -185,6 +187,8 @@ unnecessarily use excess CPU and/or GPU memory.

- **`if_name`**: Name of the interface or PCIe BDF to use
- type: `string`
- **`accurate_send`**: Boolean flag to turn on accurate TX scheduling
- type: `boolean`
- **`queues`**: Array of queues
- type: `array`
- **`name`**: Name of queue
Expand Down
9 changes: 9 additions & 0 deletions operators/advanced_network/adv_network_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,15 @@ AdvNetStatus adv_net_set_pkt_len(std::shared_ptr<AdvNetBurstParams> burst,
return adv_net_set_pkt_len(burst.get(), idx, cpu_len, gpu_len);
}

AdvNetStatus adv_net_set_pkt_tx_time(AdvNetBurstParams *burst, int idx, uint64_t time) {
return g_ano_mgr->set_pkt_tx_time(burst, idx, time);
}

AdvNetStatus adv_net_set_pkt_tx_time(std::shared_ptr<AdvNetBurstParams> burst,
int idx, uint64_t time) {
return adv_net_set_pkt_tx_time(burst.get(), idx, time);
}

int64_t adv_net_get_num_pkts(AdvNetBurstParams *burst) {
return burst->hdr.hdr.num_pkts;
}
Expand Down
25 changes: 25 additions & 0 deletions operators/advanced_network/adv_network_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,25 @@ AdvNetStatus adv_net_set_pkt_len(std::shared_ptr<AdvNetBurstParams> burst,
int cpu_len,
int gpu_len);

/**
* @brief Set packet TX time
*
* Sets the transmit time (in PTP time) to transmit the packet. Every packet transmitted
* after this one in the same queue will be transmitted no earlier than the time listed
* in the function call. This feature is only available on ConnectX-7 or BlueField 3 and
* higher cards.
*
* @param burst Burst structure containing packet lists
* @param idx Index of packet
* @param time PTP time to transmit
* @return AdvNetStatus indicating status. Valid values are:
* SUCCESS: Time set successfully
*/
AdvNetStatus adv_net_set_pkt_tx_time(AdvNetBurstParams *burst, int idx, uint64_t time);
AdvNetStatus adv_net_set_pkt_tx_time(std::shared_ptr<AdvNetBurstParams> burst,
int idx,
uint64_t time);

/**
* @brief Frees a single packet
*
Expand Down Expand Up @@ -449,6 +468,12 @@ struct YAML::convert<holoscan::ops::AdvNetConfigYaml> {
holoscan::ops::AdvNetTxConfig tx_cfg;
tx_cfg.if_name_ = tx_item["if_name"].as<std::string>();

try {
tx_cfg.accurate_send_ = tx_item["accurate_send"].as<bool>();
} catch (const std::exception& e) {
tx_cfg.accurate_send_ = false;
}

for (const auto &q_item : tx_item["queues"]) {
holoscan::ops::TxQueueConfig q;
q.common_.name_ = q_item["name"].as<std::string>();
Expand Down
1 change: 1 addition & 0 deletions operators/advanced_network/adv_network_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ struct AdvNetRxConfig {

struct AdvNetTxConfig {
std::string if_name_;
bool accurate_send_;
uint16_t port_id_;
bool empty;
std::vector<TxQueueConfig> queues_;
Expand Down
1 change: 1 addition & 0 deletions operators/advanced_network/managers/adv_network_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class ANOMgr {
virtual void free_pkts(void **pkts, int len) = 0;
virtual void free_rx_burst(AdvNetBurstParams *burst) = 0;
virtual void free_tx_burst(AdvNetBurstParams *burst) = 0;
virtual AdvNetStatus set_pkt_tx_time(AdvNetBurstParams *burst, int idx, uint64_t time) = 0;
virtual void shutdown() = 0;
virtual void print_stats() = 0;

Expand Down
Loading

0 comments on commit 8d46327

Please sign in to comment.