Skip to content

RDMA FLUSH and ATOMIC WRITE with ODP #1580

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions kernel-headers/rdma/ib_user_verbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,22 @@ struct ib_uverbs_ex_query_device {
__u32 reserved;
};

enum ib_uverbs_odp_general_cap_bits {
IB_UVERBS_ODP_SUPPORT = 1 << 0,
IB_UVERBS_ODP_SUPPORT_IMPLICIT = 1 << 1,
};

enum ib_uverbs_odp_transport_cap_bits {
IB_UVERBS_ODP_SUPPORT_SEND = 1 << 0,
IB_UVERBS_ODP_SUPPORT_RECV = 1 << 1,
IB_UVERBS_ODP_SUPPORT_WRITE = 1 << 2,
IB_UVERBS_ODP_SUPPORT_READ = 1 << 3,
IB_UVERBS_ODP_SUPPORT_ATOMIC = 1 << 4,
IB_UVERBS_ODP_SUPPORT_SRQ_RECV = 1 << 5,
IB_UVERBS_ODP_SUPPORT_FLUSH = 1 << 6,
IB_UVERBS_ODP_SUPPORT_ATOMIC_WRITE = 1 << 7,
};

struct ib_uverbs_odp_caps {
__aligned_u64 general_caps;
struct {
Expand Down
8 changes: 7 additions & 1 deletion libibverbs/examples/devinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,9 @@ static void print_odp_trans_caps(uint32_t trans)
IBV_ODP_SUPPORT_WRITE |
IBV_ODP_SUPPORT_READ |
IBV_ODP_SUPPORT_ATOMIC |
IBV_ODP_SUPPORT_SRQ_RECV);
IBV_ODP_SUPPORT_SRQ_RECV |
IBV_ODP_SUPPORT_FLUSH |
IBV_ODP_SUPPORT_ATOMIC_WRITE);

if (!trans) {
printf("\t\t\t\t\tNO SUPPORT\n");
Expand All @@ -347,6 +349,10 @@ static void print_odp_trans_caps(uint32_t trans)
printf("\t\t\t\t\tSUPPORT_ATOMIC\n");
if (trans & IBV_ODP_SUPPORT_SRQ_RECV)
printf("\t\t\t\t\tSUPPORT_SRQ\n");
if (trans & IBV_ODP_SUPPORT_FLUSH)
printf("\t\t\t\t\tSUPPORT_FLUSH\n");
if (trans & IBV_ODP_SUPPORT_ATOMIC_WRITE)
printf("\t\t\t\t\tSUPPORT_ATOMIC_WRITE\n");
if (trans & unknown_transport_caps)
printf("\t\t\t\t\tUnknown flags: 0x%" PRIX32 "\n",
trans & unknown_transport_caps);
Expand Down
2 changes: 2 additions & 0 deletions libibverbs/man/ibv_query_device_ex.3
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ enum ibv_odp_transport_cap_bits {
IBV_ODP_SUPPORT_READ = 1 << 3, /* RDMA-Read operations support on-demand paging */
IBV_ODP_SUPPORT_ATOMIC = 1 << 4, /* RDMA-Atomic operations support on-demand paging */
IBV_ODP_SUPPORT_SRQ_RECV = 1 << 5, /* SRQ receive operations support on-demand paging */
IBV_ODP_SUPPORT_FLUSH = 1 << 6, /* RDMA-Flush operations support on-demand paging */
IBV_ODP_SUPPORT_ATOMIC_WRITE = 1 << 7, /* RDMA-Atomic-write operations support on-demand paing */
};

struct ibv_tso_caps {
Expand Down
12 changes: 7 additions & 5 deletions libibverbs/verbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,13 +228,20 @@ struct ibv_query_device_ex_input {
uint32_t comp_mask;
};

enum ibv_odp_general_caps {
IBV_ODP_SUPPORT = 1 << 0,
IBV_ODP_SUPPORT_IMPLICIT = 1 << 1,
};

enum ibv_odp_transport_cap_bits {
IBV_ODP_SUPPORT_SEND = 1 << 0,
IBV_ODP_SUPPORT_RECV = 1 << 1,
IBV_ODP_SUPPORT_WRITE = 1 << 2,
IBV_ODP_SUPPORT_READ = 1 << 3,
IBV_ODP_SUPPORT_ATOMIC = 1 << 4,
IBV_ODP_SUPPORT_SRQ_RECV = 1 << 5,
IBV_ODP_SUPPORT_FLUSH = 1 << 6,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why aren't these bits coming from UAPI?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay. I will send a kernel patch to add IB_UVERBS_ODP_* flags in include/uapi/rdma/ib_user_verbs.h and update this PR accordingly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rleon
I've submitted the kernel patch and updated this PR.

Thanks

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I revised the kernel-headers commit after all the kernel-side patches were merged.

IBV_ODP_SUPPORT_ATOMIC_WRITE = 1 << 7,
};

struct ibv_odp_caps {
Expand All @@ -246,11 +253,6 @@ struct ibv_odp_caps {
} per_transport_caps;
};

enum ibv_odp_general_caps {
IBV_ODP_SUPPORT = 1 << 0,
IBV_ODP_SUPPORT_IMPLICIT = 1 << 1,
};

struct ibv_tso_caps {
uint32_t max_tso;
uint32_t supported_qpts;
Expand Down
4 changes: 3 additions & 1 deletion pyverbs/device.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,9 @@ cdef class ODPCaps(PyverbsObject):
e.IBV_ODP_SUPPORT_WRITE: 'IBV_ODP_SUPPORT_WRITE',
e.IBV_ODP_SUPPORT_READ: 'IBV_ODP_SUPPORT_READ',
e.IBV_ODP_SUPPORT_ATOMIC: 'IBV_ODP_SUPPORT_ATOMIC',
e.IBV_ODP_SUPPORT_SRQ_RECV: 'IBV_ODP_SUPPORT_SRQ_RECV'}
e.IBV_ODP_SUPPORT_SRQ_RECV: 'IBV_ODP_SUPPORT_SRQ_RECV',
e.IBV_ODP_SUPPORT_FLUSH: 'IBV_ODP_SUPPORT_FLUSH',
e.IBV_ODP_SUPPORT_ATOMIC_WRITE: 'IBV_ODP_SUPPORT_ATOMIC_WRITE'}

print_format = '{}: {}\n'
return print_format.format('ODP General caps', str_from_flags(self.general_caps, general_caps)) +\
Expand Down
2 changes: 2 additions & 0 deletions pyverbs/libibverbs_enums.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,8 @@ cdef extern from '<infiniband/verbs.h>':
IBV_ODP_SUPPORT_READ
IBV_ODP_SUPPORT_ATOMIC
IBV_ODP_SUPPORT_SRQ_RECV
IBV_ODP_SUPPORT_FLUSH
IBV_ODP_SUPPORT_ATOMIC_WRITE

cpdef enum ibv_device_cap_flags:
IBV_DEVICE_RESIZE_MAX_WR
Expand Down
73 changes: 73 additions & 0 deletions tests/test_odp.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pyverbs.mr import MR
import pyverbs.enums as e
import tests.utils as u
import unittest

HUGE_PAGE_SIZE = 0x200000

Expand Down Expand Up @@ -112,6 +113,56 @@ def create_mr(self):
flags=MAP_ANONYMOUS_| MAP_PRIVATE_)
self.mr = u.create_custom_mr(self, e.IBV_ACCESS_ON_DEMAND, user_addr=self.user_addr)

class OdpQpExRC(RCResources):
def __init__(self, dev_name, ib_port, gid_index, is_huge=False,
request_user_addr=False, use_mr_prefetch=None, is_implicit=False,
prefetch_advice=e._IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE,
msg_size=8, odp_caps=e.IBV_ODP_SUPPORT_SEND | e.IBV_ODP_SUPPORT_RECV,
use_mixed_mr=False):

''' For object descriptions, refer to OdpRC class '''
self.request_user_addr = request_user_addr
self.is_implicit = is_implicit
self.odp_caps = odp_caps
self.access = e.IBV_ACCESS_LOCAL_WRITE | e.IBV_ACCESS_ON_DEMAND | \
e.IBV_ACCESS_REMOTE_ATOMIC | e.IBV_ACCESS_REMOTE_READ | \
e.IBV_ACCESS_REMOTE_WRITE
self.user_addr = None
super(OdpQpExRC, self).__init__(dev_name=dev_name, ib_port=ib_port,
gid_index=gid_index)
self.msg_size = msg_size

if self.odp_caps & e.IBV_ODP_SUPPORT_FLUSH:
self.ptype = e.IBV_FLUSH_GLOBAL
self.level = e.IBV_FLUSH_RANGE

def create_qps(self):
if self.odp_caps & e.IBV_ODP_SUPPORT_ATOMIC_WRITE:
u.create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_ATOMIC_WRITE)
elif self.odp_caps & e.IBV_ODP_SUPPORT_FLUSH:
u.create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_FLUSH | e.IBV_QP_EX_WITH_RDMA_WRITE)
else:
raise unittest.SkipTest('There is no qpex test for the specified ODP caps.')

def create_mr(self):
u.odp_supported(self.ctx, 'rc', self.odp_caps)
if self.odp_caps & e.IBV_ODP_SUPPORT_ATOMIC_WRITE:
access = self.access
if self.request_user_addr:
mmap_flags = MAP_ANONYMOUS_| MAP_PRIVATE_
length = self.msg_size
self.user_addr = mmap(length=length, flags=mmap_flags)
self.mr = MR(self.pd, self.msg_size, access, address=self.user_addr,
implicit=self.is_implicit)
elif self.odp_caps & e.IBV_ODP_SUPPORT_FLUSH:
try:
self.mr = u.create_custom_mr(self, e.IBV_ACCESS_FLUSH_GLOBAL | e.IBV_ACCESS_REMOTE_WRITE | e.IBV_ACCESS_ON_DEMAND)
except PyverbsRDMAError as ex:
if ex.error_code == errno.EINVAL:
raise unittest.SkipTest('Create mr with IBV_ACCESS_FLUSH_GLOBAL access flag is not supported in kernel')
raise ex
else:
raise unittest.SkipTest('There is no qpex test for the specified ODP caps.')

class OdpTestCase(RDMATestCase):
def setUp(self):
Expand Down Expand Up @@ -150,6 +201,28 @@ def test_odp_rc_mixed_mr(self):
use_mixed_mr=True)
u.traffic(**self.traffic_args)

def test_odp_qp_ex_rc_atomic_write(self):
super().create_players(OdpQpExRC, request_user_addr=self.force_page_faults,
msg_size=8, odp_caps=e.IBV_ODP_SUPPORT_ATOMIC_WRITE)
self.client.msg_size = 8
self.server.msg_size = 8
u.rdma_traffic(**self.traffic_args,
new_send=True, send_op=e.IBV_WR_ATOMIC_WRITE)

def test_odp_qp_ex_rc_flush(self):
super().create_players(OdpQpExRC, request_user_addr=self.force_page_faults,
odp_caps=e.IBV_ODP_SUPPORT_FLUSH)
wcs = u.flush_traffic(**self.traffic_args, new_send=True,
send_op=e.IBV_WR_FLUSH)
if wcs[0].status != e.IBV_WC_SUCCESS:
raise PyverbsError(f'Unexpected {wc_status_to_str(wcs[0].status)}')

self.client.level = e.IBV_FLUSH_MR
wcs = u.flush_traffic(**self.traffic_args, new_send=True,
send_op=e.IBV_WR_FLUSH)
if wcs[0].status != e.IBV_WC_SUCCESS:
raise PyverbsError(f'Unexpected {wc_status_to_str(wcs[0].status)}')

def test_odp_rc_atomic_cmp_and_swp(self):
self.create_players(OdpRC, request_user_addr=self.force_page_faults,
msg_size=8, odp_caps=e.IBV_ODP_SUPPORT_ATOMIC)
Expand Down
65 changes: 14 additions & 51 deletions tests/test_qpex.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,51 +12,14 @@
from tests.base import UDResources, RCResources, RDMATestCase, XRCResources
import tests.utils as u


def create_qp_ex(agr_obj, qp_type, send_flags):
if qp_type == e.IBV_QPT_XRC_SEND:
cap = QPCap(max_send_wr=agr_obj.num_msgs, max_recv_wr=0, max_recv_sge=0,
max_send_sge=1)
else:
cap = QPCap(max_send_wr=agr_obj.num_msgs, max_recv_wr=agr_obj.num_msgs,
max_recv_sge=1, max_send_sge=1)
qia = QPInitAttrEx(cap=cap, qp_type=qp_type, scq=agr_obj.cq,
rcq=agr_obj.cq, pd=agr_obj.pd, send_ops_flags=send_flags,
comp_mask=e.IBV_QP_INIT_ATTR_PD |
e.IBV_QP_INIT_ATTR_SEND_OPS_FLAGS)
qp_attr = QPAttr(port_num=agr_obj.ib_port)
if qp_type == e.IBV_QPT_UD:
qp_attr.qkey = agr_obj.UD_QKEY
qp_attr.pkey_index = agr_obj.UD_PKEY_INDEX
if qp_type == e.IBV_QPT_RC:
qp_attr.qp_access_flags = e.IBV_ACCESS_REMOTE_WRITE | \
e.IBV_ACCESS_REMOTE_READ | \
e.IBV_ACCESS_REMOTE_ATOMIC | \
e.IBV_ACCESS_FLUSH_GLOBAL | \
e.IBV_ACCESS_FLUSH_PERSISTENT
try:
# We don't have capability bits for this
qp = QPEx(agr_obj.ctx, qia, qp_attr)
except PyverbsRDMAError as ex:
if ex.error_code == errno.EOPNOTSUPP:
raise unittest.SkipTest('Extended QP is not supported on this device')
raise ex
if qp_type != e.IBV_QPT_XRC_SEND:
agr_obj.qps.append(qp)
agr_obj.qps_num.append(qp.qp_num)
agr_obj.psns.append(random.getrandbits(24))
else:
return qp


class QpExUDSend(UDResources):
def create_qps(self):
create_qp_ex(self, e.IBV_QPT_UD, e.IBV_QP_EX_WITH_SEND)
u.create_qp_ex(self, e.IBV_QPT_UD, e.IBV_QP_EX_WITH_SEND)


class QpExRCSend(RCResources):
def create_qps(self):
create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_SEND)
u.create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_SEND)


class QpExXRCSend(XRCResources):
Expand All @@ -72,20 +35,20 @@ def create_qps(self):
recv_qp = QP(self.ctx, attr_ex, qp_attr)
self.rqp_lst.append(recv_qp)

send_qp = create_qp_ex(self, e.IBV_QPT_XRC_SEND, e.IBV_QP_EX_WITH_SEND)
send_qp = u.create_qp_ex(self, e.IBV_QPT_XRC_SEND, e.IBV_QP_EX_WITH_SEND)
self.sqp_lst.append(send_qp)
self.qps_num.append((recv_qp.qp_num, send_qp.qp_num))
self.psns.append(random.getrandbits(24))


class QpExUDSendImm(UDResources):
def create_qps(self):
create_qp_ex(self, e.IBV_QPT_UD, e.IBV_QP_EX_WITH_SEND_WITH_IMM)
u.create_qp_ex(self, e.IBV_QPT_UD, e.IBV_QP_EX_WITH_SEND_WITH_IMM)


class QpExRCSendImm(RCResources):
def create_qps(self):
create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_SEND_WITH_IMM)
u.create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_SEND_WITH_IMM)


class QpExXRCSendImm(XRCResources):
Expand All @@ -101,7 +64,7 @@ def create_qps(self):
recv_qp = QP(self.ctx, attr_ex, qp_attr)
self.rqp_lst.append(recv_qp)

send_qp = create_qp_ex(self, e.IBV_QPT_XRC_SEND,
send_qp = u.create_qp_ex(self, e.IBV_QPT_XRC_SEND,
e.IBV_QP_EX_WITH_SEND_WITH_IMM)
self.sqp_lst.append(send_qp)
self.qps_num.append((recv_qp.qp_num, send_qp.qp_num))
Expand All @@ -112,7 +75,7 @@ class QpExRCFlush(RCResources):
ptype = e.IBV_FLUSH_GLOBAL
level = e.IBV_FLUSH_RANGE
def create_qps(self):
create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_FLUSH | e.IBV_QP_EX_WITH_RDMA_WRITE)
u.create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_FLUSH | e.IBV_QP_EX_WITH_RDMA_WRITE)

def create_mr(self):
try:
Expand All @@ -125,23 +88,23 @@ def create_mr(self):

class QpExRCAtomicWrite(RCResources):
def create_qps(self):
create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_ATOMIC_WRITE)
u.create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_ATOMIC_WRITE)

def create_mr(self):
self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_WRITE)


class QpExRCRDMAWrite(RCResources):
def create_qps(self):
create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_RDMA_WRITE)
u.create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_RDMA_WRITE)

def create_mr(self):
self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_WRITE)


class QpExRCRDMAWriteImm(RCResources):
def create_qps(self):
create_qp_ex(self, e.IBV_QPT_RC,
u.create_qp_ex(self, e.IBV_QPT_RC,
e.IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM)

def create_mr(self):
Expand All @@ -150,29 +113,29 @@ def create_mr(self):

class QpExRCRDMARead(RCResources):
def create_qps(self):
create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_RDMA_READ)
u.create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_RDMA_READ)

def create_mr(self):
self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_READ)


class QpExRCAtomicCmpSwp(RCResources):
def create_qps(self):
create_qp_ex(self, e.IBV_QPT_RC,
u.create_qp_ex(self, e.IBV_QPT_RC,
e.IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP)
self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_ATOMIC)


class QpExRCAtomicFetchAdd(RCResources):
def create_qps(self):
create_qp_ex(self, e.IBV_QPT_RC,
u.create_qp_ex(self, e.IBV_QPT_RC,
e.IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD)
self.mr = u.create_custom_mr(self, e.IBV_ACCESS_REMOTE_ATOMIC)


class QpExRCBindMw(RCResources):
def create_qps(self):
create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_RDMA_WRITE |
u.create_qp_ex(self, e.IBV_QPT_RC, e.IBV_QP_EX_WITH_RDMA_WRITE |
e.IBV_QP_EX_WITH_BIND_MW)

def create_mr(self):
Expand Down
Loading