From dbab99c45dd75fe7442812de6b18b7606580f3d6 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Tue, 22 Mar 2022 17:12:48 +0100 Subject: [PATCH 01/91] [rdmalib] Add support for libfabric --- rdmalib/include/rdmalib/buffer.hpp | 45 ++++ rdmalib/include/rdmalib/connection.hpp | 47 ++++ rdmalib/include/rdmalib/rdmalib.hpp | 43 +++ rdmalib/include/rdmalib/recv_buffer.hpp | 11 + rdmalib/lib/buffer.cpp | 64 +++++ rdmalib/lib/connection.cpp | 341 +++++++++++++++++++++++- rdmalib/lib/rdmalib.cpp | 227 +++++++++++++++- 7 files changed, 774 insertions(+), 4 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index b7dec48..b9f8288 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -7,9 +7,14 @@ #include +#ifdef USE_LIBFABRIC +#include +#include +#else struct ibv_pd; struct ibv_mr; struct ibv_sge; +#endif namespace rdmalib { @@ -25,7 +30,11 @@ namespace rdmalib { uint32_t _bytes; uint32_t _byte_size; void* _ptr; + #ifdef USE_LIBFABRIC + fid_mr* _mr; + #else ibv_mr* _mr; + #endif bool _own_memory; Buffer(); @@ -37,12 +46,24 @@ namespace rdmalib { public: uintptr_t address() const; void* ptr() const; + #ifdef USE_LIBFABRIC + fid_mr* mr() const; + #else ibv_mr* mr() const; + #endif uint32_t data_size() const; uint32_t size() const; uint32_t bytes() const; + #ifdef USE_LIBFABRIC + void register_memory(fid_domain *pd, int access); + #else void register_memory(ibv_pd *pd, int access); + #endif + #ifdef USE_LIBFABRIC + void *lkey() const; + #else uint32_t lkey() const; + #endif uint32_t rkey() const; ScatterGatherElement sge(uint32_t size, uint32_t offset) const; }; @@ -106,11 +127,20 @@ namespace rdmalib { struct ScatterGatherElement { // smallvector in practice + #ifdef USE_LIBFABRIC + mutable std::vector _sges; + mutable std::vector _lkeys; + #else mutable std::vector _sges; + #endif ScatterGatherElement(); + #ifdef USE_LIBFABRIC + ScatterGatherElement(uint64_t addr, uint32_t bytes, void *lkey); + #else ScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey); + #endif template ScatterGatherElement(const Buffer & buf) @@ -121,18 +151,33 @@ namespace rdmalib { template void add(const Buffer & buf) { + #ifdef USE_LIBFABRIC + _sges.push_back({buf.address(), buf.bytes()}); + _lkeys.push_back(buf.lkey()); + #else //emplace_back for structs will be supported in C++20 _sges.push_back({buf.address(), buf.bytes(), buf.lkey()}); + #endif } template void add(const Buffer & buf, uint32_t size, size_t offset = 0) { + #ifdef USE_LIBFABRIC + _sges.push_back({buf.address() + offset, size}); + _lkeys.push_back(buf.lkey()); + #else //emplace_back for structs will be supported in C++20 _sges.push_back({buf.address() + offset, size, buf.lkey()}); + #endif } + #ifdef USE_LIBFABRIC + iovec *array() const; + void **lkeys() const; + #else ibv_sge * array() const; + #endif size_t size() const; }; } diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 3b3fb08..97b3183 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -2,14 +2,22 @@ #ifndef __RDMALIB_CONNECTION_HPP__ #define __RDMALIB_CONNECTION_HPP__ +#include +#include +#include #include #include #include #include +#ifdef USE_LIBFABRIC +#include +#include +#else #include #include +#endif #include namespace rdmalib { @@ -19,6 +27,7 @@ namespace rdmalib { RECV }; + #ifndef USE_LIBFABRIC struct ConnectionConfiguration { // Configuration of QP ibv_qp_init_attr attr; @@ -26,6 +35,7 @@ namespace rdmalib { ConnectionConfiguration(); }; + #endif enum class ConnectionStatus { // The connection object does not bind to a defined RDMA connection. @@ -43,22 +53,36 @@ namespace rdmalib { // b) Queue Pair struct Connection { private: + #ifdef USE_LIBFABRIC + fid_ep* _qp; + fid_cq* _rcv_channel; + fid_cq* _trx_channel; + #else rdma_cm_id* _id; ibv_qp* _qp; ibv_comp_channel* _channel; + #endif int32_t _req_count; int32_t _private_data; bool _passive; ConnectionStatus _status; static const int _wc_size = 32; // FIXME: associate this with RecvBuffer + #ifdef USE_LIBFABRIC + std::array _swc; // fast fix for overlapping polling + std::array _rwc; + fi_cq_err_entry _ewc; + #else std::array _swc; // fast fix for overlapping polling std::array _rwc; + #endif std::array _rwc_sges; int _send_flags; static const int _rbatch = 32; // 32 for faster division in the code + #ifndef USE_LIBFABRIC struct ibv_recv_wr _batch_wrs[_rbatch]; // preallocated and prefilled batched recv. + #endif public: Connection(bool passive = false); @@ -69,18 +93,33 @@ namespace rdmalib { void initialize_batched_recv(const rdmalib::impl::Buffer & sge, size_t offset); void inlining(bool enable); + #ifdef USE_LIBFABRIC + void initialize(fid_domain* pd, fi_info* info, fid_eq* ec); + #else void initialize(rdma_cm_id* id); + #endif void close(); + #ifdef USE_LIBFABRIC + fid* id() const; + fid_ep* qp() const; + fid_cq* receive_completion_channel() const; + fid_cq* transmit_completion_channel() const; + #else rdma_cm_id* id() const; ibv_qp* qp() const; ibv_comp_channel* completion_channel() const; + #endif uint32_t private_data() const; ConnectionStatus status() const; void set_status(ConnectionStatus status); void set_private_data(uint32_t private_data); // Blocking, no timeout + #ifdef USE_LIBFABRIC + std::tuple poll_wc(QueueType, bool blocking = true, int count = -1); + #else std::tuple poll_wc(QueueType, bool blocking = true, int count = -1); + #endif int32_t post_send(const ScatterGatherElement & elem, int32_t id = -1, bool force_inline = false); int32_t post_recv(ScatterGatherElement && elem, int32_t id = -1, int32_t count = 1); @@ -97,11 +136,19 @@ namespace rdmalib { int32_t post_atomic_fadd(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t add); // Register to be notified about all events, including unsolicited ones + #ifdef USE_LIBFABRIC + void wait_events(); + #else void notify_events(bool only_solicited = false); ibv_cq* wait_events(); void ack_events(ibv_cq* cq, int len); + #endif private: + #ifdef USE_LIBFABRIC + int32_t _post_write(ScatterGatherElement && elems, fi_msg_rma & msg, bool force_inline, bool force_solicited); + #else int32_t _post_write(ScatterGatherElement && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); + #endif }; } diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index 857beb9..4a69807 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -9,7 +9,12 @@ #include #include +#ifdef USE_LIBFABRIC +#include +#include +#else #include +#endif #include #include @@ -18,8 +23,14 @@ namespace rdmalib { // Implemented as IPV4 struct Address { + #ifdef USE_LIBFABRIC + fi_info* addrinfo; + fi_info* hints; + fid_fabric* fabric; + #else rdma_addrinfo *addrinfo; rdma_addrinfo hints; + #endif uint16_t _port; Address(const std::string & ip, int port, bool passive); @@ -29,35 +40,59 @@ namespace rdmalib { }; struct RDMAActive { + #ifndef USE_LIBFABRIC ConnectionConfiguration _cfg; + #endif std::unique_ptr _conn; Address _addr; + #ifdef USE_LIBFABRIC + sockaddr_in _remote_addr; + fid_eq* _ec; + fid_domain* _pd; + #else rdma_event_channel * _ec; ibv_pd* _pd; + #endif RDMAActive(const std::string & ip, int port, int recv_buf = 1, int max_inline_data = 0); ~RDMAActive(); void allocate(); bool connect(uint32_t secret = 0); void disconnect(); + #ifdef USE_LIBFABRIC + fid_domain* pd() const; + #else ibv_pd* pd() const; + #endif Connection & connection(); bool is_connected(); }; struct RDMAPassive { + #ifndef USE_LIBFABRIC ConnectionConfiguration _cfg; + #endif Address _addr; + #ifdef USE_LIBFABRIC + fid_eq* _ec; + fid_domain* _pd; + fid_pep* _pep; + #else rdma_event_channel * _ec; rdma_cm_id* _listen_id; ibv_pd* _pd; + #endif // Set of connections that have been std::unordered_set _active_connections; RDMAPassive(const std::string & ip, int port, int recv_buf = 1, bool initialize = true, int max_inline_data = 0); ~RDMAPassive(); void allocate(); + #ifdef USE_LIBFABRIC + fid_domain* pd() const; + #else ibv_pd* pd() const; + #endif // Blocking poll for new rdmacm events. // Returns connection pointer and connection change status. // When connection is REQUESTED and ESTABLISHED, the pointer points to a valid connection. @@ -69,6 +104,14 @@ namespace rdmalib { void accept(Connection* connection); void set_nonblocking_poll(); }; + + #ifdef USE_LIBFABRIC + struct eventEntry { + fid_t fid; + struct fi_info *info; + uint32_t secret; + }; + #endif } #endif diff --git a/rdmalib/include/rdmalib/recv_buffer.hpp b/rdmalib/include/rdmalib/recv_buffer.hpp index 67aa7a7..1c5c91a 100644 --- a/rdmalib/include/rdmalib/recv_buffer.hpp +++ b/rdmalib/include/rdmalib/recv_buffer.hpp @@ -31,6 +31,16 @@ namespace rdmalib { refill(); } + #ifdef USE_LIBFABRIC + inline std::tuple poll(bool blocking = false) + { + auto wc = this->_conn->poll_wc(rdmalib::QueueType::RECV, blocking); + if(std::get<1>(wc)) + SPDLOG_DEBUG("Polled reqs {}, left {}", std::get<1>(wc), _requests); + _requests -= std::get<1>(wc); + return wc; + } + #else inline std::tuple poll(bool blocking = false) { auto wc = this->_conn->poll_wc(rdmalib::QueueType::RECV, blocking); @@ -39,6 +49,7 @@ namespace rdmalib { _requests -= std::get<1>(wc); return wc; } + #endif inline bool refill() { diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index 5202de0..b40234e 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -1,7 +1,13 @@ // mmap #include + +#ifdef USE_LIBFABRIC +#include +#include +#else #include +#endif #include #include @@ -89,11 +95,26 @@ namespace rdmalib { namespace impl { _bytes, fmt::ptr(_mr), fmt::ptr(_ptr) ); if(_mr) + #ifdef USE_LIBFABRIC + fi_close(&_mr->fid); + #else ibv_dereg_mr(_mr); + #endif if(_own_memory) munmap(_ptr, _bytes); } + #ifdef USE_LIBFABRIC + void Buffer::register_memory(fid_domain *pd, int access) + { + int ret = fi_mr_reg(pd, _ptr, _bytes, access, 0, 0, 0, &_mr, nullptr); + impl::expect_zero(ret); + SPDLOG_DEBUG( + "Registered {} bytes, mr {}, address {}, lkey {}, rkey {}", + _bytes, fmt::ptr(_mr), fmt::ptr(_ptr), *(uint32_t *)fi_mr_desc(_mr), fi_mr_key(_mr) + ); + } + #else void Buffer::register_memory(ibv_pd* pd, int access) { _mr = ibv_reg_mr(pd, _ptr, _bytes, access); @@ -103,11 +124,19 @@ namespace rdmalib { namespace impl { _bytes, fmt::ptr(_mr), fmt::ptr(_mr->addr), _mr->lkey, _mr->rkey ); } + #endif + #ifdef USE_LIBFABRIC + fid_mr* Buffer::mr() const + { + return this->_mr; + } + #else ibv_mr* Buffer::mr() const { return this->_mr; } + #endif uint32_t Buffer::data_size() const { @@ -124,6 +153,13 @@ namespace rdmalib { namespace impl { return this->_bytes; } + #ifdef USE_LIBFABRIC + void *Buffer::lkey() const + { + assert(this->_mr); + return fi_mr_desc(this->_mr); + } + #else uint32_t Buffer::lkey() const { assert(this->_mr); @@ -131,12 +167,21 @@ namespace rdmalib { namespace impl { return this->_mr->lkey; //return 0; } + #endif + #ifdef USE_LIBFABRIC + uint32_t Buffer::rkey() const + { + assert(this->_mr); + return fi_mr_key(this->_mr); + } + #else uint32_t Buffer::rkey() const { assert(this->_mr); return this->_mr->rkey; } + #endif uintptr_t Buffer::address() const { @@ -162,20 +207,39 @@ namespace rdmalib { { } + #ifdef USE_LIBFABRIC + iovec *ScatterGatherElement::array() const + { + return _sges.data(); + } + void **ScatterGatherElement::lkeys() const + { + return _lkeys.data(); + } + #else ibv_sge * ScatterGatherElement::array() const { return _sges.data(); } + #endif size_t ScatterGatherElement::size() const { return _sges.size(); } + #ifdef USE_LIBFABRIC + ScatterGatherElement::ScatterGatherElement(uint64_t addr, uint32_t bytes, void *lkey) + { + _sges.push_back({(void *)addr, bytes}); + _lkeys.push_back(lkey); + } + #else ScatterGatherElement::ScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey) { _sges.push_back({addr, bytes, lkey}); } + #endif RemoteBuffer::RemoteBuffer(): addr(0), diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index fc4b86d..5f5fb33 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -1,5 +1,17 @@ +#include +#include +#include +#include #include +#ifdef USE_LIBFABRIC +#include +#include +#include +#include "rdmalib/buffer.hpp" +#include +#include +#endif #include #include @@ -8,16 +20,23 @@ namespace rdmalib { + #ifndef USE_LIBFABRIC ConnectionConfiguration::ConnectionConfiguration() { memset(&attr, 0, sizeof(attr)); memset(&conn_param, 0 , sizeof(conn_param)); } + #endif Connection::Connection(bool passive): + #ifdef USE_LIBFABRIC + _rcv_channel(nullptr), + _trx_channel(nullptr), + #else _id(nullptr), - _qp(nullptr), _channel(nullptr), + #endif + _qp(nullptr), _req_count(0), _private_data(0), _passive(passive), @@ -25,6 +44,9 @@ namespace rdmalib { { inlining(false); + #ifdef USE_LIBFABRIC + SPDLOG_DEBUG("Allocate a connection with qp fid {}", fmt::ptr(&_qp->fid)); + #else for(int i=0; i < _rbatch; i++){ _batch_wrs[i].wr_id = i; _batch_wrs[i].sg_list = 0; @@ -33,28 +55,41 @@ namespace rdmalib { } _batch_wrs[_rbatch-1].next = NULL; SPDLOG_DEBUG("Allocate a connection with id {}", fmt::ptr(_id)); + #endif } Connection::~Connection() { + #ifdef USE_LIBFABRIC + SPDLOG_DEBUG("Deallocate a connection with qp fid {}", fmt::ptr(&_qp->fid)); + #else SPDLOG_DEBUG("Deallocate a connection with id {}", fmt::ptr(_id)); + #endif close(); } Connection::Connection(Connection&& obj): + #ifdef USE_LIBFABRIC + _rcv_channel(obj._rcv_channel), + _trx_channel(obj._trx_channel), + #else _id(obj._id), - _qp(obj._qp), _channel(obj._channel), + #endif + _qp(obj._qp), _req_count(obj._req_count), _private_data(obj._private_data), _passive(obj._passive), _status(obj._status), _send_flags(obj._send_flags) { + #ifndef USE_LIBFABRIC obj._id = nullptr; + #endif obj._qp = nullptr; obj._req_count = 0; + #ifndef USE_LIBFABRIC for(int i=0; i < _rbatch; i++){ _batch_wrs[i].wr_id = i; _batch_wrs[i].sg_list = 0; @@ -62,6 +97,7 @@ namespace rdmalib { _batch_wrs[i].next=&(_batch_wrs[i+1]); } _batch_wrs[_rbatch-1].next = NULL; + #endif } void Connection::initialize_batched_recv(const rdmalib::impl::Buffer & buf, size_t offset) @@ -70,11 +106,36 @@ namespace rdmalib { _rwc_sges[i] = buf.sge(offset, i*offset); //for(auto & sg : _rwc_sges[i]._sges) //sg.addr += i*offset; + #ifndef USE_LIBFABRIC _batch_wrs[i].sg_list = _rwc_sges[i].array(); _batch_wrs[i].num_sge = _rwc_sges[i].size(); + #endif } } + #ifdef USE_LIBFABRIC + void Connection::initialize(fid_domain* pd, fi_info* info, fid_eq* ec) + { + // Create the endpoint + impl::expect_zero(fi_endpoint(pd, info, &_qp, reinterpret_cast(this))); + + // Bind with the completion queues and the event queue + impl::expect_zero(fi_ep_bind(_qp, &ec->fid, 0)); + fi_cq_attr attr; + memset(&attr, 0, sizeof(attr)); + attr.format = FI_CQ_FORMAT_DATA; + attr.wait_obj = FI_WAIT_UNSPEC; + attr.wait_cond = FI_CQ_COND_NONE; + impl::expect_zero(fi_cq_open(pd, &attr, &_rcv_channel, nullptr)); + impl::expect_zero(fi_ep_bind(_qp, &_rcv_channel->fid, FI_RECV)); + impl::expect_zero(fi_cq_open(pd, &attr, &_trx_channel, nullptr)); + impl::expect_zero(fi_ep_bind(_qp, &_trx_channel->fid, FI_TRANSMIT)); + + // Enable the endpoint + impl::expect_zero(fi_enable(_qp)); + SPDLOG_DEBUG("Initialize a connection with"); + } + #else void Connection::initialize(rdma_cm_id* id) { this->_id = id; @@ -82,7 +143,9 @@ namespace rdmalib { this->_qp = this->_id->qp; SPDLOG_DEBUG("Initialize a connection with id {}", fmt::ptr(_id)); } + #endif + #ifndef USE_LIBFABRIC void Connection::inlining(bool enable) { if(enable) @@ -90,9 +153,20 @@ namespace rdmalib { else _send_flags = IBV_SEND_SIGNALED; } + #endif void Connection::close() { + #ifdef USE_LIBFABRIC + SPDLOG_DEBUG("Connection close called for {} with qp fid {}", fmt::ptr(this), fmt::ptr(&this->_qp->fid)); + if(_qp) { + // We need to close the transmit and receive channels and the endpoint + impl::expect_zero(fi_close(&_rcv_channel->fid)); + impl::expect_zero(fi_close(&_trx_channel->fid)); + impl::expect_zero(fi_close(&_qp->fid)); + _status = ConnectionStatus::DISCONNECTED; + } + #else SPDLOG_DEBUG("Connection close called for {} id {}", fmt::ptr(this), fmt::ptr(this->_id)); if(_id) { // When the connection is allocated on active side @@ -113,22 +187,48 @@ namespace rdmalib { _id = nullptr; _status = ConnectionStatus::DISCONNECTED; } + #endif } + #ifdef USE_LIBFABRIC + fid* Connection::id() const + { + return &this->_qp->fid; + } + #else rdma_cm_id* Connection::id() const { return this->_id; } + #endif + #ifdef USE_LIBFABRIC + fid_ep* Connection::qp() const + { + return this->_qp; + } + #else ibv_qp* Connection::qp() const { return this->_qp; } + #endif + #ifdef USE_LIBFABRIC + fid_cq* Connection::receive_completion_channel() const + { + return this->_rcv_channel; + } + fid_cq* Connection::transmit_completion_channel() const + { + return this->_trx_channel; + } + #else ibv_comp_channel* Connection::completion_channel() const { return this->_channel; } + #endif uint32_t Connection::private_data() const { @@ -152,6 +252,23 @@ namespace rdmalib { int32_t Connection::post_send(const ScatterGatherElement & elems, int32_t id, bool force_inline) { + #ifdef USE_LIBFABRIC + // FIXME: extend with multiple sges + id = id == -1 ? _req_count++ - 1 : id; + SPDLOG_DEBUG("Post send to local Local QPN fid {}", &_qp->fid); + fi_addr_t temp; + if(fi_sendv(_qp, elems.array(), elems.lkeys(), elems.size(), temp, reinterpret_cast((uint64_t)id))) { + spdlog::error("Post send unsuccessful, reason {} {}, sges_count {}, wr_id {}", + errno, strerror(errno), elems.size(), id + ); + return -1; + } + SPDLOG_DEBUG( + "Post send successful, sges_count {}, sge[0].addr {}, sge[0].size {}, wr_id {}", + elems.size(), elems.array()[0].iov_base, elems.array()[0].iov_len, id + ); + return _req_count - 1; + #else // FIXME: extend with multiple sges struct ibv_send_wr wr, *bad; wr.wr_id = id == -1 ? _req_count++ : id; @@ -173,10 +290,58 @@ namespace rdmalib { wr.num_sge, wr.sg_list[0].addr, wr.sg_list[0].length, wr.wr_id, wr.send_flags ); return _req_count - 1; + #endif } int32_t Connection::post_batched_empty_recv(int count) { + #ifdef USE_LIBFABRIC + int loops = count / _rbatch; + int reminder = count % _rbatch; + SPDLOG_DEBUG("Batch {} {} to local QPN fid {}", loops, reminder, &_qp->fid); + + int ret = 0; + fi_addr_t temp; + for(int i = 0; i < loops; ++i) { + for(int j = 0; j < _rbatch; ++j) { + auto begin = _rwc_sges[j]; + for (size_t k = 0; k < begin.size(); ++k) { + if(begin.array()[k].iov_len > 0) { + SPDLOG_DEBUG("Batched receive num_sge {} sge[0].ptr {} sge[0].length {}", begin.size(), begin.array()[k].iov_base, begin.array()[k].iov_len); + } else + SPDLOG_DEBUG("Batched receive num_sge {}", begin.size()); + } + ret = fi_recvv(_qp, begin.array(), begin.lkeys(), begin.size(), temp, nullptr); + if(ret) + break; + } + if(ret) + break; + } + + if(ret == 0 && reminder > 0){ + for(int j = 0; j < reminder; ++j) { + auto begin = _rwc_sges[j]; + for (size_t k = 0; k < begin.size(); ++k) { + if(begin.array()[k].iov_len > 0) { + SPDLOG_DEBUG("Batched receive num_sge {} sge[0].ptr {} sge[0].length {}", begin.size(), begin.array()[k].iov_base, begin.array()[k].iov_len); + } else + SPDLOG_DEBUG("Batched receive num_sge {}", begin.size()); + } + ret = fi_recvv(_qp, begin.array(), begin.lkeys(), begin.size(), temp, nullptr); + if(ret) + break; + } + } + + if(ret) { + spdlog::error("Batched Post empty recv unsuccessful, reason {} {}", ret, strerror(ret)); + return -1; + } + + SPDLOG_DEBUG("Batched Post empty recv successfull"); + return count; + #else struct ibv_recv_wr* bad = nullptr; int loops = count / _rbatch; int reminder = count % _rbatch; @@ -219,10 +384,36 @@ namespace rdmalib { SPDLOG_DEBUG("Batched Post empty recv succesfull"); return count; + #endif } int32_t Connection::post_recv(ScatterGatherElement && elem, int32_t id, int count) { + #ifdef USE_LIBFABRIC + fi_addr_t temp; + id = id == -1 ? _req_count++ : id; + SPDLOG_DEBUG("post recv to local Local QPN fid {}", &_qp->fid); + + int ret; + for(int i = 0; i < count; ++i) { + ret = fi_recvv(_qp, elem.array(), elem.lkeys(), count, temp, reinterpret_cast((uint64_t)id)); + if(ret) + break; + } + if(ret) { + spdlog::error("Post receive unsuccessful, reason {} {}", ret, strerror(ret)); + return -1; + } + if(elem.size() > 0) + SPDLOG_DEBUG( + "Post recv successfull, sges_count {}, sge[0].addr {}, sge[0].size {}, wr_id {}", + elem.size(), elem.array()[0].iov_base, elem.array()[0].iov_len, id + ); + else + SPDLOG_DEBUG("Post recv successfull"); + return id; + } + #else // FIXME: extend with multiple sges struct ibv_recv_wr wr, *bad; @@ -251,7 +442,43 @@ namespace rdmalib { SPDLOG_DEBUG("Post recv succesfull"); return wr.wr_id; } + #endif + #ifdef USE_LIBFABRIC + int32_t Connection::_post_write(ScatterGatherElement && elems, fi_msg_rma &msg, bool force_inline, bool force_solicited) + { + fi_addr_t temp; + int32_t id = _req_count++; + size_t count = elems.size(); + if(elems.size() == 1 && elems.array()[0].iov_len == 0) + count = 0; + + msg.msg_iov = elems.array(); + msg.desc = elems.lkeys(); + msg.iov_count = count; + msg.addr = temp; + msg.context = reinterpret_cast((uint64_t)id); + + int ret = fi_writemsg(_qp, &msg, 0); + if(ret) { + spdlog::error("Post write unsuccessful, reason {} {}, sges_count {}, wr_id {}, remote addr {}, remote rkey {}, imm data {}", + ret, strerror(ret), count, id, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data) + ); + return -1; + } + if(elems.size() > 0) + SPDLOG_DEBUG( + "Post write succesfull id: {}, sge size: {}, first lkey {} len {}, remote addr {}, remote rkey {}, imm data {}", + count, elems.size(), elems.lkeys()[0], elems.array()[0].iov_len, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data) + ); + else + SPDLOG_DEBUG( + "Post write succesfull id: {}, remote addr {}, remote rkey {}, imm data {}", id, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data) + ); + return _req_count - 1; + + } + #else int32_t Connection::_post_write(ScatterGatherElement && elems, ibv_send_wr wr, bool force_inline, bool force_solicited) { ibv_send_wr* bad; @@ -288,19 +515,44 @@ namespace rdmalib { return _req_count - 1; } + #endif int32_t Connection::post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, bool force_inline) { + #ifdef USE_LIBFABRIC + fi_msg_rma msg; + fi_rma_iov iov; + memset(&msg, 0, sizeof(msg)); + iov.addr = rbuf.addr; + iov.key = rbuf.rkey; + iov.len = rbuf.size; + msg.rma_iov = &iov; + msg.rma_iov_count = 1; + return _post_write(std::forward(elems), msg, force_inline, false); + #else ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); wr.opcode = IBV_WR_RDMA_WRITE; wr.wr.rdma.remote_addr = rbuf.addr; wr.wr.rdma.rkey = rbuf.rkey; return _post_write(std::forward(elems), wr, force_inline, false); + #endif } int32_t Connection::post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint32_t immediate, bool force_inline, bool force_solicited) { + #ifdef USE_LIBFABRIC + fi_msg_rma msg; + fi_rma_iov iov; + memset(&msg, 0, sizeof(msg)); + iov.addr = rbuf.addr; + iov.key = rbuf.rkey; + iov.len = rbuf.size; + msg.rma_iov = &iov; + msg.rma_iov_count = 1; + msg.data = htonl(immediate); + return _post_write(std::forward(elems), msg, force_inline, force_solicited); + #else ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; @@ -308,10 +560,25 @@ namespace rdmalib { wr.wr.rdma.remote_addr = rbuf.addr; wr.wr.rdma.rkey = rbuf.rkey; return _post_write(std::forward(elems), wr, force_inline, force_solicited); + #endif } int32_t Connection::post_cas(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap) { + #ifdef USE_LIBFABRIC + // TODO check if + fi_addr_t temp; + int32_t id = _req_count++; + memcpy(elems.array()[0].iov_base, &swap, sizeof(swap)); + memcpy(elems.array()[1].iov_base, &compare, sizeof(compare)); + int ret = fi_compare_atomic(_qp, elems.array()[0].iov_base, 1, elems.lkeys()[0], elems.array()[1].iov_base, elems.lkeys()[1], elems.array()[1].iov_base, elems.lkeys()[1], temp, rbuf.addr, rbuf.rkey, FI_UINT64, FI_CSWAP, reinterpret_cast((uint64_t)id)); + if(ret) { + spdlog::error("Post write unsuccessful, reason {} {}", errno, strerror(errno)); + return -1; + } + SPDLOG_DEBUG("Post write id {} successful", id); + return _req_count - 1; + #else ibv_send_wr wr, *bad; memset(&wr, 0, sizeof(wr)); wr.wr_id = _req_count++; @@ -332,10 +599,25 @@ namespace rdmalib { } SPDLOG_DEBUG("Post write succesfull"); return _req_count - 1; + #endif } int32_t Connection::post_atomic_fadd(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t add) { + #ifdef USE_LIBFABRIC + fi_addr_t temp; + int32_t id = _req_count++; + memcpy(elems.array()[0].iov_base, &add, sizeof(add)); + int ret = fi_atomic(_qp, &elems.array()[0], 1, elems.lkeys()[0], temp, rbuf.addr, rbuf.rkey, FI_UINT64, FI_SUM, reinterpret_cast((uint64_t)id)); + if(ret) { + spdlog::error("Post write unsuccesful, reason {} {}", errno, strerror(errno)); + return -1; + } + SPDLOG_DEBUG( + "Post atomic fadd succesfull id: {}, remote addr {}, remote rkey {}, val {}", id, rbuf.addr, rbuf.rkey, add + ); + return _req_count - 1; + #else ibv_send_wr wr, *bad; memset(&wr, 0, sizeof(wr)); wr.wr_id = _req_count++; @@ -357,8 +639,47 @@ namespace rdmalib { "Post atomic fadd succesfull id: {}, remote addr {}, remote rkey {}, val {}", wr.wr_id, wr.wr.rdma.remote_addr, wr.wr.rdma.rkey, wr.wr.atomic.compare_add ); return _req_count - 1; + #endif } + #ifdef USE_LIBFABRIC + std::tuple Connection::poll_wc(QueueType type, bool blocking, int count) + { + int ret = 0; + fi_cq_data_entry* wcs = (type == QueueType::RECV ? _rwc.data() : _swc.data()); + + //spdlog::error("{} {} {}", fmt::ptr(_qp), fmt::ptr(_qp->recv_cq), fmt::ptr(wcs)); + do { + ret = fi_cq_read( + type == QueueType::RECV ? _rcv_channel : _trx_channel, + wcs, + count == -1 ? _wc_size : count + ); + if (ret == -FI_EAVAIL) { + ret = fi_cq_readerr(type == QueueType::RECV ? _rcv_channel : _trx_channel, &_ewc, 0); + if (ret != 1) + ret = -1; + else + spdlog::error( + "Queue {} WC {} finished with an error {}", + type == QueueType::RECV ? "recv" : "send", + reinterpret_cast(_ewc.op_context), + fi_strerror(_ewc.err) + ); + } + } while(blocking && (ret == 0 || ret == -EAGAIN)); + + if(ret < 0) { + spdlog::error("Failure of polling events from: {} queue! Return value {}, errno {}", type == QueueType::RECV ? "recv" : "send", fi_strerror(ret), errno); + return std::make_tuple(nullptr, -1); + } + if(ret) + for(int i = 0; i < ret; ++i) { + SPDLOG_DEBUG("Queue {} Ret {}/{} WC {}", type == QueueType::RECV ? "recv" : "send", i + 1, ret, reinterpret_cast(wcs[i].op_context)); + } + return std::make_tuple(wcs, ret); + } + #else std::tuple Connection::poll_wc(QueueType type, bool blocking, int count) { int ret = 0; @@ -390,12 +711,25 @@ namespace rdmalib { } return std::make_tuple(wcs, ret); } + #endif + #ifndef USE_LIBFABRIC void Connection::notify_events(bool only_solicited) { impl::expect_zero(ibv_req_notify_cq(_qp->recv_cq, only_solicited)); } + #endif + #ifdef USE_LIBFABRIC + void Connection::wait_events() + { + int ret; + fi_cq_data_entry entry; + do { + ret = fi_cq_sread(_rcv_channel, &entry, 1, 0, -1); + } while (ret != 1 || entry.flags != FI_REMOTE_WRITE); + } + #else ibv_cq* Connection::wait_events() { ibv_cq* ev_cq = nullptr; @@ -403,10 +737,13 @@ namespace rdmalib { impl::expect_zero(ibv_get_cq_event(_channel, &ev_cq, &ev_ctx)); return ev_cq; } + #endif + #ifndef USE_LIBFABRIC void Connection::ack_events(ibv_cq* cq, int len) { ibv_ack_cq_events(cq, len); } + #endif } diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index b21dc43..a267dce 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -1,6 +1,7 @@ #include "rdmalib/connection.hpp" #include +#include // inet_ntoa #include #include @@ -9,6 +10,13 @@ #include #include +#ifdef USE_LIBFABRIC +#include +#include +#include +#include +#include +#endif #include #include @@ -18,14 +26,33 @@ namespace rdmalib { + // FIXME: Add credential support Address::Address(const std::string & ip, int port, bool passive) { + #ifdef USE_LIBFABRIC + // Set the hints and addrinfo to clear structures + hints = fi_allocinfo(); + addrinfo = fi_allocinfo(); + + // Set the hints to have ability to conduct MSG, Atomic and RMA operations + hints->caps |= FI_MSG | FI_RMA | FI_ATOMIC; + // Set the hints to indicate that we will register the local buffers + hints->mode |= FI_LOCAL_MR; + hints->addr_format = FI_SOCKADDR_IN; + if(passive) + impl::expect_zero(fi_getinfo(FI_VERSION(1, 13), ip.c_str(), std::to_string(port).c_str(), FI_SOURCE, hints, &addrinfo)); + else + impl::expect_zero(fi_getinfo(FI_VERSION(1, 13), nullptr, nullptr, 0, hints, &addrinfo)); + fi_freeinfo(hints); + fi_fabric(addrinfo->fabric_attr, &fabric, nullptr); + #else memset(&hints, 0, sizeof hints); hints.ai_port_space = RDMA_PS_TCP; if(passive) hints.ai_flags = RAI_PASSIVE; impl::expect_zero(rdma_getaddrinfo(ip.c_str(), std::to_string(port).c_str(), &hints, &addrinfo)); + #endif this->_port = port; } @@ -44,6 +71,27 @@ namespace rdmalib { local_in.sin_family = AF_INET; inet_pton(AF_INET, sip.c_str(), &local_in.sin_addr); + #ifdef USE_LIBFABRIC + // Set the hints and addrinfo to clear structures + hints = fi_allocinfo(); + addrinfo = fi_allocinfo(); + + // Set the hints to have ability to conduct MSG, Atomic and RMA operations + hints->caps |= FI_MSG | FI_RMA | FI_ATOMIC; + // Set the hints to indicate that we will register the local buffers + hints->mode |= FI_LOCAL_MR; + + // Set addresses and their format + hints->addr_format = FI_SOCKADDR_IN; + hints->src_addrlen = sizeof(local_in); + hints->dest_addrlen = sizeof(server_in); + hints->src_addr = &local_in; + hints->dest_addr = &server_in; + + impl::expect_zero(fi_getinfo(FI_VERSION(1, 13), nullptr, nullptr, 0, hints, &addrinfo)); + fi_freeinfo(hints); + fi_fabric(addrinfo->fabric_attr, &fabric, nullptr); + #else memset(&hints, 0, sizeof hints); hints.ai_port_space = RDMA_PS_TCP; hints.ai_src_len = sizeof(local_in); @@ -52,13 +100,19 @@ namespace rdmalib { hints.ai_dst_addr = (struct sockaddr *)(&server_in); impl::expect_zero(rdma_getaddrinfo(NULL, NULL, &hints, &addrinfo)); + #endif this->_port = port; } Address::~Address() { + #ifdef USE_LIBFABRIC + fi_close(&fabric->fid); + fi_freeinfo(addrinfo); + #else rdma_freeaddrinfo(addrinfo); + #endif } RDMAActive::RDMAActive(const std::string & ip, int port, int recv_buf, int max_inline_data): @@ -67,6 +121,12 @@ namespace rdmalib { _ec(nullptr), _pd(nullptr) { + #ifdef USE_LIBFABRIC + memset(&_remote_addr, 0, sizeof(_remote_addr)); + _remote_addr.sin_family = AF_INET; + _remote_addr.sin_port = htons(port); + inet_pton(AF_INET, ip.c_str(), &_remote_addr.sin_addr); + #else // Size of Queue Pair // Maximum requests in send queue // FIXME: configurable -> parallel workers @@ -88,12 +148,18 @@ namespace rdmalib { _cfg.conn_param.initiator_depth = 4; _cfg.conn_param.retry_count = 3; _cfg.conn_param.rnr_retry_count = 3; + #endif SPDLOG_DEBUG("Create RDMAActive"); } RDMAActive::~RDMAActive() { + #ifdef USE_LIBFABRIC + fi_close(&_pd->fid); + fi_close(&_ec->fid); + #else //ibv_dealloc_pd(this->_pd); + #endif SPDLOG_DEBUG("Destroy RDMAActive"); } @@ -101,11 +167,19 @@ namespace rdmalib { { if(!_conn) { _conn = std::unique_ptr(new Connection()); + #ifdef USE_LIBFABRIC + // Create a domain + impl::expect_zero(fi_domain(_addr.fabric, _addr.addrinfo, &_pd, nullptr)); + + // Create and enable the endpoint together with all the accompanying queues + _conn->initialize(_pd, _addr.addrinfo, _ec); + #else rdma_cm_id* id; impl::expect_zero(rdma_create_ep(&id, _addr.addrinfo, nullptr, nullptr)); impl::expect_zero(rdma_create_qp(id, _pd, &_cfg.attr)); _conn->initialize(id); _pd = _conn->id()->pd; + #endif //struct ibv_qp_attr attr; //struct ibv_qp_init_attr init_attr; @@ -160,6 +234,28 @@ namespace rdmalib { bool RDMAActive::connect(uint32_t secret) { allocate(); + #ifdef USE_LIBFABRIC + uint32_t *param = nullptr; + size_t paramlen = 0; + if(secret) { + param = &secret; + paramlen = sizeof(secret); + SPDLOG_DEBUG("Setting connection secret {} of length {}", secret, sizeof(uint32_t)); + } + if(fi_connect(_conn->qp(), &_remote_addr, param, paramlen)) { + spdlog::error("Connection unsuccesful, reason {} {}", errno, strerror(errno)); + _conn.reset(); + _pd = nullptr; + return false; + } else { + char address[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, &_remote_addr.sin_addr, address, INET_ADDRSTRLEN); + spdlog::debug( + "[RDMAActive] Connection succesful to {}:{}", + address, _addr._port + ); + } + #else if(secret) { _cfg.conn_param.private_data = &secret; _cfg.conn_param.private_data_len = sizeof(uint32_t); @@ -176,6 +272,7 @@ namespace rdmalib { _addr._port, _addr._port, ibv_get_device_name(this->_conn->id()->verbs->device) ); } + #endif //struct ibv_qp_attr attr; //struct ibv_qp_init_attr init_attr; @@ -187,16 +284,30 @@ namespace rdmalib { void RDMAActive::disconnect() { - spdlog::debug("[RDMAActive] Disonnecting connection with id {}", fmt::ptr(_conn->id())); + #ifdef USE_LIBFABRIC + // TODO: Add the disconnectin id + spdlog::debug("[RDMAActive] Disconnecting connection with id {}", fmt::ptr(&_conn->qp()->fid)); + _conn.reset(); + _pd = nullptr; + #else + spdlog::debug("[RDMAActive] Disonnecting connection with id {}", fmt::ptr(_conn->id())); impl::expect_zero(rdma_disconnect(_conn->id())); _conn.reset(); _pd = nullptr; + #endif } + #ifdef USE_LIBFABRIC + fid_domain* RDMAActive::pd() const + { + return this->_pd; + } + #else ibv_pd* RDMAActive::pd() const { return this->_pd; } + #endif Connection & RDMAActive::connection() { @@ -211,9 +322,12 @@ namespace rdmalib { RDMAPassive::RDMAPassive(const std::string & ip, int port, int recv_buf, bool initialize, int max_inline_data): _addr(ip, port, true), _ec(nullptr), + #ifndef USE_LIBFABRIC _listen_id(nullptr), + #endif _pd(nullptr) { + #ifndef USE_LIBFABRIC // Size of Queue Pair // FIXME: configurable -> parallel workers _cfg.attr.cap.max_send_wr = 40; @@ -229,6 +343,7 @@ namespace rdmalib { _cfg.conn_param.initiator_depth = 4; _cfg.conn_param.retry_count = 3; _cfg.conn_param.rnr_retry_count = 3; + #endif if(initialize) this->allocate(); @@ -236,13 +351,34 @@ namespace rdmalib { RDMAPassive::~RDMAPassive() { + #ifdef USE_LIBFABRIC + fi_close(&_pd->fid); + fi_close(&_ec->fid); + #else rdma_destroy_id(this->_listen_id); rdma_destroy_event_channel(this->_ec); + #endif } void RDMAPassive::allocate() { + #ifdef USE_LIBFABRIC // Start listening + fi_eq_attr attr; + memset(&attr, 0, sizeof(attr)); + attr.size = 42; + attr.wait_obj = FI_WAIT_UNSPEC; + impl::expect_zero(fi_eq_open(_addr.fabric, nullptr, &_ec, nullptr)); + impl::expect_zero(fi_pep_bind(_pep, &_ec->fid, 0)); + impl::expect_zero(fi_listen(_pep)); + char address[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, _addr.addrinfo->src_addr, address, INET_ADDRSTRLEN); + spdlog::info( + "Listening on address ", + address + ); + #else + // Start listening impl::expect_nonzero(this->_ec = rdma_create_event_channel()); impl::expect_zero(rdma_create_id(this->_ec, &this->_listen_id, NULL, RDMA_PS_TCP)); impl::expect_zero(rdma_bind_addr(this->_listen_id, this->_addr.addrinfo->ai_src_addr)); @@ -257,13 +393,22 @@ namespace rdmalib { "[RDMAPassive]: listening id {}, protection domain {}", fmt::ptr(this->_listen_id), _pd->handle ); + #endif } + #ifdef USE_LIBFABRIC + fid_domain* RDMAPassive::pd() const + { + return this->_pd; + } + #else ibv_pd* RDMAPassive::pd() const { return this->_pd; } + #endif + #ifndef USE_LIBFABRIC void RDMAPassive::set_nonblocking_poll() { int fd = this->_ec->fd; @@ -274,9 +419,18 @@ namespace rdmalib { return; } } + #endif bool RDMAPassive::nonblocking_poll_events(int timeout) { + #ifdef USE_LIBFABRIC + uint32_t event; + fi_eq_entry entry; + int ret = fi_eq_read(_ec, &event, &entry, sizeof(entry), FI_PEEK); + if (ret < 0 && ret != -FI_EAGAIN && ret != -FI_EAVAIL) + spdlog::error("RDMA event poll failed"); + return ret > 0 || ret == -FI_EAVAIL; + #else pollfd my_pollfd; my_pollfd.fd = this->_ec->fd; my_pollfd.events = POLLIN; @@ -287,10 +441,78 @@ namespace rdmalib { return false; } return rc > 0; + #endif } std::tuple RDMAPassive::poll_events(bool share_cqs) { + #ifdef USE_LIBFABRIC + uint32_t event; + eventEntry entry; + Connection* connection = nullptr; + ConnectionStatus status = ConnectionStatus::UNKNOWN; + + // Poll rdma cm events. + int ret = fi_eq_read(_ec, &event, &entry, sizeof(entry), 0); + if(ret < 0 && ret != -FI_EAGAIN && ret != -FI_EAVAIL) { + spdlog::error("Event poll unsuccesful, reason {} {}", errno, strerror(errno)); + return std::make_tuple(nullptr, ConnectionStatus::UNKNOWN); + } + SPDLOG_DEBUG( + "[RDMAPassive] received event: {}", + fi_tostr(&event, FI_TYPE_EQ_EVENT) + ); + + switch (event) { + case FI_CONNREQ: + connection = new Connection{true}; + + // Read the secret + if(ret == sizeof(entry)) { + uint32_t data = *reinterpret_cast(entry.secret); + connection->set_private_data(data); + SPDLOG_DEBUG("[RDMAPassive] Connection request with private data {}", data); + } + else + SPDLOG_DEBUG("[RDMAPassive] Connection request with no private data"); + + // Check if we have a domain open for the connection already + if (!entry.info->domain_attr->domain) + fi_domain(_addr.fabric, entry.info, &_pd, NULL); + + // Enable the endpoint + connection->initialize(_pd, entry.info, _ec); + SPDLOG_DEBUG( + "[RDMAPassive] Created connection fid {} qp {}", + fmt::ptr(connection->id()), fmt::ptr(&connection->qp()->fid) + ); + + status = ConnectionStatus::REQUESTED; + _active_connections.insert(connection); + break; + case FI_CONNECTED: + SPDLOG_DEBUG( + "[RDMAPassive] Connection is established for id {}, and connection {}", + fmt::ptr(entry.fid), fmt::ptr(entry.fid->context) + ); + connection = reinterpret_cast(entry.fid->context); + status = ConnectionStatus::ESTABLISHED; + break; + case FI_SHUTDOWN: + SPDLOG_DEBUG( + "[RDMAPassive] Disconnect for id {}, and connection {}", + fmt::ptr(entry.fid), fmt::ptr(entry.fid->context) + ); + connection = reinterpret_cast(entry.fid->context); + //connection->close(); + status = ConnectionStatus::DISCONNECTED; + _active_connections.erase(connection); + break; + default: + spdlog::error("[RDMAPassive] Not any interesting event"); + break; + } + #else rdma_cm_event* event = nullptr; Connection* connection = nullptr; ConnectionStatus status = ConnectionStatus::UNKNOWN; @@ -381,12 +603,13 @@ namespace rdmalib { break; } rdma_ack_cm_event(event); + #endif return std::make_tuple(connection, status); } void RDMAPassive::accept(Connection* connection) { - if(rdma_accept(connection->id(), &_cfg.conn_param)) { + if(fi_accept(connection->qp(), nullptr, 0)) { spdlog::error("Conection accept unsuccesful, reason {} {}", errno, strerror(errno)); connection = nullptr; } From da74b44584b6576d241aace37b7264698893a936 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Tue, 22 Mar 2022 19:43:41 +0100 Subject: [PATCH 02/91] [rdmalib] Add support for wait sets and clean some bugs --- rdmalib/include/rdmalib/connection.hpp | 4 +- rdmalib/lib/connection.cpp | 51 ++++++++++++++++++-------- rdmalib/lib/rdmalib.cpp | 21 +++++++---- 3 files changed, 52 insertions(+), 24 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 97b3183..90fe27f 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -57,6 +57,7 @@ namespace rdmalib { fid_ep* _qp; fid_cq* _rcv_channel; fid_cq* _trx_channel; + fid_wait* _wait_set; #else rdma_cm_id* _id; ibv_qp* _qp; @@ -94,7 +95,7 @@ namespace rdmalib { void initialize_batched_recv(const rdmalib::impl::Buffer & sge, size_t offset); void inlining(bool enable); #ifdef USE_LIBFABRIC - void initialize(fid_domain* pd, fi_info* info, fid_eq* ec); + void initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec); #else void initialize(rdma_cm_id* id); #endif @@ -102,6 +103,7 @@ namespace rdmalib { #ifdef USE_LIBFABRIC fid* id() const; fid_ep* qp() const; + fid_wait* wait_set() const; fid_cq* receive_completion_channel() const; fid_cq* transmit_completion_channel() const; #else diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 5f5fb33..311db54 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -29,14 +29,15 @@ namespace rdmalib { #endif Connection::Connection(bool passive): + _qp(nullptr), #ifdef USE_LIBFABRIC _rcv_channel(nullptr), _trx_channel(nullptr), + _wait_set(nullptr), #else _id(nullptr), _channel(nullptr), #endif - _qp(nullptr), _req_count(0), _private_data(0), _passive(passive), @@ -62,6 +63,10 @@ namespace rdmalib { { #ifdef USE_LIBFABRIC SPDLOG_DEBUG("Deallocate a connection with qp fid {}", fmt::ptr(&_qp->fid)); + impl::expect_zero(fi_close(&_rcv_channel->fid)); + impl::expect_zero(fi_close(&_trx_channel->fid)); + impl::expect_zero(fi_close(&_wait_set->fid)); + impl::expect_zero(fi_close(&_qp->fid)); #else SPDLOG_DEBUG("Deallocate a connection with id {}", fmt::ptr(_id)); #endif @@ -69,14 +74,15 @@ namespace rdmalib { } Connection::Connection(Connection&& obj): + _qp(obj._qp), #ifdef USE_LIBFABRIC _rcv_channel(obj._rcv_channel), _trx_channel(obj._trx_channel), + _wait_set(nullptr), #else _id(obj._id), _channel(obj._channel), #endif - _qp(obj._qp), _req_count(obj._req_count), _private_data(obj._private_data), _passive(obj._passive), @@ -114,23 +120,33 @@ namespace rdmalib { } #ifdef USE_LIBFABRIC - void Connection::initialize(fid_domain* pd, fi_info* info, fid_eq* ec) + void Connection::initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec) { // Create the endpoint impl::expect_zero(fi_endpoint(pd, info, &_qp, reinterpret_cast(this))); + // Open the waitset + fi_wait_attr wait_attr; + wait_attr.wait_obj = FI_WAIT_UNSPEC; + wait_attr.flags = 0; + fi_wait_open(fabric, &wait_attr, &_wait_set); + // Bind with the completion queues and the event queue impl::expect_zero(fi_ep_bind(_qp, &ec->fid, 0)); - fi_cq_attr attr; - memset(&attr, 0, sizeof(attr)); - attr.format = FI_CQ_FORMAT_DATA; - attr.wait_obj = FI_WAIT_UNSPEC; - attr.wait_cond = FI_CQ_COND_NONE; - impl::expect_zero(fi_cq_open(pd, &attr, &_rcv_channel, nullptr)); - impl::expect_zero(fi_ep_bind(_qp, &_rcv_channel->fid, FI_RECV)); - impl::expect_zero(fi_cq_open(pd, &attr, &_trx_channel, nullptr)); + fi_cq_attr cq_attr; + memset(&cq_attr, 0, sizeof(cq_attr)); + cq_attr.format = FI_CQ_FORMAT_DATA; + cq_attr.wait_obj = FI_WAIT_UNSPEC; + cq_attr.wait_cond = FI_CQ_COND_NONE; + cq_attr.wait_set = nullptr; + impl::expect_zero(fi_cq_open(pd, &cq_attr, &_trx_channel, nullptr)); impl::expect_zero(fi_ep_bind(_qp, &_trx_channel->fid, FI_TRANSMIT)); + // Connect the wait set to the receive queue + cq_attr.wait_set = _wait_set; + impl::expect_zero(fi_cq_open(pd, &cq_attr, &_rcv_channel, nullptr)); + impl::expect_zero(fi_ep_bind(_qp, &_rcv_channel->fid, FI_RECV)); + // Enable the endpoint impl::expect_zero(fi_enable(_qp)); SPDLOG_DEBUG("Initialize a connection with"); @@ -214,6 +230,13 @@ namespace rdmalib { } #endif + #ifdef USE_LIBFABRIC + fid_wait* Connection::wait_set() const + { + return this->_wait_set; + } + #endif + #ifdef USE_LIBFABRIC fid_cq* Connection::receive_completion_channel() const { @@ -723,11 +746,7 @@ namespace rdmalib { #ifdef USE_LIBFABRIC void Connection::wait_events() { - int ret; - fi_cq_data_entry entry; - do { - ret = fi_cq_sread(_rcv_channel, &entry, 1, 0, -1); - } while (ret != 1 || entry.flags != FI_REMOTE_WRITE); + impl::expect_zero(fi_wait(_wait_set, -1)); } #else ibv_cq* Connection::wait_events() diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index a267dce..ad12af5 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -108,7 +108,7 @@ namespace rdmalib { Address::~Address() { #ifdef USE_LIBFABRIC - fi_close(&fabric->fid); + impl::expect_zero(fi_close(&fabric->fid)); fi_freeinfo(addrinfo); #else rdma_freeaddrinfo(addrinfo); @@ -155,8 +155,8 @@ namespace rdmalib { RDMAActive::~RDMAActive() { #ifdef USE_LIBFABRIC - fi_close(&_pd->fid); - fi_close(&_ec->fid); + impl::expect_zero(fi_close(&_pd->fid)); + impl::expect_zero(fi_close(&_ec->fid)); #else //ibv_dealloc_pd(this->_pd); #endif @@ -172,7 +172,7 @@ namespace rdmalib { impl::expect_zero(fi_domain(_addr.fabric, _addr.addrinfo, &_pd, nullptr)); // Create and enable the endpoint together with all the accompanying queues - _conn->initialize(_pd, _addr.addrinfo, _ec); + _conn->initialize(_addr.fabric, _pd, _addr.addrinfo, _ec); #else rdma_cm_id* id; impl::expect_zero(rdma_create_ep(&id, _addr.addrinfo, nullptr, nullptr)); @@ -352,8 +352,8 @@ namespace rdmalib { RDMAPassive::~RDMAPassive() { #ifdef USE_LIBFABRIC - fi_close(&_pd->fid); - fi_close(&_ec->fid); + impl::expect_zero(fi_close(&_pd->fid)); + impl::expect_zero(fi_close(&_ec->fid)); #else rdma_destroy_id(this->_listen_id); rdma_destroy_event_channel(this->_ec); @@ -481,7 +481,7 @@ namespace rdmalib { fi_domain(_addr.fabric, entry.info, &_pd, NULL); // Enable the endpoint - connection->initialize(_pd, entry.info, _ec); + connection->initialize(_addr.fabric, _pd, entry.info, _ec); SPDLOG_DEBUG( "[RDMAPassive] Created connection fid {} qp {}", fmt::ptr(connection->id()), fmt::ptr(&connection->qp()->fid) @@ -609,10 +609,17 @@ namespace rdmalib { } void RDMAPassive::accept(Connection* connection) { + #ifdef USE_LIBFABRIC if(fi_accept(connection->qp(), nullptr, 0)) { spdlog::error("Conection accept unsuccesful, reason {} {}", errno, strerror(errno)); connection = nullptr; } + #else + if(rdma_accept(connection->id(), &_cfg.conn_param)) { + spdlog::error("Conection accept unsuccesful, reason {} {}", errno, strerror(errno)); + connection = nullptr; + } + #endif SPDLOG_DEBUG("[RDMAPassive] Connection accepted at QP {}", fmt::ptr(connection->qp())); } } From f78e1cebc3514a2a7ec854229ca4a13dca0ab820 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Tue, 22 Mar 2022 19:44:44 +0100 Subject: [PATCH 03/91] [rfaas] Add the support for libfabric --- rfaas/include/rfaas/executor.hpp | 20 +++++++++++++++++ rfaas/lib/executor.cpp | 37 ++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/rfaas/include/rfaas/executor.hpp b/rfaas/include/rfaas/executor.hpp index 0b3e68d..fcfffbc 100644 --- a/rfaas/include/rfaas/executor.hpp +++ b/rfaas/include/rfaas/executor.hpp @@ -174,7 +174,11 @@ namespace rfaas { _connections[0].conn->poll_wc(rdmalib::QueueType::SEND, true); auto wc = _connections[0]._rcv_buffer.poll(true); + #ifdef USE_LIBFABRIC + uint32_t val = ntohl(std::get<0>(wc)[0].data); + #else uint32_t val = ntohl(std::get<0>(wc)[0].imm_data); + #endif int return_val = val & 0x0000FFFF; int finished_invoc_id = val >> 16; if(return_val == 0) { @@ -229,14 +233,22 @@ namespace rfaas { while(!found_result) { auto wc = _connections[0]._rcv_buffer.poll(true); for(int i = 0; i < std::get<1>(wc); ++i) { + #ifdef USE_LIBFABRIC + uint32_t val = ntohl(std::get<0>(wc)[i].data); + #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); + #endif int return_val = val & 0x0000FFFF; int finished_invoc_id = val >> 16; if(finished_invoc_id == invoc_id) { found_result = true; return_value = return_val; + #ifdef USE_LIBFABRIC + out_size = std::get<0>(wc)[i].len; + #else out_size = std::get<0>(wc)[i].byte_len; + #endif //spdlog::info("Result for id {}", finished_invoc_id); } else { auto it = _futures.find(finished_invoc_id); @@ -255,7 +267,11 @@ namespace rfaas { // because we still hold the atomic // Thus, we later unset the variable since we're done for(int i = 0; i < std::get<1>(wc); ++i) { + #ifdef USE_LIBFABRIC + uint32_t val = ntohl(std::get<0>(wc)[i].data); + #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); + #endif int return_val = val & 0x0000FFFF; int finished_invoc_id = val >> 16; auto it = _futures.find(finished_invoc_id); @@ -323,7 +339,11 @@ namespace rfaas { auto wc = _connections[0]._rcv_buffer.poll(true); expected -= std::get<1>(wc); for(int i = 0; i < std::get<1>(wc); ++i) { + #ifdef USE_LIBFABRIC + uint32_t val = ntohl(std::get<0>(wc)[i].data); + #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); + #endif int return_val = val & 0x0000FFFF; int finished_invoc_id = val >> 16; if(return_val == 0) { diff --git a/rfaas/lib/executor.cpp b/rfaas/lib/executor.cpp index 603288f..f6f293f 100644 --- a/rfaas/lib/executor.cpp +++ b/rfaas/lib/executor.cpp @@ -49,7 +49,11 @@ namespace rfaas { _invoc_id(0), _max_inlined_msg(max_inlined_msg) { + #ifdef USE_LIBFABRIC + _execs_buf.register_memory(_state.pd(), FI_WRITE | FI_REMOTE_WRITE); + #else _execs_buf.register_memory(_state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + #endif events = 0; _active_polling = false; _end_requested = false; @@ -74,7 +78,11 @@ namespace rfaas { rewind(file); rdmalib::Buffer functions(len); rdmalib::impl::expect_true(fread(functions.data(), 1, len, file) == len); + #ifdef USE_LIBFABRIC + functions.register_memory(_state.pd(), FI_WRITE); + #else functions.register_memory(_state.pd(), IBV_ACCESS_LOCAL_WRITE); + #endif fclose(file); // FIXME: same function as in server/functions.cpp - merge? @@ -136,7 +144,9 @@ namespace rfaas { } _exec_manager->disconnect(); _exec_manager.reset(nullptr); + #ifndef USE_LIBFABRIC _state._cfg.attr.send_cq = _state._cfg.attr.recv_cq = 0; + #endif // Clear up old connections _connections.clear(); @@ -147,6 +157,9 @@ namespace rfaas { { // FIXME: hide the details in rdmalib spdlog::info("Background thread starts waiting for events"); + #ifdef USE_LIBFABRIC + int rc; + #else _connections[0].conn->notify_events(true); int flags = fcntl(_connections[0].conn->completion_channel()->fd, F_GETFL); int rc = fcntl(_connections[0].conn->completion_channel()->fd, F_SETFL, flags | O_NONBLOCK); @@ -154,8 +167,19 @@ namespace rfaas { fprintf(stderr, "Failed to change file descriptor of completion event channel\n"); return; } + #endif while(!_end_requested && _connections.size()) { + #ifdef USE_LIBFABRIC + fi_cq_data_entry entry; + do { + rc = fi_wait(_connections[0].conn->wait_set(), 100); + if(_end_requested) { + spdlog::info("Background thread stops waiting for events"); + return; + } + } while (rc != 0); + #else pollfd my_pollfd; my_pollfd.fd = _connections[0].conn->completion_channel()->fd; my_pollfd.events = POLLIN; @@ -167,17 +191,24 @@ namespace rfaas { return; } } while (rc == 0); + #endif if (rc < 0) { fprintf(stderr, "poll failed\n"); return; } if(!_end_requested) { + #ifndef USE_LIBFABRIC auto cq = _connections[0].conn->wait_events(); _connections[0].conn->notify_events(true); _connections[0].conn->ack_events(cq, 1); + #endif auto wc = _connections[0]._rcv_buffer.poll(false); for(int i = 0; i < std::get<1>(wc); ++i) { + #ifdef USE_LIBFABRIC + uint32_t val = ntohl(std::get<0>(wc)[i].data); + #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); + #endif int return_val = val & 0x0000FFFF; int finished_invoc_id = val >> 16; auto it = _futures.find(finished_invoc_id); @@ -338,7 +369,11 @@ namespace rfaas { while(received < numcores) { auto wcs = this->_connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true); for(int i = 0; i < std::get<1>(wcs); ++i) { + #ifdef USE_LIBFABRIC + int id = reinterpret_cast(std::get<0>(wcs)[i].op_context); + #else int id = std::get<0>(wcs)[i].wr_id; + #endif SPDLOG_DEBUG( "Received buffer details for thread, addr {}, rkey {}", _execs_buf.data()[id].r_addr, _execs_buf.data()[id].r_key @@ -355,7 +390,9 @@ namespace rfaas { _active_polling = false; // Ensure that we are able to process asynchronous replies // before we start any submissionk. + #ifndef USE_LIBFABRIC _connections[0].conn->notify_events(true); + #endif // FIXME: extend to multiple connections _background_thread.reset( new std::thread{ From 135e02a472609cfb8b95afd35aa6de10bdedc6b5 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Tue, 22 Mar 2022 19:53:30 +0100 Subject: [PATCH 04/91] Add the support for libfabric and GNI --- CMakeLists.txt | 72 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 37ccf58..aaa9a66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,6 +62,20 @@ else() set(RFAAS_WITH_TESTING OFF) endif() +### +# Select the networking support +### +option(WITH_LIBFABRIC "Enable libfabric backend instead of ibverbs" Off) +if(${WITH_LIBFABRIC}) + message(STATUS "Enabling libfabric support") + add_definitions(-DUSE_LIBFABRIC) +endif() +option(WITH_GNI_AUTH "Enable the GNI authentication backend" Off) +if(${WITH_GNI_AUTH}) + message(STATUS "Enabling the GNI authentication backend") + add_definitions(-DUSE_GNI_AUTH) +endif() + ### # Optional: use existing installations ### @@ -85,14 +99,30 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads) ### -# librdmacm +# Networking ### find_package(PkgConfig REQUIRED) -pkg_check_modules(rdmacm REQUIRED IMPORTED_TARGET librdmacm) -### -# libibverbs -### -pkg_check_modules(ibverbs REQUIRED IMPORTED_TARGET libibverbs) +if (${WITH_LIBFABRIC}) + ### + # libfabric + ### + pkg_check_modules(fabric REQUIRED IMPORTED_TARGET libfabric) + if (${WITH_GNI_AUTH}) + ### + # cray-drc (aka rmda-credentials) + ### + pkg_check_modules(drc REQUIRED IMPORTED_TARGET cray-drc) + endif() +else() + ### + # librdmacm + ### + pkg_check_modules(rdmacm REQUIRED IMPORTED_TARGET librdmacm) + ### + # libibverbs + ### + pkg_check_modules(ibverbs REQUIRED IMPORTED_TARGET libibverbs) +endif() ### # pistache @@ -112,14 +142,28 @@ add_library(rdmalib STATIC ${rdmalib_files}) add_dependencies(rdmalib spdlog) add_dependencies(rdmalib cereal) target_include_directories(rdmalib PUBLIC "rdmalib/include") -target_include_directories(rdmalib SYSTEM PUBLIC $) -target_include_directories(rdmalib SYSTEM PUBLIC $) +if( ${WITH_LIBFABRIC} ) + target_include_directories(rdmalib SYSTEM PUBLIC $) + if( ${WITH_GNI_AUTH} ) + target_include_directories(rdmalib SYSTEM PUBLIC $) + endif() +else() + target_include_directories(rdmalib SYSTEM PUBLIC $) + target_include_directories(rdmalib SYSTEM PUBLIC $) +endif() target_include_directories(rdmalib SYSTEM PUBLIC $) target_include_directories(rdmalib SYSTEM PRIVATE $) set_target_properties(rdmalib PROPERTIES POSITION_INDEPENDENT_CODE On) set_target_properties(rdmalib PROPERTIES LIBRARY_OUTPUT_DIRECTORY lib) -target_link_libraries(rdmalib PUBLIC PkgConfig::rdmacm) -target_link_libraries(rdmalib PUBLIC PkgConfig::ibverbs) +if( ${WITH_LIBFABRIC} ) + target_link_libraries(rdmalib PUBLIC PkgConfig::fabric) + if( ${WITH_GNI_AUTH} ) + target_link_libraries(rdmalib PUBLIC PkgConfig::drc) + endif() +else() + target_link_libraries(rdmalib PUBLIC PkgConfig::rdmacm) + target_link_libraries(rdmalib PUBLIC PkgConfig::ibverbs) +endif() target_link_libraries(rdmalib PRIVATE spdlog::spdlog) target_link_libraries(rdmalib PRIVATE cereal) @@ -135,15 +179,19 @@ target_include_directories(rfaaslib PUBLIC "rfaas/include") target_include_directories(rfaaslib PRIVATE $) target_include_directories(rfaaslib SYSTEM PUBLIC $) target_include_directories(rfaaslib SYSTEM PUBLIC $) +#if (${WITH_LIBFABRIC} AND ${WITH_GNI_AUTH}) +#target_include_directories(rfaaslib SYSTEM PUBLIC $) +#endif() set_target_properties(rfaaslib PROPERTIES POSITION_INDEPENDENT_CODE On) set_target_properties(rfaaslib PROPERTIES LIBRARY_OUTPUT_DIRECTORY lib) target_link_libraries(rfaaslib PUBLIC rdmalib) -target_link_libraries(rfaaslib PUBLIC PkgConfig::rdmacm) -target_link_libraries(rfaaslib PUBLIC PkgConfig::ibverbs) target_link_libraries(rfaaslib PUBLIC spdlog::spdlog) target_link_libraries(rfaaslib PRIVATE cereal) target_link_libraries(rfaaslib PUBLIC dl) target_link_libraries(rfaaslib PUBLIC Threads::Threads) +#if (${WITH_LIBFABRIC} AND ${WITH_GNI_AUTH}) +#target_link_libraries(rfaaslib PUBLIC drc) +#endif() ### # Server From 80840578da4d8a2eba6ffd2233ec72ea33e21991 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Tue, 22 Mar 2022 23:47:32 +0100 Subject: [PATCH 05/91] [server] Add libfabric support --- server/executor/fast_executor.cpp | 46 +++++++++++++++++++++++++++++ server/executor/fast_executor.hpp | 12 ++++---- server/executor_manager/cli.cpp | 2 ++ server/executor_manager/client.cpp | 20 ++++++++++++- server/executor_manager/client.hpp | 4 +++ server/executor_manager/manager.cpp | 4 +++ 6 files changed, 81 insertions(+), 7 deletions(-) diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index 45c2e57..64f9971 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -71,12 +72,17 @@ namespace server { for(int i = 0; i < std::get<1>(wcs); ++i) { //server_processing_times.start(); + #ifdef USE_LIBFABRIC + fi_cq_data_entry* wc = &std::get<0>(wcs)[i]; + int info = ntohl(wc->data); + #else ibv_wc* wc = &std::get<0>(wcs)[i]; if(wc->status) { spdlog::error("Failed work completion! Reason: {}", ibv_wc_status_str(wc->status)); continue; } int info = ntohl(wc->imm_data); + #endif int func_id = info & invocation_mask; int invoc_id = info >> 16; bool solicited = info & solicited_mask; @@ -87,9 +93,15 @@ namespace server { // Measure hot polling time until we started execution auto now = std::chrono::high_resolution_clock::now(); + #ifdef USE_LIBFABRIC + auto func_end = work(invoc_id, func_id, solicited, + wc->len - rdmalib::functions::Submission::DATA_HEADER_SIZE + ); + #else auto func_end = work(invoc_id, func_id, solicited, wc->byte_len - rdmalib::functions::Submission::DATA_HEADER_SIZE ); + #endif _accounting.update_polling_time(start, now); i = 0; start = func_end; @@ -112,7 +124,9 @@ namespace server { if(_polling_state != PollingState::HOT_ALWAYS && time_passed >= timeout) { _polling_state = PollingState::WARM; // FIXME: can we miss an event here? + #ifndef USE_LIBFABRIC conn->notify_events(); + #endif SPDLOG_DEBUG("Switching to warm polling after {} us with no invocations", time_passed); return; } @@ -135,12 +149,17 @@ namespace server { for(int i = 0; i < std::get<1>(wcs); ++i) { //server_processing_times.start(); + #ifdef USE_LIBFABRIC + fi_cq_data_entry* wc = &std::get<0>(wcs)[i]; + int info = ntohl(wc->data); + #else ibv_wc* wc = &std::get<0>(wcs)[i]; if(wc->status) { spdlog::error("Failed work completion! Reason: {}", ibv_wc_status_str(wc->status)); continue; } int info = ntohl(wc->imm_data); + #endif int func_id = info & invocation_mask; bool solicited = info & solicited_mask; int invoc_id = info >> 16; @@ -149,7 +168,11 @@ namespace server { id, invoc_id, func_id, repetitions ); + #ifdef USE_LIBFABRIC + work(invoc_id, func_id, solicited, wc->len - rdmalib::functions::Submission::DATA_HEADER_SIZE); + #else work(invoc_id, func_id, solicited, wc->byte_len - rdmalib::functions::Submission::DATA_HEADER_SIZE); + #endif //sum += server_processing_times.end(); conn->poll_wc(rdmalib::QueueType::SEND, true); @@ -166,9 +189,13 @@ namespace server { // Do waiting after a single polling - avoid missing an events that // arrived before we called notify_events if(repetitions < max_repetitions) { + #ifdef USE_LIBFABRIC + conn->wait_events(); + #else auto cq = conn->wait_events(); conn->ack_events(cq, 1); conn->notify_events(); + #endif } } SPDLOG_DEBUG("Thread {} Stopped warm polling", id); @@ -179,7 +206,11 @@ namespace server { rdmalib::RDMAActive mgr_connection(_mgr_conn.addr, _mgr_conn.port, wc_buffer._rcv_buf_size, max_inline_data); mgr_connection.allocate(); this->_mgr_connection = &mgr_connection.connection(); + #ifdef USE_LIBFABRIC + _accounting_buf.register_memory(mgr_connection.pd(), FI_WRITE | FI_REMOTE_WRITE); + #else _accounting_buf.register_memory(mgr_connection.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_ATOMIC); + #endif if(!mgr_connection.connect(_mgr_conn.secret)) return; spdlog::info("Thread {} Established connection to the manager!", id); @@ -192,7 +223,11 @@ namespace server { this->conn = &active.connection(); // Receive function data from the client - this WC must be posted first // We do it before connection to ensure that client does not start sending before us + #ifdef USE_LIBFABRIC + func_buffer.register_memory(active.pd(), FI_WRITE | FI_REMOTE_WRITE); + #else func_buffer.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + #endif this->conn->post_recv(func_buffer); // Request notification before connecting - avoid missing a WC! @@ -204,21 +239,32 @@ namespace server { } else { _polling_state = PollingState::HOT; } + #ifndef USE_LIBFABRIC if(_polling_state == PollingState::WARM_ALWAYS || _polling_state == PollingState::WARM) conn->notify_events(); + #endif if(!active.connect()) return; // Now generic receives for function invocations + #ifdef USE_LIBFABRIC + send.register_memory(active.pd(), FI_WRITE); + rcv.register_memory(active.pd(), FI_WRITE | FI_REMOTE_WRITE); + #else send.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE); rcv.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + #endif this->wc_buffer.connect(this->conn); spdlog::info("Thread {} Established connection to client!", id); // Send to the client information about thread buffer rdmalib::Buffer buf(1); + #ifdef USE_LIBFABRIC + buf.register_memory(active.pd(), FI_WRITE); + #else buf.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE); + #endif buf.data()[0].r_addr = rcv.address(); buf.data()[0].r_key = rcv.rkey(); SPDLOG_DEBUG("Thread {} Sends buffer details to client!", id); diff --git a/server/executor/fast_executor.hpp b/server/executor/fast_executor.hpp index a0435df..5a25e79 100644 --- a/server/executor/fast_executor.hpp +++ b/server/executor/fast_executor.hpp @@ -129,14 +129,14 @@ namespace server { constexpr static int HOT_POLLING_VERIFICATION_PERIOD = 10000; PollingState _polling_state; - Thread(std::string addr, int port, int id, int functions_size, - int buf_size, int recv_buffer_size, int max_inline_data, + Thread(std::string addr_, int port_, int id_, int functions_size, + int buf_size, int recv_buffer_size, int max_inline_data_, const executor::ManagerConnection & mgr_conn): _functions(functions_size), - addr(addr), - port(port), - max_inline_data(max_inline_data), - id(id), + addr(addr_), + port(port_), + max_inline_data(max_inline_data_), + id(id_), repetitions(0), max_repetitions(0), sum(0), diff --git a/server/executor_manager/cli.cpp b/server/executor_manager/cli.cpp index 4fb4eb9..69a21b6 100644 --- a/server/executor_manager/cli.cpp +++ b/server/executor_manager/cli.cpp @@ -25,11 +25,13 @@ void signal_handler(int) int main(int argc, char ** argv) { + #ifndef USE_LIBFABRIC int rc = ibv_fork_init(); if(rc) { spdlog::error("ibv_fork_init failed, cannot continue! Error code {}", rc); exit(rc); } + #endif auto opts = rfaas::executor_manager::opts(argc, argv); if(opts.verbose) diff --git a/server/executor_manager/client.cpp b/server/executor_manager/client.cpp index 109830d..4f7c013 100644 --- a/server/executor_manager/client.cpp +++ b/server/executor_manager/client.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -12,7 +14,11 @@ namespace rfaas::executor_manager { - Client::Client(rdmalib::Connection* conn, ibv_pd* pd): //, Accounting & _acc): + #ifdef USE_LIBFABRIC + Client::Client(rdmalib::Connection* conn, fid_domain* pd): + #else + Client::Client(rdmalib::Connection* conn, ibv_pd* pd): + #endif connection(conn), allocation_requests(RECV_BUF_SIZE), rcv_buffer(RECV_BUF_SIZE), @@ -23,9 +29,17 @@ namespace rfaas::executor_manager { { // Make the buffer accessible to clients memset(accounting.data(), 0, accounting.data_size()); + #ifdef USE_LIBFABRIC + accounting.register_memory(pd, FI_WRITE | FI_REMOTE_WRITE); + #else accounting.register_memory(pd, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC); + #endif // Make the buffer accessible to clients + #ifdef USE_LIBFABRIC + allocation_requests.register_memory(pd, FI_WRITE | FI_REMOTE_WRITE); + #else allocation_requests.register_memory(pd, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + #endif // Initialize batch receive WCs connection->initialize_batched_recv(allocation_requests, sizeof(rdmalib::AllocationRequest)); rcv_buffer.connect(connection); @@ -46,7 +60,11 @@ namespace rfaas::executor_manager { void Client::disable(int id) { + #ifdef USE_LIBFABRIC + fi_shutdown(connection->qp(), 0); + #else rdma_disconnect(connection->id()); + #endif SPDLOG_DEBUG( "[Client] Disconnect client with connection {} id {}", fmt::ptr(connection), fmt::ptr(connection->id()) diff --git a/server/executor_manager/client.hpp b/server/executor_manager/client.hpp index b535ad4..d8dd25f 100644 --- a/server/executor_manager/client.hpp +++ b/server/executor_manager/client.hpp @@ -30,7 +30,11 @@ namespace rfaas::executor_manager { uint32_t allocation_time; bool _active; + #ifdef USE_LIBFABRIC + Client(rdmalib::Connection* conn, fid_domain* pd); + #else Client(rdmalib::Connection* conn, ibv_pd* pd); + #endif void reload_queue(); void disable(int); bool active(); diff --git a/server/executor_manager/manager.cpp b/server/executor_manager/manager.cpp index 3e48758..c738cb9 100644 --- a/server/executor_manager/manager.cpp +++ b/server/executor_manager/manager.cpp @@ -174,9 +174,13 @@ namespace rfaas::executor_manager { for(int j = 0; j < std::get<1>(wcs); ++j) { auto wc = std::get<0>(wcs)[j]; + #ifdef USE_LIBFABRIC + uint64_t id = reinterpret_cast(wc.op_context); + #else if(wc.status != 0) continue; uint64_t id = wc.wr_id; + #endif int16_t cores = client.allocation_requests.data()[id].cores; char * client_address = client.allocation_requests.data()[id].listen_address; int client_port = client.allocation_requests.data()[id].listen_port; From fa415b74bb508d5e161d22c2460f9a771a1e0f9f Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Wed, 23 Mar 2022 00:10:27 +0100 Subject: [PATCH 06/91] [benchmarks] Add support for libfabric --- benchmarks/cold_benchmark.cpp | 9 +++++++++ benchmarks/cpp_interface.cpp | 8 ++++++++ benchmarks/parallel_invocations.cpp | 9 +++++++++ benchmarks/warm_benchmark.cpp | 6 ++++++ 4 files changed, 32 insertions(+) diff --git a/benchmarks/cold_benchmark.cpp b/benchmarks/cold_benchmark.cpp index c9a6cbe..7fb7f68 100644 --- a/benchmarks/cold_benchmark.cpp +++ b/benchmarks/cold_benchmark.cpp @@ -1,5 +1,6 @@ #include +#include #include #include @@ -61,7 +62,11 @@ int main(int argc, char ** argv) std::vector> out; for(int i = 0; i < opts.cores; ++i) { in.emplace_back(opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE); + #ifdef USE_LIBFABRIC + in.back().register_memory(executor._state.pd(), FI_WRITE); + #else in.back().register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE); + #endif memset(in.back().data(), 0, opts.input_size); for(int i = 0; i < opts.input_size; ++i) { ((char*)in.back().data())[i] = 1; @@ -69,7 +74,11 @@ int main(int argc, char ** argv) } for(int i = 0; i < opts.cores; ++i) { out.emplace_back(opts.input_size); + #ifdef USE_LIBFABRIC + out.back().register_memory(executor._state.pd(), FI_WRITE | FI_REMOTE_WRITE); + #else out.back().register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + #endif memset(out.back().data(), 0, opts.input_size); } diff --git a/benchmarks/cpp_interface.cpp b/benchmarks/cpp_interface.cpp index 6275ed3..2b07921 100644 --- a/benchmarks/cpp_interface.cpp +++ b/benchmarks/cpp_interface.cpp @@ -1,5 +1,6 @@ #include +#include #include #include @@ -68,10 +69,17 @@ int main(int argc, char ** argv) } rdmalib::Buffer in(opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), out(opts.input_size); rdmalib::Buffer in2(opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), out2(opts.input_size); + #ifdef USE_LIBFABRIC + in.register_memory(executor._state.pd(), FI_WRITE); + out.register_memory(executor._state.pd(), FI_WRITE | FI_REMOTE_WRITE); + in2.register_memory(executor._state.pd(), FI_WRITE); + out2.register_memory(executor._state.pd(), FI_WRITE | FI_REMOTE_WRITE); + #else in.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE); out.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); in2.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE); out2.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + #endif std::vector> ins; ins.push_back(std::move(in)); ins.push_back(std::move(in2)); diff --git a/benchmarks/parallel_invocations.cpp b/benchmarks/parallel_invocations.cpp index dd10ca0..4adb7ad 100644 --- a/benchmarks/parallel_invocations.cpp +++ b/benchmarks/parallel_invocations.cpp @@ -1,5 +1,6 @@ #include +#include #include #include @@ -72,7 +73,11 @@ int main(int argc, char ** argv) std::vector> out; for(int i = 0; i < opts.numcores; ++i) { in.emplace_back(opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE); + #ifdef USE_LIBFABRIC + in.back().register_memory(executor._state.pd(), FI_WRITE); + #else in.back().register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE); + #endif memset(in.back().data(), 0, opts.input_size); for(int i = 0; i < opts.input_size; ++i) { ((char*)in.back().data())[i] = 1; @@ -80,7 +85,11 @@ int main(int argc, char ** argv) } for(int i = 0; i < opts.numcores; ++i) { out.emplace_back(opts.input_size); + #ifdef USE_LIBFABRIC + out.back().register_memory(executor._state.pd(), FI_WRITE | FI_REMOTE_WRITE); + #else out.back().register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + #endif } rdmalib::Benchmarker<1> benchmarker{settings.benchmark.repetitions}; diff --git a/benchmarks/warm_benchmark.cpp b/benchmarks/warm_benchmark.cpp index d16f36b..d6e0e06 100644 --- a/benchmarks/warm_benchmark.cpp +++ b/benchmarks/warm_benchmark.cpp @@ -1,5 +1,6 @@ #include +#include #include #include @@ -69,8 +70,13 @@ int main(int argc, char ** argv) // FIXME: move me to a memory allocator rdmalib::Buffer in(opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), out(opts.input_size); + #ifdef USE_LIBFABRIC + in.register_memory(executor._state.pd(), FI_WRITE); + out.register_memory(executor._state.pd(), FI_WRITE | FI_REMOTE_WRITE); + #else in.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE); out.register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + #endif memset(in.data(), 0, opts.input_size); for(int i = 0; i < opts.input_size; ++i) { ((char*)in.data())[i] = 1; From 88984a7df2fba6f5a22fd3e28184a5d72c7c18b4 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Wed, 23 Mar 2022 00:11:12 +0100 Subject: [PATCH 07/91] Repair compile bugs --- rdmalib/include/rdmalib/buffer.hpp | 4 ++-- rdmalib/include/rdmalib/connection.hpp | 2 ++ rdmalib/lib/connection.cpp | 20 +++++++++++--------- server/executor/server.hpp | 9 +++++++++ 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index b9f8288..9168cd7 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -152,7 +152,7 @@ namespace rdmalib { void add(const Buffer & buf) { #ifdef USE_LIBFABRIC - _sges.push_back({buf.address(), buf.bytes()}); + _sges.push_back({(void *)buf.address(), (size_t)buf.bytes()}); _lkeys.push_back(buf.lkey()); #else //emplace_back for structs will be supported in C++20 @@ -164,7 +164,7 @@ namespace rdmalib { void add(const Buffer & buf, uint32_t size, size_t offset = 0) { #ifdef USE_LIBFABRIC - _sges.push_back({buf.address() + offset, size}); + _sges.push_back({(void *)(buf.address() + offset), (size_t)size}); _lkeys.push_back(buf.lkey()); #else //emplace_back for structs will be supported in C++20 diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 90fe27f..99f235c 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -93,7 +93,9 @@ namespace rdmalib { Connection(Connection&&); void initialize_batched_recv(const rdmalib::impl::Buffer & sge, size_t offset); + #ifndef USE_LIBFABRIC void inlining(bool enable); + #endif #ifdef USE_LIBFABRIC void initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec); #else diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 311db54..7913c74 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -43,7 +43,9 @@ namespace rdmalib { _passive(passive), _status(ConnectionStatus::UNKNOWN) { + #ifndef USE_LIBFABRIC inlining(false); + #endif #ifdef USE_LIBFABRIC SPDLOG_DEBUG("Allocate a connection with qp fid {}", fmt::ptr(&_qp->fid)); @@ -278,8 +280,8 @@ namespace rdmalib { #ifdef USE_LIBFABRIC // FIXME: extend with multiple sges id = id == -1 ? _req_count++ - 1 : id; - SPDLOG_DEBUG("Post send to local Local QPN fid {}", &_qp->fid); - fi_addr_t temp; + SPDLOG_DEBUG("Post send to local Local QPN fid {}", fmt::ptr(&_qp->fid)); + fi_addr_t temp = 0; if(fi_sendv(_qp, elems.array(), elems.lkeys(), elems.size(), temp, reinterpret_cast((uint64_t)id))) { spdlog::error("Post send unsuccessful, reason {} {}, sges_count {}, wr_id {}", errno, strerror(errno), elems.size(), id @@ -321,10 +323,10 @@ namespace rdmalib { #ifdef USE_LIBFABRIC int loops = count / _rbatch; int reminder = count % _rbatch; - SPDLOG_DEBUG("Batch {} {} to local QPN fid {}", loops, reminder, &_qp->fid); + SPDLOG_DEBUG("Batch {} {} to local QPN fid {}", loops, reminder, fmt::ptr(&_qp->fid)); int ret = 0; - fi_addr_t temp; + fi_addr_t temp = 0; for(int i = 0; i < loops; ++i) { for(int j = 0; j < _rbatch; ++j) { auto begin = _rwc_sges[j]; @@ -413,9 +415,9 @@ namespace rdmalib { int32_t Connection::post_recv(ScatterGatherElement && elem, int32_t id, int count) { #ifdef USE_LIBFABRIC - fi_addr_t temp; + fi_addr_t temp = 0; id = id == -1 ? _req_count++ : id; - SPDLOG_DEBUG("post recv to local Local QPN fid {}", &_qp->fid); + SPDLOG_DEBUG("post recv to local Local QPN fid {}", fmt::ptr(&_qp->fid)); int ret; for(int i = 0; i < count; ++i) { @@ -470,7 +472,7 @@ namespace rdmalib { #ifdef USE_LIBFABRIC int32_t Connection::_post_write(ScatterGatherElement && elems, fi_msg_rma &msg, bool force_inline, bool force_solicited) { - fi_addr_t temp; + fi_addr_t temp = 0; int32_t id = _req_count++; size_t count = elems.size(); if(elems.size() == 1 && elems.array()[0].iov_len == 0) @@ -590,7 +592,7 @@ namespace rdmalib { { #ifdef USE_LIBFABRIC // TODO check if - fi_addr_t temp; + fi_addr_t temp = 0; int32_t id = _req_count++; memcpy(elems.array()[0].iov_base, &swap, sizeof(swap)); memcpy(elems.array()[1].iov_base, &compare, sizeof(compare)); @@ -628,7 +630,7 @@ namespace rdmalib { int32_t Connection::post_atomic_fadd(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t add) { #ifdef USE_LIBFABRIC - fi_addr_t temp; + fi_addr_t temp = 0; int32_t id = _req_count++; memcpy(elems.array()[0].iov_base, &add, sizeof(add)); int ret = fi_atomic(_qp, &elems.array()[0], 1, elems.lkeys()[0], temp, rbuf.addr, rbuf.rkey, FI_UINT64, FI_SUM, reinterpret_cast((uint64_t)id)); diff --git a/server/executor/server.hpp b/server/executor/server.hpp index 3980657..c9f536f 100644 --- a/server/executor/server.hpp +++ b/server/executor/server.hpp @@ -1,4 +1,5 @@ +#include #include #include #include @@ -133,10 +134,18 @@ namespace server { void register_buffer(rdmalib::Buffer & buf, bool is_recv_buffer) { if(is_recv_buffer) { + #ifdef USE_LIBFABRIC + buf.register_memory(_state.pd(), FI_WRITE | FI_REMOTE_WRITE); + #else buf.register_memory(_state.pd(), IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE); + #endif _status.add_buffer(buf); } else { + #ifdef USE_LIBFABRIC + buf.register_memory(_state.pd(), FI_WRITE); + #else buf.register_memory(_state.pd(), IBV_ACCESS_LOCAL_WRITE); + #endif } } From 55cca977687ad242b3c1d6b497d37494a389afbf Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Wed, 23 Mar 2022 13:23:16 +0100 Subject: [PATCH 08/91] Fix bugs and add better printing --- rdmalib/include/rdmalib/util.hpp | 6 +++++ rdmalib/lib/rdmalib.cpp | 35 +++++++++++++++++------------ server/executor_manager/manager.cpp | 4 ++-- server/executor_manager/manager.hpp | 5 +++-- 4 files changed, 32 insertions(+), 18 deletions(-) diff --git a/rdmalib/include/rdmalib/util.hpp b/rdmalib/include/rdmalib/util.hpp index 1ce80a5..78f41ec 100644 --- a/rdmalib/include/rdmalib/util.hpp +++ b/rdmalib/include/rdmalib/util.hpp @@ -2,6 +2,8 @@ #ifndef __RDMALIB_UTIL_HPP__ #define __RDMALIB_UTIL_HPP__ +#include +#include #include namespace rdmalib { namespace impl { @@ -15,7 +17,11 @@ namespace rdmalib { namespace impl { void expect_zero(U && u) { if(u) { + #ifdef USE_LIBFABRIC + spdlog::error("Expected zero, found: {}, message {}, errno {}, message {}", u, fi_strerror(u), errno, strerror(errno)); + #else spdlog::error("Expected zero, found: {}, errno {}, message {}", u, errno, strerror(errno)); + #endif traceback(); } assert(!u); diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index ad12af5..87c1c59 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -39,10 +39,13 @@ namespace rdmalib { // Set the hints to indicate that we will register the local buffers hints->mode |= FI_LOCAL_MR; hints->addr_format = FI_SOCKADDR_IN; + free(hints->fabric_attr->prov_name); + hints->fabric_attr->prov_name = strdup("sockets"); + std::cout << ip.c_str() << ":" << std::to_string(port).c_str() << " " << passive << std::endl; if(passive) - impl::expect_zero(fi_getinfo(FI_VERSION(1, 13), ip.c_str(), std::to_string(port).c_str(), FI_SOURCE, hints, &addrinfo)); + impl::expect_zero(fi_getinfo(FI_VERSION(1, 5), ip.c_str(), std::to_string(port).c_str(), FI_SOURCE, hints, &addrinfo)); else - impl::expect_zero(fi_getinfo(FI_VERSION(1, 13), nullptr, nullptr, 0, hints, &addrinfo)); + impl::expect_zero(fi_getinfo(FI_VERSION(1, 5), nullptr, nullptr, 0, hints, &addrinfo)); fi_freeinfo(hints); fi_fabric(addrinfo->fabric_attr, &fabric, nullptr); #else @@ -79,7 +82,9 @@ namespace rdmalib { // Set the hints to have ability to conduct MSG, Atomic and RMA operations hints->caps |= FI_MSG | FI_RMA | FI_ATOMIC; // Set the hints to indicate that we will register the local buffers - hints->mode |= FI_LOCAL_MR; + hints->mode |= FI_LOCAL_MR; + free(hints->fabric_attr->prov_name); + hints->fabric_attr->prov_name = strdup("verbs"); // Set addresses and their format hints->addr_format = FI_SOCKADDR_IN; @@ -126,6 +131,8 @@ namespace rdmalib { _remote_addr.sin_family = AF_INET; _remote_addr.sin_port = htons(port); inet_pton(AF_INET, ip.c_str(), &_remote_addr.sin_addr); + // Create a domain + impl::expect_zero(fi_domain(_addr.fabric, _addr.addrinfo, &_pd, nullptr)); #else // Size of Queue Pair // Maximum requests in send queue @@ -168,9 +175,6 @@ namespace rdmalib { if(!_conn) { _conn = std::unique_ptr(new Connection()); #ifdef USE_LIBFABRIC - // Create a domain - impl::expect_zero(fi_domain(_addr.fabric, _addr.addrinfo, &_pd, nullptr)); - // Create and enable the endpoint together with all the accompanying queues _conn->initialize(_addr.fabric, _pd, _addr.addrinfo, _ec); #else @@ -327,7 +331,9 @@ namespace rdmalib { #endif _pd(nullptr) { - #ifndef USE_LIBFABRIC + #ifdef USE_LIBFABRIC + impl::expect_zero(fi_domain(_addr.fabric, _addr.addrinfo, &_pd, nullptr)); + #else // Size of Queue Pair // FIXME: configurable -> parallel workers _cfg.attr.cap.max_send_wr = 40; @@ -364,17 +370,18 @@ namespace rdmalib { { #ifdef USE_LIBFABRIC // Start listening - fi_eq_attr attr; - memset(&attr, 0, sizeof(attr)); - attr.size = 42; - attr.wait_obj = FI_WAIT_UNSPEC; - impl::expect_zero(fi_eq_open(_addr.fabric, nullptr, &_ec, nullptr)); - impl::expect_zero(fi_pep_bind(_pep, &_ec->fid, 0)); + fi_eq_attr eq_attr; + memset(&eq_attr, 0, sizeof(eq_attr)); + eq_attr.size = 42; + eq_attr.wait_obj = FI_WAIT_UNSPEC; + impl::expect_zero(fi_eq_open(_addr.fabric, &eq_attr, &_ec, NULL)); + impl::expect_zero(fi_passive_ep(_addr.fabric, _addr.addrinfo, &_pep, NULL)); + impl::expect_zero(fi_pep_bind(_pep, &(_ec->fid), 0)); impl::expect_zero(fi_listen(_pep)); char address[INET_ADDRSTRLEN]; inet_ntop(AF_INET, _addr.addrinfo->src_addr, address, INET_ADDRSTRLEN); spdlog::info( - "Listening on address ", + "Listening on address {}", address ); #else diff --git a/server/executor_manager/manager.cpp b/server/executor_manager/manager.cpp index c738cb9..575a05b 100644 --- a/server/executor_manager/manager.cpp +++ b/server/executor_manager/manager.cpp @@ -22,13 +22,13 @@ namespace rfaas::executor_manager { Manager::Manager(Settings & settings, bool skip_rm): _q1(100), _q2(100), _ids(0), + _state(settings.device->ip_address, settings.rdma_device_port, + settings.device->default_receive_buffer_size, true), _res_mgr_connection( settings.resource_manager_address, settings.resource_manager_port, settings.device->default_receive_buffer_size ), - _state(settings.device->ip_address, settings.rdma_device_port, - settings.device->default_receive_buffer_size, true), _settings(settings), // FIXME: randomly generated _secret(0x1234), diff --git a/server/executor_manager/manager.hpp b/server/executor_manager/manager.hpp index 5d72445..b2fad51 100644 --- a/server/executor_manager/manager.hpp +++ b/server/executor_manager/manager.hpp @@ -47,14 +47,15 @@ namespace rfaas::executor_manager { std::mutex clients; std::map _clients; int _ids; + + rdmalib::RDMAPassive _state; + //rdmalib::server::ServerStatus _status; //std::vector _clients; //std::atomic _clients_active; rdmalib::RDMAActive _res_mgr_connection; //std::unique_ptr _res_mgr_connection; - rdmalib::RDMAPassive _state; - //rdmalib::server::ServerStatus _status; Settings _settings; //rdmalib::Buffer _accounting_data; uint32_t _secret; From 24fb54eec7fd745f7419b013514aaff79b130dcd Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Thu, 24 Mar 2022 13:30:56 +0100 Subject: [PATCH 09/91] [rfaas] Add lacking libfabric support --- rfaas/lib/connection.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/rfaas/lib/connection.cpp b/rfaas/lib/connection.cpp index 1041def..70c21bc 100644 --- a/rfaas/lib/connection.cpp +++ b/rfaas/lib/connection.cpp @@ -1,5 +1,7 @@ +#ifndef USE_LIBFABRIC #include +#endif #include #include @@ -30,7 +32,11 @@ namespace rfaas { return false; } _rcv_buffer.connect(&_active.connection()); - _allocation_buffer.register_memory(_active.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + #ifdef USE_LIBFABRIC + _allocation_buffer.register_memory(_active.pd(), FI_WRITE | FI_REMOTE_WRITE); + #else + _allocation_buffer.register_memory(_active.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + #endif // Initialize batch receive WCs _active.connection().initialize_batched_recv(_allocation_buffer, sizeof(rdmalib::AllocationRequest)); return ret; From 93f3dfc1dad2db102b71a86ec374a2f994f4cd7d Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Thu, 24 Mar 2022 13:32:00 +0100 Subject: [PATCH 10/91] Add proper deallocation and repair bugs --- rdmalib/include/rdmalib/rdmalib.hpp | 20 +++++---- rdmalib/include/rdmalib/util.hpp | 2 +- rdmalib/lib/connection.cpp | 15 +++++-- rdmalib/lib/rdmalib.cpp | 68 ++++++++++++++++------------- server/executor_manager/manager.cpp | 10 ++++- 5 files changed, 69 insertions(+), 46 deletions(-) diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index 4a69807..5aa8410 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -24,9 +24,10 @@ namespace rdmalib { // Implemented as IPV4 struct Address { #ifdef USE_LIBFABRIC - fi_info* addrinfo; - fi_info* hints; - fid_fabric* fabric; + fi_info* addrinfo = nullptr; + fi_info* hints = nullptr; + fid_fabric* fabric = nullptr; + std::string _ip; #else rdma_addrinfo *addrinfo; rdma_addrinfo hints; @@ -35,6 +36,7 @@ namespace rdmalib { Address(const std::string & ip, int port, bool passive); Address(const std::string & sip, const std::string & dip, int port); + Address(); ~Address(); }; @@ -46,15 +48,15 @@ namespace rdmalib { std::unique_ptr _conn; Address _addr; #ifdef USE_LIBFABRIC - sockaddr_in _remote_addr; - fid_eq* _ec; - fid_domain* _pd; + fid_eq* _ec = nullptr; + fid_domain* _pd = nullptr; #else rdma_event_channel * _ec; ibv_pd* _pd; #endif RDMAActive(const std::string & ip, int port, int recv_buf = 1, int max_inline_data = 0); + RDMAActive(); ~RDMAActive(); void allocate(); bool connect(uint32_t secret = 0); @@ -74,9 +76,9 @@ namespace rdmalib { #endif Address _addr; #ifdef USE_LIBFABRIC - fid_eq* _ec; - fid_domain* _pd; - fid_pep* _pep; + fid_eq* _ec = nullptr; + fid_domain* _pd = nullptr; + fid_pep* _pep = nullptr; #else rdma_event_channel * _ec; rdma_cm_id* _listen_id; diff --git a/rdmalib/include/rdmalib/util.hpp b/rdmalib/include/rdmalib/util.hpp index 78f41ec..f82c7fb 100644 --- a/rdmalib/include/rdmalib/util.hpp +++ b/rdmalib/include/rdmalib/util.hpp @@ -18,7 +18,7 @@ namespace rdmalib { namespace impl { { if(u) { #ifdef USE_LIBFABRIC - spdlog::error("Expected zero, found: {}, message {}, errno {}, message {}", u, fi_strerror(u), errno, strerror(errno)); + spdlog::error("Expected zero, found: {}, message {}, errno {}, message {}", u, fi_strerror(std::abs(u)), errno, strerror(errno)); #else spdlog::error("Expected zero, found: {}, errno {}, message {}", u, errno, strerror(errno)); #endif diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 7913c74..67166cf 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -65,10 +66,16 @@ namespace rdmalib { { #ifdef USE_LIBFABRIC SPDLOG_DEBUG("Deallocate a connection with qp fid {}", fmt::ptr(&_qp->fid)); - impl::expect_zero(fi_close(&_rcv_channel->fid)); - impl::expect_zero(fi_close(&_trx_channel->fid)); - impl::expect_zero(fi_close(&_wait_set->fid)); - impl::expect_zero(fi_close(&_qp->fid)); + if (_rcv_channel) + impl::expect_zero(fi_close(&_rcv_channel->fid)); + if (_trx_channel) + impl::expect_zero(fi_close(&_trx_channel->fid)); + if (_wait_set) + impl::expect_zero(fi_close(&_wait_set->fid)); + if (_qp) { + impl::expect_zero(fi_shutdown(_qp, 0)); + impl::expect_zero(fi_close(&_qp->fid)); + } #else SPDLOG_DEBUG("Deallocate a connection with id {}", fmt::ptr(_id)); #endif diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 87c1c59..c1a6aac 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -37,17 +37,12 @@ namespace rdmalib { // Set the hints to have ability to conduct MSG, Atomic and RMA operations hints->caps |= FI_MSG | FI_RMA | FI_ATOMIC; // Set the hints to indicate that we will register the local buffers - hints->mode |= FI_LOCAL_MR; - hints->addr_format = FI_SOCKADDR_IN; - free(hints->fabric_attr->prov_name); - hints->fabric_attr->prov_name = strdup("sockets"); - std::cout << ip.c_str() << ":" << std::to_string(port).c_str() << " " << passive << std::endl; - if(passive) - impl::expect_zero(fi_getinfo(FI_VERSION(1, 5), ip.c_str(), std::to_string(port).c_str(), FI_SOURCE, hints, &addrinfo)); - else - impl::expect_zero(fi_getinfo(FI_VERSION(1, 5), nullptr, nullptr, 0, hints, &addrinfo)); + hints->domain_attr->mr_mode = FI_MR_BASIC; // FI_MR_LOCAL | FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; + hints->ep_attr->type = FI_EP_MSG; + hints->fabric_attr->prov_name = strdup("GNI"); + impl::expect_zero(fi_getinfo(FI_VERSION(1, 9), ip.c_str(), std::to_string(port).c_str(), passive ? FI_SOURCE : 0, hints, &addrinfo)); fi_freeinfo(hints); - fi_fabric(addrinfo->fabric_attr, &fabric, nullptr); + impl::expect_zero(fi_fabric(addrinfo->fabric_attr, &fabric, nullptr)); #else memset(&hints, 0, sizeof hints); hints.ai_port_space = RDMA_PS_TCP; @@ -57,6 +52,7 @@ namespace rdmalib { impl::expect_zero(rdma_getaddrinfo(ip.c_str(), std::to_string(port).c_str(), &hints, &addrinfo)); #endif this->_port = port; + this->_ip = ip; } Address::Address(const std::string & sip, const std::string & dip, int port) @@ -109,12 +105,15 @@ namespace rdmalib { this->_port = port; } + Address::Address() {} Address::~Address() { #ifdef USE_LIBFABRIC - impl::expect_zero(fi_close(&fabric->fid)); - fi_freeinfo(addrinfo); + if (fabric) + impl::expect_zero(fi_close(&fabric->fid)); + if (addrinfo) + fi_freeinfo(addrinfo); #else rdma_freeaddrinfo(addrinfo); #endif @@ -127,10 +126,6 @@ namespace rdmalib { _pd(nullptr) { #ifdef USE_LIBFABRIC - memset(&_remote_addr, 0, sizeof(_remote_addr)); - _remote_addr.sin_family = AF_INET; - _remote_addr.sin_port = htons(port); - inet_pton(AF_INET, ip.c_str(), &_remote_addr.sin_addr); // Create a domain impl::expect_zero(fi_domain(_addr.fabric, _addr.addrinfo, &_pd, nullptr)); #else @@ -159,11 +154,15 @@ namespace rdmalib { SPDLOG_DEBUG("Create RDMAActive"); } + RDMAActive::RDMAActive() {} + RDMAActive::~RDMAActive() { #ifdef USE_LIBFABRIC - impl::expect_zero(fi_close(&_pd->fid)); - impl::expect_zero(fi_close(&_ec->fid)); + if (_pd) + impl::expect_zero(fi_close(&_pd->fid)); + if (_ec) + impl::expect_zero(fi_close(&_ec->fid)); #else //ibv_dealloc_pd(this->_pd); #endif @@ -246,17 +245,15 @@ namespace rdmalib { paramlen = sizeof(secret); SPDLOG_DEBUG("Setting connection secret {} of length {}", secret, sizeof(uint32_t)); } - if(fi_connect(_conn->qp(), &_remote_addr, param, paramlen)) { - spdlog::error("Connection unsuccesful, reason {} {}", errno, strerror(errno)); + if(fi_connect(_conn->qp(), &_addr.addrinfo->dest_addr, param, paramlen)) { + spdlog::error("Connection unsuccessful, reason {} {}", errno, strerror(errno)); _conn.reset(); _pd = nullptr; return false; } else { - char address[INET_ADDRSTRLEN]; - inet_ntop(AF_INET, &_remote_addr.sin_addr, address, INET_ADDRSTRLEN); spdlog::debug( - "[RDMAActive] Connection succesful to {}:{}", - address, _addr._port + "[RDMAActive] Connection successful to {}:{}", + _addr._ip, _addr._port ); } #else @@ -358,8 +355,12 @@ namespace rdmalib { RDMAPassive::~RDMAPassive() { #ifdef USE_LIBFABRIC - impl::expect_zero(fi_close(&_pd->fid)); - impl::expect_zero(fi_close(&_ec->fid)); + if (_pd) + impl::expect_zero(fi_close(&_pd->fid)); + if (_pep) + impl::expect_zero(fi_close(&_pep->fid)); + if (_ec) + impl::expect_zero(fi_close(&_ec->fid)); #else rdma_destroy_id(this->_listen_id); rdma_destroy_event_channel(this->_ec); @@ -374,16 +375,21 @@ namespace rdmalib { memset(&eq_attr, 0, sizeof(eq_attr)); eq_attr.size = 42; eq_attr.wait_obj = FI_WAIT_UNSPEC; - impl::expect_zero(fi_eq_open(_addr.fabric, &eq_attr, &_ec, NULL)); impl::expect_zero(fi_passive_ep(_addr.fabric, _addr.addrinfo, &_pep, NULL)); + impl::expect_zero(fi_eq_open(_addr.fabric, &eq_attr, &_ec, NULL)); impl::expect_zero(fi_pep_bind(_pep, &(_ec->fid), 0)); impl::expect_zero(fi_listen(_pep)); - char address[INET_ADDRSTRLEN]; - inet_ntop(AF_INET, _addr.addrinfo->src_addr, address, INET_ADDRSTRLEN); + size_t size = 100; + char address[size]; + impl::expect_zero(fi_getname(&_pep->fid, address, &size)); spdlog::info( - "Listening on address {}", - address + "Listening on address length {}:", + size ); + std::cout << "\b \b"; + for(size_t i = 0; i < size; ++i) + std::cout << std::hex << (int)address[i]; + std::cout << std::endl; #else // Start listening impl::expect_nonzero(this->_ec = rdma_create_event_channel()); diff --git a/server/executor_manager/manager.cpp b/server/executor_manager/manager.cpp index 575a05b..76f0d3a 100644 --- a/server/executor_manager/manager.cpp +++ b/server/executor_manager/manager.cpp @@ -24,19 +24,27 @@ namespace rfaas::executor_manager { _ids(0), _state(settings.device->ip_address, settings.rdma_device_port, settings.device->default_receive_buffer_size, true), + #ifndef USE_LIBFABRIC _res_mgr_connection( settings.resource_manager_address, settings.resource_manager_port, settings.device->default_receive_buffer_size ), + #endif _settings(settings), // FIXME: randomly generated _secret(0x1234), _skip_rm(skip_rm), _shutdown(false) { - if(!_skip_rm) + if(!_skip_rm) { + rdmalib::RDMAActive _res_mgr_connection( + settings.resource_manager_address, + settings.resource_manager_port, + settings.device->default_receive_buffer_size + ); _res_mgr_connection.allocate(); + } } void Manager::shutdown() From f415a30be34872ef5c7a146c338a5ed057d79e46 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Thu, 24 Mar 2022 13:36:40 +0100 Subject: [PATCH 11/91] Repair compilation errors --- CMakeLists.txt | 7 ++++++- server/executor/functions.cpp | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aaa9a66..9fc9e82 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,8 +95,13 @@ endif() ### # threads ### +set(CMAKE_THREAD_LIBS_INIT "-lpthread") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") +set(CMAKE_HAVE_THREADS_LIBRARY 1) +set(CMAKE_USE_WIN32_THREADS_INIT 0) +set(CMAKE_USE_PTHREADS_INIT 1) set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(Threads) +find_package(Threads REQUIRED) ### # Networking diff --git a/server/executor/functions.cpp b/server/executor/functions.cpp index 5967248..c4bb6d2 100644 --- a/server/executor/functions.cpp +++ b/server/executor/functions.cpp @@ -10,6 +10,8 @@ // FIXME: works only on Linux #include +#include +#include #include #include #include @@ -59,7 +61,11 @@ namespace server { _library_handle(nullptr) { // FIXME: works only on Linux + #ifdef USE_LIBFABRIC#ifdef USE_LIBFABRIC + rdmalib::impl::expect_nonnegative(_fd = syscall(SYS_memfd_create, "test", MFD_CLOEXEC)); + #else rdmalib::impl::expect_nonnegative(_fd = memfd_create("libfunction", 0)); + #endif rdmalib::impl::expect_zero(ftruncate(_fd, size)); rdmalib::impl::expect_nonnull( From 2f1699ef7aee954b9760afe523c5d81deb312426 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Fri, 25 Mar 2022 15:00:10 +0100 Subject: [PATCH 12/91] Remove bugs --- rdmalib/lib/connection.cpp | 100 +++++++++---------- rdmalib/lib/rdmalib.cpp | 42 ++++++-- rfaas/lib/connection.cpp | 2 +- rfaas/lib/executor.cpp | 9 +- server/executor/fast_executor.cpp | 3 +- server/executor_manager/executor_process.cpp | 1 + 6 files changed, 90 insertions(+), 67 deletions(-) diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 67166cf..e65e3b6 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -49,7 +49,7 @@ namespace rdmalib { #endif #ifdef USE_LIBFABRIC - SPDLOG_DEBUG("Allocate a connection with qp fid {}", fmt::ptr(&_qp->fid)); + SPDLOG_DEBUG("Allocate a connection {}", fmt::ptr(this)); #else for(int i=0; i < _rbatch; i++){ _batch_wrs[i].wr_id = i; @@ -65,17 +65,7 @@ namespace rdmalib { Connection::~Connection() { #ifdef USE_LIBFABRIC - SPDLOG_DEBUG("Deallocate a connection with qp fid {}", fmt::ptr(&_qp->fid)); - if (_rcv_channel) - impl::expect_zero(fi_close(&_rcv_channel->fid)); - if (_trx_channel) - impl::expect_zero(fi_close(&_trx_channel->fid)); - if (_wait_set) - impl::expect_zero(fi_close(&_wait_set->fid)); - if (_qp) { - impl::expect_zero(fi_shutdown(_qp, 0)); - impl::expect_zero(fi_close(&_qp->fid)); - } + SPDLOG_DEBUG("Deallocate connection {} with qp fid {}", fmt::ptr(this), fmt::ptr(&_qp->fid)); #else SPDLOG_DEBUG("Deallocate a connection with id {}", fmt::ptr(_id)); #endif @@ -158,7 +148,7 @@ namespace rdmalib { // Enable the endpoint impl::expect_zero(fi_enable(_qp)); - SPDLOG_DEBUG("Initialize a connection with"); + SPDLOG_DEBUG("Initialize connection {}", fmt::ptr(this)); } #else void Connection::initialize(rdma_cm_id* id) @@ -184,10 +174,15 @@ namespace rdmalib { { #ifdef USE_LIBFABRIC SPDLOG_DEBUG("Connection close called for {} with qp fid {}", fmt::ptr(this), fmt::ptr(&this->_qp->fid)); - if(_qp) { - // We need to close the transmit and receive channels and the endpoint + // We need to close the transmit and receive channels and the endpoint + if (_rcv_channel) impl::expect_zero(fi_close(&_rcv_channel->fid)); + if (_trx_channel) impl::expect_zero(fi_close(&_trx_channel->fid)); + if (_wait_set) + impl::expect_zero(fi_close(&_wait_set->fid)); + if (_qp) { + impl::expect_zero(fi_shutdown(_qp, 0)); impl::expect_zero(fi_close(&_qp->fid)); _status = ConnectionStatus::DISCONNECTED; } @@ -286,18 +281,18 @@ namespace rdmalib { { #ifdef USE_LIBFABRIC // FIXME: extend with multiple sges - id = id == -1 ? _req_count++ - 1 : id; - SPDLOG_DEBUG("Post send to local Local QPN fid {}", fmt::ptr(&_qp->fid)); - fi_addr_t temp = 0; - if(fi_sendv(_qp, elems.array(), elems.lkeys(), elems.size(), temp, reinterpret_cast((uint64_t)id))) { - spdlog::error("Post send unsuccessful, reason {} {}, sges_count {}, wr_id {}", - errno, strerror(errno), elems.size(), id + id = id == -1 ? _req_count++ : id; + SPDLOG_DEBUG("Post send to local Local QPN on connection {} fid {}", fmt::ptr(this), fmt::ptr(&_qp->fid)); + int ret = fi_sendv(_qp, elems.array(), elems.lkeys(), elems.size(), NULL, reinterpret_cast((uint64_t)id)); + if(ret) { + spdlog::error("Post send unsuccessful on connection {} reason {} message {} errno {} message {}, sges_count {}, wr_id {}", + fmt::ptr(this), ret, fi_strerror(std::abs(ret)), errno, strerror(errno), elems.size(), id ); return -1; } SPDLOG_DEBUG( - "Post send successful, sges_count {}, sge[0].addr {}, sge[0].size {}, wr_id {}", - elems.size(), elems.array()[0].iov_base, elems.array()[0].iov_len, id + "Post send successful on connection {}, sges_count {}, sge[0].addr {}, sge[0].size {}, wr_id {}", + fmt::ptr(this), elems.size(), elems.array()[0].iov_base, elems.array()[0].iov_len, id ); return _req_count - 1; #else @@ -330,20 +325,19 @@ namespace rdmalib { #ifdef USE_LIBFABRIC int loops = count / _rbatch; int reminder = count % _rbatch; - SPDLOG_DEBUG("Batch {} {} to local QPN fid {}", loops, reminder, fmt::ptr(&_qp->fid)); + SPDLOG_DEBUG("Batch {} {} to local QPN on connection {} fid {}", loops, reminder, fmt::ptr(this), fmt::ptr(&_qp->fid)); int ret = 0; - fi_addr_t temp = 0; for(int i = 0; i < loops; ++i) { for(int j = 0; j < _rbatch; ++j) { auto begin = _rwc_sges[j]; for (size_t k = 0; k < begin.size(); ++k) { if(begin.array()[k].iov_len > 0) { - SPDLOG_DEBUG("Batched receive num_sge {} sge[0].ptr {} sge[0].length {}", begin.size(), begin.array()[k].iov_base, begin.array()[k].iov_len); + SPDLOG_DEBUG("Batched receive on connection {} num_sge {} sge[0].ptr {} sge[0].length {}", fmt::ptr(this), begin.size(), begin.array()[k].iov_base, begin.array()[k].iov_len); } else - SPDLOG_DEBUG("Batched receive num_sge {}", begin.size()); + SPDLOG_DEBUG("Batched receive on connection {} num_sge {}", fmt::ptr(this), begin.size()); } - ret = fi_recvv(_qp, begin.array(), begin.lkeys(), begin.size(), temp, nullptr); + ret = fi_recvv(_qp, begin.array(), begin.lkeys(), begin.size(), NULL, nullptr); if(ret) break; } @@ -356,22 +350,22 @@ namespace rdmalib { auto begin = _rwc_sges[j]; for (size_t k = 0; k < begin.size(); ++k) { if(begin.array()[k].iov_len > 0) { - SPDLOG_DEBUG("Batched receive num_sge {} sge[0].ptr {} sge[0].length {}", begin.size(), begin.array()[k].iov_base, begin.array()[k].iov_len); + SPDLOG_DEBUG("Batched receive on connection {} num_sge {} sge[0].ptr {} sge[0].length {}", fmt::ptr(this), begin.size(), begin.array()[k].iov_base, begin.array()[k].iov_len); } else - SPDLOG_DEBUG("Batched receive num_sge {}", begin.size()); + SPDLOG_DEBUG("Batched receive on connection {} num_sge {}", fmt::ptr(this), begin.size()); } - ret = fi_recvv(_qp, begin.array(), begin.lkeys(), begin.size(), temp, nullptr); + ret = fi_recvv(_qp, begin.array(), begin.lkeys(), begin.size(), NULL, nullptr); if(ret) break; } } if(ret) { - spdlog::error("Batched Post empty recv unsuccessful, reason {} {}", ret, strerror(ret)); + spdlog::error("Batched Post empty recv unsuccessful on connection {} reason {} {}", fmt::ptr(this), ret, fi_strerror(std::abs(ret))); return -1; } - SPDLOG_DEBUG("Batched Post empty recv successfull"); + SPDLOG_DEBUG("Batched Post empty recv successfull on connection {}", fmt::ptr(this)); return count; #else struct ibv_recv_wr* bad = nullptr; @@ -424,7 +418,7 @@ namespace rdmalib { #ifdef USE_LIBFABRIC fi_addr_t temp = 0; id = id == -1 ? _req_count++ : id; - SPDLOG_DEBUG("post recv to local Local QPN fid {}", fmt::ptr(&_qp->fid)); + SPDLOG_DEBUG("post recv to local Local QPN fid {} connection {}", fmt::ptr(&_qp->fid), fmt::ptr(this)); int ret; for(int i = 0; i < count; ++i) { @@ -433,16 +427,16 @@ namespace rdmalib { break; } if(ret) { - spdlog::error("Post receive unsuccessful, reason {} {}", ret, strerror(ret)); + spdlog::error("Post receive unsuccessful on connection {}, reason {} {}", fmt::ptr(this), ret, strerror(ret)); return -1; } if(elem.size() > 0) SPDLOG_DEBUG( - "Post recv successfull, sges_count {}, sge[0].addr {}, sge[0].size {}, wr_id {}", - elem.size(), elem.array()[0].iov_base, elem.array()[0].iov_len, id + "Post recv successfull on connection {}, sges_count {}, sge[0].addr {}, sge[0].size {}, wr_id {}", + fmt::ptr(this), elem.size(), elem.array()[0].iov_base, elem.array()[0].iov_len, id ); else - SPDLOG_DEBUG("Post recv successfull"); + SPDLOG_DEBUG("Post recv successfull on connection {}", fmt::ptr(this)); return id; } #else @@ -493,19 +487,19 @@ namespace rdmalib { int ret = fi_writemsg(_qp, &msg, 0); if(ret) { - spdlog::error("Post write unsuccessful, reason {} {}, sges_count {}, wr_id {}, remote addr {}, remote rkey {}, imm data {}", - ret, strerror(ret), count, id, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data) + spdlog::error("Post write unsuccessful, reason {} {}, sges_count {}, wr_id {}, remote addr {}, remote rkey {}, imm data {}, connection {}", + ret, strerror(ret), count, id, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data), fmt::ptr(this) ); return -1; } if(elems.size() > 0) SPDLOG_DEBUG( - "Post write succesfull id: {}, sge size: {}, first lkey {} len {}, remote addr {}, remote rkey {}, imm data {}", - count, elems.size(), elems.lkeys()[0], elems.array()[0].iov_len, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data) + "Post write succesfull id: {}, sge size: {}, first lkey {} len {}, remote addr {}, remote rkey {}, imm data {}, connection {}", + count, elems.size(), elems.lkeys()[0], elems.array()[0].iov_len, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data), fmt::ptr(this) ); else SPDLOG_DEBUG( - "Post write succesfull id: {}, remote addr {}, remote rkey {}, imm data {}", id, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data) + "Post write succesfull id: {}, remote addr {}, remote rkey {}, imm data {}, connection {}", id, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data), fmt::ptr(this) ); return _req_count - 1; @@ -605,10 +599,10 @@ namespace rdmalib { memcpy(elems.array()[1].iov_base, &compare, sizeof(compare)); int ret = fi_compare_atomic(_qp, elems.array()[0].iov_base, 1, elems.lkeys()[0], elems.array()[1].iov_base, elems.lkeys()[1], elems.array()[1].iov_base, elems.lkeys()[1], temp, rbuf.addr, rbuf.rkey, FI_UINT64, FI_CSWAP, reinterpret_cast((uint64_t)id)); if(ret) { - spdlog::error("Post write unsuccessful, reason {} {}", errno, strerror(errno)); + spdlog::error("Post write unsuccessful on connection {}, reason {} {}", fmt::ptr(this), errno, strerror(errno)); return -1; } - SPDLOG_DEBUG("Post write id {} successful", id); + SPDLOG_DEBUG("Post write id {} successful on connection", id, fmt::ptr(this)); return _req_count - 1; #else ibv_send_wr wr, *bad; @@ -646,7 +640,7 @@ namespace rdmalib { return -1; } SPDLOG_DEBUG( - "Post atomic fadd succesfull id: {}, remote addr {}, remote rkey {}, val {}", id, rbuf.addr, rbuf.rkey, add + "Post atomic fadd succesfull id: {}, remote addr {}, remote rkey {}, val {}, connection {}", id, rbuf.addr, rbuf.rkey, add, fmt::ptr(this) ); return _req_count - 1; #else @@ -693,23 +687,23 @@ namespace rdmalib { ret = -1; else spdlog::error( - "Queue {} WC {} finished with an error {}", - type == QueueType::RECV ? "recv" : "send", + "Queue {} connection {} WC {} finished with an error {}", + type == QueueType::RECV ? "recv" : "send", fmt::ptr(this), reinterpret_cast(_ewc.op_context), fi_strerror(_ewc.err) ); } } while(blocking && (ret == 0 || ret == -EAGAIN)); - if(ret < 0) { - spdlog::error("Failure of polling events from: {} queue! Return value {}, errno {}", type == QueueType::RECV ? "recv" : "send", fi_strerror(ret), errno); + if(ret < 0 && ret != -EAGAIN) { + spdlog::error("Failure of polling events from: {} queue connection {}! Return value {} message {} errno {}", type == QueueType::RECV ? "recv" : "send", fmt::ptr(this), ret, fi_strerror(std::abs(ret)), errno); return std::make_tuple(nullptr, -1); } - if(ret) + if(ret > 0) for(int i = 0; i < ret; ++i) { - SPDLOG_DEBUG("Queue {} Ret {}/{} WC {}", type == QueueType::RECV ? "recv" : "send", i + 1, ret, reinterpret_cast(wcs[i].op_context)); + SPDLOG_DEBUG("Connection {} Queue {} Ret {}/{} WC {}", fmt::ptr(this), type == QueueType::RECV ? "recv" : "send", i + 1, ret, reinterpret_cast(wcs[i].op_context)); } - return std::make_tuple(wcs, ret); + return std::make_tuple(wcs, ret == -EAGAIN ? 0 : ret); } #else std::tuple Connection::poll_wc(QueueType type, bool blocking, int count) diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index c1a6aac..0b94b5b 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -4,6 +4,7 @@ #include // inet_ntoa #include +#include #include // poll on file descriptors @@ -126,7 +127,7 @@ namespace rdmalib { _pd(nullptr) { #ifdef USE_LIBFABRIC - // Create a domain + // Create a domain (need to do that now so that we can register memory for the domain) impl::expect_zero(fi_domain(_addr.fabric, _addr.addrinfo, &_pd, nullptr)); #else // Size of Queue Pair @@ -174,6 +175,12 @@ namespace rdmalib { if(!_conn) { _conn = std::unique_ptr(new Connection()); #ifdef USE_LIBFABRIC + // Enable the event queue + fi_eq_attr eq_attr; + memset(&eq_attr, 0, sizeof(eq_attr)); + eq_attr.size = 42; + eq_attr.wait_obj = FI_WAIT_UNSPEC; + impl::expect_zero(fi_eq_open(_addr.fabric, &eq_attr, &_ec, NULL)); // Create and enable the endpoint together with all the accompanying queues _conn->initialize(_addr.fabric, _pd, _addr.addrinfo, _ec); #else @@ -245,16 +252,28 @@ namespace rdmalib { paramlen = sizeof(secret); SPDLOG_DEBUG("Setting connection secret {} of length {}", secret, sizeof(uint32_t)); } - if(fi_connect(_conn->qp(), &_addr.addrinfo->dest_addr, param, paramlen)) { - spdlog::error("Connection unsuccessful, reason {} {}", errno, strerror(errno)); + int ret = fi_connect(_conn->qp(), _addr.addrinfo->dest_addr, param, paramlen); + if(ret) { + spdlog::error("Connection unsuccessful, reason {} message {} errno {} message {}", ret, fi_strerror(ret), errno, strerror(errno)); _conn.reset(); _pd = nullptr; return false; - } else { + } + uint32_t event; + fi_eq_entry entry; + do { + ret = fi_eq_read(_ec, &event, &entry, sizeof(entry), 0); + } while (ret == -FI_EAGAIN); + if (event == FI_CONNECTED) spdlog::debug( "[RDMAActive] Connection successful to {}:{}", _addr._ip, _addr._port ); + else { + spdlog::error("Connection unsuccessful, reason {} message {} errno {} message {}", ret, fi_strerror(ret), errno, strerror(errno)); + _conn.reset(); + _pd = nullptr; + return false; } #else if(secret) { @@ -375,8 +394,8 @@ namespace rdmalib { memset(&eq_attr, 0, sizeof(eq_attr)); eq_attr.size = 42; eq_attr.wait_obj = FI_WAIT_UNSPEC; - impl::expect_zero(fi_passive_ep(_addr.fabric, _addr.addrinfo, &_pep, NULL)); impl::expect_zero(fi_eq_open(_addr.fabric, &eq_attr, &_ec, NULL)); + impl::expect_zero(fi_passive_ep(_addr.fabric, _addr.addrinfo, &_pep, NULL)); impl::expect_zero(fi_pep_bind(_pep, &(_ec->fid), 0)); impl::expect_zero(fi_listen(_pep)); size_t size = 100; @@ -466,14 +485,17 @@ namespace rdmalib { ConnectionStatus status = ConnectionStatus::UNKNOWN; // Poll rdma cm events. - int ret = fi_eq_read(_ec, &event, &entry, sizeof(entry), 0); - if(ret < 0 && ret != -FI_EAGAIN && ret != -FI_EAVAIL) { - spdlog::error("Event poll unsuccesful, reason {} {}", errno, strerror(errno)); + int ret; + do + ret = fi_eq_read(_ec, &event, &entry, sizeof(entry), 0); + while (ret == -FI_EAGAIN); + if(ret < 0) { + spdlog::error("Event poll unsuccessful, return {} message {} errno {} message {}", ret, fi_strerror(ret), errno, strerror(errno)); return std::make_tuple(nullptr, ConnectionStatus::UNKNOWN); } SPDLOG_DEBUG( - "[RDMAPassive] received event: {}", - fi_tostr(&event, FI_TYPE_EQ_EVENT) + "[RDMAPassive] received event: {} in text {}", + event, fi_tostr(&event, FI_TYPE_EQ_EVENT) ); switch (event) { diff --git a/rfaas/lib/connection.cpp b/rfaas/lib/connection.cpp index 70c21bc..d9dd5c3 100644 --- a/rfaas/lib/connection.cpp +++ b/rfaas/lib/connection.cpp @@ -31,7 +31,6 @@ namespace rfaas { spdlog::error("Couldn't connect to manager at {}:{}", _address, _port); return false; } - _rcv_buffer.connect(&_active.connection()); #ifdef USE_LIBFABRIC _allocation_buffer.register_memory(_active.pd(), FI_WRITE | FI_REMOTE_WRITE); #else @@ -39,6 +38,7 @@ namespace rfaas { #endif // Initialize batch receive WCs _active.connection().initialize_batched_recv(_allocation_buffer, sizeof(rdmalib::AllocationRequest)); + _rcv_buffer.connect(&_active.connection()); return ret; } diff --git a/rfaas/lib/executor.cpp b/rfaas/lib/executor.cpp index f6f293f..7da31d0 100644 --- a/rfaas/lib/executor.cpp +++ b/rfaas/lib/executor.cpp @@ -337,7 +337,12 @@ namespace rfaas { conn, _rcv_buf_size ); + #ifdef USE_LIBFABRIC + this->_connections.back().conn->post_recv(_execs_buf.sge(obj_size, requested*obj_size), requested); + this->_connections.back().conn->initialize_batched_recv(_execs_buf, 0); + #else this->_connections.back().conn->post_recv(_execs_buf.sge(obj_size, requested*obj_size), requested); + #endif // FIXME: this should be in a function // FIXME: here it won't work if rcv_bufer_size < numcores this->_connections.back()._rcv_buffer.connect(this->_connections.back().conn.get()); @@ -375,8 +380,8 @@ namespace rfaas { int id = std::get<0>(wcs)[i].wr_id; #endif SPDLOG_DEBUG( - "Received buffer details for thread, addr {}, rkey {}", - _execs_buf.data()[id].r_addr, _execs_buf.data()[id].r_key + "Received buffer details for thread, id {}, addr {}, rkey {}", + id, _execs_buf.data()[id].r_addr, _execs_buf.data()[id].r_key ); _connections[id].remote_input = rdmalib::RemoteBuffer( _execs_buf.data()[id].r_addr, diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index 64f9971..26a3d7f 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -251,6 +251,7 @@ namespace server { #ifdef USE_LIBFABRIC send.register_memory(active.pd(), FI_WRITE); rcv.register_memory(active.pd(), FI_WRITE | FI_REMOTE_WRITE); + conn->initialize_batched_recv(rcv, 0); #else send.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE); rcv.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); @@ -267,7 +268,7 @@ namespace server { #endif buf.data()[0].r_addr = rcv.address(); buf.data()[0].r_key = rcv.rkey(); - SPDLOG_DEBUG("Thread {} Sends buffer details to client!", id); + SPDLOG_DEBUG("Thread {} Sends buffer details to client! Addr {} rkey {}", id, buf.data()[0].r_addr, buf.data()[0].r_key); this->conn->post_send(buf, 0, buf.size() <= max_inline_data); this->conn->poll_wc(rdmalib::QueueType::SEND, true, 1); SPDLOG_DEBUG("Thread {} Sent buffer details to client!", id); diff --git a/server/executor_manager/executor_process.cpp b/server/executor_manager/executor_process.cpp index afdbf2d..b00fb71 100644 --- a/server/executor_manager/executor_process.cpp +++ b/server/executor_manager/executor_process.cpp @@ -120,6 +120,7 @@ namespace rfaas::executor_manager { "--mgr-secret", mgr_secret.c_str(), "--mgr-buf-addr", mgr_buf_addr.c_str(), "--mgr-buf-rkey", mgr_buf_rkey.c_str(), + "-v", nullptr }; int ret = execvp(argv[0], const_cast(&argv[0])); From 0b5252830d94e20ec4723662e87980b9febf0a62 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Fri, 25 Mar 2022 18:23:25 +0100 Subject: [PATCH 13/91] Resolve some bugs and change rkey to uint64_t --- rdmalib/include/rdmalib/allocation.hpp | 4 ++++ rdmalib/include/rdmalib/buffer.hpp | 6 +++--- rdmalib/include/rdmalib/functions.hpp | 4 ++++ rdmalib/lib/buffer.cpp | 14 +++++++++++--- rdmalib/lib/connection.cpp | 3 ++- rfaas/include/rfaas/executor.hpp | 16 ++++++++++++++++ rfaas/lib/executor.cpp | 3 +++ server/common.hpp | 4 ++++ server/executor/fast_executor.cpp | 4 ++-- server/executor/opts.cpp | 4 ++++ 10 files changed, 53 insertions(+), 9 deletions(-) diff --git a/rdmalib/include/rdmalib/allocation.hpp b/rdmalib/include/rdmalib/allocation.hpp index bc7e15a..fc1f460 100644 --- a/rdmalib/include/rdmalib/allocation.hpp +++ b/rdmalib/include/rdmalib/allocation.hpp @@ -23,7 +23,11 @@ namespace rdmalib { struct BufferInformation { uint64_t r_addr; + #ifdef USE_LIBFABRIC + uint64_t r_key; + #else uint32_t r_key; + #endif }; } diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 9168cd7..f821757 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -64,7 +64,7 @@ namespace rdmalib { #else uint32_t lkey() const; #endif - uint32_t rkey() const; + uint64_t rkey() const; ScatterGatherElement sge(uint32_t size, uint32_t offset) const; }; @@ -72,12 +72,12 @@ namespace rdmalib { struct RemoteBuffer { uintptr_t addr; - uint32_t rkey; + uint64_t rkey; uint32_t size; RemoteBuffer(); // When accessing the remote buffer, we might not need to know the size. - RemoteBuffer(uintptr_t addr, uint32_t rkey, uint32_t size = 0); + RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size = 0); template void serialize(Archive & ar) diff --git a/rdmalib/include/rdmalib/functions.hpp b/rdmalib/include/rdmalib/functions.hpp index 30bb6ea..353a2b1 100644 --- a/rdmalib/include/rdmalib/functions.hpp +++ b/rdmalib/include/rdmalib/functions.hpp @@ -9,7 +9,11 @@ namespace rdmalib { namespace functions { struct Submission { uint64_t r_address; + #ifdef USE_LIBFABRIC + uint64_t r_key; + #else uint32_t r_key; + #endif static constexpr int DATA_HEADER_SIZE = 12; }; diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index b40234e..2a2034b 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -111,7 +111,7 @@ namespace rdmalib { namespace impl { impl::expect_zero(ret); SPDLOG_DEBUG( "Registered {} bytes, mr {}, address {}, lkey {}, rkey {}", - _bytes, fmt::ptr(_mr), fmt::ptr(_ptr), *(uint32_t *)fi_mr_desc(_mr), fi_mr_key(_mr) + _bytes, fmt::ptr(_mr), fmt::ptr(_ptr), fmt::ptr(fi_mr_desc(_mr)), fi_mr_key(_mr) ); } #else @@ -170,7 +170,7 @@ namespace rdmalib { namespace impl { #endif #ifdef USE_LIBFABRIC - uint32_t Buffer::rkey() const + uint64_t Buffer::rkey() const { assert(this->_mr); return fi_mr_key(this->_mr); @@ -247,10 +247,18 @@ namespace rdmalib { size(0) {} - RemoteBuffer::RemoteBuffer(uintptr_t addr, uint32_t rkey, uint32_t size): + #ifdef USE_LIBFABRIC + RemoteBuffer::RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size): + addr(addr), + rkey(rkey), + size(size) + {} + #else + RemoteBuffer::RemoteBuffer(uintptr_t addr, uint32_t rkey, uint32_t size): addr(addr), rkey(rkey), size(size) {} + #endif } diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index e65e3b6..e90a0a7 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -485,7 +485,8 @@ namespace rdmalib { msg.addr = temp; msg.context = reinterpret_cast((uint64_t)id); - int ret = fi_writemsg(_qp, &msg, 0); + // int ret = fi_writemsg(_qp, &msg, 0); + int ret = fi_writev(_qp, elems.array(), elems.lkeys(), count, temp, msg.rma_iov->addr, msg.rma_iov->key, reinterpret_cast((uint64_t)id)); if(ret) { spdlog::error("Post write unsuccessful, reason {} {}, sges_count {}, wr_id {}, remote addr {}, remote rkey {}, imm data {}, connection {}", ret, strerror(ret), count, id, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data), fmt::ptr(this) diff --git a/rfaas/include/rfaas/executor.hpp b/rfaas/include/rfaas/executor.hpp index fcfffbc..9559964 100644 --- a/rfaas/include/rfaas/executor.hpp +++ b/rfaas/include/rfaas/executor.hpp @@ -98,7 +98,11 @@ namespace rfaas { char* data = static_cast(in.ptr()); // TODO: we assume here uintptr_t is 8 bytes *reinterpret_cast(data) = out.address(); + #ifdef USE_LIBFABRIC + *reinterpret_cast(data + 8) = out.rkey(); + #else *reinterpret_cast(data + 8) = out.rkey(); + #endif int invoc_id = this->_invoc_id++; //_futures[invoc_id] = std::move(std::promise{}); @@ -151,7 +155,11 @@ namespace rfaas { char* data = static_cast(in[i].ptr()); // TODO: we assume here uintptr_t is 8 bytes *reinterpret_cast(data) = out[i].address(); + #ifdef USE_LIBFABRIC + *reinterpret_cast(data + 8) = out[i].rkey(); + #else *reinterpret_cast(data + 8) = out[i].rkey(); + #endif SPDLOG_DEBUG("Invoke function {} with invocation id {}", func_idx, _invoc_id); _connections[i].conn->post_write( @@ -211,7 +219,11 @@ namespace rfaas { char* data = static_cast(in.ptr()); // TODO: we assume here uintptr_t is 8 bytes *reinterpret_cast(data) = out.address(); + #ifdef USE_LIBFABRIC + *reinterpret_cast(data + 8) = out.rkey(); + #else *reinterpret_cast(data + 8) = out.rkey(); + #endif int invoc_id = this->_invoc_id++; SPDLOG_DEBUG( @@ -312,7 +324,11 @@ namespace rfaas { char* data = static_cast(in[i].ptr()); // TODO: we assume here uintptr_t is 8 bytes *reinterpret_cast(data) = out[i].address(); + #ifdef USE_LIBFABRIC + *reinterpret_cast(data + 8) = out[i].rkey(); + #else *reinterpret_cast(data + 8) = out[i].rkey(); + #endif SPDLOG_DEBUG("Invoke function {} with invocation id {}", func_idx, _invoc_id); _connections[i].conn->post_write( diff --git a/rfaas/lib/executor.cpp b/rfaas/lib/executor.cpp index 7da31d0..30b8e0f 100644 --- a/rfaas/lib/executor.cpp +++ b/rfaas/lib/executor.cpp @@ -379,6 +379,9 @@ namespace rfaas { #else int id = std::get<0>(wcs)[i].wr_id; #endif + for (int j = 0; j < _execs_buf.data_size(); j++) + std::cout << std::hex << ((char *)_execs_buf.data())[j]; + std::cout << std::endl; SPDLOG_DEBUG( "Received buffer details for thread, id {}, addr {}, rkey {}", id, _execs_buf.data()[id].r_addr, _execs_buf.data()[id].r_key diff --git a/server/common.hpp b/server/common.hpp index 66ceec0..0279cab 100644 --- a/server/common.hpp +++ b/server/common.hpp @@ -12,7 +12,11 @@ namespace executor { int port; int secret; uint64_t r_addr; + #ifdef USE_LIBFABRIC + uint64_t r_key; + #else uint32_t r_key; + #endif }; } diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index 26a3d7f..b2a0078 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -249,7 +249,7 @@ namespace server { // Now generic receives for function invocations #ifdef USE_LIBFABRIC - send.register_memory(active.pd(), FI_WRITE); + send.register_memory(active.pd(), FI_WRITE | FI_READ); rcv.register_memory(active.pd(), FI_WRITE | FI_REMOTE_WRITE); conn->initialize_batched_recv(rcv, 0); #else @@ -262,7 +262,7 @@ namespace server { // Send to the client information about thread buffer rdmalib::Buffer buf(1); #ifdef USE_LIBFABRIC - buf.register_memory(active.pd(), FI_WRITE); + buf.register_memory(active.pd(), FI_WRITE | FI_READ); #else buf.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE); #endif diff --git a/server/executor/opts.cpp b/server/executor/opts.cpp index 5e5adc8..e2c4d13 100644 --- a/server/executor/opts.cpp +++ b/server/executor/opts.cpp @@ -29,7 +29,11 @@ namespace server { ("mgr-port", "Use selected port", cxxopts::value()) ("mgr-secret", "Use selected port", cxxopts::value()) ("mgr-buf-addr", "Use selected port", cxxopts::value()) + #ifdef USE_LIBFABRIC + ("mgr-buf-rkey", "Use selected port", cxxopts::value()) + #else ("mgr-buf-rkey", "Use selected port", cxxopts::value()) + #endif ; auto parsed_options = options.parse(argc, argv); From 0c44daac7ffa67ead33490c4f8789b3fff6238d0 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Fri, 25 Mar 2022 22:41:23 +0100 Subject: [PATCH 14/91] Solve bugs --- rdmalib/include/rdmalib/functions.hpp | 3 ++- rdmalib/lib/connection.cpp | 16 +++++++++++----- rdmalib/lib/rdmalib.cpp | 4 ++-- server/executor/opts.cpp | 4 ++++ 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/rdmalib/include/rdmalib/functions.hpp b/rdmalib/include/rdmalib/functions.hpp index 353a2b1..9fd4d4f 100644 --- a/rdmalib/include/rdmalib/functions.hpp +++ b/rdmalib/include/rdmalib/functions.hpp @@ -11,10 +11,11 @@ namespace rdmalib { namespace functions { uint64_t r_address; #ifdef USE_LIBFABRIC uint64_t r_key; + static constexpr int DATA_HEADER_SIZE = 16; #else uint32_t r_key; - #endif static constexpr int DATA_HEADER_SIZE = 12; + #endif }; constexpr int Submission::DATA_HEADER_SIZE; diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index e90a0a7..d829341 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -121,8 +121,10 @@ namespace rdmalib { #ifdef USE_LIBFABRIC void Connection::initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec) { - // Create the endpoint + // Create the endpoint and set its flags up so that we get completions on RDM impl::expect_zero(fi_endpoint(pd, info, &_qp, reinterpret_cast(this))); + uint64_t flags = FI_RECV | FI_COMPLETION; + impl::expect_zero(fi_control(&_qp->fid, FI_SETOPSFLAG, (void *)&flags)); // Open the waitset fi_wait_attr wait_attr; @@ -144,7 +146,7 @@ namespace rdmalib { // Connect the wait set to the receive queue cq_attr.wait_set = _wait_set; impl::expect_zero(fi_cq_open(pd, &cq_attr, &_rcv_channel, nullptr)); - impl::expect_zero(fi_ep_bind(_qp, &_rcv_channel->fid, FI_RECV)); + impl::expect_zero(fi_ep_bind(_qp, &_rcv_channel->fid, FI_RECV | FI_SELECTIVE_COMPLETION)); // Enable the endpoint impl::expect_zero(fi_enable(_qp)); @@ -476,17 +478,21 @@ namespace rdmalib { fi_addr_t temp = 0; int32_t id = _req_count++; size_t count = elems.size(); - if(elems.size() == 1 && elems.array()[0].iov_len == 0) + if(elems.size() == 1 && elems.array()[0].iov_len == 0) { count = 0; + SPDLOG_DEBUG( + "Zero elems" + ); + } msg.msg_iov = elems.array(); msg.desc = elems.lkeys(); msg.iov_count = count; msg.addr = temp; msg.context = reinterpret_cast((uint64_t)id); + msg.data = 100; - // int ret = fi_writemsg(_qp, &msg, 0); - int ret = fi_writev(_qp, elems.array(), elems.lkeys(), count, temp, msg.rma_iov->addr, msg.rma_iov->key, reinterpret_cast((uint64_t)id)); + int ret = fi_writemsg(_qp, &msg, FI_COMPLETION); if(ret) { spdlog::error("Post write unsuccessful, reason {} {}, sges_count {}, wr_id {}, remote addr {}, remote rkey {}, imm data {}, connection {}", ret, strerror(ret), count, id, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data), fmt::ptr(this) diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 0b94b5b..8bae4b5 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -36,7 +36,7 @@ namespace rdmalib { addrinfo = fi_allocinfo(); // Set the hints to have ability to conduct MSG, Atomic and RMA operations - hints->caps |= FI_MSG | FI_RMA | FI_ATOMIC; + hints->caps |= FI_MSG | FI_RMA | FI_ATOMIC | FI_RMA_EVENT; // Set the hints to indicate that we will register the local buffers hints->domain_attr->mr_mode = FI_MR_BASIC; // FI_MR_LOCAL | FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; hints->ep_attr->type = FI_EP_MSG; @@ -77,7 +77,7 @@ namespace rdmalib { addrinfo = fi_allocinfo(); // Set the hints to have ability to conduct MSG, Atomic and RMA operations - hints->caps |= FI_MSG | FI_RMA | FI_ATOMIC; + hints->caps |= FI_MSG | FI_RMA | FI_ATOMIC | FI_RMA_EVENT; // Set the hints to indicate that we will register the local buffers hints->mode |= FI_LOCAL_MR; free(hints->fabric_attr->prov_name); diff --git a/server/executor/opts.cpp b/server/executor/opts.cpp index e2c4d13..7c4797f 100644 --- a/server/executor/opts.cpp +++ b/server/executor/opts.cpp @@ -56,7 +56,11 @@ namespace server { result.mgr_port = parsed_options["mgr-port"].as(); result.mgr_secret = parsed_options["mgr-secret"].as(); result.accounting_buffer_addr = parsed_options["mgr-buf-addr"].as(); + #ifdef USE_LIBFABRIC + result.accounting_buffer_rkey = parsed_options["mgr-buf-rkey"].as(); + #else result.accounting_buffer_rkey = parsed_options["mgr-buf-rkey"].as(); + #endif std::string polling_mgr = parsed_options["polling-mgr"].as(); if(polling_mgr == "server") { From 7083150c3ebe32fff5c5c3cbb2dcc24d40d5f57f Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Sat, 26 Mar 2022 14:13:51 +0100 Subject: [PATCH 15/91] Change the ordering to upper immediate data bits --- rdmalib/lib/connection.cpp | 33 +++++++++++++++---------------- rdmalib/lib/rdmalib.cpp | 17 ++++------------ rfaas/include/rfaas/executor.hpp | 11 ++++++----- rfaas/lib/executor.cpp | 6 +++++- server/executor/fast_executor.cpp | 4 ++-- 5 files changed, 33 insertions(+), 38 deletions(-) diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index d829341..95a06c4 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -123,8 +123,6 @@ namespace rdmalib { { // Create the endpoint and set its flags up so that we get completions on RDM impl::expect_zero(fi_endpoint(pd, info, &_qp, reinterpret_cast(this))); - uint64_t flags = FI_RECV | FI_COMPLETION; - impl::expect_zero(fi_control(&_qp->fid, FI_SETOPSFLAG, (void *)&flags)); // Open the waitset fi_wait_attr wait_attr; @@ -146,7 +144,7 @@ namespace rdmalib { // Connect the wait set to the receive queue cq_attr.wait_set = _wait_set; impl::expect_zero(fi_cq_open(pd, &cq_attr, &_rcv_channel, nullptr)); - impl::expect_zero(fi_ep_bind(_qp, &_rcv_channel->fid, FI_RECV | FI_SELECTIVE_COMPLETION)); + impl::expect_zero(fi_ep_bind(_qp, &_rcv_channel->fid, FI_RECV)); // Enable the endpoint impl::expect_zero(fi_enable(_qp)); @@ -478,35 +476,34 @@ namespace rdmalib { fi_addr_t temp = 0; int32_t id = _req_count++; size_t count = elems.size(); - if(elems.size() == 1 && elems.array()[0].iov_len == 0) { - count = 0; - SPDLOG_DEBUG( - "Zero elems" - ); - } - + // if(elems.size() == 1 && elems.array()[0].iov_len == 0) { + // count = 0; + // SPDLOG_DEBUG( + // "Zero elems" + // ); + // } msg.msg_iov = elems.array(); msg.desc = elems.lkeys(); msg.iov_count = count; msg.addr = temp; msg.context = reinterpret_cast((uint64_t)id); - msg.data = 100; - - int ret = fi_writemsg(_qp, &msg, FI_COMPLETION); + if (msg.data == 0) + spdlog::error("Data equal to zero will result in no completion on the receiver side!"); + int ret = fi_writemsg(_qp, &msg, FI_COMPLETION | FI_REMOTE_CQ_DATA); if(ret) { spdlog::error("Post write unsuccessful, reason {} {}, sges_count {}, wr_id {}, remote addr {}, remote rkey {}, imm data {}, connection {}", - ret, strerror(ret), count, id, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data), fmt::ptr(this) + ret, strerror(ret), count, id, msg.rma_iov->addr, msg.rma_iov->key, msg.data, fmt::ptr(this) ); return -1; } if(elems.size() > 0) SPDLOG_DEBUG( "Post write succesfull id: {}, sge size: {}, first lkey {} len {}, remote addr {}, remote rkey {}, imm data {}, connection {}", - count, elems.size(), elems.lkeys()[0], elems.array()[0].iov_len, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data), fmt::ptr(this) + count, elems.size(), elems.lkeys()[0], elems.array()[0].iov_len, msg.rma_iov->addr, msg.rma_iov->key, msg.data, fmt::ptr(this) ); else SPDLOG_DEBUG( - "Post write succesfull id: {}, remote addr {}, remote rkey {}, imm data {}, connection {}", id, msg.rma_iov->addr, msg.rma_iov->key, ntohl(msg.data), fmt::ptr(this) + "Post write succesfull id: {}, remote addr {}, remote rkey {}, imm data {}, connection {}", id, msg.rma_iov->addr, msg.rma_iov->key, msg.data, fmt::ptr(this) ); return _req_count - 1; @@ -561,6 +558,8 @@ namespace rdmalib { iov.len = rbuf.size; msg.rma_iov = &iov; msg.rma_iov_count = 1; + // Add constant ignored at the other end as a data equal to zero here does not generate the completion event + msg.data = 0x1; return _post_write(std::forward(elems), msg, force_inline, false); #else ibv_send_wr wr; @@ -583,7 +582,7 @@ namespace rdmalib { iov.len = rbuf.size; msg.rma_iov = &iov; msg.rma_iov_count = 1; - msg.data = htonl(immediate); + msg.data = ((uint64_t) immediate << 32) + 0x1; return _post_write(std::forward(elems), msg, force_inline, force_solicited); #else ibv_send_wr wr; diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 8bae4b5..86e1af4 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -39,8 +39,10 @@ namespace rdmalib { hints->caps |= FI_MSG | FI_RMA | FI_ATOMIC | FI_RMA_EVENT; // Set the hints to indicate that we will register the local buffers hints->domain_attr->mr_mode = FI_MR_BASIC; // FI_MR_LOCAL | FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; - hints->ep_attr->type = FI_EP_MSG; - hints->fabric_attr->prov_name = strdup("GNI"); + hints->ep_attr->type = FI_EP_UNSPEC; + hints->tx_attr->caps = FI_MSG | FI_RMA | FI_ATOMIC; + hints->rx_attr->caps = FI_MSG | FI_RMA | FI_ATOMIC; + hints->fabric_attr->prov_name = strdup("sockets"); impl::expect_zero(fi_getinfo(FI_VERSION(1, 9), ip.c_str(), std::to_string(port).c_str(), passive ? FI_SOURCE : 0, hints, &addrinfo)); fi_freeinfo(hints); impl::expect_zero(fi_fabric(addrinfo->fabric_attr, &fabric, nullptr)); @@ -398,17 +400,6 @@ namespace rdmalib { impl::expect_zero(fi_passive_ep(_addr.fabric, _addr.addrinfo, &_pep, NULL)); impl::expect_zero(fi_pep_bind(_pep, &(_ec->fid), 0)); impl::expect_zero(fi_listen(_pep)); - size_t size = 100; - char address[size]; - impl::expect_zero(fi_getname(&_pep->fid, address, &size)); - spdlog::info( - "Listening on address length {}:", - size - ); - std::cout << "\b \b"; - for(size_t i = 0; i < size; ++i) - std::cout << std::hex << (int)address[i]; - std::cout << std::endl; #else // Start listening impl::expect_nonzero(this->_ec = rdma_create_event_channel()); diff --git a/rfaas/include/rfaas/executor.hpp b/rfaas/include/rfaas/executor.hpp index 9559964..455cb4c 100644 --- a/rfaas/include/rfaas/executor.hpp +++ b/rfaas/include/rfaas/executor.hpp @@ -183,7 +183,7 @@ namespace rfaas { auto wc = _connections[0]._rcv_buffer.poll(true); #ifdef USE_LIBFABRIC - uint32_t val = ntohl(std::get<0>(wc)[0].data); + uint32_t val = std::get<0>(wc)[0].data >> 32; #else uint32_t val = ntohl(std::get<0>(wc)[0].imm_data); #endif @@ -246,7 +246,7 @@ namespace rfaas { auto wc = _connections[0]._rcv_buffer.poll(true); for(int i = 0; i < std::get<1>(wc); ++i) { #ifdef USE_LIBFABRIC - uint32_t val = ntohl(std::get<0>(wc)[i].data); + uint32_t val = std::get<0>(wc)[i].data >> 32; #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); #endif @@ -261,7 +261,7 @@ namespace rfaas { #else out_size = std::get<0>(wc)[i].byte_len; #endif - //spdlog::info("Result for id {}", finished_invoc_id); + spdlog::info("Result {} for id {}", return_val, finished_invoc_id); } else { auto it = _futures.find(finished_invoc_id); //spdlog::info("Poll Future for id {}", finished_invoc_id); @@ -280,7 +280,7 @@ namespace rfaas { // Thus, we later unset the variable since we're done for(int i = 0; i < std::get<1>(wc); ++i) { #ifdef USE_LIBFABRIC - uint32_t val = ntohl(std::get<0>(wc)[i].data); + uint32_t val = std::get<0>(wc)[i].data >> 32; #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); #endif @@ -353,10 +353,11 @@ namespace rfaas { _active_polling = true; while(expected) { auto wc = _connections[0]._rcv_buffer.poll(true); + SPDLOG_DEBUG("Found data"); expected -= std::get<1>(wc); for(int i = 0; i < std::get<1>(wc); ++i) { #ifdef USE_LIBFABRIC - uint32_t val = ntohl(std::get<0>(wc)[i].data); + uint32_t val = std::get<0>(wc)[i].data >> 32; #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); #endif diff --git a/rfaas/lib/executor.cpp b/rfaas/lib/executor.cpp index 30b8e0f..d515963 100644 --- a/rfaas/lib/executor.cpp +++ b/rfaas/lib/executor.cpp @@ -46,7 +46,11 @@ namespace rfaas { _port(port), _rcv_buf_size(rcv_buf_size), _executions(0), + #ifdef USE_LIBFABRIC + _invoc_id(1), + #else _invoc_id(0), + #endif _max_inlined_msg(max_inlined_msg) { #ifdef USE_LIBFABRIC @@ -205,7 +209,7 @@ namespace rfaas { auto wc = _connections[0]._rcv_buffer.poll(false); for(int i = 0; i < std::get<1>(wc); ++i) { #ifdef USE_LIBFABRIC - uint32_t val = ntohl(std::get<0>(wc)[i].data); + uint32_t val = std::get<0>(wc)[i].data >> 32; #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); #endif diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index b2a0078..deb4b5d 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -74,7 +74,7 @@ namespace server { //server_processing_times.start(); #ifdef USE_LIBFABRIC fi_cq_data_entry* wc = &std::get<0>(wcs)[i]; - int info = ntohl(wc->data); + int info = wc->data >> 32; #else ibv_wc* wc = &std::get<0>(wcs)[i]; if(wc->status) { @@ -151,7 +151,7 @@ namespace server { //server_processing_times.start(); #ifdef USE_LIBFABRIC fi_cq_data_entry* wc = &std::get<0>(wcs)[i]; - int info = ntohl(wc->data); + int info = wc->data >> 32; #else ibv_wc* wc = &std::get<0>(wcs)[i]; if(wc->status) { From 1374a7531f1edd534ee4e03653b535925aa2e0a2 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Sat, 26 Mar 2022 14:24:39 +0100 Subject: [PATCH 16/91] Repair the provider selection --- rdmalib/lib/rdmalib.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 86e1af4..f78cc66 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -39,10 +39,8 @@ namespace rdmalib { hints->caps |= FI_MSG | FI_RMA | FI_ATOMIC | FI_RMA_EVENT; // Set the hints to indicate that we will register the local buffers hints->domain_attr->mr_mode = FI_MR_BASIC; // FI_MR_LOCAL | FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; - hints->ep_attr->type = FI_EP_UNSPEC; - hints->tx_attr->caps = FI_MSG | FI_RMA | FI_ATOMIC; - hints->rx_attr->caps = FI_MSG | FI_RMA | FI_ATOMIC; - hints->fabric_attr->prov_name = strdup("sockets"); + hints->ep_attr->type = FI_EP_MSG; + hints->fabric_attr->prov_name = strdup("GNI"); impl::expect_zero(fi_getinfo(FI_VERSION(1, 9), ip.c_str(), std::to_string(port).c_str(), passive ? FI_SOURCE : 0, hints, &addrinfo)); fi_freeinfo(hints); impl::expect_zero(fi_fabric(addrinfo->fabric_attr, &fabric, nullptr)); From 2acc757da341e1a9cffdd5947f5fb79dd263bd46 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Sun, 27 Mar 2022 16:55:15 +0200 Subject: [PATCH 17/91] Resolve the length of the transfer and secret passing bugs --- rdmalib/include/rdmalib/connection.hpp | 2 +- rdmalib/include/rdmalib/rdmalib.hpp | 8 ---- rdmalib/lib/connection.cpp | 56 ++++++++------------------ rdmalib/lib/rdmalib.cpp | 35 ++++++++++------ rfaas/include/rfaas/executor.hpp | 11 +++-- server/executor/fast_executor.cpp | 18 ++++++--- 6 files changed, 59 insertions(+), 71 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 99f235c..2acbe49 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -149,7 +149,7 @@ namespace rdmalib { #endif private: #ifdef USE_LIBFABRIC - int32_t _post_write(ScatterGatherElement && elems, fi_msg_rma & msg, bool force_inline, bool force_solicited); + int32_t _post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, const uint32_t immediate = 0); #else int32_t _post_write(ScatterGatherElement && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); #endif diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index 5aa8410..4c4ba43 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -106,14 +106,6 @@ namespace rdmalib { void accept(Connection* connection); void set_nonblocking_poll(); }; - - #ifdef USE_LIBFABRIC - struct eventEntry { - fid_t fid; - struct fi_info *info; - uint32_t secret; - }; - #endif } #endif diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 95a06c4..5d6782d 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -471,39 +472,27 @@ namespace rdmalib { #endif #ifdef USE_LIBFABRIC - int32_t Connection::_post_write(ScatterGatherElement && elems, fi_msg_rma &msg, bool force_inline, bool force_solicited) + int32_t Connection::_post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, const uint32_t immediate) { fi_addr_t temp = 0; int32_t id = _req_count++; size_t count = elems.size(); - // if(elems.size() == 1 && elems.array()[0].iov_len == 0) { - // count = 0; - // SPDLOG_DEBUG( - // "Zero elems" - // ); - // } - msg.msg_iov = elems.array(); - msg.desc = elems.lkeys(); - msg.iov_count = count; - msg.addr = temp; - msg.context = reinterpret_cast((uint64_t)id); - if (msg.data == 0) - spdlog::error("Data equal to zero will result in no completion on the receiver side!"); - int ret = fi_writemsg(_qp, &msg, FI_COMPLETION | FI_REMOTE_CQ_DATA); + uint64_t data = immediate + (elems.array()[0].iov_len << 32); + int ret = fi_writedata(_qp, elems.array()[0].iov_base, elems.array()[0].iov_len, elems.lkeys()[0], data, temp, rbuf.addr, rbuf.rkey, reinterpret_cast((uint64_t)id)); if(ret) { spdlog::error("Post write unsuccessful, reason {} {}, sges_count {}, wr_id {}, remote addr {}, remote rkey {}, imm data {}, connection {}", - ret, strerror(ret), count, id, msg.rma_iov->addr, msg.rma_iov->key, msg.data, fmt::ptr(this) + ret, strerror(ret), count, id, rbuf.addr, rbuf.rkey, data, fmt::ptr(this) ); return -1; } if(elems.size() > 0) SPDLOG_DEBUG( "Post write succesfull id: {}, sge size: {}, first lkey {} len {}, remote addr {}, remote rkey {}, imm data {}, connection {}", - count, elems.size(), elems.lkeys()[0], elems.array()[0].iov_len, msg.rma_iov->addr, msg.rma_iov->key, msg.data, fmt::ptr(this) + count, count, elems.lkeys()[0], elems.array()[0].iov_len, rbuf.addr, rbuf.rkey, data, fmt::ptr(this) ); else SPDLOG_DEBUG( - "Post write succesfull id: {}, remote addr {}, remote rkey {}, imm data {}, connection {}", id, msg.rma_iov->addr, msg.rma_iov->key, msg.data, fmt::ptr(this) + "Post write succesfull id: {}, remote addr {}, remote rkey {}, imm data {}, connection {}", id, rbuf.addr, rbuf.rkey, data, fmt::ptr(this) ); return _req_count - 1; @@ -550,17 +539,11 @@ namespace rdmalib { int32_t Connection::post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, bool force_inline) { #ifdef USE_LIBFABRIC - fi_msg_rma msg; - fi_rma_iov iov; - memset(&msg, 0, sizeof(msg)); - iov.addr = rbuf.addr; - iov.key = rbuf.rkey; - iov.len = rbuf.size; - msg.rma_iov = &iov; - msg.rma_iov_count = 1; - // Add constant ignored at the other end as a data equal to zero here does not generate the completion event - msg.data = 0x1; - return _post_write(std::forward(elems), msg, force_inline, false); + if (elems.size() > 1) { + spdlog::error("Post write unsuccessful on connection {}, reason Function not implemented for multiple sges.", fmt::ptr(this)); + return -1; + } + return _post_write(std::forward(elems), rbuf); #else ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); @@ -574,16 +557,11 @@ namespace rdmalib { int32_t Connection::post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint32_t immediate, bool force_inline, bool force_solicited) { #ifdef USE_LIBFABRIC - fi_msg_rma msg; - fi_rma_iov iov; - memset(&msg, 0, sizeof(msg)); - iov.addr = rbuf.addr; - iov.key = rbuf.rkey; - iov.len = rbuf.size; - msg.rma_iov = &iov; - msg.rma_iov_count = 1; - msg.data = ((uint64_t) immediate << 32) + 0x1; - return _post_write(std::forward(elems), msg, force_inline, force_solicited); + if (elems.size() > 1) { + spdlog::error("Post write unsuccessful on connection {}, reason Function not implemented for multiple sges.", fmt::ptr(this)); + return -1; + } + return _post_write(std::forward(elems), rbuf, immediate); #else ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index f78cc66..59e25f3 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -4,12 +4,14 @@ #include // inet_ntoa #include +#include #include #include // poll on file descriptors #include #include +#include #ifdef USE_LIBFABRIC #include @@ -250,7 +252,7 @@ namespace rdmalib { if(secret) { param = &secret; paramlen = sizeof(secret); - SPDLOG_DEBUG("Setting connection secret {} of length {}", secret, sizeof(uint32_t)); + SPDLOG_DEBUG("Setting connection secret {} of length {}", *param, paramlen); } int ret = fi_connect(_conn->qp(), _addr.addrinfo->dest_addr, param, paramlen); if(ret) { @@ -469,14 +471,16 @@ namespace rdmalib { { #ifdef USE_LIBFABRIC uint32_t event; - eventEntry entry; + // Need those additional bytes in fi_eq_cm_entry so that we can transfer the secret + int total_size = sizeof(fi_eq_cm_entry) + sizeof(uint32_t); + fi_eq_cm_entry *entry = (fi_eq_cm_entry *)malloc(total_size); Connection* connection = nullptr; ConnectionStatus status = ConnectionStatus::UNKNOWN; // Poll rdma cm events. int ret; do - ret = fi_eq_read(_ec, &event, &entry, sizeof(entry), 0); + ret = fi_eq_read(_ec, &event, entry, total_size, 0); while (ret == -FI_EAGAIN); if(ret < 0) { spdlog::error("Event poll unsuccessful, return {} message {} errno {} message {}", ret, fi_strerror(ret), errno, strerror(errno)); @@ -491,9 +495,11 @@ namespace rdmalib { case FI_CONNREQ: connection = new Connection{true}; + SPDLOG_DEBUG("[RDMAPassive] Connection request with ret {}", ret); + // Read the secret - if(ret == sizeof(entry)) { - uint32_t data = *reinterpret_cast(entry.secret); + if(ret == total_size) { + uint32_t data = *reinterpret_cast(entry->data); connection->set_private_data(data); SPDLOG_DEBUG("[RDMAPassive] Connection request with private data {}", data); } @@ -501,33 +507,36 @@ namespace rdmalib { SPDLOG_DEBUG("[RDMAPassive] Connection request with no private data"); // Check if we have a domain open for the connection already - if (!entry.info->domain_attr->domain) - fi_domain(_addr.fabric, entry.info, &_pd, NULL); + // if (!entry.info->domain_attr->domain) + // fi_domain(_addr.fabric, entry.info, &_pd, NULL); // Enable the endpoint - connection->initialize(_addr.fabric, _pd, entry.info, _ec); + connection->initialize(_addr.fabric, _pd, entry->info, _ec); SPDLOG_DEBUG( "[RDMAPassive] Created connection fid {} qp {}", fmt::ptr(connection->id()), fmt::ptr(&connection->qp()->fid) ); + // Free the info + fi_freeinfo(entry->info); + status = ConnectionStatus::REQUESTED; _active_connections.insert(connection); break; case FI_CONNECTED: SPDLOG_DEBUG( "[RDMAPassive] Connection is established for id {}, and connection {}", - fmt::ptr(entry.fid), fmt::ptr(entry.fid->context) + fmt::ptr(entry->fid), fmt::ptr(entry->fid->context) ); - connection = reinterpret_cast(entry.fid->context); + connection = reinterpret_cast(entry->fid->context); status = ConnectionStatus::ESTABLISHED; break; case FI_SHUTDOWN: SPDLOG_DEBUG( "[RDMAPassive] Disconnect for id {}, and connection {}", - fmt::ptr(entry.fid), fmt::ptr(entry.fid->context) + fmt::ptr(entry->fid), fmt::ptr(entry->fid->context) ); - connection = reinterpret_cast(entry.fid->context); + connection = reinterpret_cast(entry->fid->context); //connection->close(); status = ConnectionStatus::DISCONNECTED; _active_connections.erase(connection); @@ -635,7 +644,7 @@ namespace rdmalib { void RDMAPassive::accept(Connection* connection) { #ifdef USE_LIBFABRIC if(fi_accept(connection->qp(), nullptr, 0)) { - spdlog::error("Conection accept unsuccesful, reason {} {}", errno, strerror(errno)); + spdlog::error("Conection accept unsuccessful, reason {} {}", errno, strerror(errno)); connection = nullptr; } #else diff --git a/rfaas/include/rfaas/executor.hpp b/rfaas/include/rfaas/executor.hpp index 455cb4c..16bb722 100644 --- a/rfaas/include/rfaas/executor.hpp +++ b/rfaas/include/rfaas/executor.hpp @@ -246,22 +246,25 @@ namespace rfaas { auto wc = _connections[0]._rcv_buffer.poll(true); for(int i = 0; i < std::get<1>(wc); ++i) { #ifdef USE_LIBFABRIC - uint32_t val = std::get<0>(wc)[i].data >> 32; + uint64_t val = std::get<0>(wc)[i].data; + int return_val = val & 0x0000FFFF; + int finished_invoc_id = val >> 16 & 0x0000FFFF; + int len = val >> 32; #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); - #endif int return_val = val & 0x0000FFFF; int finished_invoc_id = val >> 16; + #endif if(finished_invoc_id == invoc_id) { found_result = true; return_value = return_val; #ifdef USE_LIBFABRIC - out_size = std::get<0>(wc)[i].len; + out_size = len; #else out_size = std::get<0>(wc)[i].byte_len; #endif - spdlog::info("Result {} for id {}", return_val, finished_invoc_id); + // spdlog::info("Result {} for id {}", return_val, finished_invoc_id); } else { auto it = _futures.find(finished_invoc_id); //spdlog::info("Poll Future for id {}", finished_invoc_id); diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index deb4b5d..f2ce634 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -74,7 +74,10 @@ namespace server { //server_processing_times.start(); #ifdef USE_LIBFABRIC fi_cq_data_entry* wc = &std::get<0>(wcs)[i]; - int info = wc->data >> 32; + int func_id = wc->data & invocation_mask; + int invoc_id = (wc->data >> 16) & 0x0000FFFF; + bool solicited = wc->data & solicited_mask; + int len = wc->data >> 32; #else ibv_wc* wc = &std::get<0>(wcs)[i]; if(wc->status) { @@ -82,10 +85,10 @@ namespace server { continue; } int info = ntohl(wc->imm_data); - #endif int func_id = info & invocation_mask; int invoc_id = info >> 16; bool solicited = info & solicited_mask; + #endif SPDLOG_DEBUG( "Thread {} Invoc id {} Execute func {} Repetition {}", id, invoc_id, func_id, repetitions @@ -95,7 +98,7 @@ namespace server { auto now = std::chrono::high_resolution_clock::now(); #ifdef USE_LIBFABRIC auto func_end = work(invoc_id, func_id, solicited, - wc->len - rdmalib::functions::Submission::DATA_HEADER_SIZE + len - rdmalib::functions::Submission::DATA_HEADER_SIZE ); #else auto func_end = work(invoc_id, func_id, solicited, @@ -151,7 +154,10 @@ namespace server { //server_processing_times.start(); #ifdef USE_LIBFABRIC fi_cq_data_entry* wc = &std::get<0>(wcs)[i]; - int info = wc->data >> 32; + int func_id = wc->data & invocation_mask; + int invoc_id = (wc->data >> 16) & 0x0000FFFF; + bool solicited = wc->data & solicited_mask; + int len = wc->data >> 32; #else ibv_wc* wc = &std::get<0>(wcs)[i]; if(wc->status) { @@ -159,17 +165,17 @@ namespace server { continue; } int info = ntohl(wc->imm_data); - #endif int func_id = info & invocation_mask; bool solicited = info & solicited_mask; int invoc_id = info >> 16; + #endif SPDLOG_DEBUG( "Thread {} Invoc id {} Execute func {} Repetition {}", id, invoc_id, func_id, repetitions ); #ifdef USE_LIBFABRIC - work(invoc_id, func_id, solicited, wc->len - rdmalib::functions::Submission::DATA_HEADER_SIZE); + work(invoc_id, func_id, solicited, len - rdmalib::functions::Submission::DATA_HEADER_SIZE); #else work(invoc_id, func_id, solicited, wc->byte_len - rdmalib::functions::Submission::DATA_HEADER_SIZE); #endif From e59c70fcf2c6207b5f421dc4bca79e3dcf9b40de Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Mon, 28 Mar 2022 13:05:22 +0200 Subject: [PATCH 18/91] Solve the multiple executors, accounting, secret passing and seg fault bugs --- rdmalib/include/rdmalib/connection.hpp | 4 ++++ rdmalib/lib/connection.cpp | 25 ++++++++++++++----------- rdmalib/lib/rdmalib.cpp | 1 + server/executor/fast_executor.cpp | 8 +++++--- server/executor/server.hpp | 4 ++++ server/executor_manager/client.cpp | 10 +++++----- 6 files changed, 33 insertions(+), 19 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 2acbe49..fc180ca 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -137,7 +137,11 @@ namespace rdmalib { bool solicited = false ); int32_t post_cas(ScatterGatherElement && elems, const RemoteBuffer & buf, uint64_t compare, uint64_t swap); + #ifdef USE_LIBFABRIC + int32_t post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer & rbuf, uint64_t add); + #else int32_t post_atomic_fadd(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t add); + #endif // Register to be notified about all events, including unsolicited ones #ifdef USE_LIBFABRIC diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 5d6782d..83ed58b 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -338,7 +338,7 @@ namespace rdmalib { } else SPDLOG_DEBUG("Batched receive on connection {} num_sge {}", fmt::ptr(this), begin.size()); } - ret = fi_recvv(_qp, begin.array(), begin.lkeys(), begin.size(), NULL, nullptr); + ret = fi_recv(_qp, begin.array()->iov_base, begin.array()->iov_len, begin.lkeys()[0], NULL, reinterpret_cast(j)); if(ret) break; } @@ -355,7 +355,7 @@ namespace rdmalib { } else SPDLOG_DEBUG("Batched receive on connection {} num_sge {}", fmt::ptr(this), begin.size()); } - ret = fi_recvv(_qp, begin.array(), begin.lkeys(), begin.size(), NULL, nullptr); + ret = fi_recv(_qp, begin.array()->iov_base, begin.array()->iov_len, begin.lkeys()[0], NULL, reinterpret_cast(j)); if(ret) break; } @@ -488,7 +488,7 @@ namespace rdmalib { if(elems.size() > 0) SPDLOG_DEBUG( "Post write succesfull id: {}, sge size: {}, first lkey {} len {}, remote addr {}, remote rkey {}, imm data {}, connection {}", - count, count, elems.lkeys()[0], elems.array()[0].iov_len, rbuf.addr, rbuf.rkey, data, fmt::ptr(this) + id, count, elems.lkeys()[0], elems.array()[0].iov_len, rbuf.addr, rbuf.rkey, data, fmt::ptr(this) ); else SPDLOG_DEBUG( @@ -612,22 +612,24 @@ namespace rdmalib { #endif } - int32_t Connection::post_atomic_fadd(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t add) + #ifdef USE_LIBFABRIC + int32_t Connection::post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer & rbuf, uint64_t add) { - #ifdef USE_LIBFABRIC - fi_addr_t temp = 0; int32_t id = _req_count++; - memcpy(elems.array()[0].iov_base, &add, sizeof(add)); - int ret = fi_atomic(_qp, &elems.array()[0], 1, elems.lkeys()[0], temp, rbuf.addr, rbuf.rkey, FI_UINT64, FI_SUM, reinterpret_cast((uint64_t)id)); + memcpy(_accounting_buf.data(), &add, sizeof(add)); + int ret = fi_atomic(_qp, _accounting_buf.data(), 1, _accounting_buf.lkey(), NULL, rbuf.addr, rbuf.rkey, FI_UINT64, FI_SUM, reinterpret_cast((uint64_t)id)); if(ret) { spdlog::error("Post write unsuccesful, reason {} {}", errno, strerror(errno)); return -1; } SPDLOG_DEBUG( - "Post atomic fadd succesfull id: {}, remote addr {}, remote rkey {}, val {}, connection {}", id, rbuf.addr, rbuf.rkey, add, fmt::ptr(this) + "Post atomic fadd succesfull id: {}, remote addr {}, remote rkey {}, val {}, connection {}", id, rbuf.addr, rbuf.rkey, *_accounting_buf.data(), fmt::ptr(this) ); return _req_count - 1; - #else + } + #else + int32_t Connection::post_atomic_fadd(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t add) + { ibv_send_wr wr, *bad; memset(&wr, 0, sizeof(wr)); wr.wr_id = _req_count++; @@ -649,8 +651,9 @@ namespace rdmalib { "Post atomic fadd succesfull id: {}, remote addr {}, remote rkey {}, val {}", wr.wr_id, wr.wr.rdma.remote_addr, wr.wr.rdma.rkey, wr.wr.atomic.compare_add ); return _req_count - 1; - #endif } + #endif + #ifdef USE_LIBFABRIC std::tuple Connection::poll_wc(QueueType type, bool blocking, int count) diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 59e25f3..5c88311 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -309,6 +309,7 @@ namespace rdmalib { #ifdef USE_LIBFABRIC // TODO: Add the disconnectin id spdlog::debug("[RDMAActive] Disconnecting connection with id {}", fmt::ptr(&_conn->qp()->fid)); + _conn->close(); _conn.reset(); _pd = nullptr; #else diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index f2ce634..2b11ab2 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -213,7 +213,7 @@ namespace server { mgr_connection.allocate(); this->_mgr_connection = &mgr_connection.connection(); #ifdef USE_LIBFABRIC - _accounting_buf.register_memory(mgr_connection.pd(), FI_WRITE | FI_REMOTE_WRITE); + _accounting_buf.register_memory(mgr_connection.pd(), FI_READ | FI_WRITE | FI_REMOTE_WRITE); #else _accounting_buf.register_memory(mgr_connection.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_ATOMIC); #endif @@ -230,7 +230,7 @@ namespace server { // Receive function data from the client - this WC must be posted first // We do it before connection to ensure that client does not start sending before us #ifdef USE_LIBFABRIC - func_buffer.register_memory(active.pd(), FI_WRITE | FI_REMOTE_WRITE); + func_buffer.register_memory(active.pd(), FI_READ | FI_WRITE | FI_REMOTE_WRITE); #else func_buffer.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); #endif @@ -296,13 +296,15 @@ namespace server { // Submit final accounting information _accounting.send_updated_execution(_mgr_connection, _accounting_buf, _mgr_conn, true, false); _accounting.send_updated_polling(_mgr_connection, _accounting_buf, _mgr_conn, true, false); + #ifndef USE_LIBFABRIC mgr_connection.connection().poll_wc(rdmalib::QueueType::SEND, true, 2); + #endif spdlog::info( "Thread {} finished work, spent {} ns hot polling and {} ns computation, {} executions.", id, _accounting.total_hot_polling_time , _accounting.total_execution_time, repetitions ); // FIXME: revert after manager starts to detect disconnection events - //mgr_connection.disconnect(); + // mgr_connection.disconnect(); } FastExecutors::FastExecutors(std::string client_addr, int port, diff --git a/server/executor/server.hpp b/server/executor/server.hpp index c9f536f..dab5b6a 100644 --- a/server/executor/server.hpp +++ b/server/executor/server.hpp @@ -58,7 +58,11 @@ namespace server { int mgr_port; int mgr_secret; uint64_t accounting_buffer_addr; + #ifdef USE_LIBFABRIC + uint64_t accounting_buffer_rkey; + #else uint32_t accounting_buffer_rkey; + #endif }; Options opts(int argc, char ** argv); diff --git a/server/executor_manager/client.cpp b/server/executor_manager/client.cpp index 4f7c013..651e110 100644 --- a/server/executor_manager/client.cpp +++ b/server/executor_manager/client.cpp @@ -30,13 +30,13 @@ namespace rfaas::executor_manager { // Make the buffer accessible to clients memset(accounting.data(), 0, accounting.data_size()); #ifdef USE_LIBFABRIC - accounting.register_memory(pd, FI_WRITE | FI_REMOTE_WRITE); + accounting.register_memory(pd, FI_READ | FI_WRITE | FI_REMOTE_WRITE); #else accounting.register_memory(pd, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC); #endif // Make the buffer accessible to clients #ifdef USE_LIBFABRIC - allocation_requests.register_memory(pd, FI_WRITE | FI_REMOTE_WRITE); + allocation_requests.register_memory(pd, FI_READ | FI_WRITE | FI_REMOTE_WRITE); #else allocation_requests.register_memory(pd, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); #endif @@ -60,9 +60,7 @@ namespace rfaas::executor_manager { void Client::disable(int id) { - #ifdef USE_LIBFABRIC - fi_shutdown(connection->qp(), 0); - #else + #ifndef USE_LIBFABRIC rdma_disconnect(connection->id()); #endif SPDLOG_DEBUG( @@ -89,7 +87,9 @@ namespace rfaas::executor_manager { //acc.hot_polling_time = acc.execution_time = 0; // SEGFAULT? //ibv_dereg_mr(allocation_requests._mr); + #ifndef USE_LIBFABRIC connection->close(); + #endif delete connection; connection = nullptr; _active=false; From 96be04384e72c507eb64651b601da0ef17271d38 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Mon, 28 Mar 2022 14:10:41 +0200 Subject: [PATCH 19/91] Add better resource cleaning and remove the verbose executor flag --- rdmalib/lib/connection.cpp | 28 +++++++++++++------- server/executor_manager/executor_process.cpp | 1 - 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 83ed58b..77c891f 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -176,15 +177,24 @@ namespace rdmalib { #ifdef USE_LIBFABRIC SPDLOG_DEBUG("Connection close called for {} with qp fid {}", fmt::ptr(this), fmt::ptr(&this->_qp->fid)); // We need to close the transmit and receive channels and the endpoint - if (_rcv_channel) - impl::expect_zero(fi_close(&_rcv_channel->fid)); - if (_trx_channel) - impl::expect_zero(fi_close(&_trx_channel->fid)); - if (_wait_set) - impl::expect_zero(fi_close(&_wait_set->fid)); - if (_qp) { - impl::expect_zero(fi_shutdown(_qp, 0)); - impl::expect_zero(fi_close(&_qp->fid)); + if (_status != ConnectionStatus::DISCONNECTED) { + if (_rcv_channel) { + impl::expect_zero(fi_close(&_rcv_channel->fid)); + _rcv_channel = nullptr; + } + if (_trx_channel) { + impl::expect_zero(fi_close(&_trx_channel->fid)); + _trx_channel = nullptr; + } + if (_wait_set) { + impl::expect_zero(fi_close(&_wait_set->fid)); + _wait_set = nullptr; + } + if (_qp) { + impl::expect_zero(fi_shutdown(_qp, 0)); + impl::expect_zero(fi_close(&_qp->fid)); + _qp = nullptr; + } _status = ConnectionStatus::DISCONNECTED; } #else diff --git a/server/executor_manager/executor_process.cpp b/server/executor_manager/executor_process.cpp index b00fb71..afdbf2d 100644 --- a/server/executor_manager/executor_process.cpp +++ b/server/executor_manager/executor_process.cpp @@ -120,7 +120,6 @@ namespace rfaas::executor_manager { "--mgr-secret", mgr_secret.c_str(), "--mgr-buf-addr", mgr_buf_addr.c_str(), "--mgr-buf-rkey", mgr_buf_rkey.c_str(), - "-v", nullptr }; int ret = execvp(argv[0], const_cast(&argv[0])); From 1d6f1ae745cebc49cf5d300659132edb27654594 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Tue, 29 Mar 2022 11:46:31 +0200 Subject: [PATCH 20/91] Make write faster and add performance tests --- rdmalib/include/rdmalib/benchmarker.hpp | 35 ++++++++++++++ rdmalib/include/rdmalib/connection.hpp | 20 ++++++++ rfaas/include/rfaas/executor.hpp | 61 +++++++++++++++++++++++++ rfaas/lib/executor.cpp | 7 ++- server/executor/fast_executor.cpp | 28 +++++++++--- server/executor/fast_executor.hpp | 5 +- 6 files changed, 145 insertions(+), 11 deletions(-) diff --git a/rdmalib/include/rdmalib/benchmarker.hpp b/rdmalib/include/rdmalib/benchmarker.hpp index 2d90a74..af0941e 100644 --- a/rdmalib/include/rdmalib/benchmarker.hpp +++ b/rdmalib/include/rdmalib/benchmarker.hpp @@ -84,6 +84,41 @@ namespace rdmalib { }; +template + struct PerfBenchmarker { + std::vector> _measurements; + std::chrono::time_point _point; + + PerfBenchmarker(int measurements) + { + _measurements.reserve(measurements); + } + + inline void point(int col = 0) + { + if(col == 0) + _measurements.emplace_back(); + _measurements.back()[col] = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + } + + void export_csv(std::string fname, const std::array & headers) + { + std::ofstream of(fname); + of << "id"; + for(int j = 0; j < Cols; ++j) + of << ',' << headers[j]; + of << '\n'; + + for(size_t i = 0; i < _measurements.size(); ++i) { + of << i; + for(int j = 0; j < Cols; ++j) + of << ',' << _measurements[i][j]; + of << '\n'; + } + } + + }; + } #endif diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index fc180ca..dcdc95b 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -13,6 +13,7 @@ #ifdef USE_LIBFABRIC #include #include +#include #else #include @@ -138,6 +139,25 @@ namespace rdmalib { ); int32_t post_cas(ScatterGatherElement && elems, const RemoteBuffer & buf, uint64_t compare, uint64_t swap); #ifdef USE_LIBFABRIC + template inline int32_t post_write(const Buffer & buf, const size_t size, const uint64_t offset, const RemoteBuffer & rbuf, const uint32_t immediate) { + int ret = fi_writedata(_qp, (void *)(buf.address() + offset), size, buf.lkey(), immediate + (size << 32), NULL, rbuf.addr, rbuf.rkey, (void *)(_req_count++)); + if(ret) { + spdlog::error("Post write unsuccessful, reason {} {}, buf size {}, id {}, remote addr {}, remote rkey {}, imm data {}, connection {}", + ret, strerror(ret), size, _req_count, rbuf.addr, rbuf.rkey, immediate + (size << 32), fmt::ptr(this) + ); + return -1; + } + if(size > 0) + SPDLOG_DEBUG( + "Post write succesfull id: {}, buf size: {}, lkey {}, remote addr {}, remote rkey {}, imm data {}, connection {}", + _req_count, buf.bytes(), fmt::ptr(buf.lkey()), rbuf.addr, rbuf.rkey, immediate + (size << 32), fmt::ptr(this) + ); + else + SPDLOG_DEBUG( + "Post write succesfull id: {}, remote addr {}, remote rkey {}, imm data {}, connection {}", _req_count, rbuf.addr, rbuf.rkey, immediate + (size << 32), fmt::ptr(this) + ); + return _req_count - 1; + } int32_t post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer & rbuf, uint64_t add); #else int32_t post_atomic_fadd(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t add); diff --git a/rfaas/include/rfaas/executor.hpp b/rfaas/include/rfaas/executor.hpp index 16bb722..e85edb7 100644 --- a/rfaas/include/rfaas/executor.hpp +++ b/rfaas/include/rfaas/executor.hpp @@ -64,6 +64,7 @@ namespace rfaas { std::vector _connections; std::unique_ptr _exec_manager; std::vector _func_names; + rdmalib::PerfBenchmarker<8> _perf; // manage async executions std::atomic _end_requested; @@ -113,6 +114,17 @@ namespace rfaas { func_idx, invoc_id, submission_id ); if(size != -1) { + #ifdef USE_LIBFABRIC + rdmalib::ScatterGatherElement sge; + sge.add(in, size, 0); + _connections[0].conn->post_write( + in, + size, + 0, + _connections[0].remote_input, + submission_id + ); + #else rdmalib::ScatterGatherElement sge; sge.add(in, size, 0); _connections[0].conn->post_write( @@ -122,7 +134,17 @@ namespace rfaas { size <= _max_inlined_msg, true ); + #endif } else { + #ifdef USE_LIBFABRIC + _connections[0].conn->post_write( + in, + in.bytes(), + 0, + _connections[0].remote_input, + submission_id + ); + #else _connections[0].conn->post_write( in, _connections[0].remote_input, @@ -130,6 +152,7 @@ namespace rfaas { in.bytes() <= _max_inlined_msg, true ); + #endif } _connections[0]._rcv_buffer.refill(); return std::get<1>(_futures[invoc_id]).get_future(); @@ -162,6 +185,15 @@ namespace rfaas { #endif SPDLOG_DEBUG("Invoke function {} with invocation id {}", func_idx, _invoc_id); + #ifdef USE_LIBFABRIC + _connections[i].conn->post_write( + in[i], + in[i].bytes(), + 0, + _connections[i].remote_input, + submission_id + ); + #else _connections[i].conn->post_write( in[i], _connections[i].remote_input, @@ -169,6 +201,7 @@ namespace rfaas { in[i].bytes() <= _max_inlined_msg, true ); + #endif } for(int i = 0; i < numcores; ++i) { @@ -208,6 +241,7 @@ namespace rfaas { template std::tuple execute(std::string fname, const rdmalib::Buffer & in, rdmalib::Buffer & out) { + _perf.point(); auto it = std::find(_func_names.begin(), _func_names.end(), fname); if(it == _func_names.end()) { spdlog::error("Function {} not found in the deployed library!", fname); @@ -230,14 +264,27 @@ namespace rfaas { "Invoke function {} with invocation id {}, submission id {}", func_idx, invoc_id, (invoc_id << 16) | func_idx ); + _perf.point(1); + #ifdef USE_LIBFABRIC + _connections[0].conn->post_write( + in, + in.bytes(), + 0, + _connections[0].remote_input, + (invoc_id << 16) | func_idx + ); + #else _connections[0].conn->post_write( in, _connections[0].remote_input, (invoc_id << 16) | func_idx, in.bytes() <= _max_inlined_msg ); + #endif _active_polling = true; + _perf.point(2); _connections[0]._rcv_buffer.refill(); + _perf.point(3); bool found_result = false; int return_value = 0; @@ -246,6 +293,7 @@ namespace rfaas { auto wc = _connections[0]._rcv_buffer.poll(true); for(int i = 0; i < std::get<1>(wc); ++i) { #ifdef USE_LIBFABRIC + _perf.point(4); uint64_t val = std::get<0>(wc)[i].data; int return_val = val & 0x0000FFFF; int finished_invoc_id = val >> 16 & 0x0000FFFF; @@ -275,6 +323,7 @@ namespace rfaas { } } if(found_result) { + _perf.point(5); _active_polling = false; auto wc = _connections[0]._rcv_buffer.poll(false); // Catch very unlikely interleaving @@ -296,9 +345,11 @@ namespace rfaas { if(!--std::get<0>(it->second)) std::get<1>(it->second).set_value(return_val); } + _perf.point(6); } } _connections[0].conn->poll_wc(rdmalib::QueueType::SEND, false); + _perf.point(7); if(return_value == 0) { SPDLOG_DEBUG("Finished invocation {} succesfully", invoc_id); return std::make_tuple(true, out_size); @@ -334,12 +385,22 @@ namespace rfaas { #endif SPDLOG_DEBUG("Invoke function {} with invocation id {}", func_idx, _invoc_id); + #ifdef USE_LIBFABRIC + _connections[i].conn->post_write( + in[i], + in[i].bytes(), + 0, + _connections[i].remote_input, + (_invoc_id++ << 16) | func_idx + ); + #else _connections[i].conn->post_write( in[i], _connections[i].remote_input, (_invoc_id++ << 16) | func_idx, in[i].bytes() <= _max_inlined_msg ); + #endif } for(int i = 0; i < numcores; ++i) { diff --git a/rfaas/lib/executor.cpp b/rfaas/lib/executor.cpp index d515963..3dcacca 100644 --- a/rfaas/lib/executor.cpp +++ b/rfaas/lib/executor.cpp @@ -51,7 +51,8 @@ namespace rfaas { #else _invoc_id(0), #endif - _max_inlined_msg(max_inlined_msg) + _max_inlined_msg(max_inlined_msg), + _perf(1000) { #ifdef USE_LIBFABRIC _execs_buf.register_memory(_state.pd(), FI_WRITE | FI_REMOTE_WRITE); @@ -70,6 +71,7 @@ namespace rfaas { executor::~executor() { this->deallocate(); + _perf.export_csv("client_perf.csv", {"start", "function parsed", "function post written", "buffer refilled", "received result", "parsed result", "catched unlikely case", "polled send"}); } rdmalib::Buffer executor::load_library(std::string path) @@ -383,9 +385,6 @@ namespace rfaas { #else int id = std::get<0>(wcs)[i].wr_id; #endif - for (int j = 0; j < _execs_buf.data_size(); j++) - std::cout << std::hex << ((char *)_execs_buf.data())[j]; - std::cout << std::endl; SPDLOG_DEBUG( "Received buffer details for thread, id {}, addr {}, rkey {}", id, _execs_buf.data()[id].r_addr, _execs_buf.data()[id].r_key diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index 2b11ab2..1b7a6fd 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -36,12 +36,22 @@ namespace server { ); auto start = std::chrono::high_resolution_clock::now(); // Data to ignore header passed in the buffer + _perf.point(2); uint32_t out_size = (*ptr)(rcv.data(), in_size, send.ptr()); SPDLOG_DEBUG("Thread {} finished work!", id); - + _perf.point(3); // Send back: the value of immediate write // first 16 bytes - invocation id // second 16 bytes - return value (0 on no error) + #ifdef USE_LIBFABRIC + conn->post_write( + send, + out_size, + 0, + {header->r_address, header->r_key}, + (invoc_id << 16) | 0 + ); + #else conn->post_write( send.sge(out_size, 0), {header->r_address, header->r_key}, @@ -49,9 +59,12 @@ namespace server { out_size <= max_inline_data, solicited ); + #endif + _perf.point(4); auto end = std::chrono::high_resolution_clock::now(); _accounting.update_execution_time(start, end); _accounting.send_updated_execution(_mgr_connection, _accounting_buf, _mgr_conn); + _perf.point(5); //int cpu = sched_getcpu(); //spdlog::info("Execution + sent took {} us on {} CPU", std::chrono::duration_cast(end-start).count(), cpu); return end; @@ -61,7 +74,6 @@ namespace server { { //rdmalib::Benchmarker<1> server_processing_times{max_repetitions}; SPDLOG_DEBUG("Thread {} Begins hot polling", id); - auto start = std::chrono::high_resolution_clock::now(); int i = 0; while(repetitions < max_repetitions) { @@ -70,7 +82,7 @@ namespace server { auto wcs = wc_buffer.poll(); if(std::get<1>(wcs)) { for(int i = 0; i < std::get<1>(wcs); ++i) { - + _perf.point(); //server_processing_times.start(); #ifdef USE_LIBFABRIC fi_cq_data_entry* wc = &std::get<0>(wcs)[i]; @@ -93,7 +105,7 @@ namespace server { "Thread {} Invoc id {} Execute func {} Repetition {}", id, invoc_id, func_id, repetitions ); - + _perf.point(1); // Measure hot polling time until we started execution auto now = std::chrono::high_resolution_clock::now(); #ifdef USE_LIBFABRIC @@ -108,12 +120,14 @@ namespace server { _accounting.update_polling_time(start, now); i = 0; start = func_end; - + _perf.point(6); //sum += server_processing_times.end(); conn->poll_wc(rdmalib::QueueType::SEND, true); repetitions += 1; + _perf.point(7); } wc_buffer.refill(); + _perf.point(8); } ++i; @@ -356,12 +370,14 @@ namespace server { thread.join(); SPDLOG_DEBUG("Finished wait on {} threads", _threads.size()); - for(auto & thread : _threads_data) + for(auto & thread : _threads_data) { + thread._perf.export_csv("executor_perf.csv", {"found request", "parsed request", "obtained the header and function", "finished executing", "results post written", "accounting updated", "polling accounting updated", "send queue polled", "buffer refilled"}); spdlog::info("Thread {} Repetitions {} Avg time {} ms", thread.id, thread.repetitions, static_cast(thread._accounting.total_execution_time) / thread.repetitions / 1000.0 ); + } _closing = true; } diff --git a/server/executor/fast_executor.hpp b/server/executor/fast_executor.hpp index 5a25e79..bb1b67c 100644 --- a/server/executor/fast_executor.hpp +++ b/server/executor/fast_executor.hpp @@ -2,6 +2,7 @@ #ifndef __SERVER_FASTEXECUTORS_HPP__ #define __SERVER_FASTEXECUTORS_HPP__ +#include "rdmalib/benchmarker.hpp" #include "rdmalib/rdmalib.hpp" #include #include @@ -125,6 +126,7 @@ namespace server { const executor::ManagerConnection & _mgr_conn; Accounting _accounting; rdmalib::Buffer _accounting_buf; + rdmalib::PerfBenchmarker<9> _perf; // FIXME: Adjust to billing granularity constexpr static int HOT_POLLING_VERIFICATION_PERIOD = 10000; PollingState _polling_state; @@ -147,7 +149,8 @@ namespace server { conn(nullptr), _mgr_conn(mgr_conn), _accounting({0,0,0,0}), - _accounting_buf(1) + _accounting_buf(1), + _perf(1000) { } From ff1b483af5861ccc6e02b1b26becf2d6b3fabf4d Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Thu, 31 Mar 2022 01:19:58 +0200 Subject: [PATCH 21/91] Performance improvements and adding support for more performant waits --- rdmalib/include/rdmalib/benchmarker copy.hpp | 90 ++++++++++++++++++++ rdmalib/include/rdmalib/connection.hpp | 5 +- rdmalib/lib/connection.cpp | 45 +++++----- rdmalib/lib/rdmalib.cpp | 8 +- rfaas/include/rfaas/executor.hpp | 28 +++++- rfaas/lib/executor.cpp | 19 +++-- server/executor/fast_executor.cpp | 17 +++- 7 files changed, 171 insertions(+), 41 deletions(-) create mode 100644 rdmalib/include/rdmalib/benchmarker copy.hpp diff --git a/rdmalib/include/rdmalib/benchmarker copy.hpp b/rdmalib/include/rdmalib/benchmarker copy.hpp new file mode 100644 index 0000000..2d90a74 --- /dev/null +++ b/rdmalib/include/rdmalib/benchmarker copy.hpp @@ -0,0 +1,90 @@ + +#ifndef __RDMALIB_BENCHMARKER_HPP__ +#define __RDMALIB_BENCHMARKER_HPP__ + +#include +#include +#include +#include +#include +#include + +//#include + +namespace rdmalib { + + template + struct Benchmarker { + std::vector> _measurements; + std::chrono::time_point _start, _end; + + Benchmarker(int measurements) + { + _measurements.reserve(measurements); + } + + inline void start() + { + _start = std::chrono::high_resolution_clock::now(); + } + + inline uint64_t end(int col = 0) + { + _end = std::chrono::high_resolution_clock::now(); + uint64_t duration = std::chrono::duration_cast(_end - _start).count(); + if(col == 0) + _measurements.emplace_back(); + _measurements.back()[col] = duration; + return duration; + } + + void remove_last() + { + _measurements.pop_back(); + } + + std::tuple summary(int idx = 0) + { + // FIXME: reenable + long sum = std::accumulate(_measurements.begin(), _measurements.end(), 0L, + [idx](long x, const std::array & y) { + return x + y[idx]; + } + ); + double avg = static_cast(sum) / _measurements.size(); + + //// compute median + //// let's just ignore the rule that for even size we should take an average of middle elements + int middle = _measurements.size() / 2; + std::nth_element(_measurements.begin(), _measurements.begin() + middle, _measurements.end(), + [idx](const std::array & x, const std::array & y) { + return x[idx] < y[idx]; + } + ); + int median = _measurements[middle][idx]; + + return std::make_tuple(static_cast(median) / 1000, avg / 1000); + } + + void export_csv(std::string fname, const std::array & headers) + { + std::ofstream of(fname); + of << "id"; + for(int j = 0; j < Cols; ++j) + of << ',' << headers[j]; + of << '\n'; + + for(size_t i = 0; i < _measurements.size(); ++i) { + of << i; + for(int j = 0; j < Cols; ++j) + of << ',' << _measurements[i][j]; + of << '\n'; + } + } + + }; + +} + +#endif + diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index dcdc95b..9aca752 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -58,7 +58,8 @@ namespace rdmalib { fid_ep* _qp; fid_cq* _rcv_channel; fid_cq* _trx_channel; - fid_wait* _wait_set; + fid_cntr* _write_counter; + uint64_t _counter; #else rdma_cm_id* _id; ibv_qp* _qp; @@ -165,7 +166,7 @@ namespace rdmalib { // Register to be notified about all events, including unsolicited ones #ifdef USE_LIBFABRIC - void wait_events(); + int wait_events(int timeout = -1); #else void notify_events(bool only_solicited = false); ibv_cq* wait_events(); diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 77c891f..34a9af1 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -36,7 +36,7 @@ namespace rdmalib { #ifdef USE_LIBFABRIC _rcv_channel(nullptr), _trx_channel(nullptr), - _wait_set(nullptr), + _write_counter(nullptr), #else _id(nullptr), _channel(nullptr), @@ -79,7 +79,7 @@ namespace rdmalib { #ifdef USE_LIBFABRIC _rcv_channel(obj._rcv_channel), _trx_channel(obj._trx_channel), - _wait_set(nullptr), + _write_counter(nullptr), #else _id(obj._id), _channel(obj._channel), @@ -126,25 +126,27 @@ namespace rdmalib { // Create the endpoint and set its flags up so that we get completions on RDM impl::expect_zero(fi_endpoint(pd, info, &_qp, reinterpret_cast(this))); - // Open the waitset - fi_wait_attr wait_attr; - wait_attr.wait_obj = FI_WAIT_UNSPEC; - wait_attr.flags = 0; - fi_wait_open(fabric, &wait_attr, &_wait_set); + // Open the counter for write operations + fi_cntr_attr cntr_attr; + cntr_attr.events = FI_CNTR_EVENTS_COMP; + cntr_attr.wait_obj = FI_WAIT_UNSPEC; + cntr_attr.wait_set = nullptr; + cntr_attr.flags = 0; + impl::expect_zero(fi_cntr_open(pd, &cntr_attr, &_write_counter, nullptr)); + impl::expect_zero(fi_cntr_set(_write_counter, 0)); + impl::expect_zero(fi_ep_bind(_qp, &_write_counter->fid, FI_REMOTE_WRITE)); // Bind with the completion queues and the event queue impl::expect_zero(fi_ep_bind(_qp, &ec->fid, 0)); fi_cq_attr cq_attr; memset(&cq_attr, 0, sizeof(cq_attr)); cq_attr.format = FI_CQ_FORMAT_DATA; - cq_attr.wait_obj = FI_WAIT_UNSPEC; + cq_attr.wait_obj = FI_WAIT_NONE; cq_attr.wait_cond = FI_CQ_COND_NONE; cq_attr.wait_set = nullptr; + cq_attr.size = info->rx_attr->size; impl::expect_zero(fi_cq_open(pd, &cq_attr, &_trx_channel, nullptr)); impl::expect_zero(fi_ep_bind(_qp, &_trx_channel->fid, FI_TRANSMIT)); - - // Connect the wait set to the receive queue - cq_attr.wait_set = _wait_set; impl::expect_zero(fi_cq_open(pd, &cq_attr, &_rcv_channel, nullptr)); impl::expect_zero(fi_ep_bind(_qp, &_rcv_channel->fid, FI_RECV)); @@ -186,9 +188,9 @@ namespace rdmalib { impl::expect_zero(fi_close(&_trx_channel->fid)); _trx_channel = nullptr; } - if (_wait_set) { - impl::expect_zero(fi_close(&_wait_set->fid)); - _wait_set = nullptr; + if (_write_counter) { + impl::expect_zero(fi_close(&_write_counter->fid)); + _write_counter = nullptr; } if (_qp) { impl::expect_zero(fi_shutdown(_qp, 0)); @@ -245,13 +247,6 @@ namespace rdmalib { } #endif - #ifdef USE_LIBFABRIC - fid_wait* Connection::wait_set() const - { - return this->_wait_set; - } - #endif - #ifdef USE_LIBFABRIC fid_cq* Connection::receive_completion_channel() const { @@ -696,10 +691,12 @@ namespace rdmalib { spdlog::error("Failure of polling events from: {} queue connection {}! Return value {} message {} errno {}", type == QueueType::RECV ? "recv" : "send", fmt::ptr(this), ret, fi_strerror(std::abs(ret)), errno); return std::make_tuple(nullptr, -1); } - if(ret > 0) + if(ret > 0) { + _counter += ret; for(int i = 0; i < ret; ++i) { SPDLOG_DEBUG("Connection {} Queue {} Ret {}/{} WC {}", fmt::ptr(this), type == QueueType::RECV ? "recv" : "send", i + 1, ret, reinterpret_cast(wcs[i].op_context)); } + } return std::make_tuple(wcs, ret == -EAGAIN ? 0 : ret); } #else @@ -744,9 +741,9 @@ namespace rdmalib { #endif #ifdef USE_LIBFABRIC - void Connection::wait_events() + int Connection::wait_events(int timeout) { - impl::expect_zero(fi_wait(_wait_set, -1)); + return fi_cntr_wait(_write_counter, _counter+1, timeout); } #else ibv_cq* Connection::wait_events() diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 5c88311..82f5b94 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -40,9 +40,12 @@ namespace rdmalib { // Set the hints to have ability to conduct MSG, Atomic and RMA operations hints->caps |= FI_MSG | FI_RMA | FI_ATOMIC | FI_RMA_EVENT; // Set the hints to indicate that we will register the local buffers - hints->domain_attr->mr_mode = FI_MR_BASIC; // FI_MR_LOCAL | FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; + hints->domain_attr->mr_mode = FI_MR_LOCAL | FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; hints->ep_attr->type = FI_EP_MSG; hints->fabric_attr->prov_name = strdup("GNI"); + hints->domain_attr->threading = FI_THREAD_DOMAIN; + hints->domain_attr->resource_mgmt = FI_RM_ENABLED; + hints->tx_attr->tclass = FI_TC_LOW_LATENCY; impl::expect_zero(fi_getinfo(FI_VERSION(1, 9), ip.c_str(), std::to_string(port).c_str(), passive ? FI_SOURCE : 0, hints, &addrinfo)); fi_freeinfo(hints); impl::expect_zero(fi_fabric(addrinfo->fabric_attr, &fabric, nullptr)); @@ -180,8 +183,7 @@ namespace rdmalib { // Enable the event queue fi_eq_attr eq_attr; memset(&eq_attr, 0, sizeof(eq_attr)); - eq_attr.size = 42; - eq_attr.wait_obj = FI_WAIT_UNSPEC; + eq_attr.wait_obj = FI_WAIT_NONE; impl::expect_zero(fi_eq_open(_addr.fabric, &eq_attr, &_ec, NULL)); // Create and enable the endpoint together with all the accompanying queues _conn->initialize(_addr.fabric, _pd, _addr.addrinfo, _ec); diff --git a/rfaas/include/rfaas/executor.hpp b/rfaas/include/rfaas/executor.hpp index e85edb7..afadaa8 100644 --- a/rfaas/include/rfaas/executor.hpp +++ b/rfaas/include/rfaas/executor.hpp @@ -154,7 +154,9 @@ namespace rfaas { ); #endif } + #ifndef USE_LIBFABRIC _connections[0]._rcv_buffer.refill(); + #endif return std::get<1>(_futures[invoc_id]).get_future(); } @@ -204,9 +206,11 @@ namespace rfaas { #endif } + #ifndef USE_LIBFABRIC for(int i = 0; i < numcores; ++i) { _connections[i]._rcv_buffer.refill(); } + #endif return std::get<1>(_futures[invoc_id]).get_future(); } @@ -214,9 +218,13 @@ namespace rfaas { { _connections[0].conn->poll_wc(rdmalib::QueueType::SEND, true); + #ifdef USE_LIBFABRIC + auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true); + #else auto wc = _connections[0]._rcv_buffer.poll(true); + #endif #ifdef USE_LIBFABRIC - uint32_t val = std::get<0>(wc)[0].data >> 32; + uint32_t val = std::get<0>(wc)[0].data; #else uint32_t val = ntohl(std::get<0>(wc)[0].imm_data); #endif @@ -283,14 +291,20 @@ namespace rfaas { #endif _active_polling = true; _perf.point(2); + #ifndef USE_LIBFABRIC _connections[0]._rcv_buffer.refill(); + #endif _perf.point(3); bool found_result = false; int return_value = 0; int out_size = 0; while(!found_result) { + #ifdef USE_LIBFABRIC + auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true); + #else auto wc = _connections[0]._rcv_buffer.poll(true); + #endif for(int i = 0; i < std::get<1>(wc); ++i) { #ifdef USE_LIBFABRIC _perf.point(4); @@ -325,6 +339,7 @@ namespace rfaas { if(found_result) { _perf.point(5); _active_polling = false; + #ifndef USE_LIBFABRIC auto wc = _connections[0]._rcv_buffer.poll(false); // Catch very unlikely interleaving // Event arrives after we poll while the background thread is skipping @@ -332,7 +347,7 @@ namespace rfaas { // Thus, we later unset the variable since we're done for(int i = 0; i < std::get<1>(wc); ++i) { #ifdef USE_LIBFABRIC - uint32_t val = std::get<0>(wc)[i].data >> 32; + uint32_t val = std::get<0>(wc)[i].data; #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); #endif @@ -345,6 +360,7 @@ namespace rfaas { if(!--std::get<0>(it->second)) std::get<1>(it->second).set_value(return_val); } + #endif _perf.point(6); } } @@ -403,9 +419,11 @@ namespace rfaas { #endif } + #ifndef USE_LIBFABRIC for(int i = 0; i < numcores; ++i) { _connections[i]._rcv_buffer.refill(); } + #endif int expected = numcores; while(expected) { auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::SEND, true); @@ -416,12 +434,16 @@ namespace rfaas { bool correct = true; _active_polling = true; while(expected) { + #ifdef USE_LIBFABRIC + auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true); + #else auto wc = _connections[0]._rcv_buffer.poll(true); + #endif SPDLOG_DEBUG("Found data"); expected -= std::get<1>(wc); for(int i = 0; i < std::get<1>(wc); ++i) { #ifdef USE_LIBFABRIC - uint32_t val = std::get<0>(wc)[i].data >> 32; + uint32_t val = std::get<0>(wc)[i].data; #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); #endif diff --git a/rfaas/lib/executor.cpp b/rfaas/lib/executor.cpp index 3dcacca..dc2a601 100644 --- a/rfaas/lib/executor.cpp +++ b/rfaas/lib/executor.cpp @@ -1,5 +1,6 @@ #include "rdmalib/rdmalib.hpp" +#include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include namespace rfaas { @@ -177,9 +179,11 @@ namespace rfaas { while(!_end_requested && _connections.size()) { #ifdef USE_LIBFABRIC - fi_cq_data_entry entry; do { - rc = fi_wait(_connections[0].conn->wait_set(), 100); + if(_active_polling) + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + else + rc = _connections[0].conn->wait_events(100); if(_end_requested) { spdlog::info("Background thread stops waiting for events"); return; @@ -202,16 +206,20 @@ namespace rfaas { fprintf(stderr, "poll failed\n"); return; } - if(!_end_requested) { + if(!_end_requested && !_active_polling) { #ifndef USE_LIBFABRIC auto cq = _connections[0].conn->wait_events(); _connections[0].conn->notify_events(true); _connections[0].conn->ack_events(cq, 1); #endif + #ifdef USE_LIBFABRIC + auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, false); + #else auto wc = _connections[0]._rcv_buffer.poll(false); + #endif for(int i = 0; i < std::get<1>(wc); ++i) { #ifdef USE_LIBFABRIC - uint32_t val = std::get<0>(wc)[i].data >> 32; + uint32_t val = std::get<0>(wc)[i].data; #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); #endif @@ -345,13 +353,12 @@ namespace rfaas { ); #ifdef USE_LIBFABRIC this->_connections.back().conn->post_recv(_execs_buf.sge(obj_size, requested*obj_size), requested); - this->_connections.back().conn->initialize_batched_recv(_execs_buf, 0); #else this->_connections.back().conn->post_recv(_execs_buf.sge(obj_size, requested*obj_size), requested); - #endif // FIXME: this should be in a function // FIXME: here it won't work if rcv_bufer_size < numcores this->_connections.back()._rcv_buffer.connect(this->_connections.back().conn.get()); + #endif _state.accept(this->_connections.back().conn.get()); ++requested; } else if(conn_status == rdmalib::ConnectionStatus::ESTABLISHED) { diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index 1b7a6fd..7482f58 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -79,7 +79,11 @@ namespace server { while(repetitions < max_repetitions) { // if we block, we never handle the interruption + #ifdef USE_LIBFABRIC + auto wcs = conn->poll_wc(rdmalib::QueueType::RECV, false); + #else auto wcs = wc_buffer.poll(); + #endif if(std::get<1>(wcs)) { for(int i = 0; i < std::get<1>(wcs); ++i) { _perf.point(); @@ -126,7 +130,9 @@ namespace server { repetitions += 1; _perf.point(7); } + #ifndef USE_LIBFABRIC wc_buffer.refill(); + #endif _perf.point(8); } ++i; @@ -161,7 +167,11 @@ namespace server { while(repetitions < max_repetitions) { // if we block, we never handle the interruption + #ifdef USE_LIBFABRIC + auto wcs = conn->poll_wc(rdmalib::QueueType::RECV, false); + #else auto wcs = wc_buffer.poll(); + #endif if(std::get<1>(wcs)) { for(int i = 0; i < std::get<1>(wcs); ++i) { @@ -198,7 +208,9 @@ namespace server { conn->poll_wc(rdmalib::QueueType::SEND, true); repetitions += 1; } + #ifndef USE_LIBFABRIC wc_buffer.refill(); + #endif if(_polling_state != PollingState::WARM_ALWAYS) { SPDLOG_DEBUG("Switching to hot polling after invocation!"); _polling_state = PollingState::HOT; @@ -210,7 +222,7 @@ namespace server { // arrived before we called notify_events if(repetitions < max_repetitions) { #ifdef USE_LIBFABRIC - conn->wait_events(); + rdmalib::impl::expect_zero(conn->wait_events()); #else auto cq = conn->wait_events(); conn->ack_events(cq, 1); @@ -271,12 +283,11 @@ namespace server { #ifdef USE_LIBFABRIC send.register_memory(active.pd(), FI_WRITE | FI_READ); rcv.register_memory(active.pd(), FI_WRITE | FI_REMOTE_WRITE); - conn->initialize_batched_recv(rcv, 0); #else send.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE); rcv.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); - #endif this->wc_buffer.connect(this->conn); + #endif spdlog::info("Thread {} Established connection to client!", id); // Send to the client information about thread buffer From 13fe1ea2cc504c8db1cdeef84b9fb85a1e43c04f Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Thu, 31 Mar 2022 15:36:33 +0200 Subject: [PATCH 22/91] Fix some bugs --- rdmalib/include/rdmalib/connection.hpp | 2 +- rdmalib/lib/connection.cpp | 7 ++++--- rfaas/include/rfaas/executor.hpp | 22 +++++++++++----------- rfaas/lib/executor.cpp | 4 ++-- server/executor/fast_executor.cpp | 22 +++++++++++----------- 5 files changed, 29 insertions(+), 28 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 9aca752..7fff729 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -122,7 +122,7 @@ namespace rdmalib { // Blocking, no timeout #ifdef USE_LIBFABRIC - std::tuple poll_wc(QueueType, bool blocking = true, int count = -1); + std::tuple poll_wc(QueueType, bool blocking = true, int count = -1, bool update = false); #else std::tuple poll_wc(QueueType, bool blocking = true, int count = -1); #endif diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 34a9af1..7a6c456 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -661,7 +661,7 @@ namespace rdmalib { #ifdef USE_LIBFABRIC - std::tuple Connection::poll_wc(QueueType type, bool blocking, int count) + std::tuple Connection::poll_wc(QueueType type, bool blocking, int count, bool update) { int ret = 0; fi_cq_data_entry* wcs = (type == QueueType::RECV ? _rwc.data() : _swc.data()); @@ -685,14 +685,15 @@ namespace rdmalib { fi_strerror(_ewc.err) ); } - } while(blocking && (ret == 0 || ret == -EAGAIN)); + } while(blocking && (ret == -EAGAIN || ret == 0)); if(ret < 0 && ret != -EAGAIN) { spdlog::error("Failure of polling events from: {} queue connection {}! Return value {} message {} errno {}", type == QueueType::RECV ? "recv" : "send", fmt::ptr(this), ret, fi_strerror(std::abs(ret)), errno); return std::make_tuple(nullptr, -1); } if(ret > 0) { - _counter += ret; + if (update) + _counter += ret; for(int i = 0; i < ret; ++i) { SPDLOG_DEBUG("Connection {} Queue {} Ret {}/{} WC {}", fmt::ptr(this), type == QueueType::RECV ? "recv" : "send", i + 1, ret, reinterpret_cast(wcs[i].op_context)); } diff --git a/rfaas/include/rfaas/executor.hpp b/rfaas/include/rfaas/executor.hpp index afadaa8..7a00e87 100644 --- a/rfaas/include/rfaas/executor.hpp +++ b/rfaas/include/rfaas/executor.hpp @@ -219,7 +219,7 @@ namespace rfaas { _connections[0].conn->poll_wc(rdmalib::QueueType::SEND, true); #ifdef USE_LIBFABRIC - auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true); + auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true, -1, true); #else auto wc = _connections[0]._rcv_buffer.poll(true); #endif @@ -249,7 +249,7 @@ namespace rfaas { template std::tuple execute(std::string fname, const rdmalib::Buffer & in, rdmalib::Buffer & out) { - _perf.point(); + //_perf.point(); auto it = std::find(_func_names.begin(), _func_names.end(), fname); if(it == _func_names.end()) { spdlog::error("Function {} not found in the deployed library!", fname); @@ -272,7 +272,7 @@ namespace rfaas { "Invoke function {} with invocation id {}, submission id {}", func_idx, invoc_id, (invoc_id << 16) | func_idx ); - _perf.point(1); + //_perf.point(1); #ifdef USE_LIBFABRIC _connections[0].conn->post_write( in, @@ -290,24 +290,24 @@ namespace rfaas { ); #endif _active_polling = true; - _perf.point(2); + //_perf.point(2); #ifndef USE_LIBFABRIC _connections[0]._rcv_buffer.refill(); #endif - _perf.point(3); + //_perf.point(3); bool found_result = false; int return_value = 0; int out_size = 0; while(!found_result) { #ifdef USE_LIBFABRIC - auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true); + auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true, -1, true); #else auto wc = _connections[0]._rcv_buffer.poll(true); #endif for(int i = 0; i < std::get<1>(wc); ++i) { #ifdef USE_LIBFABRIC - _perf.point(4); + //_perf.point(4); uint64_t val = std::get<0>(wc)[i].data; int return_val = val & 0x0000FFFF; int finished_invoc_id = val >> 16 & 0x0000FFFF; @@ -337,7 +337,7 @@ namespace rfaas { } } if(found_result) { - _perf.point(5); + //_perf.point(5); _active_polling = false; #ifndef USE_LIBFABRIC auto wc = _connections[0]._rcv_buffer.poll(false); @@ -361,11 +361,11 @@ namespace rfaas { std::get<1>(it->second).set_value(return_val); } #endif - _perf.point(6); + //_perf.point(6); } } _connections[0].conn->poll_wc(rdmalib::QueueType::SEND, false); - _perf.point(7); + //_perf.point(7); if(return_value == 0) { SPDLOG_DEBUG("Finished invocation {} succesfully", invoc_id); return std::make_tuple(true, out_size); @@ -435,7 +435,7 @@ namespace rfaas { _active_polling = true; while(expected) { #ifdef USE_LIBFABRIC - auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true); + auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true, -1, true); #else auto wc = _connections[0]._rcv_buffer.poll(true); #endif diff --git a/rfaas/lib/executor.cpp b/rfaas/lib/executor.cpp index dc2a601..02c48d9 100644 --- a/rfaas/lib/executor.cpp +++ b/rfaas/lib/executor.cpp @@ -166,7 +166,7 @@ namespace rfaas { // FIXME: hide the details in rdmalib spdlog::info("Background thread starts waiting for events"); #ifdef USE_LIBFABRIC - int rc; + int rc = 1; #else _connections[0].conn->notify_events(true); int flags = fcntl(_connections[0].conn->completion_channel()->fd, F_GETFL); @@ -213,7 +213,7 @@ namespace rfaas { _connections[0].conn->ack_events(cq, 1); #endif #ifdef USE_LIBFABRIC - auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, false); + auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, false, -1, true); #else auto wc = _connections[0]._rcv_buffer.poll(false); #endif diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index 7482f58..f005798 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -36,10 +36,10 @@ namespace server { ); auto start = std::chrono::high_resolution_clock::now(); // Data to ignore header passed in the buffer - _perf.point(2); + //_perf.point(2); uint32_t out_size = (*ptr)(rcv.data(), in_size, send.ptr()); SPDLOG_DEBUG("Thread {} finished work!", id); - _perf.point(3); + //_perf.point(3); // Send back: the value of immediate write // first 16 bytes - invocation id // second 16 bytes - return value (0 on no error) @@ -60,11 +60,11 @@ namespace server { solicited ); #endif - _perf.point(4); + //_perf.point(4); auto end = std::chrono::high_resolution_clock::now(); _accounting.update_execution_time(start, end); _accounting.send_updated_execution(_mgr_connection, _accounting_buf, _mgr_conn); - _perf.point(5); + //_perf.point(5); //int cpu = sched_getcpu(); //spdlog::info("Execution + sent took {} us on {} CPU", std::chrono::duration_cast(end-start).count(), cpu); return end; @@ -80,13 +80,13 @@ namespace server { // if we block, we never handle the interruption #ifdef USE_LIBFABRIC - auto wcs = conn->poll_wc(rdmalib::QueueType::RECV, false); + auto wcs = conn->poll_wc(rdmalib::QueueType::RECV, false, -1, true); #else auto wcs = wc_buffer.poll(); #endif if(std::get<1>(wcs)) { for(int i = 0; i < std::get<1>(wcs); ++i) { - _perf.point(); + //_perf.point(); //server_processing_times.start(); #ifdef USE_LIBFABRIC fi_cq_data_entry* wc = &std::get<0>(wcs)[i]; @@ -109,7 +109,7 @@ namespace server { "Thread {} Invoc id {} Execute func {} Repetition {}", id, invoc_id, func_id, repetitions ); - _perf.point(1); + //_perf.point(1); // Measure hot polling time until we started execution auto now = std::chrono::high_resolution_clock::now(); #ifdef USE_LIBFABRIC @@ -124,16 +124,16 @@ namespace server { _accounting.update_polling_time(start, now); i = 0; start = func_end; - _perf.point(6); + //_perf.point(6); //sum += server_processing_times.end(); conn->poll_wc(rdmalib::QueueType::SEND, true); repetitions += 1; - _perf.point(7); + //_perf.point(7); } #ifndef USE_LIBFABRIC wc_buffer.refill(); #endif - _perf.point(8); + //_perf.point(8); } ++i; @@ -168,7 +168,7 @@ namespace server { // if we block, we never handle the interruption #ifdef USE_LIBFABRIC - auto wcs = conn->poll_wc(rdmalib::QueueType::RECV, false); + auto wcs = conn->poll_wc(rdmalib::QueueType::RECV, false, -1, true); #else auto wcs = wc_buffer.poll(); #endif From 8e45438cb0d33766a972b129ee29f0fffa5a5790 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Fri, 1 Apr 2022 01:15:33 +0200 Subject: [PATCH 23/91] Add the newest version --- rdmalib/include/rdmalib/connection.hpp | 1 + rdmalib/include/rdmalib/rdmalib.hpp | 2 ++ rdmalib/lib/buffer.cpp | 2 +- rdmalib/lib/connection.cpp | 4 ++++ rdmalib/lib/rdmalib.cpp | 16 ++++++++++----- server/executor/cli.cpp | 4 +++- server/executor/fast_executor.cpp | 27 ++++++++++++++++++++++---- server/executor_manager/client.cpp | 2 +- 8 files changed, 46 insertions(+), 12 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 7fff729..17b0fe7 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -105,6 +105,7 @@ namespace rdmalib { #endif void close(); #ifdef USE_LIBFABRIC + fid_domain* _domain = nullptr; fid* id() const; fid_ep* qp() const; fid_wait* wait_set() const; diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index 4c4ba43..296f225 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -12,6 +12,7 @@ #ifdef USE_LIBFABRIC #include #include +// #include #else #include #endif @@ -79,6 +80,7 @@ namespace rdmalib { fid_eq* _ec = nullptr; fid_domain* _pd = nullptr; fid_pep* _pep = nullptr; + // fi_gni_ops_domain* _ops; #else rdma_event_channel * _ec; rdma_cm_id* _listen_id; diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index 2a2034b..1db48ed 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -96,7 +96,7 @@ namespace rdmalib { namespace impl { ); if(_mr) #ifdef USE_LIBFABRIC - fi_close(&_mr->fid); + impl::expect_zero(fi_close(&_mr->fid)); #else ibv_dereg_mr(_mr); #endif diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 7a6c456..09c829d 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -197,6 +197,10 @@ namespace rdmalib { impl::expect_zero(fi_close(&_qp->fid)); _qp = nullptr; } + if (_domain) { + impl::expect_zero(fi_close(&_domain->fid)); + _domain = nullptr; + } _status = ConnectionStatus::DISCONNECTED; } #else diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 82f5b94..d280736 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -397,12 +397,17 @@ namespace rdmalib { // Start listening fi_eq_attr eq_attr; memset(&eq_attr, 0, sizeof(eq_attr)); - eq_attr.size = 42; - eq_attr.wait_obj = FI_WAIT_UNSPEC; + eq_attr.size = 0; + eq_attr.wait_obj = FI_WAIT_NONE; impl::expect_zero(fi_eq_open(_addr.fabric, &eq_attr, &_ec, NULL)); impl::expect_zero(fi_passive_ep(_addr.fabric, _addr.addrinfo, &_pep, NULL)); impl::expect_zero(fi_pep_bind(_pep, &(_ec->fid), 0)); impl::expect_zero(fi_listen(_pep)); + // _ops = (fi_gni_ops_domain *)malloc(sizeof(fi_gni_ops_domain)); + // fi_open_ops(&_pd->fid, "FI_GNI_DOMAIN_OPS_1", 0, (void **)*_ops, nullptr); + // uint32_t val; + // _ops->get_val(&_pd->fid, GNI_CONN_TABLE_MAX_SIZE, &val); + // std::cout << "MAXIMUM VALUE: " << val << std::endl; #else // Start listening impl::expect_nonzero(this->_ec = rdma_create_event_channel()); @@ -510,11 +515,11 @@ namespace rdmalib { SPDLOG_DEBUG("[RDMAPassive] Connection request with no private data"); // Check if we have a domain open for the connection already - // if (!entry.info->domain_attr->domain) - // fi_domain(_addr.fabric, entry.info, &_pd, NULL); + if (!entry->info->domain_attr->domain) + fi_domain(_addr.fabric, entry->info, &connection->_domain, NULL); // Enable the endpoint - connection->initialize(_addr.fabric, _pd, entry->info, _ec); + connection->initialize(_addr.fabric, connection->_domain, entry->info, _ec); SPDLOG_DEBUG( "[RDMAPassive] Created connection fid {} qp {}", fmt::ptr(connection->id()), fmt::ptr(&connection->qp()->fid) @@ -548,6 +553,7 @@ namespace rdmalib { spdlog::error("[RDMAPassive] Not any interesting event"); break; } + free(entry); #else rdma_cm_event* event = nullptr; Connection* connection = nullptr; diff --git a/server/executor/cli.cpp b/server/executor/cli.cpp index 27fdc8b..730a0c1 100644 --- a/server/executor/cli.cpp +++ b/server/executor/cli.cpp @@ -17,7 +17,9 @@ int main(int argc, char ** argv) { - //server::SignalHandler sighandler; + // Register a SIGINT handler so that we can gracefully exit + server::SignalHandler sighandler; + auto opts = server::opts(argc, argv); if(opts.verbose) spdlog::set_level(spdlog::level::debug); diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index f005798..48ee0d2 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include @@ -25,6 +27,23 @@ namespace server { + bool SignalHandler::closing = false; + + SignalHandler::SignalHandler() + { + struct sigaction sigIntHandler; + sigIntHandler.sa_handler = &SignalHandler::handler; + sigemptyset(&sigIntHandler.sa_mask); + sigIntHandler.sa_flags = 0; + //FIXME: disable signals to avoid potential interrupts + sigaction(SIGINT, &sigIntHandler, nullptr); + } + + void SignalHandler::handler(int) + { + SignalHandler::closing = true; + } + Accounting::timepoint_t Thread::work(int invoc_id, int func_id, bool solicited, uint32_t in_size) { // FIXME: load func ptr @@ -76,7 +95,7 @@ namespace server { SPDLOG_DEBUG("Thread {} Begins hot polling", id); auto start = std::chrono::high_resolution_clock::now(); int i = 0; - while(repetitions < max_repetitions) { + while(repetitions < max_repetitions && !SignalHandler::closing) { // if we block, we never handle the interruption #ifdef USE_LIBFABRIC @@ -164,7 +183,7 @@ namespace server { // FIXME: this should be automatic SPDLOG_DEBUG("Thread {} Begins warm polling", id); - while(repetitions < max_repetitions) { + while(repetitions < max_repetitions && !SignalHandler::closing) { // if we block, we never handle the interruption #ifdef USE_LIBFABRIC @@ -220,7 +239,7 @@ namespace server { // Do waiting after a single polling - avoid missing an events that // arrived before we called notify_events - if(repetitions < max_repetitions) { + if(repetitions < max_repetitions && !SignalHandler::closing) { #ifdef USE_LIBFABRIC rdmalib::impl::expect_zero(conn->wait_events()); #else @@ -311,7 +330,7 @@ namespace server { spdlog::info("Thread {} begins work with timeout {}", id, timeout); // FIXME: catch interrupt handler here - while(repetitions < max_repetitions) { + while(repetitions < max_repetitions && !SignalHandler::closing) { if(_polling_state == PollingState::HOT || _polling_state == PollingState::HOT_ALWAYS) hot(timeout); else diff --git a/server/executor_manager/client.cpp b/server/executor_manager/client.cpp index 651e110..ede85cc 100644 --- a/server/executor_manager/client.cpp +++ b/server/executor_manager/client.cpp @@ -71,7 +71,7 @@ namespace rfaas::executor_manager { if(executor) { int status; auto b = std::chrono::high_resolution_clock::now(); - kill(executor->id(), SIGKILL); + kill(executor->id(), SIGKILL); // for executor need a SIGINT waitpid(executor->id(), &status, WUNTRACED); auto e = std::chrono::high_resolution_clock::now(); spdlog::info("Waited for child {} ms", std::chrono::duration_cast(e-b).count()); From 24907c3313a61f377870e366c6a840ad67f4801d Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Mon, 4 Apr 2022 10:32:22 +0200 Subject: [PATCH 24/91] Add support for a single client only executor manager --- rdmalib/lib/rdmalib.cpp | 55 +++++++++++++++++++---------- rfaas/include/rfaas/resources.hpp | 3 +- rfaas/lib/executor.cpp | 4 +++ rfaas/lib/resources.cpp | 6 +++- server/executor/cli.cpp | 2 +- server/executor/fast_executor.cpp | 28 +++++++-------- server/executor_manager/client.hpp | 5 +++ server/executor_manager/manager.cpp | 18 ++++++++++ server/executor_manager/manager.hpp | 5 +++ 9 files changed, 91 insertions(+), 35 deletions(-) diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index d280736..49600b6 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -183,6 +183,7 @@ namespace rdmalib { // Enable the event queue fi_eq_attr eq_attr; memset(&eq_attr, 0, sizeof(eq_attr)); + eq_attr.size = 0; eq_attr.wait_obj = FI_WAIT_NONE; impl::expect_zero(fi_eq_open(_addr.fabric, &eq_attr, &_ec, NULL)); // Create and enable the endpoint together with all the accompanying queues @@ -398,7 +399,7 @@ namespace rdmalib { fi_eq_attr eq_attr; memset(&eq_attr, 0, sizeof(eq_attr)); eq_attr.size = 0; - eq_attr.wait_obj = FI_WAIT_NONE; + eq_attr.wait_obj = FI_WAIT_UNSPEC; impl::expect_zero(fi_eq_open(_addr.fabric, &eq_attr, &_ec, NULL)); impl::expect_zero(fi_passive_ep(_addr.fabric, _addr.addrinfo, &_pep, NULL)); impl::expect_zero(fi_pep_bind(_pep, &(_ec->fid), 0)); @@ -457,7 +458,7 @@ namespace rdmalib { #ifdef USE_LIBFABRIC uint32_t event; fi_eq_entry entry; - int ret = fi_eq_read(_ec, &event, &entry, sizeof(entry), FI_PEEK); + int ret = fi_eq_sread(_ec, &event, &entry, sizeof(entry), timeout, FI_PEEK); if (ret < 0 && ret != -FI_EAGAIN && ret != -FI_EAVAIL) spdlog::error("RDMA event poll failed"); return ret > 0 || ret == -FI_EAVAIL; @@ -475,7 +476,11 @@ namespace rdmalib { #endif } + #ifdef USE_LIBFABRIC + std::tuple RDMAPassive::poll_events(bool share_cqs) + #else std::tuple RDMAPassive::poll_events(bool share_cqs) + #endif { #ifdef USE_LIBFABRIC uint32_t event; @@ -514,22 +519,36 @@ namespace rdmalib { else SPDLOG_DEBUG("[RDMAPassive] Connection request with no private data"); - // Check if we have a domain open for the connection already - if (!entry->info->domain_attr->domain) - fi_domain(_addr.fabric, entry->info, &connection->_domain, NULL); - - // Enable the endpoint - connection->initialize(_addr.fabric, connection->_domain, entry->info, _ec); - SPDLOG_DEBUG( - "[RDMAPassive] Created connection fid {} qp {}", - fmt::ptr(connection->id()), fmt::ptr(&connection->qp()->fid) - ); - - // Free the info - fi_freeinfo(entry->info); - - status = ConnectionStatus::REQUESTED; - _active_connections.insert(connection); + #ifdef USE_LIBFABRIC + // Used here as determinator of whether we have already established a connection + if (!share_cqs || ret == total_size) { + #endif + // Check if we have a domain open for the connection already + if (!entry->info->domain_attr->domain) + fi_domain(_addr.fabric, entry->info, &connection->_domain, NULL); + + // Enable the endpoint + connection->initialize(_addr.fabric, connection->_domain, entry->info, _ec); + SPDLOG_DEBUG( + "[RDMAPassive] Created connection fid {} qp {}", + fmt::ptr(connection->id()), fmt::ptr(&connection->qp()->fid) + ); + + // Free the info + fi_freeinfo(entry->info); + + status = ConnectionStatus::REQUESTED; + _active_connections.insert(connection); + #ifdef USE_LIBFABRIC + } else { + free(connection); + connection = nullptr; + impl::expect_zero(fi_reject(_pep, entry->info->handle, nullptr, 0)); + SPDLOG_DEBUG( + "[RDMAPassive] Rejected connection because we are already taken" + ); + } + #endif break; case FI_CONNECTED: SPDLOG_DEBUG( diff --git a/rfaas/include/rfaas/resources.hpp b/rfaas/include/rfaas/resources.hpp index fc7d160..d5d06c7 100644 --- a/rfaas/include/rfaas/resources.hpp +++ b/rfaas/include/rfaas/resources.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -42,7 +43,7 @@ namespace rfaas { { static std::unique_ptr _instance; std::vector _data; - + std::mt19937 _gen; servers(int positions = 0); server_data & server(int idx); diff --git a/rfaas/lib/executor.cpp b/rfaas/lib/executor.cpp index 02c48d9..5257a9a 100644 --- a/rfaas/lib/executor.cpp +++ b/rfaas/lib/executor.cpp @@ -341,7 +341,11 @@ namespace rfaas { while(established < numcores) { //while(conn_status != rdmalib::ConnectionStatus::REQUESTED) + #ifdef USE_LIBFABRIC + auto [conn, conn_status] = _state.poll_events(false); + #else auto [conn, conn_status] = _state.poll_events(true); + #endif if(conn_status == rdmalib::ConnectionStatus::REQUESTED) { SPDLOG_DEBUG( "[Executor] Requested connection from executor {}, connection {}", diff --git a/rfaas/lib/resources.cpp b/rfaas/lib/resources.cpp index 455e36c..30f6c5f 100644 --- a/rfaas/lib/resources.cpp +++ b/rfaas/lib/resources.cpp @@ -3,7 +3,9 @@ #include +#include #include +#include namespace rfaas { @@ -23,6 +25,7 @@ namespace rfaas { servers::servers(int positions) { + _gen = std::mt19937(getpid()); if(positions) _data.resize(positions); } @@ -36,7 +39,8 @@ namespace rfaas { { // FIXME: random walk // FIXME: take size of server in account - return {0}; + std::uniform_int_distribution dist(0, _data.size()-1); + return {dist(_gen)}; } servers & servers::instance() diff --git a/server/executor/cli.cpp b/server/executor/cli.cpp index 730a0c1..d53c328 100644 --- a/server/executor/cli.cpp +++ b/server/executor/cli.cpp @@ -18,7 +18,7 @@ int main(int argc, char ** argv) { // Register a SIGINT handler so that we can gracefully exit - server::SignalHandler sighandler; + //server::SignalHandler sighandler; auto opts = server::opts(argc, argv); if(opts.verbose) diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index 48ee0d2..7b7c17a 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -29,20 +29,20 @@ namespace server { bool SignalHandler::closing = false; - SignalHandler::SignalHandler() - { - struct sigaction sigIntHandler; - sigIntHandler.sa_handler = &SignalHandler::handler; - sigemptyset(&sigIntHandler.sa_mask); - sigIntHandler.sa_flags = 0; - //FIXME: disable signals to avoid potential interrupts - sigaction(SIGINT, &sigIntHandler, nullptr); - } - - void SignalHandler::handler(int) - { - SignalHandler::closing = true; - } + // SignalHandler::SignalHandler() + // { + // struct sigaction sigIntHandler; + // sigIntHandler.sa_handler = &SignalHandler::handler; + // sigemptyset(&sigIntHandler.sa_mask); + // sigIntHandler.sa_flags = 0; + // //FIXME: disable signals to avoid potential interrupts + // sigaction(SIGINT, &sigIntHandler, nullptr); + // } + + // void SignalHandler::handler(int) + // { + // SignalHandler::closing = true; + // } Accounting::timepoint_t Thread::work(int invoc_id, int func_id, bool solicited, uint32_t in_size) { diff --git a/server/executor_manager/client.hpp b/server/executor_manager/client.hpp index d8dd25f..1f74153 100644 --- a/server/executor_manager/client.hpp +++ b/server/executor_manager/client.hpp @@ -3,6 +3,7 @@ #define __SERVER_EXECUTOR_MANAGER_CLIENT_HPP__ #include +#include #include #include @@ -25,7 +26,11 @@ namespace rfaas::executor_manager { rdmalib::Connection* connection; rdmalib::Buffer allocation_requests; rdmalib::RecvBuffer rcv_buffer; + #ifdef USE_LIBFABRIC + std::unique_ptr executor = nullptr; + #else std::unique_ptr executor; + #endif rdmalib::Buffer accounting; uint32_t allocation_time; bool _active; diff --git a/server/executor_manager/manager.cpp b/server/executor_manager/manager.cpp index 76f0d3a..4fb2f8c 100644 --- a/server/executor_manager/manager.cpp +++ b/server/executor_manager/manager.cpp @@ -91,9 +91,15 @@ namespace rfaas::executor_manager { continue; spdlog::debug("[Manager-listen] Polled new rdmacm event"); + #ifdef USE_LIBFABRIC + auto [conn, conn_status] = _state.poll_events( + _established_connection + ); + #else auto [conn, conn_status] = _state.poll_events( false ); + #endif spdlog::debug( "[Manager-listen] New rdmacm connection event - connection {}, status {}", fmt::ptr(conn), conn_status @@ -105,6 +111,9 @@ namespace rfaas::executor_manager { if(conn_status == rdmalib::ConnectionStatus::DISCONNECTED) { // FIXME: handle disconnect spdlog::debug("[Manager-listen] Disconnection on connection {}", fmt::ptr(conn)); + #ifdef USE_LIBFABRIC + _established_connection = false; + #endif continue; } // When client connects, we need to fill the receive queue with work requests before @@ -121,6 +130,9 @@ namespace rfaas::executor_manager { SPDLOG_DEBUG("send to another thread\n"); atomic_thread_fence(std::memory_order_release); + #ifdef USE_LIBFABRIC + _established_connection = true; + #endif } else _state.accept(conn); continue; @@ -227,6 +239,9 @@ namespace rfaas::executor_manager { ); } else { spdlog::info("Client {} disconnects", i); + #ifdef USE_LIBFABRIC + _established_connection = false; + #endif if(client.executor) { auto now = std::chrono::high_resolution_clock::now(); client.allocation_time += @@ -248,6 +263,9 @@ namespace rfaas::executor_manager { if(client.executor) { auto status = client.executor->check(); if(std::get<0>(status) != ActiveExecutor::Status::RUNNING) { + #ifdef USE_LIBFABRIC + _established_connection = false; + #endif auto now = std::chrono::high_resolution_clock::now(); client.allocation_time += std::chrono::duration_cast( diff --git a/server/executor_manager/manager.hpp b/server/executor_manager/manager.hpp index b2fad51..a9a852c 100644 --- a/server/executor_manager/manager.hpp +++ b/server/executor_manager/manager.hpp @@ -44,6 +44,11 @@ namespace rfaas::executor_manager { moodycamel::ReaderWriterQueue> _q1; moodycamel::ReaderWriterQueue> _q2; + #ifdef USE_LIBFABRIC + bool _established_connection = false; + + #endif + std::mutex clients; std::map _clients; int _ids; From 9f8fa82208985c8a5ab93bcec2429bf1db7a0ed8 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Wed, 27 Apr 2022 12:03:26 +0200 Subject: [PATCH 25/91] Add the boilerplate of the scalable experiment --- benchmarks/scalable_benchmark.cpp | 146 +++++++++++++++++++++++++ benchmarks/scalable_benchmark.hpp | 29 +++++ benchmarks/scalable_benchmark_opts.cpp | 46 ++++++++ cmake/benchmarks.cmake | 3 +- 4 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 benchmarks/scalable_benchmark.cpp create mode 100644 benchmarks/scalable_benchmark.hpp create mode 100644 benchmarks/scalable_benchmark_opts.cpp diff --git a/benchmarks/scalable_benchmark.cpp b/benchmarks/scalable_benchmark.cpp new file mode 100644 index 0000000..053494e --- /dev/null +++ b/benchmarks/scalable_benchmark.cpp @@ -0,0 +1,146 @@ + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "scalable_benchmark.hpp" +#include "settings.hpp" + + +int get_second() { + std::time_t t = std::time(0); + std::tm* now = std::localtime(&t); + return now->tm_sec; +} + +int main(int argc, char ** argv) +{ + auto opts = scalable_benchmarker::opts(argc, argv); + spdlog::set_pattern("[%H:%M:%S:%f] [T %t] [%l] %v "); + if(opts.verbose) + spdlog::set_level(spdlog::level::debug); + else + spdlog::set_level(spdlog::level::info); + spdlog::info("Executing serverless-rdma test scalable_benchmarker"); + + // Read device details + std::ifstream in_dev{opts.device_database}; + rfaas::devices::deserialize(in_dev); + in_dev.close(); + + // Read benchmark settings + std::ifstream benchmark_cfg{opts.json_config}; + rfaas::benchmark::Settings settings = rfaas::benchmark::Settings::deserialize(benchmark_cfg); + benchmark_cfg.close(); + + // Read connection details to the executors + if(opts.executors_database != "") { + std::ifstream in_cfg(opts.executors_database); + rfaas::servers::deserialize(in_cfg); + in_cfg.close(); + } else { + spdlog::error( + "Connection to resource manager is temporarily disabled, use executor database " + "option instead!" + ); + return 1; + } + + rfaas::executor executor( + settings.device->ip_address, + settings.device->port, + settings.device->default_receive_buffer_size, + settings.device->max_inline_data + ); + std::vector> in; + std::vector> out; + for(int i = 0; i < opts.cores; ++i) { + in.emplace_back(opts.input_size, rdmalib::functions::Submission::DATA_HEADER_SIZE); + #ifdef USE_LIBFABRIC + in.back().register_memory(executor._state.pd(), FI_WRITE); + #else + in.back().register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE); + #endif + memset(in.back().data(), 0, opts.input_size); + for(int i = 0; i < opts.input_size; ++i) { + ((char*)in.back().data())[i] = 1; + } + } + for(int i = 0; i < opts.cores; ++i) { + out.emplace_back(opts.input_size); + #ifdef USE_LIBFABRIC + out.back().register_memory(executor._state.pd(), FI_WRITE | FI_REMOTE_WRITE); + #else + out.back().register_memory(executor._state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + #endif + memset(out.back().data(), 0, opts.input_size); + } + + rdmalib::Benchmarker<5> benchmarker{settings.benchmark.repetitions}; + spdlog::info("Measurements begin"); + while (get_second() != 0) {} + auto start = std::chrono::high_resolution_clock::now(); + for(int i = 0; i < settings.benchmark.repetitions;++i) { + spdlog::info("Begin iteration {}", i); + while ((get_second() % 5) != 0) {} + int j; + for(j = 0; j < opts.fail; j++) { + if(executor.allocate( + opts.flib, opts.cores, opts.input_size, + settings.benchmark.hot_timeout, false, &benchmarker + )) { + // End of function execution + benchmarker.end(4); + executor.deallocate(); + break; + } else { + spdlog::error("Allocation not succesfull"); + } + } + if (j == opts.fail) + spdlog::info( + "No possible manager was found!" + ); + } + auto end = std::chrono::high_resolution_clock::now(); + spdlog::info( + "Measurements end repetitions {} time {} ms", + benchmarker._measurements.size(), + std::chrono::duration_cast(end-start).count() / 1000.0 + ); + + auto [median, avg] = benchmarker.summary(); + spdlog::info( + "Executed {} repetitions, avg {} usec/iter, median {}", + settings.benchmark.repetitions, avg, median + ); + if(opts.output_stats != "") + benchmarker.export_csv( + opts.output_stats, + {"connect", "submit", "spawn_connect", "initialize", "execute"} + ); + + int i = 0; + for(rdmalib::Buffer & buf : out) { + printf("%d ", i++); + for(int i = 0; i < std::min(10, opts.input_size); ++i) + printf("%d ", ((char*)buf.data())[i]); + printf("\n"); + } + + return 0; +} diff --git a/benchmarks/scalable_benchmark.hpp b/benchmarks/scalable_benchmark.hpp new file mode 100644 index 0000000..e7d0771 --- /dev/null +++ b/benchmarks/scalable_benchmark.hpp @@ -0,0 +1,29 @@ + +#ifndef __TESTS__COLD_BENCHMARKER_HPP__ +#define __TESTS__COLD_BENCHMARKER_HPP__ + +#include + +namespace scalable_benchmarker { + + struct Options { + + std::string json_config; + std::string device_database; + std::string executors_database; + std::string output_stats; + bool verbose; + std::string fname; + std::string flib; + int input_size; + int cores; + int fail; + + }; + + Options opts(int argc, char ** argv); + +} + +#endif + diff --git a/benchmarks/scalable_benchmark_opts.cpp b/benchmarks/scalable_benchmark_opts.cpp new file mode 100644 index 0000000..c999f98 --- /dev/null +++ b/benchmarks/scalable_benchmark_opts.cpp @@ -0,0 +1,46 @@ + +#include + +#include "scalable_benchmark.hpp" + +namespace scalable_benchmarker { + + Options opts(int argc, char ** argv) + { + cxxopts::Options options("rfaas-cold-benchmarker", "Benchmark cold invocations"); + options.add_options() + ("c,config", "JSON input config.", cxxopts::value()) + ("device-database", "JSON configuration of devices.", cxxopts::value()) + ("executors-database", "JSON configuration of executor servers.", cxxopts::value()->default_value("")) + ("output-stats", "Output file for benchmarking statistics.", cxxopts::value()->default_value("")) + ("v,verbose", "Verbose output", cxxopts::value()->default_value("false")) + ("name", "Function name", cxxopts::value()) + ("functions", "Functions library", cxxopts::value()) + ("s,size", "Packet size", cxxopts::value()->default_value("1")) + ("h,help", "Print usage", cxxopts::value()->default_value("false")) + ("cores", "Number of cores", cxxopts::value()->default_value("1")) + ("fail", "Number of iterations until the client stops polling possible managers", cxxopts::value()->default_value("100")) + ; + auto parsed_options = options.parse(argc, argv); + if(parsed_options.count("help")) + { + std::cout << options.help() << std::endl; + exit(0); + } + + Options result; + result.json_config = parsed_options["config"].as(); + result.device_database = parsed_options["device-database"].as(); + result.verbose = parsed_options["verbose"].as();; + result.fname = parsed_options["name"].as(); + result.flib = parsed_options["functions"].as(); + result.input_size = parsed_options["size"].as(); + result.output_stats = parsed_options["output-stats"].as(); + result.executors_database = parsed_options["executors-database"].as(); + result.cores = parsed_options["cores"].as(); + result.fail = parsed_options["fail"].as(); + + return result; + } + +} diff --git a/cmake/benchmarks.cmake b/cmake/benchmarks.cmake index fdf518a..0d6e5d9 100644 --- a/cmake/benchmarks.cmake +++ b/cmake/benchmarks.cmake @@ -8,7 +8,8 @@ add_executable(warm_benchmarker benchmarks/warm_benchmark.cpp benchmarks/warm_be add_executable(parallel_invocations benchmarks/parallel_invocations.cpp benchmarks/parallel_invocations_opts.cpp) add_executable(cold_benchmarker benchmarks/cold_benchmark.cpp benchmarks/cold_benchmark_opts.cpp) add_executable(cpp_interface benchmarks/cpp_interface.cpp benchmarks/cpp_interface_opts.cpp) -set(tests_targets "warm_benchmarker" "cold_benchmarker" "parallel_invocations" "cpp_interface") +add_executable(scalable_benchmarker benchmarks/scalable_benchmark.cpp benchmarks/scalable_benchmark_opts.cpp) +set(tests_targets "warm_benchmarker" "cold_benchmarker" "parallel_invocations" "cpp_interface" "scalable_benchmarker") foreach(target ${tests_targets}) add_dependencies(${target} cxxopts::cxxopts) add_dependencies(${target} rdmalib) From 4a350ebed9396001bca648a0d2fa495ce747800e Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Wed, 27 Apr 2022 12:52:50 +0200 Subject: [PATCH 26/91] Add initial version of the script setting up the scalable benchmark --- benchmarks/scalable_benchmark.py | 169 +++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 benchmarks/scalable_benchmark.py diff --git a/benchmarks/scalable_benchmark.py b/benchmarks/scalable_benchmark.py new file mode 100644 index 0000000..114dcec --- /dev/null +++ b/benchmarks/scalable_benchmark.py @@ -0,0 +1,169 @@ +import re +import subprocess +import copy +import os +import json +import time +import argparse + +parser = argparse.ArgumentParser(description="Allows for setting up the scalable benchmark", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-Pm", "--proc-manager", type=int, required=True, help="number of manager processes per node") +parser.add_argument("-Pc", "--proc-client", type=int, required=True, help="number of client processes per node") +parser.add_argument("-Nc", "--nodes-client", type=int, required=True, help="number of nodes assigned to the client") +parser.add_argument("-c", "--config", default="multiple_configs", help="location of the config files") +parser.add_argument("-r", "--results", default="multiple_results", help="where to save the results") +args = parser.parse_args() +config = vars(args) + +# Number of executor manager processes per node +P_manager = config["proc_manager"] + +# Number of client processes per node +P_client = config["proc_client"] + +# Number of client nodes (the rest are executor nodes) +N_client = config["nodes_client"] + +# Obtain the ips and hostnames +print("Running srun -l ip a") +process = subprocess.run('srun -l ip a', shell=True, stdout=subprocess.PIPE, universal_newlines=True) +ip = process.stdout +print("Running srun -l hostname") +process = subprocess.run('srun -l hostname', shell=True, stdout=subprocess.PIPE, universal_newlines=True) +hostname = process.stdout + +# Parse the hostnames and ips +pattern = "(\d+)(?::.*\d+:.*ipogif0.*[\n]*.*[\n]*.*inet.)(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})" +ips = dict(re.findall(pattern, ip)) +pattern = "(\d+)(?::.*)(nid\d+)" +hostnames = dict(re.findall(pattern, hostname)) + +# Parse executor and client nodes +devices = {} +for key in ips.keys(): + devices[key] = {"ip": ips[key], "hostname": hostnames[key]} + +# Check total number of nodes +N_total = len(ips.keys()) +N_manager = N_total - N_client +P_total = N_client*P_client + N_manager*P_manager +if N_client*P_client > int(P_total/2): + N_client = int(P_total/2/P_client) + N_manager = N_total - N_client + +# Print status +print(f"The following nodes were registered {devices} \n") +print(f"We are spawning {N_total} nodes, {N_client} for clients and {N_manager} for managers.") +print(f"We are spawning {P_total} processses, {P_client} per machine for clients and {P_manager} per machine for managers.") + +# Define templates +template_devices = { + "devices": [ + { + "name": "", + "ip_address": "", + "port": 0, + "max_inline_data": 0, + "default_receive_buffer_size": 20 + } + ] +} + +template_manager = { + "config": { + "rdma_device": "", + "rdma_device_port": 0, + "resource_manager_address": "127.0.0.1", + "resource_manager_port": 0, + "resource_manager_secret": 0 + }, + "executor": { + "use_docker": False, + "repetitions": 10000, + "warmup_iters": 0, + "pin_threads": False + } +} + +executors = { + "executors": [] +} + +# Create device lists and configurations for managers +manager_node_list = [] +for i in range(N_manager): + for j in range(P_manager): + device = copy.deepcopy(template_devices) + manager = copy.deepcopy(template_manager) + label = list(devices.keys())[i] + manager_node_list.append(devices[label]["hostname"]) + device["devices"][0]["ip_address"] = devices[label]["ip"] + device["devices"][0]["port"] = 50000 + j + manager["config"]["rdma_device_port"] = 50000 + j + json_string = json.dumps(device) + with open(f'{os.path.join(config["config"], f"devices_manager_{i*P_manager+j}.json")}', 'w') as outfile: + outfile.write(json_string) + json_string = json.dumps(manager) + with open(f'{os.path.join(config["config"], f"executor_manager_{i*P_manager+j}.json")}', 'w') as outfile: + outfile.write(json_string) + +print("Created device lists and configurations for managers") + +# Create the executors list +for i in range(N_manager): + for j in range(P_manager): + device = {} + label = list(devices.keys())[i] + device["address"] = devices[label]["ip"] + device["port"] = 50000 + j + device["cores"] = 1 + executors["executors"].append(device) +json_string = json.dumps(executors) +with open(f'{os.path.join(config["config"], "executors_database.json")}', 'w') as outfile: + outfile.write(json_string) + +print("Created executors database") + +# Create device lists for the clients +client_node_list = [] +for i in range(0, N_client): + for j in range(P_client): + device = copy.deepcopy(template_devices) + label = list(devices.keys())[i+N_manager] + client_node_list.append(devices[label]["hostname"]) + device["devices"][0]["ip_address"] = devices[label]["ip"] + device["devices"][0]["port"] = 50000 + j + json_string = json.dumps(device) + with open(f'{os.path.join(config["config"], f"/devices_client_{i*P_client+j}.json")}', 'w') as outfile: + outfile.write(json_string) + +print("Created device lists for clients") + +# Run the managers +node_list = ",".join(manager_node_list) +command = f"""srun -l -t 00:02:00 -ntasks-per-node {P_manager} -n {N_manager*P_manager} -N {N_manager} \ + --cpu-bind=cores,verbose --oversubscribe -o managers_%t.o -e managers_%t.e --nodelist={node_list} \ + PATH=/users/mchrapek/rFaaS_old_test/rFaaS/bin:$PATH bin/executor_manager -c {os.path.join(config["config"], "executor_manager_%t.json")} \ + --device-database {os.path.join(config["config"], "devices_manager_%t.json")} --skip-resource-manager > managers_%t.o""" +print("Running the managers with command\n") +print(f"{command}\n") +manager_process = subprocess.Popen(command, shell=True) + +# Sleep for 10s to allow managers to start listening +time.sleep(10) +print("Waited 10s for managers") + +# Run the clients +node_list = ",".join(client_node_list) +command = f"""srun -l -t 00:02:00 -ntasks-per-node {P_client} -n {N_client*P_client} -N {N_client} --oversubscribe -o clients_%t.o -e clients_%t.e \ + --nodelist={node_list} benchmarks/scalable_benchmarker --config {os.path.join(config["config"], "benchmark.json")} \ + --device-database {os.path.join(config["config"], "devices_client_%t.json")} --name empty --functions examples/libfunctions.so \ + --executors-database {os.path.join(config["config"], "executors_database.json")} -s 100 \ + --output-stats {os.path.join(config["results"], "devices_client_%t.csv")} > clients_%t.o""" +print("Running the clients with command\n") +print(f"{command}\n") +client_process = subprocess.Popen(command, shell=True) +srun_process.wait() +manager_process.kill() + From e00d6678026300ac6fb66acb7b30ad1550d26dac Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Wed, 27 Apr 2022 12:54:13 +0200 Subject: [PATCH 27/91] Add a small bug fix of the scalable benchmark script --- benchmarks/scalable_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/scalable_benchmark.py b/benchmarks/scalable_benchmark.py index 114dcec..9a7d699 100644 --- a/benchmarks/scalable_benchmark.py +++ b/benchmarks/scalable_benchmark.py @@ -164,6 +164,6 @@ print("Running the clients with command\n") print(f"{command}\n") client_process = subprocess.Popen(command, shell=True) -srun_process.wait() +client_process.wait() manager_process.kill() From 9fb85db67ff378d5059a68cd7f4288ac3666d9d2 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Wed, 27 Apr 2022 15:06:00 +0200 Subject: [PATCH 28/91] Remove the support for single client executor --- rdmalib/lib/rdmalib.cpp | 50 +++++++++-------------------- rfaas/lib/executor.cpp | 4 --- server/executor_manager/client.hpp | 4 --- server/executor_manager/manager.cpp | 18 ----------- server/executor_manager/manager.hpp | 11 ++++--- 5 files changed, 22 insertions(+), 65 deletions(-) diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 49600b6..85093c5 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -476,11 +476,7 @@ namespace rdmalib { #endif } - #ifdef USE_LIBFABRIC - std::tuple RDMAPassive::poll_events(bool share_cqs) - #else std::tuple RDMAPassive::poll_events(bool share_cqs) - #endif { #ifdef USE_LIBFABRIC uint32_t event; @@ -519,36 +515,22 @@ namespace rdmalib { else SPDLOG_DEBUG("[RDMAPassive] Connection request with no private data"); - #ifdef USE_LIBFABRIC - // Used here as determinator of whether we have already established a connection - if (!share_cqs || ret == total_size) { - #endif - // Check if we have a domain open for the connection already - if (!entry->info->domain_attr->domain) - fi_domain(_addr.fabric, entry->info, &connection->_domain, NULL); - - // Enable the endpoint - connection->initialize(_addr.fabric, connection->_domain, entry->info, _ec); - SPDLOG_DEBUG( - "[RDMAPassive] Created connection fid {} qp {}", - fmt::ptr(connection->id()), fmt::ptr(&connection->qp()->fid) - ); - - // Free the info - fi_freeinfo(entry->info); - - status = ConnectionStatus::REQUESTED; - _active_connections.insert(connection); - #ifdef USE_LIBFABRIC - } else { - free(connection); - connection = nullptr; - impl::expect_zero(fi_reject(_pep, entry->info->handle, nullptr, 0)); - SPDLOG_DEBUG( - "[RDMAPassive] Rejected connection because we are already taken" - ); - } - #endif + // Check if we have a domain open for the connection already + // if (!entry->info->domain_attr->domain) + // fi_domain(_addr.fabric, entry->info, &connection->_domain, NULL); + + // Enable the endpoint + connection->initialize(_addr.fabric, _pd, entry->info, _ec); + SPDLOG_DEBUG( + "[RDMAPassive] Created connection fid {} qp {}", + fmt::ptr(connection->id()), fmt::ptr(&connection->qp()->fid) + ); + + // Free the info + fi_freeinfo(entry->info); + + status = ConnectionStatus::REQUESTED; + _active_connections.insert(connection); break; case FI_CONNECTED: SPDLOG_DEBUG( diff --git a/rfaas/lib/executor.cpp b/rfaas/lib/executor.cpp index 5257a9a..02c48d9 100644 --- a/rfaas/lib/executor.cpp +++ b/rfaas/lib/executor.cpp @@ -341,11 +341,7 @@ namespace rfaas { while(established < numcores) { //while(conn_status != rdmalib::ConnectionStatus::REQUESTED) - #ifdef USE_LIBFABRIC - auto [conn, conn_status] = _state.poll_events(false); - #else auto [conn, conn_status] = _state.poll_events(true); - #endif if(conn_status == rdmalib::ConnectionStatus::REQUESTED) { SPDLOG_DEBUG( "[Executor] Requested connection from executor {}, connection {}", diff --git a/server/executor_manager/client.hpp b/server/executor_manager/client.hpp index 1f74153..5fd5c67 100644 --- a/server/executor_manager/client.hpp +++ b/server/executor_manager/client.hpp @@ -26,11 +26,7 @@ namespace rfaas::executor_manager { rdmalib::Connection* connection; rdmalib::Buffer allocation_requests; rdmalib::RecvBuffer rcv_buffer; - #ifdef USE_LIBFABRIC - std::unique_ptr executor = nullptr; - #else std::unique_ptr executor; - #endif rdmalib::Buffer accounting; uint32_t allocation_time; bool _active; diff --git a/server/executor_manager/manager.cpp b/server/executor_manager/manager.cpp index 4fb2f8c..76f0d3a 100644 --- a/server/executor_manager/manager.cpp +++ b/server/executor_manager/manager.cpp @@ -91,15 +91,9 @@ namespace rfaas::executor_manager { continue; spdlog::debug("[Manager-listen] Polled new rdmacm event"); - #ifdef USE_LIBFABRIC - auto [conn, conn_status] = _state.poll_events( - _established_connection - ); - #else auto [conn, conn_status] = _state.poll_events( false ); - #endif spdlog::debug( "[Manager-listen] New rdmacm connection event - connection {}, status {}", fmt::ptr(conn), conn_status @@ -111,9 +105,6 @@ namespace rfaas::executor_manager { if(conn_status == rdmalib::ConnectionStatus::DISCONNECTED) { // FIXME: handle disconnect spdlog::debug("[Manager-listen] Disconnection on connection {}", fmt::ptr(conn)); - #ifdef USE_LIBFABRIC - _established_connection = false; - #endif continue; } // When client connects, we need to fill the receive queue with work requests before @@ -130,9 +121,6 @@ namespace rfaas::executor_manager { SPDLOG_DEBUG("send to another thread\n"); atomic_thread_fence(std::memory_order_release); - #ifdef USE_LIBFABRIC - _established_connection = true; - #endif } else _state.accept(conn); continue; @@ -239,9 +227,6 @@ namespace rfaas::executor_manager { ); } else { spdlog::info("Client {} disconnects", i); - #ifdef USE_LIBFABRIC - _established_connection = false; - #endif if(client.executor) { auto now = std::chrono::high_resolution_clock::now(); client.allocation_time += @@ -263,9 +248,6 @@ namespace rfaas::executor_manager { if(client.executor) { auto status = client.executor->check(); if(std::get<0>(status) != ActiveExecutor::Status::RUNNING) { - #ifdef USE_LIBFABRIC - _established_connection = false; - #endif auto now = std::chrono::high_resolution_clock::now(); client.allocation_time += std::chrono::duration_cast( diff --git a/server/executor_manager/manager.hpp b/server/executor_manager/manager.hpp index a9a852c..d7708ea 100644 --- a/server/executor_manager/manager.hpp +++ b/server/executor_manager/manager.hpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -16,12 +17,17 @@ #include #include "client.hpp" +#include "executor_manager/executor_process.hpp" +#include "rdmalib/allocation.hpp" #include "settings.hpp" #include "../common.hpp" #include "../common/readerwriterqueue.h" namespace rdmalib { struct AllocationRequest; + #ifdef USE_LIBFABRIC + struct ClientAddress; + #endif } namespace rfaas::executor_manager { @@ -43,11 +49,6 @@ namespace rfaas::executor_manager { static constexpr int POLLING_TIMEOUT_MS = 100; moodycamel::ReaderWriterQueue> _q1; moodycamel::ReaderWriterQueue> _q2; - - #ifdef USE_LIBFABRIC - bool _established_connection = false; - - #endif std::mutex clients; std::map _clients; From 512faf2f7963b581fd2f1b3691eca3012e7d5f7a Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Wed, 27 Apr 2022 15:14:57 +0200 Subject: [PATCH 29/91] Remove the remainder of the warm cold code --- server/executor_manager/manager.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/server/executor_manager/manager.hpp b/server/executor_manager/manager.hpp index d7708ea..2975b80 100644 --- a/server/executor_manager/manager.hpp +++ b/server/executor_manager/manager.hpp @@ -17,17 +17,12 @@ #include #include "client.hpp" -#include "executor_manager/executor_process.hpp" -#include "rdmalib/allocation.hpp" #include "settings.hpp" #include "../common.hpp" #include "../common/readerwriterqueue.h" namespace rdmalib { struct AllocationRequest; - #ifdef USE_LIBFABRIC - struct ClientAddress; - #endif } namespace rfaas::executor_manager { From 06a2b8101d853a606c72139a64f58c1887a60ead Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Wed, 27 Apr 2022 15:44:28 +0200 Subject: [PATCH 30/91] Remove strained copy --- rdmalib/include/rdmalib/benchmarker copy.hpp | 90 -------------------- 1 file changed, 90 deletions(-) delete mode 100644 rdmalib/include/rdmalib/benchmarker copy.hpp diff --git a/rdmalib/include/rdmalib/benchmarker copy.hpp b/rdmalib/include/rdmalib/benchmarker copy.hpp deleted file mode 100644 index 2d90a74..0000000 --- a/rdmalib/include/rdmalib/benchmarker copy.hpp +++ /dev/null @@ -1,90 +0,0 @@ - -#ifndef __RDMALIB_BENCHMARKER_HPP__ -#define __RDMALIB_BENCHMARKER_HPP__ - -#include -#include -#include -#include -#include -#include - -//#include - -namespace rdmalib { - - template - struct Benchmarker { - std::vector> _measurements; - std::chrono::time_point _start, _end; - - Benchmarker(int measurements) - { - _measurements.reserve(measurements); - } - - inline void start() - { - _start = std::chrono::high_resolution_clock::now(); - } - - inline uint64_t end(int col = 0) - { - _end = std::chrono::high_resolution_clock::now(); - uint64_t duration = std::chrono::duration_cast(_end - _start).count(); - if(col == 0) - _measurements.emplace_back(); - _measurements.back()[col] = duration; - return duration; - } - - void remove_last() - { - _measurements.pop_back(); - } - - std::tuple summary(int idx = 0) - { - // FIXME: reenable - long sum = std::accumulate(_measurements.begin(), _measurements.end(), 0L, - [idx](long x, const std::array & y) { - return x + y[idx]; - } - ); - double avg = static_cast(sum) / _measurements.size(); - - //// compute median - //// let's just ignore the rule that for even size we should take an average of middle elements - int middle = _measurements.size() / 2; - std::nth_element(_measurements.begin(), _measurements.begin() + middle, _measurements.end(), - [idx](const std::array & x, const std::array & y) { - return x[idx] < y[idx]; - } - ); - int median = _measurements[middle][idx]; - - return std::make_tuple(static_cast(median) / 1000, avg / 1000); - } - - void export_csv(std::string fname, const std::array & headers) - { - std::ofstream of(fname); - of << "id"; - for(int j = 0; j < Cols; ++j) - of << ',' << headers[j]; - of << '\n'; - - for(size_t i = 0; i < _measurements.size(); ++i) { - of << i; - for(int j = 0; j < Cols; ++j) - of << ',' << _measurements[i][j]; - of << '\n'; - } - } - - }; - -} - -#endif - From e533a6c0b83711684b535d1e4caf54ad9a263821 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Sat, 7 May 2022 13:04:00 +0200 Subject: [PATCH 31/91] Add support for the credentials --- CMakeLists.txt | 16 +++++--- benchmarks/credentials.cpp | 56 ++++++++++++++++++++++++++ cmake/benchmarks.cmake | 11 +++++ rdmalib/include/rdmalib/allocation.hpp | 36 ----------------- rdmalib/include/rdmalib/rdmalib.hpp | 13 ++++++ rdmalib/lib/rdmalib.cpp | 48 ++++++++++++++++++++++ 6 files changed, 138 insertions(+), 42 deletions(-) create mode 100644 benchmarks/credentials.cpp delete mode 100644 rdmalib/include/rdmalib/allocation.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9fc9e82..ac063b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,6 +76,16 @@ if(${WITH_GNI_AUTH}) add_definitions(-DUSE_GNI_AUTH) endif() +### +# Select whether we should compile the MPI scalability experiment +### +option(WITH_SCALABILITY "Compile the scalability experiment" Off) +if(${WITH_SCALABILITY}) + message(STATUS "Enabling the compilation of the scalability experiment") + add_definitions(-DWITH_SCALABILITY) + find_package(MPI REQUIRED) +endif() + ### # Optional: use existing installations ### @@ -184,9 +194,6 @@ target_include_directories(rfaaslib PUBLIC "rfaas/include") target_include_directories(rfaaslib PRIVATE $) target_include_directories(rfaaslib SYSTEM PUBLIC $) target_include_directories(rfaaslib SYSTEM PUBLIC $) -#if (${WITH_LIBFABRIC} AND ${WITH_GNI_AUTH}) -#target_include_directories(rfaaslib SYSTEM PUBLIC $) -#endif() set_target_properties(rfaaslib PROPERTIES POSITION_INDEPENDENT_CODE On) set_target_properties(rfaaslib PROPERTIES LIBRARY_OUTPUT_DIRECTORY lib) target_link_libraries(rfaaslib PUBLIC rdmalib) @@ -194,9 +201,6 @@ target_link_libraries(rfaaslib PUBLIC spdlog::spdlog) target_link_libraries(rfaaslib PRIVATE cereal) target_link_libraries(rfaaslib PUBLIC dl) target_link_libraries(rfaaslib PUBLIC Threads::Threads) -#if (${WITH_LIBFABRIC} AND ${WITH_GNI_AUTH}) -#target_link_libraries(rfaaslib PUBLIC drc) -#endif() ### # Server diff --git a/benchmarks/credentials.cpp b/benchmarks/credentials.cpp new file mode 100644 index 0000000..8c4f9c3 --- /dev/null +++ b/benchmarks/credentials.cpp @@ -0,0 +1,56 @@ +#include + +extern "C" { +#include "rdmacred.h" +} + +void assert_z(const std::string &text, const int x) { + if (x != 0) { + std::cout << "[ERROR] " << text << " failed with code " << x << "\n" << std::endl; + exit(1); + } +} + +void assert_nEOF(const std::string &text, const int x) { + if (x == EOF) { + std::cout << "[ERROR] " << text << " returned EOF\n" << std::endl; + exit(1); + } +} + +void assert_nNULL(const std::string &text, const void *x) { + if (x == NULL) { + std::cout << "[ERROR] " << text << " returned NULL\n" << std::endl; + exit(1); + } +} + +int main() { + // Acquire, grand access and save the credential + uint32_t credential; + int ret = drc_acquire(&credential, 0); + if (ret == 0) { + char buffer[11]; + FILE *file; + drc_grant(credential, 28487, DRC_FLAGS_TARGET_UID); + snprintf(buffer, 11, "%u", credential); + assert_nNULL("fopen", file = fopen("credential.txt", "w")); + assert_nEOF("fputs", fputs(buffer, file)); + assert_nEOF("fclose", fclose(file)); + printf("Saved credential %s\n", buffer); + } else { + std::cout << "[ERROR] Cannot acquire the credential, failed with code" << ret << std::endl; + exit(1); + } + + // Access the credential and print cookies + uint32_t cookie1; + uint32_t cookie2; + drc_info_handle_t info; + uint8_t ptag; + assert_z("drc_access", drc_access(credential, 0, &info)); + cookie1 = drc_get_first_cookie(info); + cookie2 = drc_get_second_cookie(info); + GNI_GetPtag(0, cookie1, &ptag); + std::cout << "[INFO] Got cookies " << cookie1 << " and " << cookie2 << " with ptag " << ptag << std::endl; +} diff --git a/cmake/benchmarks.cmake b/cmake/benchmarks.cmake index 0d6e5d9..3dc4f17 100644 --- a/cmake/benchmarks.cmake +++ b/cmake/benchmarks.cmake @@ -9,7 +9,11 @@ add_executable(parallel_invocations benchmarks/parallel_invocations.cpp benchmar add_executable(cold_benchmarker benchmarks/cold_benchmark.cpp benchmarks/cold_benchmark_opts.cpp) add_executable(cpp_interface benchmarks/cpp_interface.cpp benchmarks/cpp_interface_opts.cpp) add_executable(scalable_benchmarker benchmarks/scalable_benchmark.cpp benchmarks/scalable_benchmark_opts.cpp) +if ( ${WITH_SCALABILITY} ) set(tests_targets "warm_benchmarker" "cold_benchmarker" "parallel_invocations" "cpp_interface" "scalable_benchmarker") +else() +set(tests_targets "warm_benchmarker" "cold_benchmarker" "parallel_invocations" "cpp_interface") +endif() foreach(target ${tests_targets}) add_dependencies(${target} cxxopts::cxxopts) add_dependencies(${target} rdmalib) @@ -23,3 +27,10 @@ foreach(target ${tests_targets}) target_link_libraries(${target} PRIVATE benchmarks) set_target_properties(${target} PROPERTIES RUNTIME_OUTPUT_DIRECTORY benchmarks) endforeach() + +if( ${WITH_GNI_AUTH} ) +add_executable(credentials benchmarks/credentials.cpp) +target_include_directories(credentials SYSTEM PUBLIC $) +target_link_libraries(credentials PUBLIC PkgConfig::drc) +set_target_properties(credentials PROPERTIES RUNTIME_OUTPUT_DIRECTORY benchmarks) +endif() diff --git a/rdmalib/include/rdmalib/allocation.hpp b/rdmalib/include/rdmalib/allocation.hpp deleted file mode 100644 index fc1f460..0000000 --- a/rdmalib/include/rdmalib/allocation.hpp +++ /dev/null @@ -1,36 +0,0 @@ - -#ifndef __RDMALIB_EXECUTOR_MANAGER__ -#define __RDMALIB_EXECUTOR_MANAGER__ - -#include - -namespace rdmalib { - - struct AllocationRequest - { - int16_t hot_timeout; - int16_t timeout; - // > 0: Number of cores to be allocated - // < 0: client_id with negative sign, deallocation & disconnect request - int16_t cores; - int16_t input_buf_count; - int32_t input_buf_size; - uint32_t func_buf_size; - int32_t listen_port; - char listen_address[16]; - }; - - struct BufferInformation - { - uint64_t r_addr; - #ifdef USE_LIBFABRIC - uint64_t r_key; - #else - uint32_t r_key; - #endif - }; - -} - -#endif - diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index 296f225..afbea7d 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -13,6 +13,12 @@ #include #include // #include +#ifdef USE_GNI_AUTH +#include +extern "C" { +#include "rdmacred.h" +} +#endif #else #include #endif @@ -28,6 +34,13 @@ namespace rdmalib { fi_info* addrinfo = nullptr; fi_info* hints = nullptr; fid_fabric* fabric = nullptr; + #ifdef USE_GNI_AUTH + std::once_flag access_flag; + std::once_flag release_flag; + void obtain_cookies(); + static drc_info_handle_t credential_info; + static uint64_t cookie; + #endif std::string _ip; #else rdma_addrinfo *addrinfo; diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 85093c5..2b70c47 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include // poll on file descriptors @@ -19,6 +20,11 @@ #include #include #include +#ifdef USE_GNI_AUTH +extern "C" { +#include "rdmacred.h" +} +#endif #endif #include #include @@ -49,6 +55,19 @@ namespace rdmalib { impl::expect_zero(fi_getinfo(FI_VERSION(1, 9), ip.c_str(), std::to_string(port).c_str(), passive ? FI_SOURCE : 0, hints, &addrinfo)); fi_freeinfo(hints); impl::expect_zero(fi_fabric(addrinfo->fabric_attr, &fabric, nullptr)); + #ifdef USE_GNI_AUTH + // Obtain the cookies once per process + std::call_once(access_flag, &Address::obtain_cookies, this); + + // Set the hints to have the cookies + addrinfo->domain_attr->auth_key = (uint8_t *)malloc(sizeof(cookie)); + memcpy(addrinfo->domain_attr->auth_key, &cookie, sizeof(cookie)); + addrinfo->domain_attr->auth_key_size = sizeof(cookie); + addrinfo->ep_attr->auth_key = (uint8_t *)malloc(sizeof(cookie)); + memcpy(addrinfo->ep_attr->auth_key, &cookie, sizeof(cookie)); + addrinfo->ep_attr->auth_key_size = sizeof(cookie); + spdlog::info("Saved cookie {}", cookie); + #endif #else memset(&hints, 0, sizeof hints); hints.ai_port_space = RDMA_PS_TCP; @@ -61,6 +80,24 @@ namespace rdmalib { this->_ip = ip; } + #ifdef USE_GNI_AUTH + void Address::obtain_cookies() { + // Access the credentials and obtain the credential cookie + char buffer[11]; + FILE *file; + impl::expect_nonnull(file = fopen("credential.txt", "r")); + impl::expect_nonnull(fgets(buffer, 11, file)); + fclose(file); + uint32_t credential = (uint32_t)atoi(buffer); + impl::expect_zero(drc_access(credential, 0, &credential_info)); + + // Obtain the cookie + cookie = (uint64_t)drc_get_first_cookie(credential_info)<<32; + } + drc_info_handle_t Address::credential_info; + uint64_t Address::cookie; + #endif + Address::Address(const std::string & sip, const std::string & dip, int port) { struct sockaddr_in server_in, local_in; @@ -120,6 +157,9 @@ namespace rdmalib { impl::expect_zero(fi_close(&fabric->fid)); if (addrinfo) fi_freeinfo(addrinfo); + #ifdef USE_GNI_AUTH + std::call_once(release_flag, drc_release_local, &credential_info); + #endif #else rdma_freeaddrinfo(addrinfo); #endif @@ -520,6 +560,14 @@ namespace rdmalib { // fi_domain(_addr.fabric, entry->info, &connection->_domain, NULL); // Enable the endpoint + #ifdef USE_GNI_AUTH + entry->info->domain_attr->auth_key = (uint8_t *)malloc(sizeof(_addr.cookie)); + memcpy(entry->info->domain_attr->auth_key, &_addr.cookie, sizeof(_addr.cookie)); + entry->info->domain_attr->auth_key_size = sizeof(_addr.cookie); + entry->info->ep_attr->auth_key = (uint8_t *)malloc(sizeof(_addr.cookie)); + memcpy(entry->info->ep_attr->auth_key, &_addr.cookie, sizeof(_addr.cookie)); + entry->info->ep_attr->auth_key_size = sizeof(_addr.cookie); + #endif connection->initialize(_addr.fabric, _pd, entry->info, _ec); SPDLOG_DEBUG( "[RDMAPassive] Created connection fid {} qp {}", From a561fee16cc9c5b68a2732fc2bf4f48d9d9eb197 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Sat, 7 May 2022 13:07:44 +0200 Subject: [PATCH 32/91] Add the MPI scalability experiment and cleanup --- benchmarks/scalable_benchmark.cpp | 12 ++++++++- rdmalib/include/rdmalib/allocation.hpp | 36 ++++++++++++++++++++++++++ server/executor/functions.cpp | 2 +- 3 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 rdmalib/include/rdmalib/allocation.hpp diff --git a/benchmarks/scalable_benchmark.cpp b/benchmarks/scalable_benchmark.cpp index 053494e..3de687e 100644 --- a/benchmarks/scalable_benchmark.cpp +++ b/benchmarks/scalable_benchmark.cpp @@ -19,7 +19,7 @@ #include "scalable_benchmark.hpp" #include "settings.hpp" - +#include int get_second() { std::time_t t = std::time(0); @@ -60,6 +60,14 @@ int main(int argc, char ** argv) return 1; } + // Initialize the MPI and the comm world + MPI_Init(nullptr, nullptr); + int world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + int world_size; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + spdlog::info("Running as rank %d within the world of size %d", world_rank, world_size); + rfaas::executor executor( settings.device->ip_address, settings.device->port, @@ -99,6 +107,7 @@ int main(int argc, char ** argv) while ((get_second() % 5) != 0) {} int j; for(j = 0; j < opts.fail; j++) { + MPI_Barrier(MPI_COMM_WORLD); if(executor.allocate( opts.flib, opts.cores, opts.input_size, settings.benchmark.hot_timeout, false, &benchmarker @@ -122,6 +131,7 @@ int main(int argc, char ** argv) benchmarker._measurements.size(), std::chrono::duration_cast(end-start).count() / 1000.0 ); + MPI_Finalize(); auto [median, avg] = benchmarker.summary(); spdlog::info( diff --git a/rdmalib/include/rdmalib/allocation.hpp b/rdmalib/include/rdmalib/allocation.hpp new file mode 100644 index 0000000..fc1f460 --- /dev/null +++ b/rdmalib/include/rdmalib/allocation.hpp @@ -0,0 +1,36 @@ + +#ifndef __RDMALIB_EXECUTOR_MANAGER__ +#define __RDMALIB_EXECUTOR_MANAGER__ + +#include + +namespace rdmalib { + + struct AllocationRequest + { + int16_t hot_timeout; + int16_t timeout; + // > 0: Number of cores to be allocated + // < 0: client_id with negative sign, deallocation & disconnect request + int16_t cores; + int16_t input_buf_count; + int32_t input_buf_size; + uint32_t func_buf_size; + int32_t listen_port; + char listen_address[16]; + }; + + struct BufferInformation + { + uint64_t r_addr; + #ifdef USE_LIBFABRIC + uint64_t r_key; + #else + uint32_t r_key; + #endif + }; + +} + +#endif + diff --git a/server/executor/functions.cpp b/server/executor/functions.cpp index c4bb6d2..de7a8d5 100644 --- a/server/executor/functions.cpp +++ b/server/executor/functions.cpp @@ -61,7 +61,7 @@ namespace server { _library_handle(nullptr) { // FIXME: works only on Linux - #ifdef USE_LIBFABRIC#ifdef USE_LIBFABRIC + #ifdef USE_LIBFABRIC rdmalib::impl::expect_nonnegative(_fd = syscall(SYS_memfd_create, "test", MFD_CLOEXEC)); #else rdmalib::impl::expect_nonnegative(_fd = memfd_create("libfunction", 0)); From 83c964667560c977e8514cd89accbfb68feeed81 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Fri, 13 May 2022 11:54:04 +0200 Subject: [PATCH 33/91] Add repairs of the bugs and the setup script for Daint --- rdmalib/lib/connection.cpp | 45 +++++++++++--------- rdmalib/lib/rdmalib.cpp | 11 +++-- scripts/setup.sh | 4 ++ server/executor_manager/executor_process.cpp | 2 +- 4 files changed, 36 insertions(+), 26 deletions(-) create mode 100644 scripts/setup.sh diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 09c829d..1fac1d9 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -180,27 +180,30 @@ namespace rdmalib { SPDLOG_DEBUG("Connection close called for {} with qp fid {}", fmt::ptr(this), fmt::ptr(&this->_qp->fid)); // We need to close the transmit and receive channels and the endpoint if (_status != ConnectionStatus::DISCONNECTED) { - if (_rcv_channel) { - impl::expect_zero(fi_close(&_rcv_channel->fid)); - _rcv_channel = nullptr; - } - if (_trx_channel) { - impl::expect_zero(fi_close(&_trx_channel->fid)); - _trx_channel = nullptr; - } - if (_write_counter) { - impl::expect_zero(fi_close(&_write_counter->fid)); - _write_counter = nullptr; - } - if (_qp) { - impl::expect_zero(fi_shutdown(_qp, 0)); - impl::expect_zero(fi_close(&_qp->fid)); - _qp = nullptr; - } - if (_domain) { - impl::expect_zero(fi_close(&_domain->fid)); - _domain = nullptr; - } + // TODO Check how to free those and if it's necessary at all. + // When closing the endpoint we obtain a corrupted double-linked list problem + // within gnix. + // if (_rcv_channel) { + // impl::expect_zero(fi_close(&_rcv_channel->fid)); + // _rcv_channel = nullptr; + // } + // if (_trx_channel) { + // impl::expect_zero(fi_close(&_trx_channel->fid)); + // _trx_channel = nullptr; + // } + // if (_write_counter) { + // impl::expect_zero(fi_close(&_write_counter->fid)); + // _write_counter = nullptr; + // } + // if (_qp) { + // impl::expect_zero(fi_shutdown(_qp, 0)); + // impl::expect_zero(fi_close(&_qp->fid)); + // _qp = nullptr; + // } + // if (_domain) { + // impl::expect_zero(fi_close(&_domain->fid)); + // _domain = nullptr; + // } _status = ConnectionStatus::DISCONNECTED; } #else diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 2b70c47..1654c2c 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -153,10 +153,13 @@ namespace rdmalib { Address::~Address() { #ifdef USE_LIBFABRIC - if (fabric) - impl::expect_zero(fi_close(&fabric->fid)); - if (addrinfo) - fi_freeinfo(addrinfo); + // TODO Check how to free those and if it's necessary at all. + // When closing the addringo we obtain a double free or corruption problem. + // It seems that the problem is coming from the the ep_attr. + // if (fabric) + // impl::expect_zero(fi_close(&fabric->fid)); + // if (addrinfo) + // fi_freeinfo(addrinfo); #ifdef USE_GNI_AUTH std::call_once(release_flag, drc_release_local, &credential_info); #endif diff --git a/scripts/setup.sh b/scripts/setup.sh new file mode 100644 index 0000000..606535d --- /dev/null +++ b/scripts/setup.sh @@ -0,0 +1,4 @@ +module unload PrgEnv-cray +module load PrgEnv-gnu +module load rdma-credentials +srun -l -N1 -n1 benchmarks/credentials \ No newline at end of file diff --git a/server/executor_manager/executor_process.cpp b/server/executor_manager/executor_process.cpp index afdbf2d..4aaecdf 100644 --- a/server/executor_manager/executor_process.cpp +++ b/server/executor_manager/executor_process.cpp @@ -88,7 +88,7 @@ namespace rfaas::executor_manager { std::string mgr_buf_addr = std::to_string(conn.r_addr); std::string mgr_buf_rkey = std::to_string(conn.r_key); - int mypid = fork(); + int mypid = vfork(); if(mypid < 0) { spdlog::error("Fork failed! {}", mypid); } From 4e09124f5c3d2ec4fcf65091d3fff42201055c32 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Fri, 13 May 2022 13:51:24 +0200 Subject: [PATCH 34/91] Erase unnecessary flag --- rdmalib/lib/rdmalib.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 1654c2c..7580c85 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -46,7 +46,7 @@ namespace rdmalib { // Set the hints to have ability to conduct MSG, Atomic and RMA operations hints->caps |= FI_MSG | FI_RMA | FI_ATOMIC | FI_RMA_EVENT; // Set the hints to indicate that we will register the local buffers - hints->domain_attr->mr_mode = FI_MR_LOCAL | FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; + hints->domain_attr->mr_mode = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; hints->ep_attr->type = FI_EP_MSG; hints->fabric_attr->prov_name = strdup("GNI"); hints->domain_attr->threading = FI_THREAD_DOMAIN; From 958fca85a278ffd29dea955d6dbf5e59d26a1bf8 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Fri, 13 May 2022 19:37:44 +0200 Subject: [PATCH 35/91] Make scalability compilation conditional --- cmake/benchmarks.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/benchmarks.cmake b/cmake/benchmarks.cmake index 3dc4f17..13c5827 100644 --- a/cmake/benchmarks.cmake +++ b/cmake/benchmarks.cmake @@ -8,8 +8,8 @@ add_executable(warm_benchmarker benchmarks/warm_benchmark.cpp benchmarks/warm_be add_executable(parallel_invocations benchmarks/parallel_invocations.cpp benchmarks/parallel_invocations_opts.cpp) add_executable(cold_benchmarker benchmarks/cold_benchmark.cpp benchmarks/cold_benchmark_opts.cpp) add_executable(cpp_interface benchmarks/cpp_interface.cpp benchmarks/cpp_interface_opts.cpp) -add_executable(scalable_benchmarker benchmarks/scalable_benchmark.cpp benchmarks/scalable_benchmark_opts.cpp) if ( ${WITH_SCALABILITY} ) +add_executable(scalable_benchmarker benchmarks/scalable_benchmark.cpp benchmarks/scalable_benchmark_opts.cpp) set(tests_targets "warm_benchmarker" "cold_benchmarker" "parallel_invocations" "cpp_interface" "scalable_benchmarker") else() set(tests_targets "warm_benchmarker" "cold_benchmarker" "parallel_invocations" "cpp_interface") From 3b6ca9927e88b68592a1206a7a49ab5bc36bda9d Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Fri, 13 May 2022 19:38:37 +0200 Subject: [PATCH 36/91] Include the missing header --- benchmarks/credentials.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/credentials.cpp b/benchmarks/credentials.cpp index 8c4f9c3..ab8b04a 100644 --- a/benchmarks/credentials.cpp +++ b/benchmarks/credentials.cpp @@ -1,4 +1,5 @@ #include +#include "gni_pub.h" extern "C" { #include "rdmacred.h" From 584ffcb171acf6ba0b36e0903ea4bb7c494f02bc Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Sat, 14 May 2022 02:26:50 +0200 Subject: [PATCH 37/91] [exec-mgr] Add support for running Sarus containers --- config/executor_manager.json | 2 +- server/executor_manager/executor_process.cpp | 70 +++++++++++++++----- server/executor_manager/settings.cpp | 20 ++++++ server/executor_manager/settings.hpp | 17 ++++- 4 files changed, 88 insertions(+), 21 deletions(-) diff --git a/config/executor_manager.json b/config/executor_manager.json index 3f5378f..dc6a619 100644 --- a/config/executor_manager.json +++ b/config/executor_manager.json @@ -7,7 +7,7 @@ "resource_manager_secret": 0 }, "executor": { - "use_docker": false, + "sandbox-type": "process", "repetitions": 100, "warmup_iters": 0, "pin_threads": false diff --git a/server/executor_manager/executor_process.cpp b/server/executor_manager/executor_process.cpp index 4aaecdf..f6696e5 100644 --- a/server/executor_manager/executor_process.cpp +++ b/server/executor_manager/executor_process.cpp @@ -81,7 +81,7 @@ namespace rfaas::executor_manager { executor_pin_threads = std::to_string(0);//counter++); else executor_pin_threads = std::to_string(exec.pin_threads); - bool use_docker = exec.use_docker; + auto sandbox_type = exec.sandbox_type; std::string mgr_port = std::to_string(conn.port); std::string mgr_secret = std::to_string(conn.secret); @@ -96,12 +96,14 @@ namespace rfaas::executor_manager { mypid = getpid(); auto out_file = ("executor_" + std::to_string(mypid)); - spdlog::info("Child fork begins work on PID {}, using Docker? {}", mypid, use_docker); + spdlog::info("Child fork begins work on PID {}, using sandbox {}", mypid, sandbox_serialize(sandbox_type)); int fd = open(out_file.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); dup2(fd, 1); dup2(fd, 2); - if(!use_docker) { - const char * argv[] = { + + std::vector argv; + if(sandbox_type == SandboxType::PROCESS) { + argv = { "executor", "-a", client_addr.c_str(), "-p", client_port.c_str(), @@ -122,13 +124,44 @@ namespace rfaas::executor_manager { "--mgr-buf-rkey", mgr_buf_rkey.c_str(), nullptr }; - int ret = execvp(argv[0], const_cast(&argv[0])); - if(ret == -1) { - spdlog::error("Executor process failed {}, reason {}", errno, strerror(errno)); - close(fd); - exit(1); - } - } else { + } else if(sandbox_type == SandboxType::SARUS) { + argv = { + "sarus", "run", + "--device=/dev/kgni0", + "--device=/dev/kdreg", + "--mount=type=bind,source=/opt/cray,destination=/opt/cray", + "--mount=type=bind,source=/tmp/drcc.sock,destination=/tmp/drcc.sock", + "--mount=type=bind,source=/etc/opt/cray/rdma-credentials,destination=/etc/opt/cray/rdma-credentials", + "--mount=type=bind,source=/scratch/snx3000/mcopik,destination=/scratch/snx3000/mcopik", + "--mount=type=bind,source=/etc/alternatives/cray-ugni,destination=/etc/alternatives/cray-ugni", + "--mount=type=bind,source=/etc/alternatives/cray-xpmem,destination=/etc/alternatives/cray-xpmem", + "--mount=type=bind,source=/etc/alternatives/cray-alps,destination=/etc/alternatives/cray-alps", + "--mount=type=bind,source=/etc/alternatives/cray-udreg,destination=/etc/alternatives/cray-udreg", + "--mount=type=bind,source=/etc/alternatives/cray-wlm_detect,destination=/etc/alternatives/cray-wlm_detect", + "-e", "LD_LIBRARY_PATH=/opt/cray/xpmem/default/lib64/;/opt/cray/udreg/default/lib64;/opt/cray/alps/default/lib64;/opt/cray/wlm_detect/default/lib64/", + "-e", "CREDENTIAL=7029", + "spcleth/hpc-disagg:rfaas-executor-daint", + "/scratch/snx3000/mcopik/serverless_hpc/artifact/software/rfaas_libfabric/build_rfaas_debug/bin/executor", + "-a", client_addr.c_str(), + "-p", client_port.c_str(), + "--polling-mgr", "thread", + "-r", executor_repetitions.c_str(), + "-x", executor_recv_buf.c_str(), + "-s", client_in_size.c_str(), + "--pin-threads", executor_pin_threads.c_str(), + "--fast", client_cores.c_str(), + "--warmup-iters", executor_warmups.c_str(), + "--max-inline-data", executor_max_inline.c_str(), + "--func-size", client_func_size.c_str(), + "--timeout", client_timeout.c_str(), + "--mgr-address", conn.addr.c_str(), + "--mgr-port", mgr_port.c_str(), + "--mgr-secret", mgr_secret.c_str(), + "--mgr-buf-addr", mgr_buf_addr.c_str(), + "--mgr-buf-rkey", mgr_buf_rkey.c_str(), + nullptr + }; + } else if(sandbox_type == SandboxType::DOCKER) { //const char * argv[] = { // "docker_rdma_sriov", "run", // "--rm", @@ -159,7 +192,7 @@ namespace rfaas::executor_manager { // "--mgr-buf-rkey", mgr_buf_rkey.c_str(), // nullptr //}; - const char * argv[] = { + argv = { "docker_rdma_sriov", "run", "--rm", "--net=mynet", "-i", //"-it", @@ -189,14 +222,15 @@ namespace rfaas::executor_manager { "--mgr-buf-rkey", mgr_buf_rkey.c_str(), nullptr }; - int ret = execvp(argv[0], const_cast(&argv[0])); - if(ret == -1) { - spdlog::error("Executor process failed {}, reason {}", errno, strerror(errno)); - close(fd); - exit(1); - } + } + int ret = execvp(argv.data()[0], const_cast(&argv.data()[0])); + if(ret == -1) { + spdlog::error("Executor process failed {}, reason {}", errno, strerror(errno)); + close(fd); + exit(1); } + //close(fd); exit(0); } diff --git a/server/executor_manager/settings.cpp b/server/executor_manager/settings.cpp index a5fcc14..33e0c6d 100644 --- a/server/executor_manager/settings.cpp +++ b/server/executor_manager/settings.cpp @@ -5,6 +5,26 @@ namespace rfaas::executor_manager { + SandboxType sandbox_deserialize(std::string type) + { + static std::map sandboxes = { + {"process", SandboxType::PROCESS}, + {"docker", SandboxType::DOCKER}, + {"sarus", SandboxType::SARUS} + }; + return sandboxes.at(type); + } + + std::string sandbox_serialize(SandboxType type) + { + static std::map sandboxes = { + {SandboxType::PROCESS, "process"}, + {SandboxType::DOCKER, "docker"}, + {SandboxType::SARUS, "sarus"} + }; + return sandboxes.at(type); + } + Settings Settings::deserialize(std::istream & in) { Settings settings{}; diff --git a/server/executor_manager/settings.hpp b/server/executor_manager/settings.hpp index 5347c07..7a4f0db 100644 --- a/server/executor_manager/settings.hpp +++ b/server/executor_manager/settings.hpp @@ -3,6 +3,7 @@ #ifndef __RFAAS_EXECUTOR_MANAGER_SETTINGS_HPP__ #define __RFAAS_EXECUTOR_MANAGER_SETTINGS_HPP__ +#include #include #include @@ -11,9 +12,19 @@ namespace rfaas::executor_manager { + enum class SandboxType { + PROCESS = 0, + DOCKER = 1, + SARUS = 2 + }; + + SandboxType sandbox_deserialize(std::string type); + + std::string sandbox_serialize(SandboxType type); + struct ExecutorSettings { - bool use_docker; + SandboxType sandbox_type; int repetitions; int warmup_iters; int recv_buffer_size; @@ -23,10 +34,12 @@ namespace rfaas::executor_manager { template void load(Archive & ar ) { + std::string sandbox_type; ar( - CEREAL_NVP(use_docker), CEREAL_NVP(repetitions), + CEREAL_NVP(sandbox_type), CEREAL_NVP(repetitions), CEREAL_NVP(warmup_iters), CEREAL_NVP(pin_threads) ); + this->sandbox_type = sandbox_deserialize(sandbox_type); } }; From a09ecb4716916484a925b2a5c34a08fc27828348 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Sat, 14 May 2022 02:54:54 +0200 Subject: [PATCH 38/91] [exec-mgr] [rdmalib] Configure CRAY cookies from environment variables --- CMakeLists.txt | 4 ++++ rdmalib/lib/rdmalib.cpp | 14 +++++++------- scripts/setup.sh | 4 ---- scripts/setup.sh.in | 6 ++++++ server/executor_manager/executor_process.cpp | 1 - 5 files changed, 17 insertions(+), 12 deletions(-) delete mode 100644 scripts/setup.sh create mode 100644 scripts/setup.sh.in diff --git a/CMakeLists.txt b/CMakeLists.txt index ac063b2..804279e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -269,3 +269,7 @@ if( ${RFAAS_WITH_TESTING} ) include(testing) endif() +if( ${WITH_LIBFABRIC} AND ${WITH_GNI_AUTH}) + configure_file(scripts/setup.sh.in scripts/setup.sh) +endif() + diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 7580c85..461c5b8 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -66,7 +66,7 @@ namespace rdmalib { addrinfo->ep_attr->auth_key = (uint8_t *)malloc(sizeof(cookie)); memcpy(addrinfo->ep_attr->auth_key, &cookie, sizeof(cookie)); addrinfo->ep_attr->auth_key_size = sizeof(cookie); - spdlog::info("Saved cookie {}", cookie); + spdlog::info("Saved Cray credentials cookie {}", cookie); #endif #else memset(&hints, 0, sizeof hints); @@ -81,18 +81,18 @@ namespace rdmalib { } #ifdef USE_GNI_AUTH - void Address::obtain_cookies() { + void Address::obtain_cookies() + { + // Access the credentials and obtain the credential cookie - char buffer[11]; - FILE *file; - impl::expect_nonnull(file = fopen("credential.txt", "r")); - impl::expect_nonnull(fgets(buffer, 11, file)); - fclose(file); + // FIXME: this should be passed explicitly as a paramter + char* buffer = getenv("CRAY_CREDENTIALS"); uint32_t credential = (uint32_t)atoi(buffer); impl::expect_zero(drc_access(credential, 0, &credential_info)); // Obtain the cookie cookie = (uint64_t)drc_get_first_cookie(credential_info)<<32; + } drc_info_handle_t Address::credential_info; uint64_t Address::cookie; diff --git a/scripts/setup.sh b/scripts/setup.sh deleted file mode 100644 index 606535d..0000000 --- a/scripts/setup.sh +++ /dev/null @@ -1,4 +0,0 @@ -module unload PrgEnv-cray -module load PrgEnv-gnu -module load rdma-credentials -srun -l -N1 -n1 benchmarks/credentials \ No newline at end of file diff --git a/scripts/setup.sh.in b/scripts/setup.sh.in new file mode 100644 index 0000000..993eac6 --- /dev/null +++ b/scripts/setup.sh.in @@ -0,0 +1,6 @@ + +module load rdma-credentials +srun -l -N1 -n1 ${CMAKE_CURRENT_BINARY_DIR}/benchmarks/credentials +COOKIE=$(cat "credential.txt") +export CRAY_CREDENTIALS=$COOKIE + diff --git a/server/executor_manager/executor_process.cpp b/server/executor_manager/executor_process.cpp index f6696e5..38d430b 100644 --- a/server/executor_manager/executor_process.cpp +++ b/server/executor_manager/executor_process.cpp @@ -139,7 +139,6 @@ namespace rfaas::executor_manager { "--mount=type=bind,source=/etc/alternatives/cray-udreg,destination=/etc/alternatives/cray-udreg", "--mount=type=bind,source=/etc/alternatives/cray-wlm_detect,destination=/etc/alternatives/cray-wlm_detect", "-e", "LD_LIBRARY_PATH=/opt/cray/xpmem/default/lib64/;/opt/cray/udreg/default/lib64;/opt/cray/alps/default/lib64;/opt/cray/wlm_detect/default/lib64/", - "-e", "CREDENTIAL=7029", "spcleth/hpc-disagg:rfaas-executor-daint", "/scratch/snx3000/mcopik/serverless_hpc/artifact/software/rfaas_libfabric/build_rfaas_debug/bin/executor", "-a", client_addr.c_str(), From baf220eadcdc9faed605cb72cb6480624e48c8f8 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Wed, 18 May 2022 20:33:27 +0200 Subject: [PATCH 39/91] Add a better random walk implementation --- rfaas/include/rfaas/resources.hpp | 2 ++ rfaas/lib/resources.cpp | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/rfaas/include/rfaas/resources.hpp b/rfaas/include/rfaas/resources.hpp index d5d06c7..733988d 100644 --- a/rfaas/include/rfaas/resources.hpp +++ b/rfaas/include/rfaas/resources.hpp @@ -2,6 +2,7 @@ #ifndef __RFAAS_RESOURCES_HPP__ #define __RFAAS_RESOURCES_HPP__ +#include #include #include #include @@ -42,6 +43,7 @@ namespace rfaas { struct servers { static std::unique_ptr _instance; + static int current_index; std::vector _data; std::mt19937 _gen; servers(int positions = 0); diff --git a/rfaas/lib/resources.cpp b/rfaas/lib/resources.cpp index 30f6c5f..2635026 100644 --- a/rfaas/lib/resources.cpp +++ b/rfaas/lib/resources.cpp @@ -25,7 +25,11 @@ namespace rfaas { servers::servers(int positions) { - _gen = std::mt19937(getpid()); + #ifdef WITH_SCALABILITY + _gen = std::mt19937(0 + std::getenv("SLURM_PROCID")); + #else + _gen = std::mt19937(0); + #endif if(positions) _data.resize(positions); } @@ -39,12 +43,14 @@ namespace rfaas { { // FIXME: random walk // FIXME: take size of server in account - std::uniform_int_distribution dist(0, _data.size()-1); - return {dist(_gen)}; + current_index %= _data.size(); + return { current_index++ }; } servers & servers::instance() { + // Shuffle the servers + std::shuffle(_instance.get()->_data.begin(), _instance.get()->_data.end(), _instance.get()->_gen); return *_instance.get(); } From 8f68023a798231d0c10371316caed089ccc803b4 Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Sat, 21 May 2022 03:39:24 +0200 Subject: [PATCH 40/91] Add small bug fixes and shared queues --- rdmalib/include/rdmalib/connection.hpp | 2 +- rdmalib/include/rdmalib/rdmalib.hpp | 4 ++++ rdmalib/lib/connection.cpp | 17 +++++------------ rdmalib/lib/rdmalib.cpp | 26 ++++++++++++++++++++++++-- rfaas/lib/resources.cpp | 3 ++- 5 files changed, 36 insertions(+), 16 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 17b0fe7..fd3b472 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -99,7 +99,7 @@ namespace rdmalib { void inlining(bool enable); #endif #ifdef USE_LIBFABRIC - void initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec); + void initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cq* rx_channel, fid_cq* tx_channel); #else void initialize(rdma_cm_id* id); #endif diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index afbea7d..3cb0a80 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -64,6 +64,8 @@ namespace rdmalib { #ifdef USE_LIBFABRIC fid_eq* _ec = nullptr; fid_domain* _pd = nullptr; + fid_cq* _rcv_channel = nullptr; + fid_cq* _trx_channel = nullptr; #else rdma_event_channel * _ec; ibv_pd* _pd; @@ -93,6 +95,8 @@ namespace rdmalib { fid_eq* _ec = nullptr; fid_domain* _pd = nullptr; fid_pep* _pep = nullptr; + fid_cq* _rcv_channel; + fid_cq* _trx_channel; // fi_gni_ops_domain* _ops; #else rdma_event_channel * _ec; diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 1fac1d9..f32e6d8 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -121,7 +121,7 @@ namespace rdmalib { } #ifdef USE_LIBFABRIC - void Connection::initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec) + void Connection::initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cq* rx_channel, fid_cq* tx_channel) { // Create the endpoint and set its flags up so that we get completions on RDM impl::expect_zero(fi_endpoint(pd, info, &_qp, reinterpret_cast(this))); @@ -138,16 +138,9 @@ namespace rdmalib { // Bind with the completion queues and the event queue impl::expect_zero(fi_ep_bind(_qp, &ec->fid, 0)); - fi_cq_attr cq_attr; - memset(&cq_attr, 0, sizeof(cq_attr)); - cq_attr.format = FI_CQ_FORMAT_DATA; - cq_attr.wait_obj = FI_WAIT_NONE; - cq_attr.wait_cond = FI_CQ_COND_NONE; - cq_attr.wait_set = nullptr; - cq_attr.size = info->rx_attr->size; - impl::expect_zero(fi_cq_open(pd, &cq_attr, &_trx_channel, nullptr)); + _trx_channel = tx_channel; + _rcv_channel = rx_channel; impl::expect_zero(fi_ep_bind(_qp, &_trx_channel->fid, FI_TRANSMIT)); - impl::expect_zero(fi_cq_open(pd, &cq_attr, &_rcv_channel, nullptr)); impl::expect_zero(fi_ep_bind(_qp, &_rcv_channel->fid, FI_RECV)); // Enable the endpoint @@ -433,7 +426,7 @@ namespace rdmalib { id = id == -1 ? _req_count++ : id; SPDLOG_DEBUG("post recv to local Local QPN fid {} connection {}", fmt::ptr(&_qp->fid), fmt::ptr(this)); - int ret; + int ret = 1; for(int i = 0; i < count; ++i) { ret = fi_recvv(_qp, elem.array(), elem.lkeys(), count, temp, reinterpret_cast((uint64_t)id)); if(ret) @@ -462,7 +455,7 @@ namespace rdmalib { wr.num_sge = elem.size(); SPDLOG_DEBUG("post recv to local Local QPN {}",_qp->qp_num); - int ret; + int ret = 1; for(int i = 0; i < count; ++i) { ret = ibv_post_recv(_qp, &wr, &bad); if(ret) diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 7580c85..4fd22e1 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -177,6 +177,17 @@ namespace rdmalib { #ifdef USE_LIBFABRIC // Create a domain (need to do that now so that we can register memory for the domain) impl::expect_zero(fi_domain(_addr.fabric, _addr.addrinfo, &_pd, nullptr)); + + // Create the completion queues + fi_cq_attr cq_attr; + memset(&cq_attr, 0, sizeof(cq_attr)); + cq_attr.format = FI_CQ_FORMAT_DATA; + cq_attr.wait_obj = FI_WAIT_NONE; + cq_attr.wait_cond = FI_CQ_COND_NONE; + cq_attr.wait_set = nullptr; + cq_attr.size = _addr.addrinfo->rx_attr->size; + impl::expect_zero(fi_cq_open(_pd, &cq_attr, &_trx_channel, nullptr)); + impl::expect_zero(fi_cq_open(_pd, &cq_attr, &_rcv_channel, nullptr)); #else // Size of Queue Pair // Maximum requests in send queue @@ -230,7 +241,7 @@ namespace rdmalib { eq_attr.wait_obj = FI_WAIT_NONE; impl::expect_zero(fi_eq_open(_addr.fabric, &eq_attr, &_ec, NULL)); // Create and enable the endpoint together with all the accompanying queues - _conn->initialize(_addr.fabric, _pd, _addr.addrinfo, _ec); + _conn->initialize(_addr.fabric, _pd, _addr.addrinfo, _ec, _rcv_channel, _trx_channel); #else rdma_cm_id* id; impl::expect_zero(rdma_create_ep(&id, _addr.addrinfo, nullptr, nullptr)); @@ -447,6 +458,17 @@ namespace rdmalib { impl::expect_zero(fi_passive_ep(_addr.fabric, _addr.addrinfo, &_pep, NULL)); impl::expect_zero(fi_pep_bind(_pep, &(_ec->fid), 0)); impl::expect_zero(fi_listen(_pep)); + + // Create the completion queues + fi_cq_attr cq_attr; + memset(&cq_attr, 0, sizeof(cq_attr)); + cq_attr.format = FI_CQ_FORMAT_DATA; + cq_attr.wait_obj = FI_WAIT_NONE; + cq_attr.wait_cond = FI_CQ_COND_NONE; + cq_attr.wait_set = nullptr; + cq_attr.size = _addr.addrinfo->rx_attr->size; + impl::expect_zero(fi_cq_open(_pd, &cq_attr, &_trx_channel, nullptr)); + impl::expect_zero(fi_cq_open(_pd, &cq_attr, &_rcv_channel, nullptr)); // _ops = (fi_gni_ops_domain *)malloc(sizeof(fi_gni_ops_domain)); // fi_open_ops(&_pd->fid, "FI_GNI_DOMAIN_OPS_1", 0, (void **)*_ops, nullptr); // uint32_t val; @@ -571,7 +593,7 @@ namespace rdmalib { memcpy(entry->info->ep_attr->auth_key, &_addr.cookie, sizeof(_addr.cookie)); entry->info->ep_attr->auth_key_size = sizeof(_addr.cookie); #endif - connection->initialize(_addr.fabric, _pd, entry->info, _ec); + connection->initialize(_addr.fabric, _pd, entry->info, _ec, _rcv_channel, _trx_channel); SPDLOG_DEBUG( "[RDMAPassive] Created connection fid {} qp {}", fmt::ptr(connection->id()), fmt::ptr(&connection->qp()->fid) diff --git a/rfaas/lib/resources.cpp b/rfaas/lib/resources.cpp index 2635026..caa0235 100644 --- a/rfaas/lib/resources.cpp +++ b/rfaas/lib/resources.cpp @@ -10,6 +10,7 @@ namespace rfaas { std::unique_ptr servers::_instance = nullptr; + int servers::current_index = 0; server_data::server_data(): port(-1), @@ -26,7 +27,7 @@ namespace rfaas { servers::servers(int positions) { #ifdef WITH_SCALABILITY - _gen = std::mt19937(0 + std::getenv("SLURM_PROCID")); + _gen = std::mt19937(0 + atoi(std::getenv("SLURM_PROCID"))); #else _gen = std::mt19937(0); #endif From 1c3cdab7643467fd6293c4025f1cbaebba3633dd Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Sat, 21 May 2022 03:43:33 +0200 Subject: [PATCH 41/91] Set the threading safety --- rdmalib/lib/rdmalib.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 4fd22e1..9f3ff59 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -49,7 +49,7 @@ namespace rdmalib { hints->domain_attr->mr_mode = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; hints->ep_attr->type = FI_EP_MSG; hints->fabric_attr->prov_name = strdup("GNI"); - hints->domain_attr->threading = FI_THREAD_DOMAIN; + hints->domain_attr->threading = FI_THREAD_SAFE; hints->domain_attr->resource_mgmt = FI_RM_ENABLED; hints->tx_attr->tclass = FI_TC_LOW_LATENCY; impl::expect_zero(fi_getinfo(FI_VERSION(1, 9), ip.c_str(), std::to_string(port).c_str(), passive ? FI_SOURCE : 0, hints, &addrinfo)); From 1dcb060f21563482c601ace9b858d429b095546d Mon Sep 17 00:00:00 2001 From: Marcin Chrapek Date: Sat, 21 May 2022 04:29:35 +0200 Subject: [PATCH 42/91] Make the counter shared --- rdmalib/include/rdmalib/connection.hpp | 2 +- rdmalib/include/rdmalib/rdmalib.hpp | 2 ++ rdmalib/lib/connection.cpp | 12 +++--------- rdmalib/lib/rdmalib.cpp | 22 ++++++++++++++++++++-- 4 files changed, 26 insertions(+), 12 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index fd3b472..bbb8464 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -99,7 +99,7 @@ namespace rdmalib { void inlining(bool enable); #endif #ifdef USE_LIBFABRIC - void initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cq* rx_channel, fid_cq* tx_channel); + void initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cntr* write_cntr, fid_cq* rx_channel, fid_cq* tx_channel); #else void initialize(rdma_cm_id* id); #endif diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index 3cb0a80..a83aee7 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -66,6 +66,7 @@ namespace rdmalib { fid_domain* _pd = nullptr; fid_cq* _rcv_channel = nullptr; fid_cq* _trx_channel = nullptr; + fid_cntr* _write_counter = nullptr; #else rdma_event_channel * _ec; ibv_pd* _pd; @@ -97,6 +98,7 @@ namespace rdmalib { fid_pep* _pep = nullptr; fid_cq* _rcv_channel; fid_cq* _trx_channel; + fid_cntr* _write_counter = nullptr; // fi_gni_ops_domain* _ops; #else rdma_event_channel * _ec; diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index f32e6d8..065722b 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -121,19 +121,13 @@ namespace rdmalib { } #ifdef USE_LIBFABRIC - void Connection::initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cq* rx_channel, fid_cq* tx_channel) + void Connection::initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cntr* write_cntr, fid_cq* rx_channel, fid_cq* tx_channel) { // Create the endpoint and set its flags up so that we get completions on RDM impl::expect_zero(fi_endpoint(pd, info, &_qp, reinterpret_cast(this))); - // Open the counter for write operations - fi_cntr_attr cntr_attr; - cntr_attr.events = FI_CNTR_EVENTS_COMP; - cntr_attr.wait_obj = FI_WAIT_UNSPEC; - cntr_attr.wait_set = nullptr; - cntr_attr.flags = 0; - impl::expect_zero(fi_cntr_open(pd, &cntr_attr, &_write_counter, nullptr)); - impl::expect_zero(fi_cntr_set(_write_counter, 0)); + // Bind the counter for write operations + _write_counter = write_cntr; impl::expect_zero(fi_ep_bind(_qp, &_write_counter->fid, FI_REMOTE_WRITE)); // Bind with the completion queues and the event queue diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 9f3ff59..f83f6d2 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -178,6 +178,15 @@ namespace rdmalib { // Create a domain (need to do that now so that we can register memory for the domain) impl::expect_zero(fi_domain(_addr.fabric, _addr.addrinfo, &_pd, nullptr)); + // Create the counter + fi_cntr_attr cntr_attr; + cntr_attr.events = FI_CNTR_EVENTS_COMP; + cntr_attr.wait_obj = FI_WAIT_UNSPEC; + cntr_attr.wait_set = nullptr; + cntr_attr.flags = 0; + impl::expect_zero(fi_cntr_open(_pd, &cntr_attr, &_write_counter, nullptr)); + impl::expect_zero(fi_cntr_set(_write_counter, 0)); + // Create the completion queues fi_cq_attr cq_attr; memset(&cq_attr, 0, sizeof(cq_attr)); @@ -241,7 +250,7 @@ namespace rdmalib { eq_attr.wait_obj = FI_WAIT_NONE; impl::expect_zero(fi_eq_open(_addr.fabric, &eq_attr, &_ec, NULL)); // Create and enable the endpoint together with all the accompanying queues - _conn->initialize(_addr.fabric, _pd, _addr.addrinfo, _ec, _rcv_channel, _trx_channel); + _conn->initialize(_addr.fabric, _pd, _addr.addrinfo, _ec, _write_counter, _rcv_channel, _trx_channel); #else rdma_cm_id* id; impl::expect_zero(rdma_create_ep(&id, _addr.addrinfo, nullptr, nullptr)); @@ -459,6 +468,15 @@ namespace rdmalib { impl::expect_zero(fi_pep_bind(_pep, &(_ec->fid), 0)); impl::expect_zero(fi_listen(_pep)); + // Create the counter + fi_cntr_attr cntr_attr; + cntr_attr.events = FI_CNTR_EVENTS_COMP; + cntr_attr.wait_obj = FI_WAIT_UNSPEC; + cntr_attr.wait_set = nullptr; + cntr_attr.flags = 0; + impl::expect_zero(fi_cntr_open(_pd, &cntr_attr, &_write_counter, nullptr)); + impl::expect_zero(fi_cntr_set(_write_counter, 0)); + // Create the completion queues fi_cq_attr cq_attr; memset(&cq_attr, 0, sizeof(cq_attr)); @@ -593,7 +611,7 @@ namespace rdmalib { memcpy(entry->info->ep_attr->auth_key, &_addr.cookie, sizeof(_addr.cookie)); entry->info->ep_attr->auth_key_size = sizeof(_addr.cookie); #endif - connection->initialize(_addr.fabric, _pd, entry->info, _ec, _rcv_channel, _trx_channel); + connection->initialize(_addr.fabric, _pd, entry->info, _ec, _write_counter, _rcv_channel, _trx_channel); SPDLOG_DEBUG( "[RDMAPassive] Created connection fid {} qp {}", fmt::ptr(connection->id()), fmt::ptr(&connection->qp()->fid) From 6a77a1434ff7a8ccf37f4ca84187f83350394c58 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Tue, 16 Aug 2022 17:46:42 +0200 Subject: [PATCH 43/91] [benchmarks] Fix compilation issues with newer GCC versions --- benchmarks/cold_benchmark_opts.cpp | 2 ++ benchmarks/cpp_interface_opts.cpp | 2 ++ benchmarks/parallel_invocations_opts.cpp | 2 ++ benchmarks/warm_benchmark_opts.cpp | 2 ++ 4 files changed, 8 insertions(+) diff --git a/benchmarks/cold_benchmark_opts.cpp b/benchmarks/cold_benchmark_opts.cpp index 96fff52..4901de3 100644 --- a/benchmarks/cold_benchmark_opts.cpp +++ b/benchmarks/cold_benchmark_opts.cpp @@ -1,4 +1,6 @@ +#include + #include #include "cold_benchmark.hpp" diff --git a/benchmarks/cpp_interface_opts.cpp b/benchmarks/cpp_interface_opts.cpp index 2ab27c5..3bc6e2d 100644 --- a/benchmarks/cpp_interface_opts.cpp +++ b/benchmarks/cpp_interface_opts.cpp @@ -1,4 +1,6 @@ +#include + #include #include "cpp_interface.hpp" diff --git a/benchmarks/parallel_invocations_opts.cpp b/benchmarks/parallel_invocations_opts.cpp index 3a5f295..c3f14d5 100644 --- a/benchmarks/parallel_invocations_opts.cpp +++ b/benchmarks/parallel_invocations_opts.cpp @@ -1,4 +1,6 @@ +#include + #include #include "parallel_invocations.hpp" diff --git a/benchmarks/warm_benchmark_opts.cpp b/benchmarks/warm_benchmark_opts.cpp index 7d5205c..3818e79 100644 --- a/benchmarks/warm_benchmark_opts.cpp +++ b/benchmarks/warm_benchmark_opts.cpp @@ -1,4 +1,6 @@ +#include + #include #include "warm_benchmark.hpp" From 19156e7ba7d88e5a3bf20e0b729c6bc3f43500cc Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 26 Sep 2022 20:18:27 +0200 Subject: [PATCH 44/91] [benchmarks] Replace exporting Cray credential as env variable with storing in JSON --- CMakeLists.txt | 5 ++++- scripts/setup.sh.in | 8 +++++++- 2 files changed, 11 insertions(+), 2 deletions(-) mode change 100644 => 100755 CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt old mode 100644 new mode 100755 index 804279e..d450019 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -270,6 +270,9 @@ if( ${RFAAS_WITH_TESTING} ) endif() if( ${WITH_LIBFABRIC} AND ${WITH_GNI_AUTH}) - configure_file(scripts/setup.sh.in scripts/setup.sh) + configure_file( + scripts/setup.sh.in scripts/setup.sh + FILE_PERMISSIONS GROUP_READ GROUP_WRITE GROUP_EXECUTE OWNER_READ OWNER_WRITE OWNER_EXECUTE + ) endif() diff --git a/scripts/setup.sh.in b/scripts/setup.sh.in index 993eac6..e44263f 100644 --- a/scripts/setup.sh.in +++ b/scripts/setup.sh.in @@ -2,5 +2,11 @@ module load rdma-credentials srun -l -N1 -n1 ${CMAKE_CURRENT_BINARY_DIR}/benchmarks/credentials COOKIE=$(cat "credential.txt") -export CRAY_CREDENTIALS=$COOKIE + +echo "Setting credential ${COOKIE}" +for arg in "$@" +do + jq --arg cookie $COOKIE '.configuration = {"authentication_credential": $cookie |fromjson}' $arg > $arg.tmp + mv $arg.tmp $arg +done From abacf92d14c3c58a4ac5e0ee26a9374fde537159 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 26 Sep 2022 20:20:45 +0200 Subject: [PATCH 45/91] [rfaas] Add Cray credentials to library configuration --- config/devices.json | 5 ++++- rfaas/include/rfaas/devices.hpp | 27 ++++++++++++++++++++++++--- rfaas/lib/devices.cpp | 1 + 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/config/devices.json b/config/devices.json index 2bcbf76..b831c48 100644 --- a/config/devices.json +++ b/config/devices.json @@ -7,5 +7,8 @@ "max_inline_data": 0, "default_receive_buffer_size": 32 } - ] + ], + "configuration": { + "authentication_credential": 0 + } } diff --git a/rfaas/include/rfaas/devices.hpp b/rfaas/include/rfaas/devices.hpp index 9e93254..1bc2710 100644 --- a/rfaas/include/rfaas/devices.hpp +++ b/rfaas/include/rfaas/devices.hpp @@ -20,26 +20,47 @@ namespace rfaas { int port; int16_t max_inline_data; int16_t default_receive_buffer_size; + int32_t authentication_cookie; template void save(Archive & ar) const { ar( CEREAL_NVP(name), CEREAL_NVP(ip_address), CEREAL_NVP(port), - CEREAL_NVP(max_inline_data), CEREAL_NVP(default_receive_buffer_size)); + CEREAL_NVP(max_inline_data), CEREAL_NVP(default_receive_buffer_size) + ); } template void load(Archive & ar ) { ar( CEREAL_NVP(name), CEREAL_NVP(ip_address), CEREAL_NVP(port), - CEREAL_NVP(max_inline_data), CEREAL_NVP(default_receive_buffer_size)); + CEREAL_NVP(max_inline_data), CEREAL_NVP(default_receive_buffer_size) + ); + } + }; + + struct platform_configuration + { + int32_t authentication_credential; + + template + void save(Archive & ar) const + { + ar(CEREAL_NVP(authentication_credential)); + } + + template + void load(Archive & ar ) + { + ar(CEREAL_NVP(authentication_credential)); } }; struct devices { static std::unique_ptr _instance; - std::vector _data; + std::vector _data; + platform_configuration _configuration; device_data * device (std::string name) noexcept; static devices & instance(); diff --git a/rfaas/lib/devices.cpp b/rfaas/lib/devices.cpp index fabf5ad..5d93817 100644 --- a/rfaas/lib/devices.cpp +++ b/rfaas/lib/devices.cpp @@ -28,6 +28,7 @@ namespace rfaas { cereal::JSONInputArchive archive_in(in); //archive_in(cereal::make_nvp("devices", *devices::_instance.get())); archive_in(cereal::make_nvp("devices", devices::_instance.get()->_data)); + archive_in(cereal::make_nvp("configuration", devices::_instance.get()->_configuration)); } // void epilogue(cereal::JSONInputArchive& ar, const device_data&) { // std::cout << "test " << ar.getNodeName() << std::endl; From e9b19ab228f7ca64034ed0877d7a0c63677d0b9d Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 26 Sep 2022 20:21:59 +0200 Subject: [PATCH 46/91] [rdmalib] Change library configuration to acccept Cray credentials as an additional parameter instead of environment variables --- rdmalib/include/rdmalib/rdmalib.hpp | 35 +++++++++--- rdmalib/lib/rdmalib.cpp | 83 +++++++++++++++++++++-------- 2 files changed, 90 insertions(+), 28 deletions(-) diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index a83aee7..c8ba193 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -28,20 +28,43 @@ extern "C" { namespace rdmalib { + struct Configuration { + + static Configuration& get_instance(); + + #ifdef USE_GNI_AUTH + void configure_cookie(uint32_t cray_cookie); + std::optional cookie() const; + std::optional credential() const; + bool is_configured() const; + #endif + + private: + + Configuration(); + ~Configuration(); + + std::once_flag _access_flag; + drc_info_handle_t _credential_info; + uint64_t _cookie; + uint32_t _credential; + bool _is_configured; + + static Configuration& _get_instance(); + static Configuration _instance; + + }; + // Implemented as IPV4 struct Address { #ifdef USE_LIBFABRIC fi_info* addrinfo = nullptr; fi_info* hints = nullptr; fid_fabric* fabric = nullptr; + std::string _ip; #ifdef USE_GNI_AUTH - std::once_flag access_flag; - std::once_flag release_flag; - void obtain_cookies(); - static drc_info_handle_t credential_info; - static uint64_t cookie; + uint64_t cookie; #endif - std::string _ip; #else rdma_addrinfo *addrinfo; rdma_addrinfo hints; diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index d81ff1e..7861be0 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -35,6 +35,61 @@ extern "C" { namespace rdmalib { + Configuration::Configuration(): + _is_configured(false) + {} + + Configuration::~Configuration() + { + #ifdef USE_GNI_AUTH + drc_release_local(&_credential_info); + #endif + } + + Configuration& Configuration::get_instance() + { + return _get_instance(); + } + + Configuration& Configuration::_get_instance() + { + static Configuration _instance; + return _instance; + } + + void Configuration::configure_cookie(uint32_t credential) + { + Configuration& inst = _get_instance(); + std::call_once(_access_flag, [&]() { + + impl::expect_zero(drc_access(credential, 0, &inst._credential_info)); + + _credential = credential; + // Obtain the cookie + _cookie = (uint64_t)drc_get_first_cookie(inst._credential_info) << 32; + + _is_configured = true; + + }); + } + + std::optional Configuration::cookie() const + { + return (_is_configured ? _cookie : std::optional{}); + } + + std::optional Configuration::credential() const + { + return (_is_configured ? _credential: std::optional{}); + } + + bool Configuration::is_configured() const + { + return _is_configured; + } + + Configuration Configuration::_instance; + // FIXME: Add credential support Address::Address(const std::string & ip, int port, bool passive) { @@ -55,9 +110,14 @@ namespace rdmalib { impl::expect_zero(fi_getinfo(FI_VERSION(1, 9), ip.c_str(), std::to_string(port).c_str(), passive ? FI_SOURCE : 0, hints, &addrinfo)); fi_freeinfo(hints); impl::expect_zero(fi_fabric(addrinfo->fabric_attr, &fabric, nullptr)); + std::cerr << "True" << std::endl; #ifdef USE_GNI_AUTH + // Obtain the cookies once per process - std::call_once(access_flag, &Address::obtain_cookies, this); + auto c = Configuration::get_instance().cookie(); + // spdlog::error("GNI authentication cookie has not been configured!"); + impl::expect_true(c.has_value()); + cookie = c.value(); // Set the hints to have the cookies addrinfo->domain_attr->auth_key = (uint8_t *)malloc(sizeof(cookie)); @@ -80,24 +140,6 @@ namespace rdmalib { this->_ip = ip; } - #ifdef USE_GNI_AUTH - void Address::obtain_cookies() - { - - // Access the credentials and obtain the credential cookie - // FIXME: this should be passed explicitly as a paramter - char* buffer = getenv("CRAY_CREDENTIALS"); - uint32_t credential = (uint32_t)atoi(buffer); - impl::expect_zero(drc_access(credential, 0, &credential_info)); - - // Obtain the cookie - cookie = (uint64_t)drc_get_first_cookie(credential_info)<<32; - - } - drc_info_handle_t Address::credential_info; - uint64_t Address::cookie; - #endif - Address::Address(const std::string & sip, const std::string & dip, int port) { struct sockaddr_in server_in, local_in; @@ -160,9 +202,6 @@ namespace rdmalib { // impl::expect_zero(fi_close(&fabric->fid)); // if (addrinfo) // fi_freeinfo(addrinfo); - #ifdef USE_GNI_AUTH - std::call_once(release_flag, drc_release_local, &credential_info); - #endif #else rdma_freeaddrinfo(addrinfo); #endif From 0b924eb951250dee6336c091639209c73d1a5468 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Mon, 26 Sep 2022 20:30:34 +0200 Subject: [PATCH 47/91] [exec-mgr] [exec] Use the new of initializing Cray credentials --- benchmarks/warm_benchmark.cpp | 4 ++++ server/executor/cli.cpp | 9 +++++++-- server/executor/opts.cpp | 6 ++++++ server/executor/server.hpp | 3 +++ server/executor_manager/cli.cpp | 4 ++++ server/executor_manager/executor_process.cpp | 18 +++++++++++++++++- 6 files changed, 41 insertions(+), 3 deletions(-) diff --git a/benchmarks/warm_benchmark.cpp b/benchmarks/warm_benchmark.cpp index d6e0e06..e19bbd5 100644 --- a/benchmarks/warm_benchmark.cpp +++ b/benchmarks/warm_benchmark.cpp @@ -33,6 +33,10 @@ int main(int argc, char ** argv) rfaas::devices::deserialize(in_dev); in_dev.close(); + rdmalib::Configuration::get_instance().configure_cookie( + rfaas::devices::instance()._configuration.authentication_credential + ); + // Read benchmark settings std::ifstream benchmark_cfg{opts.json_config}; rfaas::benchmark::Settings settings = rfaas::benchmark::Settings::deserialize(benchmark_cfg); diff --git a/server/executor/cli.cpp b/server/executor/cli.cpp index d53c328..a5f9f9f 100644 --- a/server/executor/cli.cpp +++ b/server/executor/cli.cpp @@ -37,9 +37,14 @@ int main(int argc, char ** argv) opts.timeout ); spdlog::info( - "My manager runs at {}:{}, its secret is {}, the accounting buffer is at {} with rkey {}", + "My manager runs at {}:{}, its secret is {}, the accounting buffer is at {} with rkey {}, cookie {}", opts.mgr_address, opts.mgr_port, opts.mgr_secret, - opts.accounting_buffer_addr, opts.accounting_buffer_rkey + opts.accounting_buffer_addr, opts.accounting_buffer_rkey, + opts.authentication_cookie + ); + + rdmalib::Configuration::get_instance().configure_cookie( + opts.authentication_cookie ); executor::ManagerConnection mgr{ diff --git a/server/executor/opts.cpp b/server/executor/opts.cpp index 7c4797f..c8f9f65 100644 --- a/server/executor/opts.cpp +++ b/server/executor/opts.cpp @@ -34,6 +34,9 @@ namespace server { #else ("mgr-buf-rkey", "Use selected port", cxxopts::value()) #endif + #ifdef USE_GNI_AUTH + ("authentication-cookie", "Use selected port", cxxopts::value()) + #endif ; auto parsed_options = options.parse(argc, argv); @@ -61,6 +64,9 @@ namespace server { #else result.accounting_buffer_rkey = parsed_options["mgr-buf-rkey"].as(); #endif + #ifdef USE_GNI_AUTH + result.authentication_cookie = parsed_options["authentication-cookie"].as(); + #endif std::string polling_mgr = parsed_options["polling-mgr"].as(); if(polling_mgr == "server") { diff --git a/server/executor/server.hpp b/server/executor/server.hpp index dab5b6a..c566cc4 100644 --- a/server/executor/server.hpp +++ b/server/executor/server.hpp @@ -63,6 +63,9 @@ namespace server { #else uint32_t accounting_buffer_rkey; #endif + #ifdef USE_GNI_AUTH + uint32_t authentication_cookie; + #endif }; Options opts(int argc, char ** argv); diff --git a/server/executor_manager/cli.cpp b/server/executor_manager/cli.cpp index 69a21b6..d86073b 100644 --- a/server/executor_manager/cli.cpp +++ b/server/executor_manager/cli.cpp @@ -52,6 +52,10 @@ int main(int argc, char ** argv) std::ifstream in_dev{opts.device_database}; rfaas::devices::deserialize(in_dev); + rdmalib::Configuration::get_instance().configure_cookie( + rfaas::devices::instance()._configuration.authentication_credential + ); + // Read executor manager settings std::ifstream in_cfg{opts.json_config}; rfaas::executor_manager::Settings settings = rfaas::executor_manager::Settings::deserialize(in_cfg); diff --git a/server/executor_manager/executor_process.cpp b/server/executor_manager/executor_process.cpp index 38d430b..352e95c 100644 --- a/server/executor_manager/executor_process.cpp +++ b/server/executor_manager/executor_process.cpp @@ -8,6 +8,7 @@ #include #include +#include #include "executor_process.hpp" #include "settings.hpp" @@ -87,6 +88,9 @@ namespace rfaas::executor_manager { std::string mgr_secret = std::to_string(conn.secret); std::string mgr_buf_addr = std::to_string(conn.r_addr); std::string mgr_buf_rkey = std::to_string(conn.r_key); + #ifdef USE_GNI_AUTH + std::string authentication_cookie = std::to_string(rdmalib::Configuration::get_instance().credential().value()); + #endif int mypid = vfork(); if(mypid < 0) { @@ -122,6 +126,9 @@ namespace rfaas::executor_manager { "--mgr-secret", mgr_secret.c_str(), "--mgr-buf-addr", mgr_buf_addr.c_str(), "--mgr-buf-rkey", mgr_buf_rkey.c_str(), + #ifdef USE_GNI_AUTH + "--authentication-cookie", authentication_cookie.c_str(), + #endif nullptr }; } else if(sandbox_type == SandboxType::SARUS) { @@ -133,6 +140,7 @@ namespace rfaas::executor_manager { "--mount=type=bind,source=/tmp/drcc.sock,destination=/tmp/drcc.sock", "--mount=type=bind,source=/etc/opt/cray/rdma-credentials,destination=/etc/opt/cray/rdma-credentials", "--mount=type=bind,source=/scratch/snx3000/mcopik,destination=/scratch/snx3000/mcopik", + "--mount=type=bind,source=/project/g34/mcopik,destination=/project/g34/mcopik", "--mount=type=bind,source=/etc/alternatives/cray-ugni,destination=/etc/alternatives/cray-ugni", "--mount=type=bind,source=/etc/alternatives/cray-xpmem,destination=/etc/alternatives/cray-xpmem", "--mount=type=bind,source=/etc/alternatives/cray-alps,destination=/etc/alternatives/cray-alps", @@ -140,7 +148,8 @@ namespace rfaas::executor_manager { "--mount=type=bind,source=/etc/alternatives/cray-wlm_detect,destination=/etc/alternatives/cray-wlm_detect", "-e", "LD_LIBRARY_PATH=/opt/cray/xpmem/default/lib64/;/opt/cray/udreg/default/lib64;/opt/cray/alps/default/lib64;/opt/cray/wlm_detect/default/lib64/", "spcleth/hpc-disagg:rfaas-executor-daint", - "/scratch/snx3000/mcopik/serverless_hpc/artifact/software/rfaas_libfabric/build_rfaas_debug/bin/executor", + //"/scratch/snx3000/mcopik/serverless_hpc/artifact/software/rfaas_libfabric/build_rfaas_debug/bin/executor", + "/scratch/snx3000/mcopik/serverless_disaggregation/build_rfaas_libfabric/bin/executor", "-a", client_addr.c_str(), "-p", client_port.c_str(), "--polling-mgr", "thread", @@ -158,6 +167,9 @@ namespace rfaas::executor_manager { "--mgr-secret", mgr_secret.c_str(), "--mgr-buf-addr", mgr_buf_addr.c_str(), "--mgr-buf-rkey", mgr_buf_rkey.c_str(), + #ifdef USE_GNI_AUTH + "--authentication-cookie", authentication_cookie.c_str(), + #endif nullptr }; } else if(sandbox_type == SandboxType::DOCKER) { @@ -223,6 +235,10 @@ namespace rfaas::executor_manager { }; } + for(const char* str : argv) + std::cerr << str << std::endl; + + int ret = execvp(argv.data()[0], const_cast(&argv.data()[0])); if(ret == -1) { spdlog::error("Executor process failed {}, reason {}", errno, strerror(errno)); From 8cca2dda1bc4f9099e377e1400891566cc8c8374 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Tue, 27 Sep 2022 23:56:34 +0200 Subject: [PATCH 48/91] [rdmalib] Add string formatting function --- rdmalib/include/rdmalib/util.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/rdmalib/include/rdmalib/util.hpp b/rdmalib/include/rdmalib/util.hpp index f82c7fb..de295ea 100644 --- a/rdmalib/include/rdmalib/util.hpp +++ b/rdmalib/include/rdmalib/util.hpp @@ -78,6 +78,19 @@ namespace rdmalib { namespace impl { assert(ptr); } + // Code borrowed from StackOverflow https://stackoverflow.com/questions/2342162/stdstring-formatting-like-sprintf + template + std::string string_format( const std::string& format, Args ... args ) + { + int size_s = std::snprintf( nullptr, 0, format.c_str(), args ... ) + 1; + if(size_s <= 0) + return ""; + auto size = static_cast(size_s); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + return std::string(buf.get(), buf.get() + size - 1); + } + }} #endif From d5c120e812b358eef1e368fcace070d2b7a44074 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Wed, 28 Sep 2022 00:19:20 +0200 Subject: [PATCH 49/91] [rdmalib] Remove debug printout --- rdmalib/lib/rdmalib.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 7861be0..fe8864d 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -110,7 +110,6 @@ namespace rdmalib { impl::expect_zero(fi_getinfo(FI_VERSION(1, 9), ip.c_str(), std::to_string(port).c_str(), passive ? FI_SOURCE : 0, hints, &addrinfo)); fi_freeinfo(hints); impl::expect_zero(fi_fabric(addrinfo->fabric_attr, &fabric, nullptr)); - std::cerr << "True" << std::endl; #ifdef USE_GNI_AUTH // Obtain the cookies once per process From ee0632816823ba9a7deb767a5e0fd038fe939074 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Wed, 28 Sep 2022 02:18:53 +0200 Subject: [PATCH 50/91] [tools] Make the Cray credentials script properly executable --- scripts/setup.sh.in | 1 + 1 file changed, 1 insertion(+) mode change 100644 => 100755 scripts/setup.sh.in diff --git a/scripts/setup.sh.in b/scripts/setup.sh.in old mode 100644 new mode 100755 index e44263f..3c4cec4 --- a/scripts/setup.sh.in +++ b/scripts/setup.sh.in @@ -1,3 +1,4 @@ +#!/bin/bash module load rdma-credentials srun -l -N1 -n1 ${CMAKE_CURRENT_BINARY_DIR}/benchmarks/credentials From 453a1cdee202522a87e4d91d2ddb03f4f45dec47 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Wed, 28 Sep 2022 03:05:03 +0200 Subject: [PATCH 51/91] [util] Support strings in formatting --- rdmalib/include/rdmalib/util.hpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/rdmalib/include/rdmalib/util.hpp b/rdmalib/include/rdmalib/util.hpp index de295ea..ec68a6c 100644 --- a/rdmalib/include/rdmalib/util.hpp +++ b/rdmalib/include/rdmalib/util.hpp @@ -78,16 +78,22 @@ namespace rdmalib { namespace impl { assert(ptr); } + template + const char* to_cstr(const StrType & str) + { + return str.c_str(); + } + // Code borrowed from StackOverflow https://stackoverflow.com/questions/2342162/stdstring-formatting-like-sprintf template - std::string string_format( const std::string& format, Args ... args ) + std::string string_format( const std::string& format, Args... args ) { - int size_s = std::snprintf( nullptr, 0, format.c_str(), args ... ) + 1; + int size_s = std::snprintf( nullptr, 0, format.c_str(), to_cstr(args)... ) + 1; if(size_s <= 0) return ""; auto size = static_cast(size_s); std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); + std::snprintf(buf.get(), size, format.c_str(), to_cstr(args)...); return std::string(buf.get(), buf.get() + size - 1); } From 562f616e1ccb7cc8965e99ae726075666e1d4240 Mon Sep 17 00:00:00 2001 From: Marcin Copik Date: Wed, 28 Sep 2022 03:05:47 +0200 Subject: [PATCH 52/91] [executor-manager] Implement JSON-based additional configuration of container parameters --- config/executor_manager.json | 31 ++++++++++- server/executor_manager/executor_process.cpp | 48 +++++++++-------- server/executor_manager/opts.cpp | 1 + server/executor_manager/settings.cpp | 48 +++++++++++++++++ server/executor_manager/settings.hpp | 56 ++++++++++++++++++++ 5 files changed, 162 insertions(+), 22 deletions(-) diff --git a/config/executor_manager.json b/config/executor_manager.json index dc6a619..bbb1b86 100644 --- a/config/executor_manager.json +++ b/config/executor_manager.json @@ -11,6 +11,35 @@ "repetitions": 100, "warmup_iters": 0, "pin_threads": false - } + }, + "sandbox-configuration": [ + { + "key": "sarus", + "value": { + "devices": [ + "/dev/kgni0", "dev/kdreg" + ], + "mounts": [ + "/opt/cray", + "/tmp/drcc.sock", + "/etc/opt/cray/rdma-credentials", + "/etc/alternatives/cray-ugni", + "/etc/alternatives/cray-xpmem", + "/etc/alternatives/cray-alps", + "/etc/alternatives/cray-udreg", + "/etc/alternatives/cray-wlm_detect" + ], + "mount_filesystem": [ + "/scratch/snx3000/{user}" + ], + "env": [ + { + "key": "LD_LIBRARY_PATH", + "value": "/opt/cray/xpmem/default/lib64/;/opt/cray/udreg/default/lib64;/opt/cray/alps/default/lib64;/opt/cray/wlm_detect/default/lib64/" + } + ] + } + } + ] } diff --git a/server/executor_manager/executor_process.cpp b/server/executor_manager/executor_process.cpp index 352e95c..8e0bf57 100644 --- a/server/executor_manager/executor_process.cpp +++ b/server/executor_manager/executor_process.cpp @@ -1,4 +1,5 @@ +#include #include #include @@ -105,7 +106,8 @@ namespace rfaas::executor_manager { dup2(fd, 1); dup2(fd, 2); - std::vector argv; + std::vector argv; + std::vector additional_args; if(sandbox_type == SandboxType::PROCESS) { argv = { "executor", @@ -132,24 +134,18 @@ namespace rfaas::executor_manager { nullptr }; } else if(sandbox_type == SandboxType::SARUS) { + argv = { - "sarus", "run", - "--device=/dev/kgni0", - "--device=/dev/kdreg", - "--mount=type=bind,source=/opt/cray,destination=/opt/cray", - "--mount=type=bind,source=/tmp/drcc.sock,destination=/tmp/drcc.sock", - "--mount=type=bind,source=/etc/opt/cray/rdma-credentials,destination=/etc/opt/cray/rdma-credentials", - "--mount=type=bind,source=/scratch/snx3000/mcopik,destination=/scratch/snx3000/mcopik", - "--mount=type=bind,source=/project/g34/mcopik,destination=/project/g34/mcopik", - "--mount=type=bind,source=/etc/alternatives/cray-ugni,destination=/etc/alternatives/cray-ugni", - "--mount=type=bind,source=/etc/alternatives/cray-xpmem,destination=/etc/alternatives/cray-xpmem", - "--mount=type=bind,source=/etc/alternatives/cray-alps,destination=/etc/alternatives/cray-alps", - "--mount=type=bind,source=/etc/alternatives/cray-udreg,destination=/etc/alternatives/cray-udreg", - "--mount=type=bind,source=/etc/alternatives/cray-wlm_detect,destination=/etc/alternatives/cray-wlm_detect", - "-e", "LD_LIBRARY_PATH=/opt/cray/xpmem/default/lib64/;/opt/cray/udreg/default/lib64;/opt/cray/alps/default/lib64;/opt/cray/wlm_detect/default/lib64/", - "spcleth/hpc-disagg:rfaas-executor-daint", - //"/scratch/snx3000/mcopik/serverless_hpc/artifact/software/rfaas_libfabric/build_rfaas_debug/bin/executor", - "/scratch/snx3000/mcopik/serverless_disaggregation/build_rfaas_libfabric/bin/executor", + "sarus", "run" + }; + + exec.sandbox_config->generate_args(argv, exec.sandbox_user); + + argv.emplace_back(exec.sandbox_name); + + argv.emplace_back(exec.sandbox_config->get_executor_path()); + + additional_args = { "-a", client_addr.c_str(), "-p", client_port.c_str(), "--polling-mgr", "thread", @@ -172,6 +168,7 @@ namespace rfaas::executor_manager { #endif nullptr }; + } else if(sandbox_type == SandboxType::DOCKER) { //const char * argv[] = { // "docker_rdma_sriov", "run", @@ -235,11 +232,20 @@ namespace rfaas::executor_manager { }; } - for(const char* str : argv) - std::cerr << str << std::endl; + std::vector cstrings_argv; + std::transform(argv.begin(), argv.end(), std::back_inserter(cstrings_argv), + [](const std::string & input) -> const char* { + return input.c_str(); + } + ); + std::copy(additional_args.begin(), additional_args.end(), std::back_inserter(cstrings_argv)); + SPDLOG_DEBUG("Executor launch arguments"); + for(const char* str : cstrings_argv) + if(str) + SPDLOG_DEBUG(str); - int ret = execvp(argv.data()[0], const_cast(&argv.data()[0])); + int ret = execvp(cstrings_argv.data()[0], const_cast(&cstrings_argv.data()[0])); if(ret == -1) { spdlog::error("Executor process failed {}, reason {}", errno, strerror(errno)); close(fd); diff --git a/server/executor_manager/opts.cpp b/server/executor_manager/opts.cpp index c1712d0..cb90554 100644 --- a/server/executor_manager/opts.cpp +++ b/server/executor_manager/opts.cpp @@ -12,6 +12,7 @@ namespace rfaas::executor_manager { ("c,config", "JSON input config.", cxxopts::value()) ("device-database", "JSON configuration of devices.", cxxopts::value()) ("skip-resource-manager", "Ignore resource manager and don't connect to it.", cxxopts::value()->default_value("false")) + ("user", "Name of OS users to be used for launching containres", cxxopts::value()) ("v,verbose", "Verbose output", cxxopts::value()->default_value("false")) ; auto parsed_options = options.parse(argc, argv); diff --git a/server/executor_manager/settings.cpp b/server/executor_manager/settings.cpp index 33e0c6d..90c02ca 100644 --- a/server/executor_manager/settings.cpp +++ b/server/executor_manager/settings.cpp @@ -1,6 +1,11 @@ +#include +#include + #include +#include + #include "settings.hpp" namespace rfaas::executor_manager { @@ -31,6 +36,7 @@ namespace rfaas::executor_manager { cereal::JSONInputArchive archive_in(in); archive_in(cereal::make_nvp("config", settings)); archive_in(cereal::make_nvp("executor", settings.exec)); + archive_in(cereal::make_nvp("sandbox-configuration", settings.sandboxes)); // read RDMA device details rfaas::device_data * dev = rfaas::devices::instance().device(settings.rdma_device); @@ -44,8 +50,50 @@ namespace rfaas::executor_manager { settings.exec.max_inline_data = dev->max_inline_data; settings.exec.recv_buffer_size = dev->default_receive_buffer_size; + settings.exec.sandbox_config = &settings.sandboxes.at(settings.exec.sandbox_type); + // FIXME: should be sent with request + settings.exec.sandbox_user = "mcopik"; + settings.exec.sandbox_name = "spcleth/hpc-disagg:rfaas-executor-daint"; + for(auto & mount : settings.exec.sandbox_config->mounts) + std::cerr << mount << std::endl; + return settings; } + void SandboxConfiguration::generate_args(std::vector & args, const std::string & user) const + { + for(auto & dev : this->devices) + args.emplace_back(rdmalib::impl::string_format("--device=%s", dev)); + + for(auto & mount : this->mount_filesystem) { + std::string user_partition{mount}; + user_partition = std::regex_replace(user_partition, std::regex{R"(\{user\})"}, user); + args.emplace_back(rdmalib::impl::string_format("--mount=type=bind,source=%s,destination=%s", user_partition, user_partition)); + } + + for(auto & mount : this->mounts) + args.emplace_back(rdmalib::impl::string_format("--mount=type=bind,source=%s,destination=%s", mount, mount)); + + for(auto & [key, value] : this->env) { + args.emplace_back("-e"); + args.emplace_back(rdmalib::impl::string_format("%s=%s", key, value)); + } + + } + + std::string SandboxConfiguration::get_executor_path() const + { + // Horrible hack - we need to get the location of the executor. + // We assume that rFaaS is built on the shared filesystem that is mounted + // in the container. + // Furtheermore, since rFaaS is built in a single directory, we know + // that executor should be located in the same directory as + // executor_manager. + + // This works only on Linux! + auto path = std::filesystem::canonical("/proc/self/exe").parent_path(); + return path / "executor"; + } + } diff --git a/server/executor_manager/settings.hpp b/server/executor_manager/settings.hpp index 7a4f0db..72cbbac 100644 --- a/server/executor_manager/settings.hpp +++ b/server/executor_manager/settings.hpp @@ -9,6 +9,8 @@ #include #include +#include +#include namespace rfaas::executor_manager { @@ -22,9 +24,44 @@ namespace rfaas::executor_manager { std::string sandbox_serialize(SandboxType type); +} + +namespace rfaas::executor_manager { + + struct SandboxConfiguration + { + std::vector devices; + std::vector mounts; + std::vector mount_filesystem; + std::map env; + + template + void load(Archive & ar ) + { + ar( + CEREAL_NVP(devices), CEREAL_NVP(mounts), + CEREAL_NVP(mount_filesystem), CEREAL_NVP(env) + ); + } + + void generate_args(std::vector & args, const std::string & user) const; + + /** + * In the Sarus container, we cannot build the executor since we need to + * compile with Cray headers. + * Thus, we have to mount the rFaaS build directory and access executor + * this way. + **/ + std::string get_executor_path() const; + }; + struct ExecutorSettings { SandboxType sandbox_type; + SandboxConfiguration* sandbox_config; + std::string sandbox_user; + std::string sandbox_name; + int repetitions; int warmup_iters; int recv_buffer_size; @@ -59,6 +96,8 @@ namespace rfaas::executor_manager { // Passed to the scheduled executor ExecutorSettings exec; + std::map sandboxes; + template void load(Archive & ar ) { @@ -74,4 +113,21 @@ namespace rfaas::executor_manager { } +namespace cereal +{ + + template inline + std::string save_minimal(Archive const &, rfaas::executor_manager::SandboxType const & t) + { + return rfaas::executor_manager::sandbox_serialize(t); + } + + template inline + void load_minimal( Archive const &, rfaas::executor_manager::SandboxType & t, std::string const & value) + { + t = rfaas::executor_manager::sandbox_deserialize(value); + } + +} + #endif From d1e334cff182a9e5a253548c608ae6abe0d15b5c Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Tue, 6 Jun 2023 12:19:23 -0400 Subject: [PATCH 53/91] Fix compiler errors and add gitignore --- .gitignore | 54 +++++++++++++++++++++++++++++ benchmarks/warm_benchmark.cpp | 2 ++ rdmalib/include/rdmalib/buffer.hpp | 9 ++++- rdmalib/include/rdmalib/rdmalib.hpp | 4 ++- rdmalib/lib/buffer.cpp | 6 ++++ rdmalib/lib/rdmalib.cpp | 4 +++ server/executor/cli.cpp | 10 +++++- server/executor_manager/cli.cpp | 2 ++ 8 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b39f2b9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,54 @@ +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +# CMake +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps +bin/ +configuration/ +volumes/ +containers/config/htpasswd + +benchmarks/warm_benchmarker +benchmarks/parallel_invocations +benchmarks/cold_benchmarker +benchmarks/cpp_interface diff --git a/benchmarks/warm_benchmark.cpp b/benchmarks/warm_benchmark.cpp index e19bbd5..aeaa3b4 100644 --- a/benchmarks/warm_benchmark.cpp +++ b/benchmarks/warm_benchmark.cpp @@ -33,9 +33,11 @@ int main(int argc, char ** argv) rfaas::devices::deserialize(in_dev); in_dev.close(); + #ifdef USE_GNI_AUTH rdmalib::Configuration::get_instance().configure_cookie( rfaas::devices::instance()._configuration.authentication_credential ); + #endif // Read benchmark settings std::ifstream benchmark_cfg{opts.json_config}; diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index f821757..1d3a032 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -61,10 +61,11 @@ namespace rdmalib { #endif #ifdef USE_LIBFABRIC void *lkey() const; + uint64_t rkey() const; #else uint32_t lkey() const; + uint32_t rkey() const; #endif - uint64_t rkey() const; ScatterGatherElement sge(uint32_t size, uint32_t offset) const; }; @@ -79,6 +80,12 @@ namespace rdmalib { // When accessing the remote buffer, we might not need to know the size. RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size = 0); + #ifdef USE_LIBFABRIC + RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size); + #else + RemoteBuffer(uintptr_t addr, uint32_t rkey, uint32_t size); + #endif + template void serialize(Archive & ar) { diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index c8ba193..a305e3f 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #ifdef USE_LIBFABRIC @@ -14,7 +15,6 @@ #include // #include #ifdef USE_GNI_AUTH -#include extern "C" { #include "rdmacred.h" } @@ -45,7 +45,9 @@ namespace rdmalib { ~Configuration(); std::once_flag _access_flag; + #ifdef USE_GNI_AUTH drc_info_handle_t _credential_info; + #endif uint64_t _cookie; uint32_t _credential; bool _is_configured; diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index 1db48ed..0bacdaf 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -247,6 +247,12 @@ namespace rdmalib { size(0) {} + RemoteBuffer::RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size): + addr(addr), + rkey(rkey), + size(size) + {} + #ifdef USE_LIBFABRIC RemoteBuffer::RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size): addr(addr), diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index fe8864d..822a24a 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -57,6 +57,7 @@ namespace rdmalib { return _instance; } + #ifdef USE_GNI_AUTH void Configuration::configure_cookie(uint32_t credential) { Configuration& inst = _get_instance(); @@ -87,6 +88,7 @@ namespace rdmalib { { return _is_configured; } + #endif Configuration Configuration::_instance; @@ -136,7 +138,9 @@ namespace rdmalib { impl::expect_zero(rdma_getaddrinfo(ip.c_str(), std::to_string(port).c_str(), &hints, &addrinfo)); #endif this->_port = port; + #ifdef USE_LIBFABRIC this->_ip = ip; + #endif } Address::Address(const std::string & sip, const std::string & dip, int port) diff --git a/server/executor/cli.cpp b/server/executor/cli.cpp index a5f9f9f..73ef522 100644 --- a/server/executor/cli.cpp +++ b/server/executor/cli.cpp @@ -36,16 +36,24 @@ int main(int argc, char ** argv) opts.func_size, opts.msg_size, opts.recv_buffer_size, opts.max_inline_data, opts.timeout ); + + #ifdef USE_GNI_AUTH spdlog::info( "My manager runs at {}:{}, its secret is {}, the accounting buffer is at {} with rkey {}, cookie {}", opts.mgr_address, opts.mgr_port, opts.mgr_secret, opts.accounting_buffer_addr, opts.accounting_buffer_rkey, opts.authentication_cookie ); - rdmalib::Configuration::get_instance().configure_cookie( opts.authentication_cookie ); + #else + spdlog::info( + "My manager runs at {}:{}, its secret is {}, the accounting buffer is at {} with rkey {}", + opts.mgr_address, opts.mgr_port, opts.mgr_secret, + opts.accounting_buffer_addr, opts.accounting_buffer_rkey + ); + #endif executor::ManagerConnection mgr{ opts.mgr_address, diff --git a/server/executor_manager/cli.cpp b/server/executor_manager/cli.cpp index d86073b..626c5a5 100644 --- a/server/executor_manager/cli.cpp +++ b/server/executor_manager/cli.cpp @@ -52,9 +52,11 @@ int main(int argc, char ** argv) std::ifstream in_dev{opts.device_database}; rfaas::devices::deserialize(in_dev); + #ifdef USE_GNI_AUTH rdmalib::Configuration::get_instance().configure_cookie( rfaas::devices::instance()._configuration.authentication_credential ); + #endif // Read executor manager settings std::ifstream in_cfg{opts.json_config}; From f8b6cf8fe172a1bb15f0b4510aeb2e804f01a3a4 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Tue, 6 Jun 2023 17:04:13 -0400 Subject: [PATCH 54/91] Initial refactor of Buffer MemoryRegion template --- rdmalib/include/rdmalib/buffer.hpp | 68 ++++++++++++----------- rdmalib/lib/buffer.cpp | 87 +++++++++++++++++++----------- 2 files changed, 93 insertions(+), 62 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 1d3a032..03e8f4e 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -7,22 +7,27 @@ #include -#ifdef USE_LIBFABRIC +//#ifdef USE_LIBFABRIC #include #include -#else +//#else struct ibv_pd; struct ibv_mr; struct ibv_sge; -#endif +//#endif namespace rdmalib { + template struct ScatterGatherElement; namespace impl { - // move non-template methods from header + // mregion - Memory region type: fid_mr* for libfabric, ibv_mr* for ibverbs + // pdomain - Protected domain type: fid_domain* for libfabric, ibv_pd* for ibverbs + // lkey - lkey type: void* for libfabric, uint32_t for ibverbs + //template + template struct Buffer { protected: uint32_t _size; @@ -30,11 +35,7 @@ namespace rdmalib { uint32_t _bytes; uint32_t _byte_size; void* _ptr; - #ifdef USE_LIBFABRIC - fid_mr* _mr; - #else - ibv_mr* _mr; - #endif + MemoryRegion* _mr; bool _own_memory; Buffer(); @@ -46,11 +47,7 @@ namespace rdmalib { public: uintptr_t address() const; void* ptr() const; - #ifdef USE_LIBFABRIC - fid_mr* mr() const; - #else - ibv_mr* mr() const; - #endif + MemoryRegion* mr() const; uint32_t data_size() const; uint32_t size() const; uint32_t bytes() const; @@ -61,14 +58,22 @@ namespace rdmalib { #endif #ifdef USE_LIBFABRIC void *lkey() const; - uint64_t rkey() const; #else uint32_t lkey() const; - uint32_t rkey() const; #endif - ScatterGatherElement sge(uint32_t size, uint32_t offset) const; + + uint32_t rkey() const; + + ScatterGatherElement sge(uint32_t size, uint32_t offset) const; }; + struct FabricBuffer : Buffer { + void destroy_buffer(); + }; + + struct VerbsBuffer : Buffer { + void destroy_buffer(); + }; } struct RemoteBuffer { @@ -93,37 +98,39 @@ namespace rdmalib { } }; - template - struct Buffer : impl::Buffer{ + template + struct Buffer : impl::Buffer, MemoryRegion> { + + using ImplBuffer = impl::Buffer, MemoryRegion>; Buffer(): - impl::Buffer() + ImplBuffer() {} // Provide a buffer instance for existing memory pool // Does NOT free the associated resource Buffer(T * ptr, uint32_t size): - impl::Buffer(ptr, size, sizeof(T)) + ImplBuffer(ptr, size, sizeof(T)) {} // Provide a buffer instance for existing memory pool // Does NOT free the associated resource Buffer(void * ptr, uint32_t size): - impl::Buffer(ptr, size, sizeof(T)) + ImplBuffer(ptr, size, sizeof(T)) {} Buffer(size_t size, size_t header = 0): - impl::Buffer(size, sizeof(T), header) + ImplBuffer(size, sizeof(T), header) {} - Buffer & operator=(Buffer && obj) + Buffer & operator=(Buffer && obj) { - impl::Buffer::operator=(std::move(obj)); + ImplBuffer::operator=(std::move(obj)); return *this; } - Buffer(const Buffer & obj) = delete; - Buffer(Buffer && obj) = default; + Buffer(const Buffer & obj) = delete; + Buffer(Buffer && obj) = default; T* data() const { @@ -132,6 +139,7 @@ namespace rdmalib { } }; + template struct ScatterGatherElement { // smallvector in practice #ifdef USE_LIBFABRIC @@ -150,13 +158,13 @@ namespace rdmalib { #endif template - ScatterGatherElement(const Buffer & buf) + ScatterGatherElement(const Buffer & buf) { add(buf); } template - void add(const Buffer & buf) + void add(const Buffer & buf) { #ifdef USE_LIBFABRIC _sges.push_back({(void *)buf.address(), (size_t)buf.bytes()}); @@ -168,7 +176,7 @@ namespace rdmalib { } template - void add(const Buffer & buf, uint32_t size, size_t offset = 0) + void add(const Buffer & buf, uint32_t size, size_t offset = 0) { #ifdef USE_LIBFABRIC _sges.push_back({(void *)(buf.address() + offset), (size_t)size}); diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index 0bacdaf..b3828bb 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -14,7 +14,8 @@ namespace rdmalib { namespace impl { - Buffer::Buffer(): + template + Buffer::Buffer(): _size(0), _header(0), _bytes(0), @@ -24,7 +25,8 @@ namespace rdmalib { namespace impl { _own_memory(false) {} - Buffer::Buffer(Buffer && obj): + template + Buffer::Buffer(Buffer && obj): _size(obj._size), _header(obj._header), _bytes(obj._bytes), @@ -37,7 +39,8 @@ namespace rdmalib { namespace impl { obj._ptr = obj._mr = nullptr; } - Buffer & Buffer::operator=(Buffer && obj) + template + Buffer& Buffer::operator=(Buffer && obj) { _size = obj._size; _bytes = obj._bytes; @@ -52,7 +55,8 @@ namespace rdmalib { namespace impl { return *this; } - Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header): + template + Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header): _size(size), _header(header), _bytes(size * byte_size + header), @@ -73,7 +77,8 @@ namespace rdmalib { namespace impl { ); } - Buffer::Buffer(void* ptr, uint32_t size, uint32_t byte_size): + template + Buffer::Buffer(void* ptr, uint32_t size, uint32_t byte_size): _size(size), _header(0), _bytes(size * byte_size), @@ -88,23 +93,33 @@ namespace rdmalib { namespace impl { ); } - Buffer::~Buffer() + template + Buffer::~Buffer() { SPDLOG_DEBUG( "Deallocate {} bytes, mr {}, ptr {}", _bytes, fmt::ptr(_mr), fmt::ptr(_ptr) ); + static_cast(this)->destroy_buffer(); + } + + void FabricBuffer::destroy_buffer() + { if(_mr) - #ifdef USE_LIBFABRIC impl::expect_zero(fi_close(&_mr->fid)); - #else + if(_own_memory) + munmap(_ptr, _bytes); + } + + void VerbsBuffer::destroy_buffer() + { + if(_mr) ibv_dereg_mr(_mr); - #endif if(_own_memory) munmap(_ptr, _bytes); } - #ifdef USE_LIBFABRIC + #ifdef USE_LIBFABRIC // requires proc domain refactor (up next) void Buffer::register_memory(fid_domain *pd, int access) { int ret = fi_mr_reg(pd, _ptr, _bytes, access, 0, 0, 0, &_mr, nullptr); @@ -115,7 +130,9 @@ namespace rdmalib { namespace impl { ); } #else - void Buffer::register_memory(ibv_pd* pd, int access) + + template + void Buffer::register_memory(ibv_pd* pd, int access) { _mr = ibv_reg_mr(pd, _ptr, _bytes, access); impl::expect_nonnull(_mr); @@ -126,41 +143,39 @@ namespace rdmalib { namespace impl { } #endif - #ifdef USE_LIBFABRIC - fid_mr* Buffer::mr() const + template + MemoryRegion* Buffer::mr() const { return this->_mr; } - #else - ibv_mr* Buffer::mr() const - { - return this->_mr; - } - #endif - uint32_t Buffer::data_size() const + template + uint32_t Buffer::data_size() const { return this->_size; } - uint32_t Buffer::size() const + template + uint32_t Buffer::size() const { return this->_size + this->_header; } - uint32_t Buffer::bytes() const + template + uint32_t Buffer::bytes() const { return this->_bytes; } - #ifdef USE_LIBFABRIC + #ifdef USE_LIBFABRIC // requires lkey refactor (up next) void *Buffer::lkey() const { assert(this->_mr); return fi_mr_desc(this->_mr); } #else - uint32_t Buffer::lkey() const + template + uint32_t Buffer::lkey() const { assert(this->_mr); // Apparently it's not needed and better to skip that check. @@ -176,25 +191,29 @@ namespace rdmalib { namespace impl { return fi_mr_key(this->_mr); } #else - uint32_t Buffer::rkey() const + template + uint32_t Buffer::rkey() const { assert(this->_mr); return this->_mr->rkey; } #endif - uintptr_t Buffer::address() const + template + uintptr_t Buffer::address() const { assert(this->_mr); return reinterpret_cast(this->_ptr); } - void* Buffer::ptr() const + template + void* Buffer::ptr() const { return this->_ptr; } - ScatterGatherElement Buffer::sge(uint32_t size, uint32_t offset) const + template + ScatterGatherElement Buffer::sge(uint32_t size, uint32_t offset) const { return {address() + offset, size, lkey()}; } @@ -203,7 +222,8 @@ namespace rdmalib { namespace impl { namespace rdmalib { - ScatterGatherElement::ScatterGatherElement() + template + ScatterGatherElement::ScatterGatherElement() { } @@ -217,13 +237,15 @@ namespace rdmalib { return _lkeys.data(); } #else - ibv_sge * ScatterGatherElement::array() const + template + ibv_sge * ScatterGatherElement::array() const { return _sges.data(); } #endif - size_t ScatterGatherElement::size() const + template + size_t ScatterGatherElement::size() const { return _sges.size(); } @@ -235,7 +257,8 @@ namespace rdmalib { _lkeys.push_back(lkey); } #else - ScatterGatherElement::ScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey) + template + ScatterGatherElement::ScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey) { _sges.push_back({addr, bytes, lkey}); } From 287fc8e44bfb40e0caf2876175eedae2a5b80020 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Wed, 7 Jun 2023 10:11:56 -0400 Subject: [PATCH 55/91] More refactoring of Buffer using templates --- rdmalib/include/rdmalib/buffer.hpp | 66 +++++++++++-------- rdmalib/lib/buffer.cpp | 101 +++++++++++++---------------- 2 files changed, 83 insertions(+), 84 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 03e8f4e..09d6ad8 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -18,7 +18,7 @@ struct ibv_sge; namespace rdmalib { - template + template struct ScatterGatherElement; namespace impl { @@ -26,8 +26,7 @@ namespace rdmalib { // mregion - Memory region type: fid_mr* for libfabric, ibv_mr* for ibverbs // pdomain - Protected domain type: fid_domain* for libfabric, ibv_pd* for ibverbs // lkey - lkey type: void* for libfabric, uint32_t for ibverbs - //template - template + template struct Buffer { protected: uint32_t _size; @@ -51,27 +50,37 @@ namespace rdmalib { uint32_t data_size() const; uint32_t size() const; uint32_t bytes() const; - #ifdef USE_LIBFABRIC - void register_memory(fid_domain *pd, int access); - #else - void register_memory(ibv_pd *pd, int access); - #endif - #ifdef USE_LIBFABRIC - void *lkey() const; - #else - uint32_t lkey() const; - #endif + void register_memory(Domain *pd, int access) + { + static_cast(this)->_register_memory(pd, access); + } + LKey lkey() const + { + return static_cast(this)->_lkey(); + } + uint32_t rkey() const // TODO: introduce another template parameter for ret type + { + return static_cast(this)->_rkey(); + } + + ScatterGatherElement sge(uint32_t size, uint32_t offset) const; + }; - uint32_t rkey() const; + struct FabricBuffer : Buffer { + void _register_memory(fid_domain *pd, int access); - ScatterGatherElement sge(uint32_t size, uint32_t offset) const; - }; + void *_lkey() const; + uint64_t _rkey() const; - struct FabricBuffer : Buffer { void destroy_buffer(); }; - struct VerbsBuffer : Buffer { + struct VerbsBuffer : Buffer { + void register_memory(ibv_pd* pd, int access); + + uint32_t _lkey() const; + uint32_t _rkey() const; + void destroy_buffer(); }; } @@ -98,10 +107,11 @@ namespace rdmalib { } }; - template - struct Buffer : impl::Buffer, MemoryRegion> { + template + struct Buffer : impl::Buffer, MemoryRegion, Domain, LKey> { - using ImplBuffer = impl::Buffer, MemoryRegion>; + using Self = Buffer; + using ImplBuffer = impl::Buffer; Buffer(): ImplBuffer() @@ -123,14 +133,14 @@ namespace rdmalib { ImplBuffer(size, sizeof(T), header) {} - Buffer & operator=(Buffer && obj) + Self & operator=(Self && obj) { ImplBuffer::operator=(std::move(obj)); return *this; } - Buffer(const Buffer & obj) = delete; - Buffer(Buffer && obj) = default; + Buffer(const Self & obj) = delete; + Buffer(Self && obj) = default; T* data() const { @@ -139,7 +149,7 @@ namespace rdmalib { } }; - template + template struct ScatterGatherElement { // smallvector in practice #ifdef USE_LIBFABRIC @@ -158,13 +168,13 @@ namespace rdmalib { #endif template - ScatterGatherElement(const Buffer & buf) + ScatterGatherElement(const Buffer & buf) { add(buf); } template - void add(const Buffer & buf) + void add(const Buffer & buf) { #ifdef USE_LIBFABRIC _sges.push_back({(void *)buf.address(), (size_t)buf.bytes()}); @@ -176,7 +186,7 @@ namespace rdmalib { } template - void add(const Buffer & buf, uint32_t size, size_t offset = 0) + void add(const Buffer & buf, uint32_t size, size_t offset = 0) { #ifdef USE_LIBFABRIC _sges.push_back({(void *)(buf.address() + offset), (size_t)size}); diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index b3828bb..e974490 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -2,20 +2,19 @@ // mmap #include -#ifdef USE_LIBFABRIC +//#ifdef USE_LIBFABRIC #include #include -#else +//#else #include -#endif +//#endif #include #include namespace rdmalib { namespace impl { - - template - Buffer::Buffer(): + template + Buffer::Buffer(): _size(0), _header(0), _bytes(0), @@ -25,8 +24,8 @@ namespace rdmalib { namespace impl { _own_memory(false) {} - template - Buffer::Buffer(Buffer && obj): + template + Buffer::Buffer(Buffer && obj): _size(obj._size), _header(obj._header), _bytes(obj._bytes), @@ -39,8 +38,8 @@ namespace rdmalib { namespace impl { obj._ptr = obj._mr = nullptr; } - template - Buffer& Buffer::operator=(Buffer && obj) + template + Buffer& Buffer::operator=(Buffer && obj) { _size = obj._size; _bytes = obj._bytes; @@ -55,8 +54,8 @@ namespace rdmalib { namespace impl { return *this; } - template - Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header): + template + Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header): _size(size), _header(header), _bytes(size * byte_size + header), @@ -77,8 +76,8 @@ namespace rdmalib { namespace impl { ); } - template - Buffer::Buffer(void* ptr, uint32_t size, uint32_t byte_size): + template + Buffer::Buffer(void* ptr, uint32_t size, uint32_t byte_size): _size(size), _header(0), _bytes(size * byte_size), @@ -93,8 +92,8 @@ namespace rdmalib { namespace impl { ); } - template - Buffer::~Buffer() + template + Buffer::~Buffer() { SPDLOG_DEBUG( "Deallocate {} bytes, mr {}, ptr {}", @@ -119,8 +118,7 @@ namespace rdmalib { namespace impl { munmap(_ptr, _bytes); } - #ifdef USE_LIBFABRIC // requires proc domain refactor (up next) - void Buffer::register_memory(fid_domain *pd, int access) + void FabricBuffer::_register_memory(fid_domain *pd, int access) { int ret = fi_mr_reg(pd, _ptr, _bytes, access, 0, 0, 0, &_mr, nullptr); impl::expect_zero(ret); @@ -129,10 +127,8 @@ namespace rdmalib { namespace impl { _bytes, fmt::ptr(_mr), fmt::ptr(_ptr), fmt::ptr(fi_mr_desc(_mr)), fi_mr_key(_mr) ); } - #else - template - void Buffer::register_memory(ibv_pd* pd, int access) + void VerbsBuffer::register_memory(ibv_pd* pd, int access) { _mr = ibv_reg_mr(pd, _ptr, _bytes, access); impl::expect_nonnull(_mr); @@ -141,79 +137,72 @@ namespace rdmalib { namespace impl { _bytes, fmt::ptr(_mr), fmt::ptr(_mr->addr), _mr->lkey, _mr->rkey ); } - #endif - template - MemoryRegion* Buffer::mr() const + template + MemoryRegion* Buffer::mr() const { return this->_mr; } - template - uint32_t Buffer::data_size() const + template + uint32_t Buffer::data_size() const { return this->_size; } - template - uint32_t Buffer::size() const + template + uint32_t Buffer::size() const { return this->_size + this->_header; } - template - uint32_t Buffer::bytes() const + template + uint32_t Buffer::bytes() const { return this->_bytes; } - #ifdef USE_LIBFABRIC // requires lkey refactor (up next) - void *Buffer::lkey() const + void *FabricBuffer::_lkey() const { assert(this->_mr); return fi_mr_desc(this->_mr); } - #else - template - uint32_t Buffer::lkey() const + + uint32_t VerbsBuffer::_lkey() const { assert(this->_mr); // Apparently it's not needed and better to skip that check. return this->_mr->lkey; //return 0; } - #endif - #ifdef USE_LIBFABRIC - uint64_t Buffer::rkey() const + uint64_t FabricBuffer::_rkey() const { assert(this->_mr); return fi_mr_key(this->_mr); } - #else - template - uint32_t Buffer::rkey() const + + uint32_t VerbsBuffer::_rkey() const { assert(this->_mr); return this->_mr->rkey; } - #endif - template - uintptr_t Buffer::address() const + template + uintptr_t Buffer::address() const { assert(this->_mr); return reinterpret_cast(this->_ptr); } - template - void* Buffer::ptr() const + template + void* Buffer::ptr() const { return this->_ptr; } - template - ScatterGatherElement Buffer::sge(uint32_t size, uint32_t offset) const + template + ScatterGatherElement Buffer::sge(uint32_t size, uint32_t offset) const { return {address() + offset, size, lkey()}; } @@ -222,8 +211,8 @@ namespace rdmalib { namespace impl { namespace rdmalib { - template - ScatterGatherElement::ScatterGatherElement() + template + ScatterGatherElement::ScatterGatherElement() { } @@ -237,15 +226,15 @@ namespace rdmalib { return _lkeys.data(); } #else - template - ibv_sge * ScatterGatherElement::array() const + template + ibv_sge * ScatterGatherElement::array() const { return _sges.data(); } #endif - template - size_t ScatterGatherElement::size() const + template + size_t ScatterGatherElement::size() const { return _sges.size(); } @@ -257,8 +246,8 @@ namespace rdmalib { _lkeys.push_back(lkey); } #else - template - ScatterGatherElement::ScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey) + template + ScatterGatherElement::ScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey) { _sges.push_back({addr, bytes, lkey}); } From 0d57935f6451e0559c824a7278367775ec4df25c Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Thu, 8 Jun 2023 19:02:41 -0400 Subject: [PATCH 56/91] Refactoring RemoteBuffer --- rdmalib/include/rdmalib/buffer.hpp | 10 +++------- rdmalib/lib/buffer.cpp | 20 ++++---------------- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 09d6ad8..10e330e 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -85,6 +85,8 @@ namespace rdmalib { }; } + // RKey - rkey type: u32 for ibverbs, u64 for libfabric + template struct RemoteBuffer { uintptr_t addr; uint64_t rkey; @@ -92,13 +94,7 @@ namespace rdmalib { RemoteBuffer(); // When accessing the remote buffer, we might not need to know the size. - RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size = 0); - - #ifdef USE_LIBFABRIC - RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size); - #else - RemoteBuffer(uintptr_t addr, uint32_t rkey, uint32_t size); - #endif + RemoteBuffer(uintptr_t addr, RKey rkey, uint32_t size = 0); template void serialize(Archive & ar) diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index e974490..6caff62 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -253,30 +253,18 @@ namespace rdmalib { } #endif - RemoteBuffer::RemoteBuffer(): + template + RemoteBuffer::RemoteBuffer(): addr(0), rkey(0), size(0) {} - RemoteBuffer::RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size): + template + RemoteBuffer::RemoteBuffer(uintptr_t addr, RKey rkey, uint32_t size): addr(addr), rkey(rkey), size(size) {} - #ifdef USE_LIBFABRIC - RemoteBuffer::RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size): - addr(addr), - rkey(rkey), - size(size) - {} - #else - RemoteBuffer::RemoteBuffer(uintptr_t addr, uint32_t rkey, uint32_t size): - addr(addr), - rkey(rkey), - size(size) - {} - #endif - } From e24973ac3faa3fcdc44ea6b35eca8578551f9f1f Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Fri, 9 Jun 2023 11:15:22 -0400 Subject: [PATCH 57/91] More refactoring to SGE, Buffer --- rdmalib/include/rdmalib/buffer.hpp | 98 ++++++++++++++++++++---------- rdmalib/lib/buffer.cpp | 33 +++++----- 2 files changed, 79 insertions(+), 52 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 10e330e..5a9428a 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -18,7 +18,7 @@ struct ibv_sge; namespace rdmalib { - template + template struct ScatterGatherElement; namespace impl { @@ -26,7 +26,8 @@ namespace rdmalib { // mregion - Memory region type: fid_mr* for libfabric, ibv_mr* for ibverbs // pdomain - Protected domain type: fid_domain* for libfabric, ibv_pd* for ibverbs // lkey - lkey type: void* for libfabric, uint32_t for ibverbs - template + // rkey - rkey type: u64 for libfabric, u32 for ibverbs + template struct Buffer { protected: uint32_t _size; @@ -58,15 +59,16 @@ namespace rdmalib { { return static_cast(this)->_lkey(); } - uint32_t rkey() const // TODO: introduce another template parameter for ret type + RKey rkey() const { return static_cast(this)->_rkey(); } - ScatterGatherElement sge(uint32_t size, uint32_t offset) const; + template + ScatterGatherElement sge(uint32_t size, uint32_t offset) const; }; - struct FabricBuffer : Buffer { + struct FabricBuffer : Buffer { void _register_memory(fid_domain *pd, int access); void *_lkey() const; @@ -145,23 +147,18 @@ namespace rdmalib { } }; - template + // SGE - the scatter gather element type: iovec for libfabric, ibv_sge for verbs + template struct ScatterGatherElement { // smallvector in practice - #ifdef USE_LIBFABRIC - mutable std::vector _sges; - mutable std::vector _lkeys; - #else - mutable std::vector _sges; - #endif + mutable std::vector _sges; ScatterGatherElement(); - #ifdef USE_LIBFABRIC - ScatterGatherElement(uint64_t addr, uint32_t bytes, void *lkey); - #else - ScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey); - #endif + ScatterGatherElement(uint64_t addr, uint32_t bytes, LKey lkey) + { + static_cast(this); + } template ScatterGatherElement(const Buffer & buf) @@ -172,34 +169,69 @@ namespace rdmalib { template void add(const Buffer & buf) { - #ifdef USE_LIBFABRIC + static_cast(this)->_add(buf); + } + + template + void add(const Buffer & buf, uint32_t size, size_t offset = 0) + { + static_cast(this)->_add(buf, size, offset); + } + + SGE *array() const + { + return static_cast(this)->_array(); + } + + size_t size() const; + }; + + + struct FabricScatterGatherElement : ScatterGatherElement + { + + mutable std::vector _lkeys; + + FabricScatterGatherElement(uint64_t addr, uint32_t bytes, void *lkey); + + template + void _add(const Buffer & buf) + { _sges.push_back({(void *)buf.address(), (size_t)buf.bytes()}); _lkeys.push_back(buf.lkey()); - #else - //emplace_back for structs will be supported in C++20 - _sges.push_back({buf.address(), buf.bytes(), buf.lkey()}); - #endif } template - void add(const Buffer & buf, uint32_t size, size_t offset = 0) + void _add(const Buffer & buf, uint32_t size, size_t offset = 0) { - #ifdef USE_LIBFABRIC _sges.push_back({(void *)(buf.address() + offset), (size_t)size}); _lkeys.push_back(buf.lkey()); - #else + } + + iovec *_array() const; + void **lkeys() const; + }; + + struct VerbsScatterGatherElement : ScatterGatherElement + { + + VerbsScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey); + + template + void _add(const Buffer & buf) + { + //emplace_back for structs will be supported in C++20 + _sges.push_back({buf.address(), buf.bytes(), buf.lkey()}); + } + + template + void _add(const Buffer & buf, uint32_t size, size_t offset = 0) + { //emplace_back for structs will be supported in C++20 _sges.push_back({buf.address() + offset, size, buf.lkey()}); - #endif } - #ifdef USE_LIBFABRIC - iovec *array() const; - void **lkeys() const; - #else - ibv_sge * array() const; - #endif - size_t size() const; + ibv_sge *_array() const; }; } diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index 6caff62..36a684e 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -201,8 +201,8 @@ namespace rdmalib { namespace impl { return this->_ptr; } - template - ScatterGatherElement Buffer::sge(uint32_t size, uint32_t offset) const + template + ScatterGatherElement Buffer::sge(uint32_t size, uint32_t offset) const { return {address() + offset, size, lkey()}; } @@ -211,47 +211,42 @@ namespace rdmalib { namespace impl { namespace rdmalib { - template - ScatterGatherElement::ScatterGatherElement() + template + ScatterGatherElement::ScatterGatherElement() { } - #ifdef USE_LIBFABRIC - iovec *ScatterGatherElement::array() const + iovec *FabricScatterGatherElement::_array() const { return _sges.data(); } - void **ScatterGatherElement::lkeys() const + + void **FabricScatterGatherElement::lkeys() const { return _lkeys.data(); } - #else - template - ibv_sge * ScatterGatherElement::array() const + + ibv_sge *VerbsScatterGatherElement::_array() const { return _sges.data(); } - #endif - template - size_t ScatterGatherElement::size() const + template + size_t ScatterGatherElement::size() const { return _sges.size(); } - #ifdef USE_LIBFABRIC - ScatterGatherElement::ScatterGatherElement(uint64_t addr, uint32_t bytes, void *lkey) + FabricScatterGatherElement::FabricScatterGatherElement(uint64_t addr, uint32_t bytes, void *lkey) { _sges.push_back({(void *)addr, bytes}); _lkeys.push_back(lkey); } - #else - template - ScatterGatherElement::ScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey) + + VerbsScatterGatherElement::VerbsScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey) { _sges.push_back({addr, bytes, lkey}); } - #endif template RemoteBuffer::RemoteBuffer(): From 955b788ecf4c1c8d04abfad2fccf738a9ef77267 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Wed, 28 Jun 2023 13:45:12 +0000 Subject: [PATCH 58/91] Buffer compiles after refactor --- rdmalib/include/rdmalib/buffer.hpp | 44 ++++++++++++-------- rdmalib/lib/buffer.cpp | 64 ++++++++++++++++-------------- 2 files changed, 61 insertions(+), 47 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 5a9428a..3ff0fa4 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -21,6 +21,9 @@ namespace rdmalib { template struct ScatterGatherElement; + struct FabricScatterGatherElement; + struct VerbsScatterGatherElement; + namespace impl { // mregion - Memory region type: fid_mr* for libfabric, ibv_mr* for ibverbs @@ -57,7 +60,7 @@ namespace rdmalib { } LKey lkey() const { - return static_cast(this)->_lkey(); + return static_cast(this)->_lkey(); } RKey rkey() const { @@ -65,7 +68,10 @@ namespace rdmalib { } template - ScatterGatherElement sge(uint32_t size, uint32_t offset) const; + ScatterGatherElement sge(uint32_t size, uint32_t offset) const + { + return static_cast(this)->_sge(size, offset); + } }; struct FabricBuffer : Buffer { @@ -74,15 +80,19 @@ namespace rdmalib { void *_lkey() const; uint64_t _rkey() const; + FabricScatterGatherElement _sge(uint32_t size, uint32_t offset) const; + void destroy_buffer(); }; - struct VerbsBuffer : Buffer { + struct VerbsBuffer : Buffer { void register_memory(ibv_pd* pd, int access); uint32_t _lkey() const; uint32_t _rkey() const; + VerbsScatterGatherElement _sge(uint32_t size, uint32_t offset) const; + void destroy_buffer(); }; } @@ -105,11 +115,11 @@ namespace rdmalib { } }; - template - struct Buffer : impl::Buffer, MemoryRegion, Domain, LKey> { + template + struct Buffer : impl::Buffer, MemoryRegion, Domain, LKey, RKey> { - using Self = Buffer; - using ImplBuffer = impl::Buffer; + using Self = Buffer; + using ImplBuffer = impl::Buffer; Buffer(): ImplBuffer() @@ -148,7 +158,7 @@ namespace rdmalib { }; // SGE - the scatter gather element type: iovec for libfabric, ibv_sge for verbs - template + template struct ScatterGatherElement { // smallvector in practice mutable std::vector _sges; @@ -161,19 +171,19 @@ namespace rdmalib { } template - ScatterGatherElement(const Buffer & buf) + ScatterGatherElement(const Buffer & buf) { add(buf); } template - void add(const Buffer & buf) + void add(const Buffer & buf) { static_cast(this)->_add(buf); } template - void add(const Buffer & buf, uint32_t size, size_t offset = 0) + void add(const Buffer & buf, uint32_t size, size_t offset = 0) { static_cast(this)->_add(buf, size, offset); } @@ -187,7 +197,7 @@ namespace rdmalib { }; - struct FabricScatterGatherElement : ScatterGatherElement + struct FabricScatterGatherElement : ScatterGatherElement { mutable std::vector _lkeys; @@ -195,14 +205,14 @@ namespace rdmalib { FabricScatterGatherElement(uint64_t addr, uint32_t bytes, void *lkey); template - void _add(const Buffer & buf) + void _add(const Buffer & buf) { _sges.push_back({(void *)buf.address(), (size_t)buf.bytes()}); _lkeys.push_back(buf.lkey()); } template - void _add(const Buffer & buf, uint32_t size, size_t offset = 0) + void _add(const Buffer & buf, uint32_t size, size_t offset = 0) { _sges.push_back({(void *)(buf.address() + offset), (size_t)size}); _lkeys.push_back(buf.lkey()); @@ -212,20 +222,20 @@ namespace rdmalib { void **lkeys() const; }; - struct VerbsScatterGatherElement : ScatterGatherElement + struct VerbsScatterGatherElement : ScatterGatherElement { VerbsScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey); template - void _add(const Buffer & buf) + void _add(const Buffer & buf) { //emplace_back for structs will be supported in C++20 _sges.push_back({buf.address(), buf.bytes(), buf.lkey()}); } template - void _add(const Buffer & buf, uint32_t size, size_t offset = 0) + void _add(const Buffer & buf, uint32_t size, size_t offset = 0) { //emplace_back for structs will be supported in C++20 _sges.push_back({buf.address() + offset, size, buf.lkey()}); diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index 36a684e..df5dea1 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -13,8 +13,8 @@ #include namespace rdmalib { namespace impl { - template - Buffer::Buffer(): + template + Buffer::Buffer(): _size(0), _header(0), _bytes(0), @@ -24,8 +24,8 @@ namespace rdmalib { namespace impl { _own_memory(false) {} - template - Buffer::Buffer(Buffer && obj): + template + Buffer::Buffer(Buffer && obj): _size(obj._size), _header(obj._header), _bytes(obj._bytes), @@ -38,8 +38,8 @@ namespace rdmalib { namespace impl { obj._ptr = obj._mr = nullptr; } - template - Buffer& Buffer::operator=(Buffer && obj) + template + Buffer& Buffer::operator=(Buffer && obj) { _size = obj._size; _bytes = obj._bytes; @@ -54,8 +54,8 @@ namespace rdmalib { namespace impl { return *this; } - template - Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header): + template + Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header): _size(size), _header(header), _bytes(size * byte_size + header), @@ -76,8 +76,8 @@ namespace rdmalib { namespace impl { ); } - template - Buffer::Buffer(void* ptr, uint32_t size, uint32_t byte_size): + template + Buffer::Buffer(void* ptr, uint32_t size, uint32_t byte_size): _size(size), _header(0), _bytes(size * byte_size), @@ -92,8 +92,8 @@ namespace rdmalib { namespace impl { ); } - template - Buffer::~Buffer() + template + Buffer::~Buffer() { SPDLOG_DEBUG( "Deallocate {} bytes, mr {}, ptr {}", @@ -138,26 +138,26 @@ namespace rdmalib { namespace impl { ); } - template - MemoryRegion* Buffer::mr() const + template + MemoryRegion* Buffer::mr() const { return this->_mr; } - template - uint32_t Buffer::data_size() const + template + uint32_t Buffer::data_size() const { return this->_size; } - template - uint32_t Buffer::size() const + template + uint32_t Buffer::size() const { return this->_size + this->_header; } - template - uint32_t Buffer::bytes() const + template + uint32_t Buffer::bytes() const { return this->_bytes; } @@ -188,21 +188,25 @@ namespace rdmalib { namespace impl { return this->_mr->rkey; } - template - uintptr_t Buffer::address() const + template + uintptr_t Buffer::address() const { assert(this->_mr); return reinterpret_cast(this->_ptr); } - template - void* Buffer::ptr() const + template + void* Buffer::ptr() const { return this->_ptr; } - template - ScatterGatherElement Buffer::sge(uint32_t size, uint32_t offset) const + FabricScatterGatherElement FabricBuffer::_sge(uint32_t size, uint32_t offset) const + { + return {address() + offset, size, lkey()}; + } + + VerbsScatterGatherElement VerbsBuffer::_sge(uint32_t size, uint32_t offset) const { return {address() + offset, size, lkey()}; } @@ -211,8 +215,8 @@ namespace rdmalib { namespace impl { namespace rdmalib { - template - ScatterGatherElement::ScatterGatherElement() + template + ScatterGatherElement::ScatterGatherElement() { } @@ -231,8 +235,8 @@ namespace rdmalib { return _sges.data(); } - template - size_t ScatterGatherElement::size() const + template + size_t ScatterGatherElement::size() const { return _sges.size(); } From f465f8b09dbd366101b438a6f8fe604dbf4d0966 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Mon, 3 Jul 2023 18:10:00 -0400 Subject: [PATCH 59/91] Add traits to impl::Buffer --- rdmalib/include/rdmalib/buffer.hpp | 189 ++++++++++++++++------------- 1 file changed, 105 insertions(+), 84 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 1d3a032..6161bb3 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -7,71 +7,91 @@ #include -#ifdef USE_LIBFABRIC +// #ifdef USE_LIBFABRIC #include #include -#else +// #else struct ibv_pd; struct ibv_mr; struct ibv_sge; -#endif +// #endif -namespace rdmalib { +namespace rdmalib +{ struct ScatterGatherElement; - namespace impl { + struct ibverbs; + struct libfabric; + + template + struct library_traits; + + template <> + struct library_traits + { + typedef ibv_mr *mr_t; + typedef ibv_pd *pd_t; + typedef uint32_t lkey_t; + typedef uint32_t rkey_t; + }; + + template <> + struct library_traits + { + using type = libfabric; + + typedef fid_mr *mr_t; + typedef fid_domain *pd_t; + typedef void *lkey_t; + typedef uint64_t rkey_t; + }; + + namespace impl + { // move non-template methods from header - struct Buffer { + template + struct Buffer + { protected: + using mr_t = library_traits::mr_t; + using pd_t = library_traits::pd_t; + using lkey_t = library_traits::lkey_t; + using rkey_t = library_traits::rkey_t; + uint32_t _size; uint32_t _header; uint32_t _bytes; uint32_t _byte_size; - void* _ptr; - #ifdef USE_LIBFABRIC - fid_mr* _mr; - #else - ibv_mr* _mr; - #endif + void *_ptr; + mr_t _mr; bool _own_memory; Buffer(); - Buffer(void* ptr, uint32_t size, uint32_t byte_size); + Buffer(void *ptr, uint32_t size, uint32_t byte_size); Buffer(uint32_t size, uint32_t byte_size, uint32_t header); Buffer(Buffer &&); - Buffer & operator=(Buffer && obj); + Buffer &operator=(Buffer &&obj); ~Buffer(); + public: uintptr_t address() const; - void* ptr() const; - #ifdef USE_LIBFABRIC - fid_mr* mr() const; - #else - ibv_mr* mr() const; - #endif + void *ptr() const; + mr_t mr() const; uint32_t data_size() const; uint32_t size() const; uint32_t bytes() const; - #ifdef USE_LIBFABRIC - void register_memory(fid_domain *pd, int access); - #else - void register_memory(ibv_pd *pd, int access); - #endif - #ifdef USE_LIBFABRIC - void *lkey() const; - uint64_t rkey() const; - #else - uint32_t lkey() const; - uint32_t rkey() const; - #endif + void register_memory(pd_t pd, int access); + lkey_t lkey() const; + rkey_t rkey() const; ScatterGatherElement sge(uint32_t size, uint32_t offset) const; }; } - struct RemoteBuffer { + struct RemoteBuffer + { uintptr_t addr; uint64_t rkey; uint32_t size; @@ -80,114 +100,115 @@ namespace rdmalib { // When accessing the remote buffer, we might not need to know the size. RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size = 0); - #ifdef USE_LIBFABRIC +#ifdef USE_LIBFABRIC RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size); - #else +#else RemoteBuffer(uintptr_t addr, uint32_t rkey, uint32_t size); - #endif +#endif - template - void serialize(Archive & ar) + template + void serialize(Archive &ar) { ar(CEREAL_NVP(addr), CEREAL_NVP(rkey), CEREAL_NVP(size)); } }; - template - struct Buffer : impl::Buffer{ + template + struct Buffer : impl::Buffer + { - Buffer(): - impl::Buffer() - {} + Buffer() : impl::Buffer() + { + } // Provide a buffer instance for existing memory pool // Does NOT free the associated resource - Buffer(T * ptr, uint32_t size): - impl::Buffer(ptr, size, sizeof(T)) - {} + Buffer(T *ptr, uint32_t size) : impl::Buffer(ptr, size, sizeof(T)) + { + } // Provide a buffer instance for existing memory pool // Does NOT free the associated resource - Buffer(void * ptr, uint32_t size): - impl::Buffer(ptr, size, sizeof(T)) - {} + Buffer(void *ptr, uint32_t size) : impl::Buffer(ptr, size, sizeof(T)) + { + } - Buffer(size_t size, size_t header = 0): - impl::Buffer(size, sizeof(T), header) - {} + Buffer(size_t size, size_t header = 0) : impl::Buffer(size, sizeof(T), header) + { + } - Buffer & operator=(Buffer && obj) + Buffer &operator=(Buffer &&obj) { impl::Buffer::operator=(std::move(obj)); return *this; } - Buffer(const Buffer & obj) = delete; - Buffer(Buffer && obj) = default; + Buffer(const Buffer &obj) = delete; + Buffer(Buffer &&obj) = default; - T* data() const + T *data() const { // void pointer arithmetic is not allowed - return reinterpret_cast(static_cast(this->_ptr) + this->_header); + return reinterpret_cast(static_cast(this->_ptr) + this->_header); } }; - struct ScatterGatherElement { - // smallvector in practice - #ifdef USE_LIBFABRIC + struct ScatterGatherElement + { +// smallvector in practice +#ifdef USE_LIBFABRIC mutable std::vector _sges; mutable std::vector _lkeys; - #else +#else mutable std::vector _sges; - #endif +#endif ScatterGatherElement(); - #ifdef USE_LIBFABRIC +#ifdef USE_LIBFABRIC ScatterGatherElement(uint64_t addr, uint32_t bytes, void *lkey); - #else +#else ScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey); - #endif +#endif - template - ScatterGatherElement(const Buffer & buf) + template + ScatterGatherElement(const Buffer &buf) { add(buf); } - template - void add(const Buffer & buf) + template + void add(const Buffer &buf) { - #ifdef USE_LIBFABRIC +#ifdef USE_LIBFABRIC _sges.push_back({(void *)buf.address(), (size_t)buf.bytes()}); _lkeys.push_back(buf.lkey()); - #else - //emplace_back for structs will be supported in C++20 +#else + // emplace_back for structs will be supported in C++20 _sges.push_back({buf.address(), buf.bytes(), buf.lkey()}); - #endif +#endif } - template - void add(const Buffer & buf, uint32_t size, size_t offset = 0) + template + void add(const Buffer &buf, uint32_t size, size_t offset = 0) { - #ifdef USE_LIBFABRIC +#ifdef USE_LIBFABRIC _sges.push_back({(void *)(buf.address() + offset), (size_t)size}); _lkeys.push_back(buf.lkey()); - #else - //emplace_back for structs will be supported in C++20 +#else + // emplace_back for structs will be supported in C++20 _sges.push_back({buf.address() + offset, size, buf.lkey()}); - #endif +#endif } - #ifdef USE_LIBFABRIC +#ifdef USE_LIBFABRIC iovec *array() const; void **lkeys() const; - #else - ibv_sge * array() const; - #endif +#else + ibv_sge *array() const; +#endif size_t size() const; }; } #endif - From 868e412453c6eb0a837713fe07b4e65f9dc5ef1e Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Mon, 3 Jul 2023 18:35:02 -0400 Subject: [PATCH 60/91] Finish types in buffer.hpp --- rdmalib/include/rdmalib/buffer.hpp | 72 +++++++++++++++--------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 6161bb3..bc43e28 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -19,6 +19,7 @@ struct ibv_sge; namespace rdmalib { + template struct ScatterGatherElement; struct ibverbs; @@ -34,6 +35,7 @@ namespace rdmalib typedef ibv_pd *pd_t; typedef uint32_t lkey_t; typedef uint32_t rkey_t; + typedef iovec sge_t; }; template <> @@ -45,6 +47,7 @@ namespace rdmalib typedef fid_domain *pd_t; typedef void *lkey_t; typedef uint64_t rkey_t; + typedef ibv_sge sge_t; }; namespace impl @@ -55,10 +58,10 @@ namespace rdmalib struct Buffer { protected: - using mr_t = library_traits::mr_t; - using pd_t = library_traits::pd_t; - using lkey_t = library_traits::lkey_t; - using rkey_t = library_traits::rkey_t; + using mr_t = typename library_traits::mr_t; + using pd_t = typename library_traits::pd_t; + using lkey_t = typename library_traits::lkey_t; + using rkey_t = typename library_traits::rkey_t; uint32_t _size; uint32_t _header; @@ -85,13 +88,17 @@ namespace rdmalib void register_memory(pd_t pd, int access); lkey_t lkey() const; rkey_t rkey() const; - ScatterGatherElement sge(uint32_t size, uint32_t offset) const; + ScatterGatherElement sge(uint32_t size, uint32_t offset) const; }; } + template struct RemoteBuffer { + + using rkey_t = typename library_traits::rkey_t; + uintptr_t addr; uint64_t rkey; uint32_t size; @@ -100,11 +107,7 @@ namespace rdmalib // When accessing the remote buffer, we might not need to know the size. RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size = 0); -#ifdef USE_LIBFABRIC - RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size); -#else - RemoteBuffer(uintptr_t addr, uint32_t rkey, uint32_t size); -#endif + RemoteBuffer(uintptr_t addr, rkey_t rkey, uint32_t size); template void serialize(Archive &ar) @@ -113,38 +116,39 @@ namespace rdmalib } }; - template - struct Buffer : impl::Buffer + template + struct Buffer : impl::Buffer, Library> { + using ImplBuffer = impl::Buffer, Library>; - Buffer() : impl::Buffer() + Buffer() : ImplBuffer() { } // Provide a buffer instance for existing memory pool // Does NOT free the associated resource - Buffer(T *ptr, uint32_t size) : impl::Buffer(ptr, size, sizeof(T)) + Buffer(T *ptr, uint32_t size) : ImplBuffer(ptr, size, sizeof(T)) { } // Provide a buffer instance for existing memory pool // Does NOT free the associated resource - Buffer(void *ptr, uint32_t size) : impl::Buffer(ptr, size, sizeof(T)) + Buffer(void *ptr, uint32_t size) : ImplBuffer(ptr, size, sizeof(T)) { } - Buffer(size_t size, size_t header = 0) : impl::Buffer(size, sizeof(T), header) + Buffer(size_t size, size_t header = 0) : ImplBuffer(size, sizeof(T), header) { } - Buffer &operator=(Buffer &&obj) + Buffer &operator=(Buffer &&obj) { - impl::Buffer::operator=(std::move(obj)); + ImplBuffer::operator=(std::move(obj)); return *this; } - Buffer(const Buffer &obj) = delete; - Buffer(Buffer &&obj) = default; + Buffer(const Buffer &obj) = delete; + Buffer(Buffer &&obj) = default; T *data() const { @@ -153,32 +157,30 @@ namespace rdmalib } }; + template struct ScatterGatherElement { -// smallvector in practice + + using sge_t = typename library_traits::sge_t; + using lkey_t = typename library_traits::lkey_t; + + mutable std::vector _sges; #ifdef USE_LIBFABRIC - mutable std::vector _sges; mutable std::vector _lkeys; -#else - mutable std::vector _sges; #endif ScatterGatherElement(); -#ifdef USE_LIBFABRIC - ScatterGatherElement(uint64_t addr, uint32_t bytes, void *lkey); -#else - ScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey); -#endif + ScatterGatherElement(uint64_t addr, uint32_t bytes, lkey_t lkey); template - ScatterGatherElement(const Buffer &buf) + ScatterGatherElement(const Buffer &buf) { add(buf); } template - void add(const Buffer &buf) + void add(const Buffer &buf) { #ifdef USE_LIBFABRIC _sges.push_back({(void *)buf.address(), (size_t)buf.bytes()}); @@ -190,7 +192,7 @@ namespace rdmalib } template - void add(const Buffer &buf, uint32_t size, size_t offset = 0) + void add(const Buffer &buf, uint32_t size, size_t offset = 0) { #ifdef USE_LIBFABRIC _sges.push_back({(void *)(buf.address() + offset), (size_t)size}); @@ -201,12 +203,12 @@ namespace rdmalib #endif } + sge_t *array() const; + #ifdef USE_LIBFABRIC - iovec *array() const; void **lkeys() const; -#else - ibv_sge *array() const; #endif + size_t size() const; }; } From f36dd2527259373103f5fb8bafe659cf7192e1a5 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Mon, 3 Jul 2023 18:48:16 -0400 Subject: [PATCH 61/91] Move implementations to derived structs --- rdmalib/include/rdmalib/buffer.hpp | 108 ++++++++++++++++++++++------- rdmalib/lib/buffer.cpp | 88 ++++++++++++----------- 2 files changed, 130 insertions(+), 66 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index bc43e28..ef3b2b9 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -31,6 +31,7 @@ namespace rdmalib template <> struct library_traits { + //using type = ibverbs; typedef ibv_mr *mr_t; typedef ibv_pd *pd_t; typedef uint32_t lkey_t; @@ -41,8 +42,7 @@ namespace rdmalib template <> struct library_traits { - using type = libfabric; - + //using type = libfabric; typedef fid_mr *mr_t; typedef fid_domain *pd_t; typedef void *lkey_t; @@ -54,15 +54,24 @@ namespace rdmalib { // move non-template methods from header - template + template struct Buffer { protected: + using Library = typename Derived::library; using mr_t = typename library_traits::mr_t; using pd_t = typename library_traits::pd_t; using lkey_t = typename library_traits::lkey_t; using rkey_t = typename library_traits::rkey_t; + /* + typedef typename Derived::library Library; + typedef typename library_traits::mr_t mr_t ; + typedef typename library_traits::pd_t pd_t ; + typedef typename library_traits::lkey_t lkey_t ; + typedef typename library_traits::rkey_t rkey_t ; + */ + uint32_t _size; uint32_t _header; uint32_t _bytes; @@ -76,7 +85,10 @@ namespace rdmalib Buffer(uint32_t size, uint32_t byte_size, uint32_t header); Buffer(Buffer &&); Buffer &operator=(Buffer &&obj); - ~Buffer(); + ~Buffer() + { + static_cast(this)->destroy(); + } public: uintptr_t address() const; @@ -85,10 +97,37 @@ namespace rdmalib uint32_t data_size() const; uint32_t size() const; uint32_t bytes() const; + void register_memory(pd_t pd, int access) + { + static_cast(this)->register_memory(pd, access); + } + lkey_t lkey() const + { + static_cast(this)->lkey(); + } + rkey_t rkey() const + { + static_cast(this)->rkey(); + } + ScatterGatherElement sge(uint32_t size, uint32_t offset) const; + }; + + struct LibfabricBuffer : Buffer + { + //using library = libfabric; void register_memory(pd_t pd, int access); lkey_t lkey() const; rkey_t rkey() const; - ScatterGatherElement sge(uint32_t size, uint32_t offset) const; + void destroy(); + }; + + struct VerbsBuffer : Buffer + { + //using library = ibverbs; + void register_memory(pd_t pd, int access); + lkey_t lkey() const; + rkey_t rkey() const; + void destroy(); }; } @@ -117,9 +156,9 @@ namespace rdmalib }; template - struct Buffer : impl::Buffer, Library> + struct Buffer : impl::Buffer> { - using ImplBuffer = impl::Buffer, Library>; + using ImplBuffer = impl::Buffer>; Buffer() : ImplBuffer() { @@ -157,17 +196,14 @@ namespace rdmalib } }; - template + template struct ScatterGatherElement { - + using Library = Derived::library; using sge_t = typename library_traits::sge_t; using lkey_t = typename library_traits::lkey_t; mutable std::vector _sges; -#ifdef USE_LIBFABRIC - mutable std::vector _lkeys; -#endif ScatterGatherElement(); @@ -179,36 +215,58 @@ namespace rdmalib add(buf); } + template + void add(const Buffer &buf); + + template + void add(const Buffer &buf, uint32_t size, size_t offset = 0); + + sge_t *array() const; + + size_t size() const; + }; + + struct VerbsScatterGatherElement : ScatterGatherElement + { template void add(const Buffer &buf) { -#ifdef USE_LIBFABRIC - _sges.push_back({(void *)buf.address(), (size_t)buf.bytes()}); - _lkeys.push_back(buf.lkey()); -#else // emplace_back for structs will be supported in C++20 _sges.push_back({buf.address(), buf.bytes(), buf.lkey()}); -#endif } template void add(const Buffer &buf, uint32_t size, size_t offset = 0) { -#ifdef USE_LIBFABRIC - _sges.push_back({(void *)(buf.address() + offset), (size_t)size}); - _lkeys.push_back(buf.lkey()); -#else // emplace_back for structs will be supported in C++20 _sges.push_back({buf.address() + offset, size, buf.lkey()}); -#endif } sge_t *array() const; + size_t size() const; -#ifdef USE_LIBFABRIC - void **lkeys() const; -#endif + }; + + struct LibfabricScatterGatherElement : ScatterGatherElement + { + mutable std::vector _lkeys; + + template + void add(const Buffer &buf) + { + _sges.push_back({(void *)buf.address(), (size_t)buf.bytes()}); + _lkeys.push_back(buf.lkey()); + } + template + void add(const Buffer &buf, uint32_t size, size_t offset = 0) + { + _sges.push_back({(void *)(buf.address() + offset), (size_t)size}); + _lkeys.push_back(buf.lkey()); + } + + sge_t *array() const; + lkey_t *lkeys() const; size_t size() const; }; } diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index 0bacdaf..337d387 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -2,19 +2,20 @@ // mmap #include -#ifdef USE_LIBFABRIC +// #ifdef USE_LIBFABRIC #include #include -#else +// #else #include -#endif +// #endif #include #include namespace rdmalib { namespace impl { - Buffer::Buffer(): + template + Buffer::Buffer(): _size(0), _header(0), _bytes(0), @@ -24,7 +25,8 @@ namespace rdmalib { namespace impl { _own_memory(false) {} - Buffer::Buffer(Buffer && obj): + template + Buffer::Buffer(Buffer && obj): _size(obj._size), _header(obj._header), _bytes(obj._bytes), @@ -37,7 +39,8 @@ namespace rdmalib { namespace impl { obj._ptr = obj._mr = nullptr; } - Buffer & Buffer::operator=(Buffer && obj) + template + Buffer & Buffer::operator=(Buffer && obj) { _size = obj._size; _bytes = obj._bytes; @@ -52,7 +55,8 @@ namespace rdmalib { namespace impl { return *this; } - Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header): + template + Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header): _size(size), _header(header), _bytes(size * byte_size + header), @@ -73,7 +77,8 @@ namespace rdmalib { namespace impl { ); } - Buffer::Buffer(void* ptr, uint32_t size, uint32_t byte_size): + template + Buffer::Buffer(void* ptr, uint32_t size, uint32_t byte_size): _size(size), _header(0), _bytes(size * byte_size), @@ -88,24 +93,31 @@ namespace rdmalib { namespace impl { ); } - Buffer::~Buffer() + void LibfabricBuffer::destroy() { SPDLOG_DEBUG( "Deallocate {} bytes, mr {}, ptr {}", _bytes, fmt::ptr(_mr), fmt::ptr(_ptr) ); if(_mr) - #ifdef USE_LIBFABRIC impl::expect_zero(fi_close(&_mr->fid)); - #else + if(_own_memory) + munmap(_ptr, _bytes); + } + + void VerbsBuffer::destroy() + { + SPDLOG_DEBUG( + "Deallocate {} bytes, mr {}, ptr {}", + _bytes, fmt::ptr(_mr), fmt::ptr(_ptr) + ); + if(_mr) ibv_dereg_mr(_mr); - #endif if(_own_memory) munmap(_ptr, _bytes); } - #ifdef USE_LIBFABRIC - void Buffer::register_memory(fid_domain *pd, int access) + void LibfabricBuffer::register_memory(LibfabricBuffer::pd_t pd, int access) { int ret = fi_mr_reg(pd, _ptr, _bytes, access, 0, 0, 0, &_mr, nullptr); impl::expect_zero(ret); @@ -114,8 +126,8 @@ namespace rdmalib { namespace impl { _bytes, fmt::ptr(_mr), fmt::ptr(_ptr), fmt::ptr(fi_mr_desc(_mr)), fi_mr_key(_mr) ); } - #else - void Buffer::register_memory(ibv_pd* pd, int access) + + void VerbsBuffer::register_memory(VerbsBuffer::pd_t pd, int access) { _mr = ibv_reg_mr(pd, _ptr, _bytes, access); impl::expect_nonnull(_mr); @@ -124,77 +136,71 @@ namespace rdmalib { namespace impl { _bytes, fmt::ptr(_mr), fmt::ptr(_mr->addr), _mr->lkey, _mr->rkey ); } - #endif - #ifdef USE_LIBFABRIC - fid_mr* Buffer::mr() const + template + Buffer::mr_t Buffer::mr() const { return this->_mr; } - #else - ibv_mr* Buffer::mr() const - { - return this->_mr; - } - #endif - uint32_t Buffer::data_size() const + template + uint32_t Buffer::data_size() const { return this->_size; } - uint32_t Buffer::size() const + template + uint32_t Buffer::size() const { return this->_size + this->_header; } - uint32_t Buffer::bytes() const + template + uint32_t Buffer::bytes() const { return this->_bytes; } - #ifdef USE_LIBFABRIC - void *Buffer::lkey() const + LibfabricBuffer::lkey_t LibfabricBuffer::lkey() const { assert(this->_mr); return fi_mr_desc(this->_mr); } - #else - uint32_t Buffer::lkey() const + + VerbsBuffer::lkey_t VerbsBuffer::lkey() const { assert(this->_mr); // Apparently it's not needed and better to skip that check. return this->_mr->lkey; //return 0; } - #endif - #ifdef USE_LIBFABRIC - uint64_t Buffer::rkey() const + LibfabricBuffer::rkey_t LibfabricBuffer::rkey() const { assert(this->_mr); return fi_mr_key(this->_mr); } - #else - uint32_t Buffer::rkey() const + VerbsBuffer::rkey_t VerbsBuffer::rkey() const { assert(this->_mr); return this->_mr->rkey; } - #endif - uintptr_t Buffer::address() const + template + uintptr_t Buffer::address() const { assert(this->_mr); return reinterpret_cast(this->_ptr); } - void* Buffer::ptr() const + template + void* Buffer::ptr() const { return this->_ptr; } - ScatterGatherElement Buffer::sge(uint32_t size, uint32_t offset) const + template + ScatterGatherElement Buffer::sge(uint32_t size, uint32_t offset) const { return {address() + offset, size, lkey()}; } From 424bbe6c70c70778d60850253ed79e78e8ea0300 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Wed, 5 Jul 2023 11:01:57 -0400 Subject: [PATCH 62/91] Buffer compiles --- rdmalib/include/rdmalib/buffer.hpp | 97 +++++++++++++++++------- rdmalib/lib/buffer.cpp | 118 ++++++----------------------- 2 files changed, 92 insertions(+), 123 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index ef3b2b9..003a1a7 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -19,7 +19,7 @@ struct ibv_sge; namespace rdmalib { - template + template struct ScatterGatherElement; struct ibverbs; @@ -36,7 +36,7 @@ namespace rdmalib typedef ibv_pd *pd_t; typedef uint32_t lkey_t; typedef uint32_t rkey_t; - typedef iovec sge_t; + typedef ibv_sge sge_t; }; template <> @@ -47,18 +47,17 @@ namespace rdmalib typedef fid_domain *pd_t; typedef void *lkey_t; typedef uint64_t rkey_t; - typedef ibv_sge sge_t; + typedef iovec sge_t; }; namespace impl { // move non-template methods from header - template + template struct Buffer { protected: - using Library = typename Derived::library; using mr_t = typename library_traits::mr_t; using pd_t = typename library_traits::pd_t; using lkey_t = typename library_traits::lkey_t; @@ -93,7 +92,10 @@ namespace rdmalib public: uintptr_t address() const; void *ptr() const; - mr_t mr() const; + mr_t mr() const + { + return this->_mr; + } uint32_t data_size() const; uint32_t size() const; uint32_t bytes() const; @@ -109,10 +111,10 @@ namespace rdmalib { static_cast(this)->rkey(); } - ScatterGatherElement sge(uint32_t size, uint32_t offset) const; + ScatterGatherElement sge(uint32_t size, uint32_t offset) const; }; - struct LibfabricBuffer : Buffer + struct LibfabricBuffer : Buffer { //using library = libfabric; void register_memory(pd_t pd, int access); @@ -121,7 +123,7 @@ namespace rdmalib void destroy(); }; - struct VerbsBuffer : Buffer + struct VerbsBuffer : Buffer { //using library = ibverbs; void register_memory(pd_t pd, int access); @@ -139,14 +141,20 @@ namespace rdmalib using rkey_t = typename library_traits::rkey_t; uintptr_t addr; - uint64_t rkey; + rkey_t rkey; uint32_t size; - RemoteBuffer(); + RemoteBuffer(): + addr(0), + rkey(0), + size(0) + {} // When accessing the remote buffer, we might not need to know the size. - RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size = 0); - - RemoteBuffer(uintptr_t addr, rkey_t rkey, uint32_t size); + RemoteBuffer(uintptr_t addr, rkey_t rkey, uint32_t size = 0): + addr(addr), + rkey(rkey), + size(size) + {} template void serialize(Archive &ar) @@ -156,9 +164,9 @@ namespace rdmalib }; template - struct Buffer : impl::Buffer> + struct Buffer : impl::Buffer, Library> { - using ImplBuffer = impl::Buffer>; + using ImplBuffer = impl::Buffer, Library>; Buffer() : ImplBuffer() { @@ -196,18 +204,22 @@ namespace rdmalib } }; - template + template struct ScatterGatherElement { - using Library = Derived::library; + //using Library = typename Derived::library; using sge_t = typename library_traits::sge_t; using lkey_t = typename library_traits::lkey_t; mutable std::vector _sges; - ScatterGatherElement(); + ScatterGatherElement() + {} - ScatterGatherElement(uint64_t addr, uint32_t bytes, lkey_t lkey); + ScatterGatherElement(uint64_t addr, uint32_t bytes, lkey_t lkey) + { + static_cast(this)->Derived(addr, bytes, lkey); + } template ScatterGatherElement(const Buffer &buf) @@ -216,18 +228,31 @@ namespace rdmalib } template - void add(const Buffer &buf); + void add(const Buffer &buf) + { + static_cast(this)->add(buf); + } template - void add(const Buffer &buf, uint32_t size, size_t offset = 0); + void add(const Buffer &buf, uint32_t size, size_t offset = 0) + { + static_cast(this)->add(buf, size, offset); + } - sge_t *array() const; + sge_t *array() const + { + return static_cast(this)->array(); + } size_t size() const; }; - struct VerbsScatterGatherElement : ScatterGatherElement + struct VerbsScatterGatherElement : ScatterGatherElement { + using Library = ibverbs; + + VerbsScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey); + template void add(const Buffer &buf) { @@ -247,9 +272,16 @@ namespace rdmalib }; - struct LibfabricScatterGatherElement : ScatterGatherElement + struct LibfabricScatterGatherElement : ScatterGatherElement { mutable std::vector _lkeys; + using Library = libfabric; + + LibfabricScatterGatherElement(uint64_t addr, uint32_t bytes, lkey_t lkey) + { + _sges.push_back({(void *)addr, bytes}); + _lkeys.push_back(lkey); + } template void add(const Buffer &buf) @@ -265,9 +297,18 @@ namespace rdmalib _lkeys.push_back(buf.lkey()); } - sge_t *array() const; - lkey_t *lkeys() const; - size_t size() const; + sge_t *array() const + { + return _sges.data(); + } + lkey_t *lkeys() const + { + return _lkeys.data(); + } + size_t size() const + { + return _sges.size(); + } }; } diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index 337d387..d4f9b77 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -14,8 +14,8 @@ namespace rdmalib { namespace impl { - template - Buffer::Buffer(): + template + Buffer::Buffer(): _size(0), _header(0), _bytes(0), @@ -25,8 +25,8 @@ namespace rdmalib { namespace impl { _own_memory(false) {} - template - Buffer::Buffer(Buffer && obj): + template + Buffer::Buffer(Buffer && obj): _size(obj._size), _header(obj._header), _bytes(obj._bytes), @@ -39,8 +39,8 @@ namespace rdmalib { namespace impl { obj._ptr = obj._mr = nullptr; } - template - Buffer & Buffer::operator=(Buffer && obj) + template + Buffer & Buffer::operator=(Buffer && obj) { _size = obj._size; _bytes = obj._bytes; @@ -55,8 +55,8 @@ namespace rdmalib { namespace impl { return *this; } - template - Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header): + template + Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header): _size(size), _header(header), _bytes(size * byte_size + header), @@ -77,8 +77,8 @@ namespace rdmalib { namespace impl { ); } - template - Buffer::Buffer(void* ptr, uint32_t size, uint32_t byte_size): + template + Buffer::Buffer(void* ptr, uint32_t size, uint32_t byte_size): _size(size), _header(0), _bytes(size * byte_size), @@ -137,26 +137,20 @@ namespace rdmalib { namespace impl { ); } - template - Buffer::mr_t Buffer::mr() const - { - return this->_mr; - } - - template - uint32_t Buffer::data_size() const + template + uint32_t Buffer::data_size() const { return this->_size; } - template - uint32_t Buffer::size() const + template + uint32_t Buffer::size() const { return this->_size + this->_header; } - template - uint32_t Buffer::bytes() const + template + uint32_t Buffer::bytes() const { return this->_bytes; } @@ -186,91 +180,25 @@ namespace rdmalib { namespace impl { return this->_mr->rkey; } - template - uintptr_t Buffer::address() const + template + uintptr_t Buffer::address() const { assert(this->_mr); return reinterpret_cast(this->_ptr); } - template - void* Buffer::ptr() const + template + void* Buffer::ptr() const { return this->_ptr; } - template - ScatterGatherElement Buffer::sge(uint32_t size, uint32_t offset) const + template + ScatterGatherElement Buffer::sge(uint32_t size, uint32_t offset) const { return {address() + offset, size, lkey()}; } }} -namespace rdmalib { - - ScatterGatherElement::ScatterGatherElement() - { - } - - #ifdef USE_LIBFABRIC - iovec *ScatterGatherElement::array() const - { - return _sges.data(); - } - void **ScatterGatherElement::lkeys() const - { - return _lkeys.data(); - } - #else - ibv_sge * ScatterGatherElement::array() const - { - return _sges.data(); - } - #endif - - size_t ScatterGatherElement::size() const - { - return _sges.size(); - } - - #ifdef USE_LIBFABRIC - ScatterGatherElement::ScatterGatherElement(uint64_t addr, uint32_t bytes, void *lkey) - { - _sges.push_back({(void *)addr, bytes}); - _lkeys.push_back(lkey); - } - #else - ScatterGatherElement::ScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey) - { - _sges.push_back({addr, bytes, lkey}); - } - #endif - - RemoteBuffer::RemoteBuffer(): - addr(0), - rkey(0), - size(0) - {} - - RemoteBuffer::RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size): - addr(addr), - rkey(rkey), - size(size) - {} - - #ifdef USE_LIBFABRIC - RemoteBuffer::RemoteBuffer(uintptr_t addr, uint64_t rkey, uint32_t size): - addr(addr), - rkey(rkey), - size(size) - {} - #else - RemoteBuffer::RemoteBuffer(uintptr_t addr, uint32_t rkey, uint32_t size): - addr(addr), - rkey(rkey), - size(size) - {} - #endif - -} +namespace rdmalib {} \ No newline at end of file From a30ced0b01e2797435ae09bdaf55074af0a0cfc8 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Thu, 6 Jul 2023 13:34:42 -0400 Subject: [PATCH 63/91] Move traits to separate header --- rdmalib/include/rdmalib/buffer.hpp | 36 ++---------------------- rdmalib/include/rdmalib/connection.hpp | 7 +++-- rdmalib/include/rdmalib/libraries.hpp | 38 ++++++++++++++++++++++++++ rdmalib/lib/buffer.cpp | 1 + 4 files changed, 45 insertions(+), 37 deletions(-) create mode 100644 rdmalib/include/rdmalib/libraries.hpp diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 003a1a7..c14e7f5 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -7,14 +7,10 @@ #include -// #ifdef USE_LIBFABRIC #include #include -// #else -struct ibv_pd; -struct ibv_mr; -struct ibv_sge; -// #endif + +#include namespace rdmalib { @@ -22,34 +18,6 @@ namespace rdmalib template struct ScatterGatherElement; - struct ibverbs; - struct libfabric; - - template - struct library_traits; - - template <> - struct library_traits - { - //using type = ibverbs; - typedef ibv_mr *mr_t; - typedef ibv_pd *pd_t; - typedef uint32_t lkey_t; - typedef uint32_t rkey_t; - typedef ibv_sge sge_t; - }; - - template <> - struct library_traits - { - //using type = libfabric; - typedef fid_mr *mr_t; - typedef fid_domain *pd_t; - typedef void *lkey_t; - typedef uint64_t rkey_t; - typedef iovec sge_t; - }; - namespace impl { diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index bbb8464..f9712c0 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -10,16 +10,17 @@ #include #include -#ifdef USE_LIBFABRIC +//#ifdef USE_LIBFABRIC #include #include #include -#else +//#else #include #include -#endif +//#endif #include +#include namespace rdmalib { diff --git a/rdmalib/include/rdmalib/libraries.hpp b/rdmalib/include/rdmalib/libraries.hpp new file mode 100644 index 0000000..9cd3738 --- /dev/null +++ b/rdmalib/include/rdmalib/libraries.hpp @@ -0,0 +1,38 @@ +#ifndef __RDMALIB_LIBRARIES_HPP__ +#define __RDMALIB_LIBRARIES_HPP__ + +#include + +struct ibv_pd; +struct ibv_mr; +struct ibv_sge; + +struct ibverbs; +struct libfabric; + +template +struct library_traits; + +template <> +struct library_traits +{ + using type = ibverbs; + typedef ibv_mr *mr_t; + typedef ibv_pd *pd_t; + typedef uint32_t lkey_t; + typedef uint32_t rkey_t; + typedef ibv_sge sge_t; +}; + +template <> +struct library_traits +{ + using type = libfabric; + typedef fid_mr *mr_t; + typedef fid_domain *pd_t; + typedef void *lkey_t; + typedef uint64_t rkey_t; + typedef iovec sge_t; +}; + +#endif \ No newline at end of file diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index d4f9b77..c09b7b1 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -9,6 +9,7 @@ #include // #endif +#include #include #include From a1a049c28ebf83ed4084421ea8f604cbe0c4c718 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Fri, 7 Jul 2023 19:06:14 -0400 Subject: [PATCH 64/91] Begin refactoring Connection --- rdmalib/include/rdmalib/connection.hpp | 38 ++++++++++++++++++-------- rdmalib/include/rdmalib/libraries.hpp | 14 ++++++++-- 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index f9712c0..e1f509d 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -53,37 +53,43 @@ namespace rdmalib { // State of a communication: // a) communication ID // b) Queue Pair + template struct Connection { private: + // Bring types into scope + using qp_t = library_traits::qp_t; + using channel_t = library_traits::channel_t; + using wc_t = library_traits::wc_t; + #ifdef USE_LIBFABRIC - fid_ep* _qp; - fid_cq* _rcv_channel; - fid_cq* _trx_channel; + channel_t _rcv_channel; + channel_t _trx_channel; fid_cntr* _write_counter; uint64_t _counter; #else rdma_cm_id* _id; - ibv_qp* _qp; - ibv_comp_channel* _channel; + channel_t* _channel; #endif + + qp_t _qp; + int32_t _req_count; int32_t _private_data; bool _passive; ConnectionStatus _status; static const int _wc_size = 32; // FIXME: associate this with RecvBuffer + std::array _swc; // fast fix for overlapping polling + std::array _rwc; #ifdef USE_LIBFABRIC - std::array _swc; // fast fix for overlapping polling - std::array _rwc; fi_cq_err_entry _ewc; - #else - std::array _swc; // fast fix for overlapping polling - std::array _rwc; #endif - std::array _rwc_sges; + + std::array, _wc_size> _rwc_sges; int _send_flags; static const int _rbatch = 32; // 32 for faster division in the code + #ifndef USE_LIBFABRIC struct ibv_recv_wr _batch_wrs[_rbatch]; // preallocated and prefilled batched recv. #endif @@ -181,6 +187,16 @@ namespace rdmalib { int32_t _post_write(ScatterGatherElement && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); #endif }; + + struct LibfabricConnection : Connection + { + + }; + + struct LibfabricConnection : Connection + { + + }; } #endif diff --git a/rdmalib/include/rdmalib/libraries.hpp b/rdmalib/include/rdmalib/libraries.hpp index 9cd3738..ec1af88 100644 --- a/rdmalib/include/rdmalib/libraries.hpp +++ b/rdmalib/include/rdmalib/libraries.hpp @@ -16,23 +16,33 @@ struct library_traits; template <> struct library_traits { - using type = ibverbs; + typedef ibverbs type; + typedef ibv_mr *mr_t; typedef ibv_pd *pd_t; typedef uint32_t lkey_t; typedef uint32_t rkey_t; typedef ibv_sge sge_t; + + typedef ibv_qp *qp_t; + typedef ibv_comp_channel *channel_t; + typedef ibv_wc wc_t; }; template <> struct library_traits { - using type = libfabric; + typedef libfabric type; + typedef fid_mr *mr_t; typedef fid_domain *pd_t; typedef void *lkey_t; typedef uint64_t rkey_t; typedef iovec sge_t; + + typedef fid_ep *qp_t; + typedef fid_cq *channel_t; + typedef fi_cq_data_entry wc_t; }; #endif \ No newline at end of file From d28571211d6bed1e60e6c3f547bab58c3eeaf9ad Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Wed, 12 Jul 2023 18:10:30 +0000 Subject: [PATCH 65/91] Refactor Connection header --- rdmalib/include/rdmalib/connection.hpp | 158 ++++++++++++------------- rdmalib/include/rdmalib/libraries.hpp | 56 +++++---- 2 files changed, 111 insertions(+), 103 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index e1f509d..ff51823 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -55,22 +55,15 @@ namespace rdmalib { // b) Queue Pair template struct Connection { - private: // Bring types into scope - using qp_t = library_traits::qp_t; - using channel_t = library_traits::channel_t; - using wc_t = library_traits::wc_t; - - #ifdef USE_LIBFABRIC - channel_t _rcv_channel; - channel_t _trx_channel; - fid_cntr* _write_counter; - uint64_t _counter; - #else - rdma_cm_id* _id; - channel_t* _channel; - #endif - + using qp_t = typename library_traits::qp_t; + using wc_t = typename library_traits::wc_t; + using id_t = typename library_traits::id_t; + using channel_t = typename library_traits::channel_t; + template + using SGE = ScatterGatherElement; + using RemoteBuffer_ = RemoteBuffer; + private: qp_t _qp; int32_t _req_count; @@ -81,74 +74,77 @@ namespace rdmalib { // FIXME: associate this with RecvBuffer std::array _swc; // fast fix for overlapping polling std::array _rwc; - #ifdef USE_LIBFABRIC - fi_cq_err_entry _ewc; - #endif - std::array, _wc_size> _rwc_sges; + std::array, _wc_size> _rwc_sges; int _send_flags; - static const int _rbatch = 32; // 32 for faster division in the code - - #ifndef USE_LIBFABRIC - struct ibv_recv_wr _batch_wrs[_rbatch]; // preallocated and prefilled batched recv. - #endif public: + static const int _rbatch = 32; // 32 for faster division in the code + Connection(bool passive = false); ~Connection(); Connection(const Connection&) = delete; Connection& operator=(const Connection&) = delete; Connection(Connection&&); - void initialize_batched_recv(const rdmalib::impl::Buffer & sge, size_t offset); - #ifndef USE_LIBFABRIC - void inlining(bool enable); - #endif - #ifdef USE_LIBFABRIC - void initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cntr* write_cntr, fid_cq* rx_channel, fid_cq* tx_channel); - #else - void initialize(rdma_cm_id* id); - #endif + template + void initialize_batched_recv(const rdmalib::impl::Buffer & sge, size_t offset); void close(); - #ifdef USE_LIBFABRIC - fid_domain* _domain = nullptr; - fid* id() const; - fid_ep* qp() const; - fid_wait* wait_set() const; - fid_cq* receive_completion_channel() const; - fid_cq* transmit_completion_channel() const; - #else - rdma_cm_id* id() const; - ibv_qp* qp() const; - ibv_comp_channel* completion_channel() const; - #endif + + id_t* id() const; + qp_t* qp() const; + uint32_t private_data() const; ConnectionStatus status() const; void set_status(ConnectionStatus status); void set_private_data(uint32_t private_data); // Blocking, no timeout - #ifdef USE_LIBFABRIC - std::tuple poll_wc(QueueType, bool blocking = true, int count = -1, bool update = false); - #else - std::tuple poll_wc(QueueType, bool blocking = true, int count = -1); - #endif - int32_t post_send(const ScatterGatherElement & elem, int32_t id = -1, bool force_inline = false); - int32_t post_recv(ScatterGatherElement && elem, int32_t id = -1, int32_t count = 1); - int32_t post_batched_empty_recv(int32_t count = 1); + std::tuple poll_wc(QueueType, bool blocking = true, int count = -1, bool update = false); - int32_t post_write(ScatterGatherElement && elems, const RemoteBuffer & buf, bool force_inline = false); + template + int32_t post_send(const SGE & elem, int32_t id = -1, bool force_inline = false); + template + int32_t post_recv(SGE && elem, int32_t id = -1, int32_t count = 1); + + int32_t post_batched_empty_recv(int32_t count = 1); + template + int32_t post_write(SGE && elems, const RemoteBuffer_ & buf, bool force_inline = false); // Solicited makes sense only for RDMA write with immediate - int32_t post_write(ScatterGatherElement && elems, const RemoteBuffer & buf, + template + int32_t post_write(SGE && elems, const RemoteBuffer_ & buf, uint32_t immediate, bool force_inline = false, bool solicited = false ); - int32_t post_cas(ScatterGatherElement && elems, const RemoteBuffer & buf, uint64_t compare, uint64_t swap); - #ifdef USE_LIBFABRIC - template inline int32_t post_write(const Buffer & buf, const size_t size, const uint64_t offset, const RemoteBuffer & rbuf, const uint32_t immediate) { + template + int32_t post_cas(SGE && elems, const RemoteBuffer_ & buf, uint64_t compare, uint64_t swap); + }; + + struct LibfabricConnection : Connection + { + template + using Buffer_ = Buffer; + template // S = SGE's Derived class + using SGE = ScatterGatherElement; + + fid_cq *_rcv_channel; + fid_cq *_trx_channel; + fid_cntr* _write_counter; + uint64_t _counter; + fid_domain* _domain = nullptr; + + fi_cq_err_entry _ewc; + + void initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cntr* write_cntr, fid_cq* rx_channel, fid_cq* tx_channel); + + fid_wait* wait_set() const; + channel_t receive_completion_channel() const; + channel_t transmit_completion_channel() const; + + template inline int32_t post_write(const Buffer_ & buf, const size_t size, const uint64_t offset, const RemoteBuffer_ & rbuf, const uint32_t immediate) { int ret = fi_writedata(_qp, (void *)(buf.address() + offset), size, buf.lkey(), immediate + (size << 32), NULL, rbuf.addr, rbuf.rkey, (void *)(_req_count++)); if(ret) { spdlog::error("Post write unsuccessful, reason {} {}, buf size {}, id {}, remote addr {}, remote rkey {}, imm data {}, connection {}", @@ -167,37 +163,41 @@ namespace rdmalib { ); return _req_count - 1; } - int32_t post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer & rbuf, uint64_t add); - #else - int32_t post_atomic_fadd(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t add); - #endif + + int32_t post_atomic_fadd(const Buffer_ & _accounting_buf, const RemoteBuffer_& rbuf, uint64_t add); // Register to be notified about all events, including unsolicited ones - #ifdef USE_LIBFABRIC int wait_events(int timeout = -1); - #else - void notify_events(bool only_solicited = false); - ibv_cq* wait_events(); - void ack_events(ibv_cq* cq, int len); - #endif - private: - #ifdef USE_LIBFABRIC - int32_t _post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, const uint32_t immediate = 0); - #else - int32_t _post_write(ScatterGatherElement && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); - #endif + + template + int32_t _post_write(SGE && elems, const RemoteBuffer_ & rbuf, const uint32_t immediate = 0); }; - struct LibfabricConnection : Connection + struct VerbsConnection : Connection { + template // S for SGE's Derived class + using SGE = ScatterGatherElement; - }; + id_t* _id; + channel_t *_channel; - struct LibfabricConnection : Connection - { + struct ibv_recv_wr _batch_wrs[_rbatch]; // preallocated and prefilled batched recv. + + void inlining(bool enable); + void initialize(rdma_cm_id* id); + ibv_comp_channel* completion_channel() const; + + template + int32_t post_atomic_fadd(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t add); + + void notify_events(bool only_solicited = false); + ibv_cq* wait_events(); + void ack_events(ibv_cq* cq, int len); + + template + int32_t _post_write(SGE && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); }; } #endif - diff --git a/rdmalib/include/rdmalib/libraries.hpp b/rdmalib/include/rdmalib/libraries.hpp index ec1af88..3dbe8f2 100644 --- a/rdmalib/include/rdmalib/libraries.hpp +++ b/rdmalib/include/rdmalib/libraries.hpp @@ -1,12 +1,18 @@ #ifndef __RDMALIB_LIBRARIES_HPP__ #define __RDMALIB_LIBRARIES_HPP__ +#include #include +// Forward declare ibverbs structs struct ibv_pd; struct ibv_mr; struct ibv_sge; +struct ibv_qp; +struct ibv_comp_channel; +struct ibv_wc; +// Library parameter definitions struct ibverbs; struct libfabric; @@ -14,35 +20,37 @@ template struct library_traits; template <> -struct library_traits +struct library_traits { - typedef ibverbs type; - - typedef ibv_mr *mr_t; - typedef ibv_pd *pd_t; - typedef uint32_t lkey_t; - typedef uint32_t rkey_t; - typedef ibv_sge sge_t; - - typedef ibv_qp *qp_t; - typedef ibv_comp_channel *channel_t; - typedef ibv_wc wc_t; + using type = libfabric; + + using mr_t = fid_mr *; + using pd_t = fid_domain *; + using lkey_t = void *; + using rkey_t = uint64_t; + using sge_t = iovec; + + using qp_t = fid_ep *; + using wc_t = fi_cq_data_entry; + using id_t = fid *; + using channel_t = fid_cq *; }; template <> -struct library_traits +struct library_traits { - typedef libfabric type; - - typedef fid_mr *mr_t; - typedef fid_domain *pd_t; - typedef void *lkey_t; - typedef uint64_t rkey_t; - typedef iovec sge_t; - - typedef fid_ep *qp_t; - typedef fid_cq *channel_t; - typedef fi_cq_data_entry wc_t; + using type = ibverbs; + + using mr_t = ibv_mr *; + using pd_t = ibv_pd *; + using lkey_t = uint32_t; + using rkey_t = uint32_t; + using sge_t = ibv_sge; + + using qp_t = ibv_qp *; + using wc_t = ibv_wc; + using id_t = rdma_cm_id *; + using channel_t = ibv_comp_channel *; }; #endif \ No newline at end of file From 80d973c8ea587172086c680a4f31aa4707b1efea Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Wed, 12 Jul 2023 20:45:52 +0000 Subject: [PATCH 66/91] Refactored constructors --- rdmalib/include/rdmalib/connection.hpp | 19 +++-- rdmalib/lib/connection.cpp | 99 ++++++++++++++------------ 2 files changed, 69 insertions(+), 49 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index ff51823..36448f1 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -55,6 +55,7 @@ namespace rdmalib { // b) Queue Pair template struct Connection { + protected: // Bring types into scope using qp_t = typename library_traits::qp_t; using wc_t = typename library_traits::wc_t; @@ -63,7 +64,6 @@ namespace rdmalib { template using SGE = ScatterGatherElement; using RemoteBuffer_ = RemoteBuffer; - private: qp_t _qp; int32_t _req_count; @@ -78,12 +78,15 @@ namespace rdmalib { std::array, _wc_size> _rwc_sges; int _send_flags; - public: static const int _rbatch = 32; // 32 for faster division in the code - Connection(bool passive = false); - ~Connection(); + Connection(bool passive = false) { + static_cast(this)->Derived(passive); + } + ~Connection() { + static_cast(this)->~Derived(); + } Connection(const Connection&) = delete; Connection& operator=(const Connection&) = delete; Connection(Connection&&); @@ -138,6 +141,10 @@ namespace rdmalib { fi_cq_err_entry _ewc; + LibfabricConnection(bool passive); + LibfabricConnection(LibfabricConnection&& obj); + ~LibfabricConnection(); + void initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cntr* write_cntr, fid_cq* rx_channel, fid_cq* tx_channel); fid_wait* wait_set() const; @@ -183,6 +190,10 @@ namespace rdmalib { struct ibv_recv_wr _batch_wrs[_rbatch]; // preallocated and prefilled batched recv. + VerbsConnection(bool passive); + VerbsConnection(VerbsConnection&& obj); + ~VerbsConnection(); + void inlining(bool enable); void initialize(rdma_cm_id* id); ibv_comp_channel* completion_channel() const; diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 065722b..75a3599 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -7,14 +7,14 @@ #include #include #include -#ifdef USE_LIBFABRIC +// #ifdef USE_LIBFABRIC #include #include #include #include "rdmalib/buffer.hpp" #include #include -#endif +// #endif #include #include @@ -31,28 +31,30 @@ namespace rdmalib { } #endif - Connection::Connection(bool passive): - _qp(nullptr), - #ifdef USE_LIBFABRIC - _rcv_channel(nullptr), - _trx_channel(nullptr), - _write_counter(nullptr), - #else - _id(nullptr), - _channel(nullptr), - #endif - _req_count(0), - _private_data(0), - _passive(passive), - _status(ConnectionStatus::UNKNOWN) + LibfabricConnection::LibfabricConnection(bool passive) { - #ifndef USE_LIBFABRIC + _qp = nullptr; + _rcv_channel = nullptr; + _trx_channel = nullptr; + _write_counter = nullptr; + _req_count = 0; + _private_data = 0; + _passive = passive; + _status= ConnectionStatus::UNKNOWN; + SPDLOG_DEBUG("Allocate a connection {}", fmt::ptr(this)); + } + + VerbsConnection::VerbsConnection(bool passive) + { + _qp = nullptr; + _id = nullptr; + _channel = nullptr; + _req_count = 0; + _private_data = 0; + _passive = passive; + _status = ConnectionStatus::UNKNOWN; inlining(false); - #endif - #ifdef USE_LIBFABRIC - SPDLOG_DEBUG("Allocate a connection {}", fmt::ptr(this)); - #else for(int i=0; i < _rbatch; i++){ _batch_wrs[i].wr_id = i; _batch_wrs[i].sg_list = 0; @@ -61,42 +63,50 @@ namespace rdmalib { } _batch_wrs[_rbatch-1].next = NULL; SPDLOG_DEBUG("Allocate a connection with id {}", fmt::ptr(_id)); - #endif } - Connection::~Connection() + LibfabricConnection::~LibfabricConnection() { - #ifdef USE_LIBFABRIC SPDLOG_DEBUG("Deallocate connection {} with qp fid {}", fmt::ptr(this), fmt::ptr(&_qp->fid)); - #else + close(); + } + + VerbsConnection::~VerbsConnection() + { SPDLOG_DEBUG("Deallocate a connection with id {}", fmt::ptr(_id)); - #endif close(); } - Connection::Connection(Connection&& obj): - _qp(obj._qp), - #ifdef USE_LIBFABRIC - _rcv_channel(obj._rcv_channel), - _trx_channel(obj._trx_channel), - _write_counter(nullptr), - #else - _id(obj._id), - _channel(obj._channel), - #endif - _req_count(obj._req_count), - _private_data(obj._private_data), - _passive(obj._passive), - _status(obj._status), - _send_flags(obj._send_flags) + LibfabricConnection::LibfabricConnection(LibfabricConnection&& obj) { - #ifndef USE_LIBFABRIC + _qp = obj._qp; + _rcv_channel = obj._rcv_channel; + _trx_channel = obj._trx_channel; + _write_counter = nullptr; + _req_count = obj._req_count; + _private_data = obj._private_data; + _passive = obj._passive; + _status = obj._status; + _send_flags = obj._send_flags; + + obj._qp = nullptr; + obj._req_count = 0; + } + + VerbsConnection::VerbsConnection(VerbsConnection&& obj) + { + _qp = obj._qp; + _id = obj._id; + _channel = obj._channel; + _req_count = obj._req_count; + _private_data = obj._private_data; + _passive = obj._passive; + _status = obj._status; + _send_flags = obj._send_flags; obj._id = nullptr; - #endif obj._qp = nullptr; obj._req_count = 0; - #ifndef USE_LIBFABRIC for(int i=0; i < _rbatch; i++){ _batch_wrs[i].wr_id = i; _batch_wrs[i].sg_list = 0; @@ -104,7 +114,6 @@ namespace rdmalib { _batch_wrs[i].next=&(_batch_wrs[i+1]); } _batch_wrs[_rbatch-1].next = NULL; - #endif } void Connection::initialize_batched_recv(const rdmalib::impl::Buffer & buf, size_t offset) From 4d686c51a2ef3e9bf0ac0e1fff0773ea2c602146 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Thu, 13 Jul 2023 10:12:03 -0400 Subject: [PATCH 67/91] Refactor RecvBuffer --- rdmalib/include/rdmalib/connection.hpp | 7 +++- rdmalib/include/rdmalib/recv_buffer.hpp | 44 ++++++++++++++++--------- rdmalib/lib/connection.cpp | 19 +++++++++-- 3 files changed, 50 insertions(+), 20 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 36448f1..d4f375a 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -64,8 +64,8 @@ namespace rdmalib { template using SGE = ScatterGatherElement; using RemoteBuffer_ = RemoteBuffer; - qp_t _qp; + qp_t _qp; int32_t _req_count; int32_t _private_data; bool _passive; @@ -145,6 +145,9 @@ namespace rdmalib { LibfabricConnection(LibfabricConnection&& obj); ~LibfabricConnection(); + template + void initialize_batched_recv(const rdmalib::impl::Buffer & sge, size_t offset); + void initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cntr* write_cntr, fid_cq* rx_channel, fid_cq* tx_channel); fid_wait* wait_set() const; @@ -195,6 +198,8 @@ namespace rdmalib { ~VerbsConnection(); void inlining(bool enable); + template + void initialize_batched_recv(const rdmalib::impl::Buffer & sge, size_t offset); void initialize(rdma_cm_id* id); ibv_comp_channel* completion_channel() const; diff --git a/rdmalib/include/rdmalib/recv_buffer.hpp b/rdmalib/include/rdmalib/recv_buffer.hpp index 1c5c91a..d2b04a7 100644 --- a/rdmalib/include/rdmalib/recv_buffer.hpp +++ b/rdmalib/include/rdmalib/recv_buffer.hpp @@ -3,6 +3,7 @@ #define __RDMALIB_RECV_BUFFER_HPP__ #include +#include #include @@ -10,7 +11,10 @@ namespace rdmalib { + template struct RecvBuffer { + using wc_t = library_traits::wc_t; + int _rcv_buf_size; int _refill_threshold; int _requests; @@ -31,8 +35,26 @@ namespace rdmalib { refill(); } - #ifdef USE_LIBFABRIC - inline std::tuple poll(bool blocking = false) + inline std::tuple poll(bool blocking = false) + { + return static_cast(this)->poll(blocking); + } + + inline bool refill() + { + if(_requests < _refill_threshold) { + SPDLOG_DEBUG("Post {} requests to buffer at QP {}", _rcv_buf_size - _requests, fmt::ptr(_conn->qp())); + this->_conn->post_batched_empty_recv(_rcv_buf_size - _requests); + //this->_conn->post_recv({}, -1, _rcv_buf_size - _requests); + _requests = _rcv_buf_size; + return true; + } + return false; + } + }; + + struct LibfabricRecvBuffer : RecvBuffer { + inline std::tuple poll(bool blocking = false) { auto wc = this->_conn->poll_wc(rdmalib::QueueType::RECV, blocking); if(std::get<1>(wc)) @@ -40,8 +62,10 @@ namespace rdmalib { _requests -= std::get<1>(wc); return wc; } - #else - inline std::tuple poll(bool blocking = false) + }; + + struct VerbsRecvBuffer : RecvBuffer { + inline std::tuple poll(bool blocking = false) { auto wc = this->_conn->poll_wc(rdmalib::QueueType::RECV, blocking); if(std::get<1>(wc)) @@ -49,19 +73,7 @@ namespace rdmalib { _requests -= std::get<1>(wc); return wc; } - #endif - inline bool refill() - { - if(_requests < _refill_threshold) { - SPDLOG_DEBUG("Post {} requests to buffer at QP {}", _rcv_buf_size - _requests, fmt::ptr(_conn->qp())); - this->_conn->post_batched_empty_recv(_rcv_buf_size - _requests); - //this->_conn->post_recv({}, -1, _rcv_buf_size - _requests); - _requests = _rcv_buf_size; - return true; - } - return false; - } }; } diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 75a3599..7553a70 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -31,6 +31,8 @@ namespace rdmalib { } #endif + + LibfabricConnection::LibfabricConnection(bool passive) { _qp = nullptr; @@ -53,6 +55,7 @@ namespace rdmalib { _private_data = 0; _passive = passive; _status = ConnectionStatus::UNKNOWN; + inlining(false); for(int i=0; i < _rbatch; i++){ @@ -116,16 +119,26 @@ namespace rdmalib { _batch_wrs[_rbatch-1].next = NULL; } - void Connection::initialize_batched_recv(const rdmalib::impl::Buffer & buf, size_t offset) + + template + void LibfabricConnection::initialize_batched_recv(const rdmalib::impl::Buffer & buf, size_t offset) + { + for(int i = 0; i < _rbatch; i++){ + _rwc_sges[i] = buf.sge(offset, i*offset); + //for(auto & sg : _rwc_sges[i]._sges) + //sg.addr += i*offset; + } + } + + template + void VerbsConnection::initialize_batched_recv(const rdmalib::impl::Buffer & buf, size_t offset) { for(int i = 0; i < _rbatch; i++){ _rwc_sges[i] = buf.sge(offset, i*offset); //for(auto & sg : _rwc_sges[i]._sges) //sg.addr += i*offset; - #ifndef USE_LIBFABRIC _batch_wrs[i].sg_list = _rwc_sges[i].array(); _batch_wrs[i].num_sge = _rwc_sges[i].size(); - #endif } } From 96d1b725d542fe1d89ef81d4ebd9556f452ee8f4 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Fri, 14 Jul 2023 12:56:22 -0400 Subject: [PATCH 68/91] Refactor more of Connection --- rdmalib/include/rdmalib/connection.hpp | 28 ++++++++++++++----- rdmalib/lib/connection.cpp | 37 +++++++++----------------- 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index d4f375a..2d2b5d2 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -93,10 +93,18 @@ namespace rdmalib { template void initialize_batched_recv(const rdmalib::impl::Buffer & sge, size_t offset); - void close(); - - id_t* id() const; - qp_t* qp() const; + void close() + { + static_cast(this)->close(); + } + id_t id() const + { + return static_cast(this)->id(); + } + qp_t qp() const + { + return static_cast(this)->qp(); + } uint32_t private_data() const; ConnectionStatus status() const; @@ -145,10 +153,14 @@ namespace rdmalib { LibfabricConnection(LibfabricConnection&& obj); ~LibfabricConnection(); + id_t id() const; + qp_t qp() const; + template void initialize_batched_recv(const rdmalib::impl::Buffer & sge, size_t offset); void initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cntr* write_cntr, fid_cq* rx_channel, fid_cq* tx_channel); + void close(); fid_wait* wait_set() const; channel_t receive_completion_channel() const; @@ -188,14 +200,18 @@ namespace rdmalib { template // S for SGE's Derived class using SGE = ScatterGatherElement; - id_t* _id; - channel_t *_channel; + id_t _id; + channel_t _channel; struct ibv_recv_wr _batch_wrs[_rbatch]; // preallocated and prefilled batched recv. VerbsConnection(bool passive); VerbsConnection(VerbsConnection&& obj); ~VerbsConnection(); + void close(); + + id_t id() const; + qp_t qp() const; void inlining(bool enable); template diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 7553a70..1c9f979 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -31,8 +31,6 @@ namespace rdmalib { } #endif - - LibfabricConnection::LibfabricConnection(bool passive) { _qp = nullptr; @@ -142,8 +140,7 @@ namespace rdmalib { } } - #ifdef USE_LIBFABRIC - void Connection::initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cntr* write_cntr, fid_cq* rx_channel, fid_cq* tx_channel) + void LibfabricConnection::initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cntr* write_cntr, fid_cq* rx_channel, fid_cq* tx_channel) { // Create the endpoint and set its flags up so that we get completions on RDM impl::expect_zero(fi_endpoint(pd, info, &_qp, reinterpret_cast(this))); @@ -163,29 +160,25 @@ namespace rdmalib { impl::expect_zero(fi_enable(_qp)); SPDLOG_DEBUG("Initialize connection {}", fmt::ptr(this)); } - #else - void Connection::initialize(rdma_cm_id* id) + + void VerbsConnection::initialize(rdma_cm_id* id) { this->_id = id; this->_channel = _id->recv_cq_channel; this->_qp = this->_id->qp; SPDLOG_DEBUG("Initialize a connection with id {}", fmt::ptr(_id)); } - #endif - #ifndef USE_LIBFABRIC - void Connection::inlining(bool enable) + void VerbsConnection::inlining(bool enable) { if(enable) _send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE; else _send_flags = IBV_SEND_SIGNALED; } - #endif - void Connection::close() + void LibfabricConnection::close() { - #ifdef USE_LIBFABRIC SPDLOG_DEBUG("Connection close called for {} with qp fid {}", fmt::ptr(this), fmt::ptr(&this->_qp->fid)); // We need to close the transmit and receive channels and the endpoint if (_status != ConnectionStatus::DISCONNECTED) { @@ -215,7 +208,10 @@ namespace rdmalib { // } _status = ConnectionStatus::DISCONNECTED; } - #else + } + + void VerbsConnection::close() + { SPDLOG_DEBUG("Connection close called for {} id {}", fmt::ptr(this), fmt::ptr(this->_id)); if(_id) { // When the connection is allocated on active side @@ -236,32 +232,25 @@ namespace rdmalib { _id = nullptr; _status = ConnectionStatus::DISCONNECTED; } - #endif } - #ifdef USE_LIBFABRIC - fid* Connection::id() const + id_t LibfabricConnection::id() const { return &this->_qp->fid; } - #else - rdma_cm_id* Connection::id() const + id_t VerbsConnection::id() const { return this->_id; } - #endif - #ifdef USE_LIBFABRIC - fid_ep* Connection::qp() const + qp_t Connection::qp() const { return this->_qp; } - #else - ibv_qp* Connection::qp() const + qp_t Connection::qp() const { return this->_qp; } - #endif #ifdef USE_LIBFABRIC fid_cq* Connection::receive_completion_channel() const From ac96bef5cf19619fc6609abe160e74d15ace29d2 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Mon, 17 Jul 2023 10:01:03 -0400 Subject: [PATCH 69/91] Connection refactor almost done. Introducing more traits for sub-types --- rdmalib/include/rdmalib/buffer.hpp | 51 ++-- rdmalib/include/rdmalib/connection.hpp | 2 + rdmalib/include/rdmalib/libraries.hpp | 14 +- rdmalib/include/rdmalib/recv_buffer.hpp | 40 +-- rdmalib/lib/buffer.cpp | 378 ++++++++++++------------ rdmalib/lib/connection.cpp | 129 ++++---- 6 files changed, 296 insertions(+), 318 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index c14e7f5..d6b42cb 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -26,18 +26,11 @@ namespace rdmalib struct Buffer { protected: - using mr_t = typename library_traits::mr_t; - using pd_t = typename library_traits::pd_t; + using mr_t = typename library_traits::mr_t; + using pd_t = typename library_traits::pd_t; using lkey_t = typename library_traits::lkey_t; using rkey_t = typename library_traits::rkey_t; - - /* - typedef typename Derived::library Library; - typedef typename library_traits::mr_t mr_t ; - typedef typename library_traits::pd_t pd_t ; - typedef typename library_traits::lkey_t lkey_t ; - typedef typename library_traits::rkey_t rkey_t ; - */ + //using SGE = library_traits::LibSGE; uint32_t _size; uint32_t _header; @@ -69,22 +62,21 @@ namespace rdmalib uint32_t bytes() const; void register_memory(pd_t pd, int access) { - static_cast(this)->register_memory(pd, access); + static_cast(this)->register_memory(pd, access); } lkey_t lkey() const { - static_cast(this)->lkey(); + static_cast(this)->lkey(); } rkey_t rkey() const { - static_cast(this)->rkey(); + static_cast(this)->rkey(); } - ScatterGatherElement sge(uint32_t size, uint32_t offset) const; + SGE sge(uint32_t size, uint32_t offset) const; }; struct LibfabricBuffer : Buffer { - //using library = libfabric; void register_memory(pd_t pd, int access); lkey_t lkey() const; rkey_t rkey() const; @@ -93,7 +85,6 @@ namespace rdmalib struct VerbsBuffer : Buffer { - //using library = ibverbs; void register_memory(pd_t pd, int access); lkey_t lkey() const; rkey_t rkey() const; @@ -112,17 +103,19 @@ namespace rdmalib rkey_t rkey; uint32_t size; - RemoteBuffer(): + RemoteBuffer() : addr(0), rkey(0), size(0) - {} + { + } // When accessing the remote buffer, we might not need to know the size. - RemoteBuffer(uintptr_t addr, rkey_t rkey, uint32_t size = 0): + RemoteBuffer(uintptr_t addr, rkey_t rkey, uint32_t size = 0) : addr(addr), rkey(rkey), size(size) - {} + { + } template void serialize(Archive &ar) @@ -144,6 +137,7 @@ namespace rdmalib // Does NOT free the associated resource Buffer(T *ptr, uint32_t size) : ImplBuffer(ptr, size, sizeof(T)) { + } // Provide a buffer instance for existing memory pool @@ -175,18 +169,18 @@ namespace rdmalib template struct ScatterGatherElement { - //using Library = typename Derived::library; - using sge_t = typename library_traits::sge_t; + using sge_t = typename library_traits::sge_t; using lkey_t = typename library_traits::lkey_t; mutable std::vector _sges; ScatterGatherElement() - {} + { + } ScatterGatherElement(uint64_t addr, uint32_t bytes, lkey_t lkey) { - static_cast(this)->Derived(addr, bytes, lkey); + static_cast(this)->Derived(addr, bytes, lkey); } template @@ -198,18 +192,18 @@ namespace rdmalib template void add(const Buffer &buf) { - static_cast(this)->add(buf); + static_cast(this)->add(buf); } template void add(const Buffer &buf, uint32_t size, size_t offset = 0) { - static_cast(this)->add(buf, size, offset); + static_cast(this)->add(buf, size, offset); } sge_t *array() const { - return static_cast(this)->array(); + return static_cast(this)->array(); } size_t size() const; @@ -217,7 +211,6 @@ namespace rdmalib struct VerbsScatterGatherElement : ScatterGatherElement { - using Library = ibverbs; VerbsScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey); @@ -237,13 +230,11 @@ namespace rdmalib sge_t *array() const; size_t size() const; - }; struct LibfabricScatterGatherElement : ScatterGatherElement { mutable std::vector _lkeys; - using Library = libfabric; LibfabricScatterGatherElement(uint64_t addr, uint32_t bytes, lkey_t lkey) { diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 2d2b5d2..9e447ce 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -229,6 +229,8 @@ namespace rdmalib { template int32_t _post_write(SGE && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); + std::tuple poll_wc(QueueType type, bool blocking, int count); + }; } diff --git a/rdmalib/include/rdmalib/libraries.hpp b/rdmalib/include/rdmalib/libraries.hpp index 3dbe8f2..d1fdc87 100644 --- a/rdmalib/include/rdmalib/libraries.hpp +++ b/rdmalib/include/rdmalib/libraries.hpp @@ -3,6 +3,7 @@ #include #include +//#include "rdmalib/rdmalib.hpp" // Forward declare ibverbs structs struct ibv_pd; @@ -34,6 +35,11 @@ struct library_traits using wc_t = fi_cq_data_entry; using id_t = fid *; using channel_t = fid_cq *; + + // template + // using LibBuffer = rdmalib::Buffer; + // using LibSGE = rdmalib::LibfabricScatterGatherElement; + // using LibConnection = rdmalib::LibfabricConnection; }; template <> @@ -49,8 +55,14 @@ struct library_traits using qp_t = ibv_qp *; using wc_t = ibv_wc; - using id_t = rdma_cm_id *; + using id_t = rdma_cm_id *; using channel_t = ibv_comp_channel *; + + // These are going to need to go elsewhere + // template + // using LibBuffer = rdmalib::Buffer; + // using LibSGE = rdmalib::VerbsScatterGatherElement; + // using LibConnection = rdmalib::VerbsConnection; }; #endif \ No newline at end of file diff --git a/rdmalib/include/rdmalib/recv_buffer.hpp b/rdmalib/include/rdmalib/recv_buffer.hpp index d2b04a7..21d30b4 100644 --- a/rdmalib/include/rdmalib/recv_buffer.hpp +++ b/rdmalib/include/rdmalib/recv_buffer.hpp @@ -9,26 +9,29 @@ #include -namespace rdmalib { +namespace rdmalib +{ template - struct RecvBuffer { + struct RecvBuffer + { using wc_t = library_traits::wc_t; + using LibConnection = library_traits::LibConnection; int _rcv_buf_size; int _refill_threshold; int _requests; constexpr static int DEFAULT_REFILL_THRESHOLD = 8; - rdmalib::Connection * _conn; + LibConnection *_conn; - RecvBuffer(int rcv_buf_size): - _rcv_buf_size(rcv_buf_size), + RecvBuffer(int rcv_buf_size) : _rcv_buf_size(rcv_buf_size), _refill_threshold(std::min(_rcv_buf_size, DEFAULT_REFILL_THRESHOLD)), _requests(0), _conn(nullptr) - {} + { + } - inline void connect(rdmalib::Connection * conn) + inline void connect(LibConnection *conn) { this->_conn = conn; _requests = 0; @@ -37,15 +40,16 @@ namespace rdmalib { inline std::tuple poll(bool blocking = false) { - return static_cast(this)->poll(blocking); + return static_cast(this)->poll(blocking); } inline bool refill() { - if(_requests < _refill_threshold) { + if (_requests < _refill_threshold) + { SPDLOG_DEBUG("Post {} requests to buffer at QP {}", _rcv_buf_size - _requests, fmt::ptr(_conn->qp())); this->_conn->post_batched_empty_recv(_rcv_buf_size - _requests); - //this->_conn->post_recv({}, -1, _rcv_buf_size - _requests); + // this->_conn->post_recv({}, -1, _rcv_buf_size - _requests); _requests = _rcv_buf_size; return true; } @@ -53,29 +57,29 @@ namespace rdmalib { } }; - struct LibfabricRecvBuffer : RecvBuffer { - inline std::tuple poll(bool blocking = false) + struct LibfabricRecvBuffer : RecvBuffer + { + inline std::tuple poll(bool blocking = false) { auto wc = this->_conn->poll_wc(rdmalib::QueueType::RECV, blocking); - if(std::get<1>(wc)) + if (std::get<1>(wc)) SPDLOG_DEBUG("Polled reqs {}, left {}", std::get<1>(wc), _requests); _requests -= std::get<1>(wc); return wc; } }; - struct VerbsRecvBuffer : RecvBuffer { - inline std::tuple poll(bool blocking = false) + struct VerbsRecvBuffer : RecvBuffer + { + inline std::tuple poll(bool blocking = false) { auto wc = this->_conn->poll_wc(rdmalib::QueueType::RECV, blocking); - if(std::get<1>(wc)) + if (std::get<1>(wc)) SPDLOG_DEBUG("Polled reqs {}, left {}", std::get<1>(wc), _requests); _requests -= std::get<1>(wc); return wc; } - }; } #endif - diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index c09b7b1..5b0c7ad 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -13,193 +13,191 @@ #include #include -namespace rdmalib { namespace impl { - - template - Buffer::Buffer(): - _size(0), - _header(0), - _bytes(0), - _byte_size(0), - _ptr(nullptr), - _mr(nullptr), - _own_memory(false) - {} - - template - Buffer::Buffer(Buffer && obj): - _size(obj._size), - _header(obj._header), - _bytes(obj._bytes), - _byte_size(obj._byte_size), - _ptr(obj._ptr), - _mr(obj._mr), - _own_memory(obj._own_memory) - { - obj._size = obj._bytes = obj._header = 0; - obj._ptr = obj._mr = nullptr; - } - - template - Buffer & Buffer::operator=(Buffer && obj) - { - _size = obj._size; - _bytes = obj._bytes; - _bytes = obj._byte_size; - _header = obj._header; - _ptr = obj._ptr; - _mr = obj._mr; - _own_memory = obj._own_memory; - - obj._size = obj._bytes = 0; - obj._ptr = obj._mr = nullptr; - return *this; - } - - template - Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header): - _size(size), - _header(header), - _bytes(size * byte_size + header), - _byte_size(byte_size), - _mr(nullptr), - _own_memory(true) - { - //size_t alloc = _bytes; - //if(alloc < 4096) { - // alloc = 4096; - // spdlog::warn("Page too small, allocating {} bytes", alloc); - //} - // page-aligned address for maximum performance - _ptr = mmap(nullptr, _bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); - SPDLOG_DEBUG( - "Allocated {} bytes, address {}", - _bytes, fmt::ptr(_ptr) - ); - } - - template - Buffer::Buffer(void* ptr, uint32_t size, uint32_t byte_size): - _size(size), - _header(0), - _bytes(size * byte_size), - _byte_size(byte_size), - _ptr(ptr), - _mr(nullptr), - _own_memory(false) - { - SPDLOG_DEBUG( - "Allocated {} bytes, address {}", - _bytes, fmt::ptr(_ptr) - ); - } - - void LibfabricBuffer::destroy() - { - SPDLOG_DEBUG( - "Deallocate {} bytes, mr {}, ptr {}", - _bytes, fmt::ptr(_mr), fmt::ptr(_ptr) - ); - if(_mr) - impl::expect_zero(fi_close(&_mr->fid)); - if(_own_memory) - munmap(_ptr, _bytes); - } - - void VerbsBuffer::destroy() - { - SPDLOG_DEBUG( - "Deallocate {} bytes, mr {}, ptr {}", - _bytes, fmt::ptr(_mr), fmt::ptr(_ptr) - ); - if(_mr) - ibv_dereg_mr(_mr); - if(_own_memory) - munmap(_ptr, _bytes); - } - - void LibfabricBuffer::register_memory(LibfabricBuffer::pd_t pd, int access) - { - int ret = fi_mr_reg(pd, _ptr, _bytes, access, 0, 0, 0, &_mr, nullptr); - impl::expect_zero(ret); - SPDLOG_DEBUG( - "Registered {} bytes, mr {}, address {}, lkey {}, rkey {}", - _bytes, fmt::ptr(_mr), fmt::ptr(_ptr), fmt::ptr(fi_mr_desc(_mr)), fi_mr_key(_mr) - ); - } - - void VerbsBuffer::register_memory(VerbsBuffer::pd_t pd, int access) - { - _mr = ibv_reg_mr(pd, _ptr, _bytes, access); - impl::expect_nonnull(_mr); - SPDLOG_DEBUG( - "Registered {} bytes, mr {}, address {}, lkey {}, rkey {}", - _bytes, fmt::ptr(_mr), fmt::ptr(_mr->addr), _mr->lkey, _mr->rkey - ); - } - - template - uint32_t Buffer::data_size() const - { - return this->_size; - } - - template - uint32_t Buffer::size() const - { - return this->_size + this->_header; - } - - template - uint32_t Buffer::bytes() const - { - return this->_bytes; - } - - LibfabricBuffer::lkey_t LibfabricBuffer::lkey() const - { - assert(this->_mr); - return fi_mr_desc(this->_mr); - } - - VerbsBuffer::lkey_t VerbsBuffer::lkey() const - { - assert(this->_mr); - // Apparently it's not needed and better to skip that check. - return this->_mr->lkey; - //return 0; - } - - LibfabricBuffer::rkey_t LibfabricBuffer::rkey() const - { - assert(this->_mr); - return fi_mr_key(this->_mr); - } - VerbsBuffer::rkey_t VerbsBuffer::rkey() const - { - assert(this->_mr); - return this->_mr->rkey; - } - - template - uintptr_t Buffer::address() const - { - assert(this->_mr); - return reinterpret_cast(this->_ptr); - } - - template - void* Buffer::ptr() const - { - return this->_ptr; - } - - template - ScatterGatherElement Buffer::sge(uint32_t size, uint32_t offset) const - { - return {address() + offset, size, lkey()}; - } - -}} - -namespace rdmalib {} \ No newline at end of file +namespace rdmalib +{ + namespace impl + { + + template + Buffer::Buffer() : _size(0), + _header(0), + _bytes(0), + _byte_size(0), + _ptr(nullptr), + _mr(nullptr), + _own_memory(false) + { + } + + template + Buffer::Buffer(Buffer &&obj) : _size(obj._size), + _header(obj._header), + _bytes(obj._bytes), + _byte_size(obj._byte_size), + _ptr(obj._ptr), + _mr(obj._mr), + _own_memory(obj._own_memory) + { + obj._size = obj._bytes = obj._header = 0; + obj._ptr = obj._mr = nullptr; + } + + template + Buffer &Buffer::operator=(Buffer &&obj) + { + _size = obj._size; + _bytes = obj._bytes; + _bytes = obj._byte_size; + _header = obj._header; + _ptr = obj._ptr; + _mr = obj._mr; + _own_memory = obj._own_memory; + + obj._size = obj._bytes = 0; + obj._ptr = obj._mr = nullptr; + return *this; + } + + template + Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header) : _size(size), + _header(header), + _bytes(size * byte_size + header), + _byte_size(byte_size), + _mr(nullptr), + _own_memory(true) + { + // size_t alloc = _bytes; + // if(alloc < 4096) { + // alloc = 4096; + // spdlog::warn("Page too small, allocating {} bytes", alloc); + // } + // page-aligned address for maximum performance + _ptr = mmap(nullptr, _bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + SPDLOG_DEBUG( + "Allocated {} bytes, address {}", + _bytes, fmt::ptr(_ptr)); + } + + template + Buffer::Buffer(void *ptr, uint32_t size, uint32_t byte_size) : _size(size), + _header(0), + _bytes(size * byte_size), + _byte_size(byte_size), + _ptr(ptr), + _mr(nullptr), + _own_memory(false) + { + SPDLOG_DEBUG( + "Allocated {} bytes, address {}", + _bytes, fmt::ptr(_ptr)); + } + + void LibfabricBuffer::destroy() + { + SPDLOG_DEBUG( + "Deallocate {} bytes, mr {}, ptr {}", + _bytes, fmt::ptr(_mr), fmt::ptr(_ptr)); + if (_mr) + impl::expect_zero(fi_close(&_mr->fid)); + if (_own_memory) + munmap(_ptr, _bytes); + } + + void VerbsBuffer::destroy() + { + SPDLOG_DEBUG( + "Deallocate {} bytes, mr {}, ptr {}", + _bytes, fmt::ptr(_mr), fmt::ptr(_ptr)); + if (_mr) + ibv_dereg_mr(_mr); + if (_own_memory) + munmap(_ptr, _bytes); + } + + void LibfabricBuffer::register_memory(LibfabricBuffer::pd_t pd, int access) + { + int ret = fi_mr_reg(pd, _ptr, _bytes, access, 0, 0, 0, &_mr, nullptr); + impl::expect_zero(ret); + SPDLOG_DEBUG( + "Registered {} bytes, mr {}, address {}, lkey {}, rkey {}", + _bytes, fmt::ptr(_mr), fmt::ptr(_ptr), fmt::ptr(fi_mr_desc(_mr)), fi_mr_key(_mr)); + } + + void VerbsBuffer::register_memory(VerbsBuffer::pd_t pd, int access) + { + _mr = ibv_reg_mr(pd, _ptr, _bytes, access); + impl::expect_nonnull(_mr); + SPDLOG_DEBUG( + "Registered {} bytes, mr {}, address {}, lkey {}, rkey {}", + _bytes, fmt::ptr(_mr), fmt::ptr(_mr->addr), _mr->lkey, _mr->rkey); + } + + template + uint32_t Buffer::data_size() const + { + return this->_size; + } + + template + uint32_t Buffer::size() const + { + return this->_size + this->_header; + } + + template + uint32_t Buffer::bytes() const + { + return this->_bytes; + } + + LibfabricBuffer::lkey_t LibfabricBuffer::lkey() const + { + assert(this->_mr); + return fi_mr_desc(this->_mr); + } + + VerbsBuffer::lkey_t VerbsBuffer::lkey() const + { + assert(this->_mr); + // Apparently it's not needed and better to skip that check. + return this->_mr->lkey; + // return 0; + } + + LibfabricBuffer::rkey_t LibfabricBuffer::rkey() const + { + assert(this->_mr); + return fi_mr_key(this->_mr); + } + VerbsBuffer::rkey_t VerbsBuffer::rkey() const + { + assert(this->_mr); + return this->_mr->rkey; + } + + template + uintptr_t Buffer::address() const + { + assert(this->_mr); + return reinterpret_cast(this->_ptr); + } + + template + void *Buffer::ptr() const + { + return this->_ptr; + } + + template + typename Buffer::SGE + Buffer::sge(uint32_t size, uint32_t offset) const + { + return {address() + offset, size, lkey()}; + } + + } +} + +namespace rdmalib +{ +} \ No newline at end of file diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 1c9f979..76ecef8 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -243,54 +243,54 @@ namespace rdmalib { return this->_id; } - qp_t Connection::qp() const + qp_t LibfabricConnection::qp() const { return this->_qp; } - qp_t Connection::qp() const + qp_t VerbsConnection::qp() const { return this->_qp; } - #ifdef USE_LIBFABRIC - fid_cq* Connection::receive_completion_channel() const + fid_cq* LibfabricConnection::receive_completion_channel() const { return this->_rcv_channel; } - fid_cq* Connection::transmit_completion_channel() const + fid_cq* LibfabricConnection::transmit_completion_channel() const { return this->_trx_channel; } - #else - ibv_comp_channel* Connection::completion_channel() const + ibv_comp_channel* VerbsConnection::completion_channel() const { return this->_channel; } - #endif - uint32_t Connection::private_data() const + template + uint32_t Connection::private_data() const { return this->_private_data; } - ConnectionStatus Connection::status() const + template + ConnectionStatus Connection::status() const { return this->_status; } - void Connection::set_status(ConnectionStatus status) + template + void Connection::set_status(ConnectionStatus status) { this->_status = status; } - void Connection::set_private_data(uint32_t private_data) + template + void Connection::set_private_data(uint32_t private_data) { this->_private_data = private_data; } - int32_t Connection::post_send(const ScatterGatherElement & elems, int32_t id, bool force_inline) + int32_t LibfabricConnection::post_send(const ScatterGatherElement & elems, int32_t id, bool force_inline) { - #ifdef USE_LIBFABRIC // FIXME: extend with multiple sges id = id == -1 ? _req_count++ : id; SPDLOG_DEBUG("Post send to local Local QPN on connection {} fid {}", fmt::ptr(this), fmt::ptr(&_qp->fid)); @@ -306,7 +306,10 @@ namespace rdmalib { fmt::ptr(this), elems.size(), elems.array()[0].iov_base, elems.array()[0].iov_len, id ); return _req_count - 1; - #else + } + + int32_t VerbsConnection::post_send(const ScatterGatherElement & elems, int32_t id, bool force_inline) + { // FIXME: extend with multiple sges struct ibv_send_wr wr, *bad; wr.wr_id = id == -1 ? _req_count++ : id; @@ -328,12 +331,10 @@ namespace rdmalib { wr.num_sge, wr.sg_list[0].addr, wr.sg_list[0].length, wr.wr_id, wr.send_flags ); return _req_count - 1; - #endif } - int32_t Connection::post_batched_empty_recv(int count) + int32_t LibfabricConnection::post_batched_empty_recv(int count) { - #ifdef USE_LIBFABRIC int loops = count / _rbatch; int reminder = count % _rbatch; SPDLOG_DEBUG("Batch {} {} to local QPN on connection {} fid {}", loops, reminder, fmt::ptr(this), fmt::ptr(&_qp->fid)); @@ -378,7 +379,10 @@ namespace rdmalib { SPDLOG_DEBUG("Batched Post empty recv successfull on connection {}", fmt::ptr(this)); return count; - #else + } + + int32_t VerbsConnection::post_batched_empty_recv(int count) + { struct ibv_recv_wr* bad = nullptr; int loops = count / _rbatch; int reminder = count % _rbatch; @@ -421,12 +425,10 @@ namespace rdmalib { SPDLOG_DEBUG("Batched Post empty recv succesfull"); return count; - #endif } - int32_t Connection::post_recv(ScatterGatherElement && elem, int32_t id, int count) + int32_t LibfabricConnection::post_recv(ScatterGatherElement && elem, int32_t id, int count) { - #ifdef USE_LIBFABRIC fi_addr_t temp = 0; id = id == -1 ? _req_count++ : id; SPDLOG_DEBUG("post recv to local Local QPN fid {} connection {}", fmt::ptr(&_qp->fid), fmt::ptr(this)); @@ -450,9 +452,10 @@ namespace rdmalib { SPDLOG_DEBUG("Post recv successfull on connection {}", fmt::ptr(this)); return id; } - #else - // FIXME: extend with multiple sges + int32_t VerbsConnection::post_recv(ScatterGatherElement && elem, int32_t id, int count) + { + // FIXME: extend with multiple sges struct ibv_recv_wr wr, *bad; wr.wr_id = id == -1 ? _req_count++ : id; wr.next = nullptr; @@ -479,10 +482,8 @@ namespace rdmalib { SPDLOG_DEBUG("Post recv succesfull"); return wr.wr_id; } - #endif - #ifdef USE_LIBFABRIC - int32_t Connection::_post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, const uint32_t immediate) + int32_t LibfabricConnection::_post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, const uint32_t immediate) { fi_addr_t temp = 0; int32_t id = _req_count++; @@ -507,8 +508,8 @@ namespace rdmalib { return _req_count - 1; } - #else - int32_t Connection::_post_write(ScatterGatherElement && elems, ibv_send_wr wr, bool force_inline, bool force_solicited) + + int32_t VerbsConnection::_post_write(ScatterGatherElement && elems, ibv_send_wr wr, bool force_inline, bool force_solicited) { ibv_send_wr* bad; wr.wr_id = _req_count++; @@ -544,48 +545,28 @@ namespace rdmalib { return _req_count - 1; } - #endif - int32_t Connection::post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, bool force_inline) + int32_t LibfabricConnection::post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, bool force_inline) { - #ifdef USE_LIBFABRIC if (elems.size() > 1) { spdlog::error("Post write unsuccessful on connection {}, reason Function not implemented for multiple sges.", fmt::ptr(this)); return -1; } return _post_write(std::forward(elems), rbuf); - #else - ibv_send_wr wr; - memset(&wr, 0, sizeof(wr)); - wr.opcode = IBV_WR_RDMA_WRITE; - wr.wr.rdma.remote_addr = rbuf.addr; - wr.wr.rdma.rkey = rbuf.rkey; - return _post_write(std::forward(elems), wr, force_inline, false); - #endif } - int32_t Connection::post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint32_t immediate, bool force_inline, bool force_solicited) + int32_t VerbsConnection::post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, bool force_inline) { - #ifdef USE_LIBFABRIC - if (elems.size() > 1) { - spdlog::error("Post write unsuccessful on connection {}, reason Function not implemented for multiple sges.", fmt::ptr(this)); - return -1; - } - return _post_write(std::forward(elems), rbuf, immediate); - #else ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); - wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; - wr.imm_data = htonl(immediate); + wr.opcode = IBV_WR_RDMA_WRITE; wr.wr.rdma.remote_addr = rbuf.addr; wr.wr.rdma.rkey = rbuf.rkey; - return _post_write(std::forward(elems), wr, force_inline, force_solicited); - #endif + return _post_write(std::forward(elems), wr, force_inline, false); } - int32_t Connection::post_cas(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap) + int32_t LibfabricConnection::post_cas(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap) { - #ifdef USE_LIBFABRIC // TODO check if fi_addr_t temp = 0; int32_t id = _req_count++; @@ -598,7 +579,10 @@ namespace rdmalib { } SPDLOG_DEBUG("Post write id {} successful on connection", id, fmt::ptr(this)); return _req_count - 1; - #else + } + + int32_t VerbsConnection::post_cas(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap) + { ibv_send_wr wr, *bad; memset(&wr, 0, sizeof(wr)); wr.wr_id = _req_count++; @@ -619,11 +603,9 @@ namespace rdmalib { } SPDLOG_DEBUG("Post write succesfull"); return _req_count - 1; - #endif } - #ifdef USE_LIBFABRIC - int32_t Connection::post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer & rbuf, uint64_t add) + int32_t LibfabricConnection::post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer & rbuf, uint64_t add) { int32_t id = _req_count++; memcpy(_accounting_buf.data(), &add, sizeof(add)); @@ -637,8 +619,8 @@ namespace rdmalib { ); return _req_count - 1; } - #else - int32_t Connection::post_atomic_fadd(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t add) + + int32_t VerbsConnection::post_atomic_fadd(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t add) { ibv_send_wr wr, *bad; memset(&wr, 0, sizeof(wr)); @@ -662,11 +644,8 @@ namespace rdmalib { ); return _req_count - 1; } - #endif - - #ifdef USE_LIBFABRIC - std::tuple Connection::poll_wc(QueueType type, bool blocking, int count, bool update) + std::tuple LibfabricConnection::poll_wc(QueueType type, bool blocking, int count, bool update) { int ret = 0; fi_cq_data_entry* wcs = (type == QueueType::RECV ? _rwc.data() : _swc.data()); @@ -705,8 +684,8 @@ namespace rdmalib { } return std::make_tuple(wcs, ret == -EAGAIN ? 0 : ret); } - #else - std::tuple Connection::poll_wc(QueueType type, bool blocking, int count) + + std::tuple VerbsConnection::poll_wc(QueueType type, bool blocking, int count) { int ret = 0; ibv_wc* wcs = (type == QueueType::RECV ? _rwc.data() : _swc.data()); @@ -737,35 +716,27 @@ namespace rdmalib { } return std::make_tuple(wcs, ret); } - #endif - #ifndef USE_LIBFABRIC - void Connection::notify_events(bool only_solicited) + void VerbsConnection::notify_events(bool only_solicited) { impl::expect_zero(ibv_req_notify_cq(_qp->recv_cq, only_solicited)); } - #endif - #ifdef USE_LIBFABRIC - int Connection::wait_events(int timeout) + int LibfabricConnection::wait_events(int timeout) { return fi_cntr_wait(_write_counter, _counter+1, timeout); } - #else - ibv_cq* Connection::wait_events() + + ibv_cq* VerbsConnection::wait_events() { ibv_cq* ev_cq = nullptr; void* ev_ctx = nullptr; impl::expect_zero(ibv_get_cq_event(_channel, &ev_cq, &ev_ctx)); return ev_cq; } - #endif - #ifndef USE_LIBFABRIC - void Connection::ack_events(ibv_cq* cq, int len) + void VerbsConnection::ack_events(ibv_cq* cq, int len) { ibv_ack_cq_events(cq, len); } - #endif - } From f540de4765065c4c40762ce3749f88ed27f9e77f Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Mon, 17 Jul 2023 12:02:56 -0400 Subject: [PATCH 70/91] Fixed errors in connection --- rdmalib/include/rdmalib/buffer.hpp | 4 +- rdmalib/include/rdmalib/connection.hpp | 60 +++++++++++-------- rdmalib/lib/connection.cpp | 80 ++++++++++---------------- 3 files changed, 71 insertions(+), 73 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index d6b42cb..14ba4bb 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -31,6 +31,7 @@ namespace rdmalib using lkey_t = typename library_traits::lkey_t; using rkey_t = typename library_traits::rkey_t; //using SGE = library_traits::LibSGE; + // TODO: DEAL WITH THIS uint32_t _size; uint32_t _header; @@ -211,7 +212,7 @@ namespace rdmalib struct VerbsScatterGatherElement : ScatterGatherElement { - + using Library = ibverbs; VerbsScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey); template @@ -234,6 +235,7 @@ namespace rdmalib struct LibfabricScatterGatherElement : ScatterGatherElement { + using Library = libfabric; mutable std::vector _lkeys; LibfabricScatterGatherElement(uint64_t addr, uint32_t bytes, lkey_t lkey) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 9e447ce..c398a94 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -61,9 +61,9 @@ namespace rdmalib { using wc_t = typename library_traits::wc_t; using id_t = typename library_traits::id_t; using channel_t = typename library_traits::channel_t; - template + template // TODO: remove this generic. should be a trait using SGE = ScatterGatherElement; - using RemoteBuffer_ = RemoteBuffer; + using RemoteBuffer = RemoteBuffer; qp_t _qp; int32_t _req_count; @@ -103,7 +103,7 @@ namespace rdmalib { } qp_t qp() const { - return static_cast(this)->qp(); + return this->_id; } uint32_t private_data() const; @@ -122,24 +122,24 @@ namespace rdmalib { int32_t post_batched_empty_recv(int32_t count = 1); template - int32_t post_write(SGE && elems, const RemoteBuffer_ & buf, bool force_inline = false); + int32_t post_write(SGE && elems, const RemoteBuffer & buf, bool force_inline = false); // Solicited makes sense only for RDMA write with immediate template - int32_t post_write(SGE && elems, const RemoteBuffer_ & buf, + int32_t post_write(SGE && elems, const RemoteBuffer & buf, uint32_t immediate, bool force_inline = false, bool solicited = false ); template - int32_t post_cas(SGE && elems, const RemoteBuffer_ & buf, uint64_t compare, uint64_t swap); + int32_t post_cas(SGE && elems, const RemoteBuffer & buf, uint64_t compare, uint64_t swap); }; struct LibfabricConnection : Connection { template - using Buffer_ = Buffer; - template // S = SGE's Derived class - using SGE = ScatterGatherElement; + using Buffer = Buffer; + //using RemoteBuffer = RemoteBuffer; + using SGE = LibfabricScatterGatherElement; fid_cq *_rcv_channel; fid_cq *_trx_channel; @@ -153,8 +153,10 @@ namespace rdmalib { LibfabricConnection(LibfabricConnection&& obj); ~LibfabricConnection(); - id_t id() const; - qp_t qp() const; + id_t id() const + { + return &this->_qp->fid; + } template void initialize_batched_recv(const rdmalib::impl::Buffer & sge, size_t offset); @@ -166,7 +168,12 @@ namespace rdmalib { channel_t receive_completion_channel() const; channel_t transmit_completion_channel() const; - template inline int32_t post_write(const Buffer_ & buf, const size_t size, const uint64_t offset, const RemoteBuffer_ & rbuf, const uint32_t immediate) { + int32_t post_cas(SGE && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap); + int32_t post_send(const SGE & elems, int32_t id, bool force_inline); + int32_t post_batched_empty_recv(int count); + int32_t post_recv(SGE && elem, int32_t id, int count); + + template inline int32_t post_write(const Buffer & buf, const size_t size, const uint64_t offset, const RemoteBuffer & rbuf, const uint32_t immediate) { int ret = fi_writedata(_qp, (void *)(buf.address() + offset), size, buf.lkey(), immediate + (size << 32), NULL, rbuf.addr, rbuf.rkey, (void *)(_req_count++)); if(ret) { spdlog::error("Post write unsuccessful, reason {} {}, buf size {}, id {}, remote addr {}, remote rkey {}, imm data {}, connection {}", @@ -186,19 +193,21 @@ namespace rdmalib { return _req_count - 1; } - int32_t post_atomic_fadd(const Buffer_ & _accounting_buf, const RemoteBuffer_& rbuf, uint64_t add); + int32_t post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer & rbuf, uint64_t add); // Register to be notified about all events, including unsolicited ones int wait_events(int timeout = -1); - template - int32_t _post_write(SGE && elems, const RemoteBuffer_ & rbuf, const uint32_t immediate = 0); + int32_t _post_write(SGE && elems, const RemoteBuffer & rbuf, const uint32_t immediate = 0); + int32_t post_write(SGE && elems, const RemoteBuffer & rbuf, bool force_inline); + + std::tuple poll_wc(QueueType type, bool blocking, int count, bool update); }; struct VerbsConnection : Connection { - template // S for SGE's Derived class - using SGE = ScatterGatherElement; + using SGE = ScatterGatherElement; + // using RemoteBuffer = RemoteBuffer; // handled in parent id_t _id; channel_t _channel; @@ -210,8 +219,10 @@ namespace rdmalib { ~VerbsConnection(); void close(); - id_t id() const; - qp_t qp() const; + id_t id() const + { + return this->_id; + } void inlining(bool enable); template @@ -219,15 +230,18 @@ namespace rdmalib { void initialize(rdma_cm_id* id); ibv_comp_channel* completion_channel() const; - template - int32_t post_atomic_fadd(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t add); + int32_t post_send(const SGE & elems, int32_t id, bool force_inline); + int32_t post_batched_empty_recv(int count); + int32_t post_recv(SGE && elem, int32_t id, int count); + int32_t post_cas(SGE && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap); + int32_t post_atomic_fadd(SGE && elems, const RemoteBuffer & rbuf, uint64_t add); void notify_events(bool only_solicited = false); ibv_cq* wait_events(); void ack_events(ibv_cq* cq, int len); - template - int32_t _post_write(SGE && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); + int32_t _post_write(SGE && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); + int32_t post_write(SGE && elems, const RemoteBuffer & rbuf, bool force_inline); std::tuple poll_wc(QueueType type, bool blocking, int count); diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 76ecef8..ad1e0f9 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -234,24 +234,6 @@ namespace rdmalib { } } - id_t LibfabricConnection::id() const - { - return &this->_qp->fid; - } - id_t VerbsConnection::id() const - { - return this->_id; - } - - qp_t LibfabricConnection::qp() const - { - return this->_qp; - } - qp_t VerbsConnection::qp() const - { - return this->_qp; - } - fid_cq* LibfabricConnection::receive_completion_channel() const { return this->_rcv_channel; @@ -289,7 +271,7 @@ namespace rdmalib { this->_private_data = private_data; } - int32_t LibfabricConnection::post_send(const ScatterGatherElement & elems, int32_t id, bool force_inline) + int32_t LibfabricConnection::post_send(const SGE & elems, int32_t id, bool force_inline) { // FIXME: extend with multiple sges id = id == -1 ? _req_count++ : id; @@ -308,7 +290,7 @@ namespace rdmalib { return _req_count - 1; } - int32_t VerbsConnection::post_send(const ScatterGatherElement & elems, int32_t id, bool force_inline) + int32_t VerbsConnection::post_send(const SGE & elems, int32_t id, bool force_inline) { // FIXME: extend with multiple sges struct ibv_send_wr wr, *bad; @@ -427,7 +409,7 @@ namespace rdmalib { return count; } - int32_t LibfabricConnection::post_recv(ScatterGatherElement && elem, int32_t id, int count) + int32_t LibfabricConnection::post_recv(SGE && elem, int32_t id, int count) { fi_addr_t temp = 0; id = id == -1 ? _req_count++ : id; @@ -453,7 +435,7 @@ namespace rdmalib { return id; } - int32_t VerbsConnection::post_recv(ScatterGatherElement && elem, int32_t id, int count) + int32_t VerbsConnection::post_recv(SGE && elem, int32_t id, int count) { // FIXME: extend with multiple sges struct ibv_recv_wr wr, *bad; @@ -483,7 +465,7 @@ namespace rdmalib { return wr.wr_id; } - int32_t LibfabricConnection::_post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, const uint32_t immediate) + int32_t LibfabricConnection::_post_write(SGE && elems, const RemoteBuffer & rbuf, const uint32_t immediate) { fi_addr_t temp = 0; int32_t id = _req_count++; @@ -509,7 +491,7 @@ namespace rdmalib { } - int32_t VerbsConnection::_post_write(ScatterGatherElement && elems, ibv_send_wr wr, bool force_inline, bool force_solicited) + int32_t VerbsConnection::_post_write(SGE && elems, ibv_send_wr wr, bool force_inline, bool force_solicited) { ibv_send_wr* bad; wr.wr_id = _req_count++; @@ -527,26 +509,26 @@ namespace rdmalib { spdlog::error("Post write unsuccesful, reason {} {}, sges_count {}, wr_id {}, remote addr {}, remote rkey {}, imm data {}", ret, strerror(ret), wr.num_sge, wr.wr_id, wr.wr.rdma.remote_addr, wr.wr.rdma.rkey, ntohl(wr.imm_data) ); - if(IBV_SEND_INLINE & wr.send_flags) - spdlog::error("The write of size {} was inlined, is it supported by the device?", - wr.sg_list[0].length - ); + if(IBV_SEND_INLINE & wr.send_flags) { + spdlog::error("The write of size {} was inlined, is it supported by the device?", wr.sg_list[0].length); + } return -1; } - if(wr.num_sge > 0) + + if(wr.num_sge > 0) { SPDLOG_DEBUG( - "Post write succesfull id: {}, sge size: {}, first lkey {} len {}, remote addr {}, remote rkey {}, imm data {}", + "Post write successful id: {}, sge size: {}, first lkey {} len {}, remote addr {}, remote rkey {}, imm data {}", wr.wr_id, wr.num_sge, wr.sg_list[0].lkey, wr.sg_list[0].length, wr.wr.rdma.remote_addr, wr.wr.rdma.rkey, ntohl(wr.imm_data) ); - else + } else { SPDLOG_DEBUG( - "Post write succesfull id: {}, remote addr {}, remote rkey {}, imm data {}", wr.wr_id, wr.wr.rdma.remote_addr, wr.wr.rdma.rkey, ntohl(wr.imm_data) + "Post write successful id: {}, remote addr {}, remote rkey {}, imm data {}", wr.wr_id, wr.wr.rdma.remote_addr, wr.wr.rdma.rkey, ntohl(wr.imm_data) ); + } return _req_count - 1; - } - int32_t LibfabricConnection::post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, bool force_inline) + int32_t LibfabricConnection::post_write(SGE && elems, const RemoteBuffer & rbuf, bool force_inline) { if (elems.size() > 1) { spdlog::error("Post write unsuccessful on connection {}, reason Function not implemented for multiple sges.", fmt::ptr(this)); @@ -555,7 +537,7 @@ namespace rdmalib { return _post_write(std::forward(elems), rbuf); } - int32_t VerbsConnection::post_write(ScatterGatherElement && elems, const RemoteBuffer & rbuf, bool force_inline) + int32_t VerbsConnection::post_write(SGE && elems, const RemoteBuffer & rbuf, bool force_inline) { ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); @@ -565,7 +547,7 @@ namespace rdmalib { return _post_write(std::forward(elems), wr, force_inline, false); } - int32_t LibfabricConnection::post_cas(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap) + int32_t LibfabricConnection::post_cas(SGE && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap) { // TODO check if fi_addr_t temp = 0; @@ -581,7 +563,7 @@ namespace rdmalib { return _req_count - 1; } - int32_t VerbsConnection::post_cas(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap) + int32_t VerbsConnection::post_cas(SGE && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap) { ibv_send_wr wr, *bad; memset(&wr, 0, sizeof(wr)); @@ -620,7 +602,7 @@ namespace rdmalib { return _req_count - 1; } - int32_t VerbsConnection::post_atomic_fadd(ScatterGatherElement && elems, const RemoteBuffer & rbuf, uint64_t add) + int32_t VerbsConnection::post_atomic_fadd(SGE && elems, const RemoteBuffer & rbuf, uint64_t add) { ibv_send_wr wr, *bad; memset(&wr, 0, sizeof(wr)); @@ -639,9 +621,7 @@ namespace rdmalib { spdlog::error("Post write unsuccesful, reason {} {}", errno, strerror(errno)); return -1; } - SPDLOG_DEBUG( - "Post atomic fadd succesfull id: {}, remote addr {}, remote rkey {}, val {}", wr.wr_id, wr.wr.rdma.remote_addr, wr.wr.rdma.rkey, wr.wr.atomic.compare_add - ); + SPDLOG_DEBUG("Post atomic fadd succesfull id: {}, remote addr {}, remote rkey {}, val {}", wr.wr_id, wr.wr.rdma.remote_addr, wr.wr.rdma.rkey, wr.wr.atomic.compare_add); return _req_count - 1; } @@ -659,15 +639,16 @@ namespace rdmalib { ); if (ret == -FI_EAVAIL) { ret = fi_cq_readerr(type == QueueType::RECV ? _rcv_channel : _trx_channel, &_ewc, 0); - if (ret != 1) + if (ret != 1) { ret = -1; - else + } else { spdlog::error( - "Queue {} connection {} WC {} finished with an error {}", - type == QueueType::RECV ? "recv" : "send", fmt::ptr(this), - reinterpret_cast(_ewc.op_context), - fi_strerror(_ewc.err) - ); + "Queue {} connection {} WC {} finished with an error {}", + type == QueueType::RECV ? "recv" : "send", fmt::ptr(this), + reinterpret_cast(_ewc.op_context), + fi_strerror(_ewc.err) + ); + } } } while(blocking && (ret == -EAGAIN || ret == 0)); @@ -703,7 +684,7 @@ namespace rdmalib { spdlog::error("Failure of polling events from: {} queue! Return value {}, errno {}", type == QueueType::RECV ? "recv" : "send", ret, errno); return std::make_tuple(nullptr, -1); } - if(ret) + if(ret) { for(int i = 0; i < ret; ++i) { if(wcs[i].status != IBV_WC_SUCCESS) { spdlog::error( @@ -714,6 +695,7 @@ namespace rdmalib { } SPDLOG_DEBUG("Queue {} Ret {}/{} WC {} Status {}", type == QueueType::RECV ? "recv" : "send", i + 1, ret, wcs[i].wr_id, ibv_wc_status_str(wcs[i].status)); } + } return std::make_tuple(wcs, ret); } From d2da053365054ae1ecc76d082f291bef2cde1908 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Tue, 18 Jul 2023 14:15:32 -0400 Subject: [PATCH 71/91] Fix more errors --- rdmalib/include/rdmalib/buffer.hpp | 11 ++++++++-- rdmalib/include/rdmalib/connection.hpp | 28 +++++++++++++------------- rdmalib/lib/buffer.cpp | 7 ------- rdmalib/lib/connection.cpp | 18 ++++++++--------- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 14ba4bb..711e278 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -30,8 +30,11 @@ namespace rdmalib using pd_t = typename library_traits::pd_t; using lkey_t = typename library_traits::lkey_t; using rkey_t = typename library_traits::rkey_t; - //using SGE = library_traits::LibSGE; + // TODO: DEAL WITH THIS + //using SGE = library_traits::LibSGE; + template + using SGE = ScatterGatherElement; uint32_t _size; uint32_t _header; @@ -73,7 +76,11 @@ namespace rdmalib { static_cast(this)->rkey(); } - SGE sge(uint32_t size, uint32_t offset) const; + template + SGE sge(uint32_t size, uint32_t offset) const + { + return {address() + offset, size, lkey()}; + } }; struct LibfabricBuffer : Buffer diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index c398a94..0400032 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -63,7 +63,7 @@ namespace rdmalib { using channel_t = typename library_traits::channel_t; template // TODO: remove this generic. should be a trait using SGE = ScatterGatherElement; - using RemoteBuffer = RemoteBuffer; + using RemoteBuffer_ = RemoteBuffer; qp_t _qp; int32_t _req_count; @@ -122,23 +122,23 @@ namespace rdmalib { int32_t post_batched_empty_recv(int32_t count = 1); template - int32_t post_write(SGE && elems, const RemoteBuffer & buf, bool force_inline = false); + int32_t post_write(SGE && elems, const RemoteBuffer_ & buf, bool force_inline = false); // Solicited makes sense only for RDMA write with immediate template - int32_t post_write(SGE && elems, const RemoteBuffer & buf, + int32_t post_write(SGE && elems, const RemoteBuffer_ & buf, uint32_t immediate, bool force_inline = false, bool solicited = false ); template - int32_t post_cas(SGE && elems, const RemoteBuffer & buf, uint64_t compare, uint64_t swap); + int32_t post_cas(SGE && elems, const RemoteBuffer_ & buf, uint64_t compare, uint64_t swap); }; struct LibfabricConnection : Connection { template using Buffer = Buffer; - //using RemoteBuffer = RemoteBuffer; + //using RemoteBuffer_ = RemoteBuffer_; using SGE = LibfabricScatterGatherElement; fid_cq *_rcv_channel; @@ -168,12 +168,12 @@ namespace rdmalib { channel_t receive_completion_channel() const; channel_t transmit_completion_channel() const; - int32_t post_cas(SGE && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap); + int32_t post_cas(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t compare, uint64_t swap); int32_t post_send(const SGE & elems, int32_t id, bool force_inline); int32_t post_batched_empty_recv(int count); int32_t post_recv(SGE && elem, int32_t id, int count); - template inline int32_t post_write(const Buffer & buf, const size_t size, const uint64_t offset, const RemoteBuffer & rbuf, const uint32_t immediate) { + template inline int32_t post_write(const Buffer & buf, const size_t size, const uint64_t offset, const RemoteBuffer_ & rbuf, const uint32_t immediate) { int ret = fi_writedata(_qp, (void *)(buf.address() + offset), size, buf.lkey(), immediate + (size << 32), NULL, rbuf.addr, rbuf.rkey, (void *)(_req_count++)); if(ret) { spdlog::error("Post write unsuccessful, reason {} {}, buf size {}, id {}, remote addr {}, remote rkey {}, imm data {}, connection {}", @@ -193,13 +193,13 @@ namespace rdmalib { return _req_count - 1; } - int32_t post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer & rbuf, uint64_t add); + int32_t post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer_ & rbuf, uint64_t add); // Register to be notified about all events, including unsolicited ones int wait_events(int timeout = -1); - int32_t _post_write(SGE && elems, const RemoteBuffer & rbuf, const uint32_t immediate = 0); - int32_t post_write(SGE && elems, const RemoteBuffer & rbuf, bool force_inline); + int32_t _post_write(SGE && elems, const RemoteBuffer_ & rbuf, const uint32_t immediate = 0); + int32_t post_write(SGE && elems, const RemoteBuffer_ & rbuf, bool force_inline); std::tuple poll_wc(QueueType type, bool blocking, int count, bool update); }; @@ -207,7 +207,7 @@ namespace rdmalib { struct VerbsConnection : Connection { using SGE = ScatterGatherElement; - // using RemoteBuffer = RemoteBuffer; // handled in parent + // using RemoteBuffer_ = RemoteBuffer_; // handled in parent id_t _id; channel_t _channel; @@ -233,15 +233,15 @@ namespace rdmalib { int32_t post_send(const SGE & elems, int32_t id, bool force_inline); int32_t post_batched_empty_recv(int count); int32_t post_recv(SGE && elem, int32_t id, int count); - int32_t post_cas(SGE && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap); - int32_t post_atomic_fadd(SGE && elems, const RemoteBuffer & rbuf, uint64_t add); + int32_t post_cas(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t compare, uint64_t swap); + int32_t post_atomic_fadd(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t add); void notify_events(bool only_solicited = false); ibv_cq* wait_events(); void ack_events(ibv_cq* cq, int len); int32_t _post_write(SGE && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); - int32_t post_write(SGE && elems, const RemoteBuffer & rbuf, bool force_inline); + int32_t post_write(SGE && elems, const RemoteBuffer_ & rbuf, bool force_inline); std::tuple poll_wc(QueueType type, bool blocking, int count); diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index 5b0c7ad..46e9eae 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -188,13 +188,6 @@ namespace rdmalib return this->_ptr; } - template - typename Buffer::SGE - Buffer::sge(uint32_t size, uint32_t offset) const - { - return {address() + offset, size, lkey()}; - } - } } diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index ad1e0f9..f4c6d46 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -465,7 +465,7 @@ namespace rdmalib { return wr.wr_id; } - int32_t LibfabricConnection::_post_write(SGE && elems, const RemoteBuffer & rbuf, const uint32_t immediate) + int32_t LibfabricConnection::_post_write(SGE && elems, const RemoteBuffer_ & rbuf, const uint32_t immediate) { fi_addr_t temp = 0; int32_t id = _req_count++; @@ -528,26 +528,26 @@ namespace rdmalib { return _req_count - 1; } - int32_t LibfabricConnection::post_write(SGE && elems, const RemoteBuffer & rbuf, bool force_inline) + int32_t LibfabricConnection::post_write(SGE && elems, const RemoteBuffer_ & rbuf, bool force_inline) { if (elems.size() > 1) { spdlog::error("Post write unsuccessful on connection {}, reason Function not implemented for multiple sges.", fmt::ptr(this)); return -1; } - return _post_write(std::forward(elems), rbuf); + return _post_write(std::forward(elems), rbuf); } - int32_t VerbsConnection::post_write(SGE && elems, const RemoteBuffer & rbuf, bool force_inline) + int32_t VerbsConnection::post_write(SGE && elems, const RemoteBuffer_ & rbuf, bool force_inline) { ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); wr.opcode = IBV_WR_RDMA_WRITE; wr.wr.rdma.remote_addr = rbuf.addr; wr.wr.rdma.rkey = rbuf.rkey; - return _post_write(std::forward(elems), wr, force_inline, false); + return _post_write(std::forward(elems), wr, force_inline, false); } - int32_t LibfabricConnection::post_cas(SGE && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap) + int32_t LibfabricConnection::post_cas(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t compare, uint64_t swap) { // TODO check if fi_addr_t temp = 0; @@ -563,7 +563,7 @@ namespace rdmalib { return _req_count - 1; } - int32_t VerbsConnection::post_cas(SGE && elems, const RemoteBuffer & rbuf, uint64_t compare, uint64_t swap) + int32_t VerbsConnection::post_cas(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t compare, uint64_t swap) { ibv_send_wr wr, *bad; memset(&wr, 0, sizeof(wr)); @@ -587,7 +587,7 @@ namespace rdmalib { return _req_count - 1; } - int32_t LibfabricConnection::post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer & rbuf, uint64_t add) + int32_t LibfabricConnection::post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer_ & rbuf, uint64_t add) { int32_t id = _req_count++; memcpy(_accounting_buf.data(), &add, sizeof(add)); @@ -602,7 +602,7 @@ namespace rdmalib { return _req_count - 1; } - int32_t VerbsConnection::post_atomic_fadd(SGE && elems, const RemoteBuffer & rbuf, uint64_t add) + int32_t VerbsConnection::post_atomic_fadd(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t add) { ibv_send_wr wr, *bad; memset(&wr, 0, sizeof(wr)); From 07ac8b2214eee1ee3c60c3ecd15626d350a33e7a Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Tue, 18 Jul 2023 14:53:56 -0400 Subject: [PATCH 72/91] Refactor functions, util, server --- rdmalib/include/rdmalib/functions.hpp | 13 +++++++------ rdmalib/include/rdmalib/recv_buffer.hpp | 2 +- rdmalib/include/rdmalib/server.hpp | 5 +++-- rdmalib/lib/functions.cpp | 3 ++- rdmalib/lib/server.cpp | 12 ++++++++---- 5 files changed, 21 insertions(+), 14 deletions(-) diff --git a/rdmalib/include/rdmalib/functions.hpp b/rdmalib/include/rdmalib/functions.hpp index 9fd4d4f..2c67cf9 100644 --- a/rdmalib/include/rdmalib/functions.hpp +++ b/rdmalib/include/rdmalib/functions.hpp @@ -7,19 +7,20 @@ namespace rdmalib { namespace functions { - struct Submission { + struct LibfabricSubmission { uint64_t r_address; - #ifdef USE_LIBFABRIC uint64_t r_key; static constexpr int DATA_HEADER_SIZE = 16; - #else + }; + + struct VerbsSubmission { + uint64_t r_address; uint32_t r_key; static constexpr int DATA_HEADER_SIZE = 12; - #endif }; - constexpr int Submission::DATA_HEADER_SIZE; - + constexpr int LibfabricSubmission::DATA_HEADER_SIZE; + constexpr int VerbsSubmission::DATA_HEADER_SIZE; typedef void (*FuncType)(void*, void*); diff --git a/rdmalib/include/rdmalib/recv_buffer.hpp b/rdmalib/include/rdmalib/recv_buffer.hpp index 21d30b4..bc76f89 100644 --- a/rdmalib/include/rdmalib/recv_buffer.hpp +++ b/rdmalib/include/rdmalib/recv_buffer.hpp @@ -16,7 +16,7 @@ namespace rdmalib struct RecvBuffer { using wc_t = library_traits::wc_t; - using LibConnection = library_traits::LibConnection; + //using LibConnection = library_traits::LibConnection; // TODO int _rcv_buf_size; int _refill_threshold; diff --git a/rdmalib/include/rdmalib/server.hpp b/rdmalib/include/rdmalib/server.hpp index 9ead314..7b75dba 100644 --- a/rdmalib/include/rdmalib/server.hpp +++ b/rdmalib/include/rdmalib/server.hpp @@ -16,9 +16,10 @@ namespace rdmalib { namespace server { + template struct ServerStatus { - std::vector _buffers; - rdmalib::RemoteBuffer _threads_allocator; + std::vector> _buffers; + rdmalib::RemoteBuffer _threads_allocator; std::string _address; int _port; diff --git a/rdmalib/lib/functions.cpp b/rdmalib/lib/functions.cpp index 8dcb516..c8bd71c 100644 --- a/rdmalib/lib/functions.cpp +++ b/rdmalib/lib/functions.cpp @@ -5,7 +5,8 @@ namespace rdmalib { namespace functions { - constexpr int Submission::DATA_HEADER_SIZE; + constexpr int LibfabricSubmission::DATA_HEADER_SIZE; + constexpr int VerbsSubmission::DATA_HEADER_SIZE; void FunctionsDB::test_function(void* args, void* res) { diff --git a/rdmalib/lib/server.cpp b/rdmalib/lib/server.cpp index 34b4681..d37dd1d 100644 --- a/rdmalib/lib/server.cpp +++ b/rdmalib/lib/server.cpp @@ -5,17 +5,20 @@ namespace rdmalib { namespace server { - ServerStatus::ServerStatus(): + template + ServerStatus::ServerStatus(): _address(""), _port(0) {} - ServerStatus::ServerStatus(std::string address, int port): + template + ServerStatus::ServerStatus(std::string address, int port): _address(address), _port(port) {} - ServerStatus ServerStatus::deserialize(std::istream & in) + template + ServerStatus ServerStatus::deserialize(std::istream & in) { ServerStatus status; cereal::JSONInputArchive archive_in(in); @@ -23,7 +26,8 @@ namespace rdmalib { namespace server { return status; } - void ServerStatus::serialize(std::ostream & out) const + template + void ServerStatus::serialize(std::ostream & out) const { cereal::JSONOutputArchive archive_out(out); archive_out(*this); From 14e75d27a6cbbda5d1cc310ffa2bc24a0728e9e9 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Tue, 18 Jul 2023 19:50:09 -0400 Subject: [PATCH 73/91] Connection compiles, refactor BufferInfo --- rdmalib/include/rdmalib/allocation.hpp | 8 +++----- rdmalib/include/rdmalib/buffer.hpp | 6 ++++-- rdmalib/include/rdmalib/connection.hpp | 10 ++++++---- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/rdmalib/include/rdmalib/allocation.hpp b/rdmalib/include/rdmalib/allocation.hpp index fc1f460..9f08b46 100644 --- a/rdmalib/include/rdmalib/allocation.hpp +++ b/rdmalib/include/rdmalib/allocation.hpp @@ -20,14 +20,12 @@ namespace rdmalib { char listen_address[16]; }; + template struct BufferInformation { + using rkey_t = library_traits::rkey_t; uint64_t r_addr; - #ifdef USE_LIBFABRIC - uint64_t r_key; - #else - uint32_t r_key; - #endif + rkey_t r_key; }; } diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 711e278..3a849f1 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -70,11 +70,11 @@ namespace rdmalib } lkey_t lkey() const { - static_cast(this)->lkey(); + return static_cast(this)->lkey(); } rkey_t rkey() const { - static_cast(this)->rkey(); + return static_cast(this)->rkey(); } template SGE sge(uint32_t size, uint32_t offset) const @@ -221,6 +221,7 @@ namespace rdmalib { using Library = ibverbs; VerbsScatterGatherElement(uint64_t addr, uint32_t bytes, uint32_t lkey); + VerbsScatterGatherElement(); template void add(const Buffer &buf) @@ -245,6 +246,7 @@ namespace rdmalib using Library = libfabric; mutable std::vector _lkeys; + LibfabricScatterGatherElement(); LibfabricScatterGatherElement(uint64_t addr, uint32_t bytes, lkey_t lkey) { _sges.push_back({(void *)addr, bytes}); diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 0400032..0adbc60 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -75,17 +75,16 @@ namespace rdmalib { std::array _swc; // fast fix for overlapping polling std::array _rwc; - std::array, _wc_size> _rwc_sges; int _send_flags; public: static const int _rbatch = 32; // 32 for faster division in the code Connection(bool passive = false) { - static_cast(this)->Derived(passive); + } ~Connection() { - static_cast(this)->~Derived(); + } Connection(const Connection&) = delete; Connection& operator=(const Connection&) = delete; @@ -147,6 +146,8 @@ namespace rdmalib { uint64_t _counter; fid_domain* _domain = nullptr; + std::array _rwc_sges; + fi_cq_err_entry _ewc; LibfabricConnection(bool passive); @@ -206,13 +207,14 @@ namespace rdmalib { struct VerbsConnection : Connection { - using SGE = ScatterGatherElement; + using SGE = VerbsScatterGatherElement; // using RemoteBuffer_ = RemoteBuffer_; // handled in parent id_t _id; channel_t _channel; struct ibv_recv_wr _batch_wrs[_rbatch]; // preallocated and prefilled batched recv. + std::array _rwc_sges; VerbsConnection(bool passive); VerbsConnection(VerbsConnection&& obj); From d7ba7a99dca566db44c014960c42c00468b2fe09 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Tue, 18 Jul 2023 20:01:15 -0400 Subject: [PATCH 74/91] Refactor rdmalib constructors --- rdmalib/include/rdmalib/rdmalib.hpp | 70 ++++++++++++++++++++++++----- rdmalib/lib/rdmalib.cpp | 56 ++++++++++++++++------- 2 files changed, 98 insertions(+), 28 deletions(-) diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index a305e3f..285f28b 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -1,4 +1,3 @@ - #ifndef __RDMALIB_RDMALIB_HPP__ #define __RDMALIB_RDMALIB_HPP__ @@ -10,7 +9,7 @@ #include #include -#ifdef USE_LIBFABRIC +// #ifdef USE_LIBFABRIC #include #include // #include @@ -18,13 +17,13 @@ extern "C" { #include "rdmacred.h" } -#endif #else #include #endif #include #include +#include namespace rdmalib { @@ -58,19 +57,11 @@ namespace rdmalib { }; // Implemented as IPV4 + template struct Address { - #ifdef USE_LIBFABRIC - fi_info* addrinfo = nullptr; - fi_info* hints = nullptr; - fid_fabric* fabric = nullptr; - std::string _ip; #ifdef USE_GNI_AUTH uint64_t cookie; #endif - #else - rdma_addrinfo *addrinfo; - rdma_addrinfo hints; - #endif uint16_t _port; Address(const std::string & ip, int port, bool passive); @@ -80,6 +71,31 @@ namespace rdmalib { ~Address(); }; + struct LibfabricAddress : Address { + fi_info* addrinfo = nullptr; + fi_info* hints = nullptr; + fid_fabric* fabric = nullptr; + std::string _ip; + + LibfabricAddress(const std::string & ip, int port, bool passive); + LibfabricAddress(const std::string & sip, const std::string & dip, int port); + LibfabricAddress() {} + + ~LibfabricAddress(); + }; + + struct VerbsAddress : Address { + rdma_addrinfo *addrinfo; + rdma_addrinfo hints; + + VerbsAddress(const std::string & ip, int port, bool passive); + VerbsAddress(const std::string & sip, const std::string & dip, int port); + VerbsAddress() {} + + ~VerbsAddress(); + }; + + template struct RDMAActive { #ifndef USE_LIBFABRIC ConnectionConfiguration _cfg; @@ -97,6 +113,36 @@ namespace rdmalib { ibv_pd* _pd; #endif + RDMAActive(const std::string & ip, int port, int recv_buf = 1, int max_inline_data = 0); + RDMAActive(); + ~RDMAActive(); + void allocate(); + bool connect(uint32_t secret = 0); + void disconnect(); + #ifdef USE_LIBFABRIC + fid_domain* pd() const; + #else + ibv_pd* pd() const; + #endif + Connection & connection(); + bool is_connected(); + };struct RDMAActive { + #ifndef USE_LIBFABRIC + ConnectionConfiguration _cfg; + #endif + std::unique_ptr _conn; + Address _addr; + #ifdef USE_LIBFABRIC + fid_eq* _ec = nullptr; + fid_domain* _pd = nullptr; + fid_cq* _rcv_channel = nullptr; + fid_cq* _trx_channel = nullptr; + fid_cntr* _write_counter = nullptr; + #else + rdma_event_channel * _ec; + ibv_pd* _pd; + #endif + RDMAActive(const std::string & ip, int port, int recv_buf = 1, int max_inline_data = 0); RDMAActive(); ~RDMAActive(); diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 822a24a..6724755 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -14,7 +14,7 @@ #include #include -#ifdef USE_LIBFABRIC +// #ifdef USE_LIBFABRIC #include #include #include @@ -25,7 +25,7 @@ extern "C" { #include "rdmacred.h" } #endif -#endif +// #endif #include #include @@ -93,9 +93,8 @@ namespace rdmalib { Configuration Configuration::_instance; // FIXME: Add credential support - Address::Address(const std::string & ip, int port, bool passive) + LibfabricAddress::LibfabricAddress(const std::string & ip, int port, bool passive) { - #ifdef USE_LIBFABRIC // Set the hints and addrinfo to clear structures hints = fi_allocinfo(); addrinfo = fi_allocinfo(); @@ -129,21 +128,22 @@ namespace rdmalib { addrinfo->ep_attr->auth_key_size = sizeof(cookie); spdlog::info("Saved Cray credentials cookie {}", cookie); #endif - #else + + this->_port = port; + this->_ip = ip; + } + VerbsAddress::VerbsAddress(const std::string & ip, int port, bool passive) + { memset(&hints, 0, sizeof hints); hints.ai_port_space = RDMA_PS_TCP; if(passive) hints.ai_flags = RAI_PASSIVE; impl::expect_zero(rdma_getaddrinfo(ip.c_str(), std::to_string(port).c_str(), &hints, &addrinfo)); - #endif this->_port = port; - #ifdef USE_LIBFABRIC - this->_ip = ip; - #endif } - Address::Address(const std::string & sip, const std::string & dip, int port) + LibfabricAddress::LibfabricAddress(const std::string & sip, const std::string & dip, int port) { struct sockaddr_in server_in, local_in; memset(&server_in, 0, sizeof(server_in)); @@ -158,7 +158,6 @@ namespace rdmalib { local_in.sin_family = AF_INET; inet_pton(AF_INET, sip.c_str(), &local_in.sin_addr); - #ifdef USE_LIBFABRIC // Set the hints and addrinfo to clear structures hints = fi_allocinfo(); addrinfo = fi_allocinfo(); @@ -180,7 +179,23 @@ namespace rdmalib { impl::expect_zero(fi_getinfo(FI_VERSION(1, 13), nullptr, nullptr, 0, hints, &addrinfo)); fi_freeinfo(hints); fi_fabric(addrinfo->fabric_attr, &fabric, nullptr); - #else + this->_port = port; + } + VerbsAddress::VerbsAddress(const std::string & sip, const std::string & dip, int port) + { + struct sockaddr_in server_in, local_in; + memset(&server_in, 0, sizeof(server_in)); + memset(&local_in, 0, sizeof(local_in)); + + /*address of remote node*/ + server_in.sin_family = AF_INET; + server_in.sin_port = htons(port); + inet_pton(AF_INET, dip.c_str(), &server_in.sin_addr); + + /*address of local device*/ + local_in.sin_family = AF_INET; + inet_pton(AF_INET, sip.c_str(), &local_in.sin_addr); + memset(&hints, 0, sizeof hints); hints.ai_port_space = RDMA_PS_TCP; hints.ai_src_len = sizeof(local_in); @@ -189,13 +204,22 @@ namespace rdmalib { hints.ai_dst_addr = (struct sockaddr *)(&server_in); impl::expect_zero(rdma_getaddrinfo(NULL, NULL, &hints, &addrinfo)); - #endif this->_port = port; } - Address::Address() {} - - Address::~Address() + LibfabricAddress::~LibfabricAddress() + { + #ifdef USE_LIBFABRIC + // TODO Check how to free those and if it's necessary at all. + // When closing the addringo we obtain a double free or corruption problem. + // It seems that the problem is coming from the the ep_attr. + // if (fabric) + // impl::expect_zero(fi_close(&fabric->fid)); + // if (addrinfo) + // fi_freeinfo(addrinfo); + #endif + } + VerbsAddress::~VerbsAddress() { #ifdef USE_LIBFABRIC // TODO Check how to free those and if it's necessary at all. From 2442f72bbae208638def3960919ee6c9e122aace Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Tue, 18 Jul 2023 20:15:59 -0400 Subject: [PATCH 75/91] Refactor rdmalib.hpp entirely --- rdmalib/include/rdmalib/rdmalib.hpp | 137 +++++++++++++++++----------- 1 file changed, 84 insertions(+), 53 deletions(-) diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index 285f28b..47cc3ff 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -59,10 +59,11 @@ namespace rdmalib { // Implemented as IPV4 template struct Address { + uint16_t _port; + // TODO do usings here for addrinfo & hints #ifdef USE_GNI_AUTH uint64_t cookie; #endif - uint16_t _port; Address(const std::string & ip, int port, bool passive); Address(const std::string & sip, const std::string & dip, int port); @@ -97,21 +98,9 @@ namespace rdmalib { template struct RDMAActive { - #ifndef USE_LIBFABRIC - ConnectionConfiguration _cfg; - #endif + using pd_t = typename library_traits::pd_t; std::unique_ptr _conn; Address _addr; - #ifdef USE_LIBFABRIC - fid_eq* _ec = nullptr; - fid_domain* _pd = nullptr; - fid_cq* _rcv_channel = nullptr; - fid_cq* _trx_channel = nullptr; - fid_cntr* _write_counter = nullptr; - #else - rdma_event_channel * _ec; - ibv_pd* _pd; - #endif RDMAActive(const std::string & ip, int port, int recv_buf = 1, int max_inline_data = 0); RDMAActive(); @@ -119,51 +108,76 @@ namespace rdmalib { void allocate(); bool connect(uint32_t secret = 0); void disconnect(); - #ifdef USE_LIBFABRIC - fid_domain* pd() const; - #else - ibv_pd* pd() const; - #endif + pd_t* pd() const; Connection & connection(); bool is_connected(); - };struct RDMAActive { - #ifndef USE_LIBFABRIC - ConnectionConfiguration _cfg; - #endif - std::unique_ptr _conn; - Address _addr; - #ifdef USE_LIBFABRIC + }; + + struct LibfabricRDMAActive : RDMAActive { + std::unique_ptr _conn; + LibfabricAddress _addr; + fid_eq* _ec = nullptr; fid_domain* _pd = nullptr; fid_cq* _rcv_channel = nullptr; fid_cq* _trx_channel = nullptr; fid_cntr* _write_counter = nullptr; - #else + + LibfabricRDMAActive(const std::string & ip, int port, int recv_buf = 1, int max_inline_data = 0); + LibfabricRDMAActive(); + ~LibfabricRDMAActive(); + void allocate(); + bool connect(uint32_t secret = 0); + void disconnect(); + pd_t pd() const; + LibfabricConnection & connection(); + bool is_connected(); + }; + + struct VerbsRDMAActive : RDMAActive { + ConnectionConfiguration _cfg; + std::unique_ptr _conn; + VerbsAddress _addr; rdma_event_channel * _ec; ibv_pd* _pd; - #endif - RDMAActive(const std::string & ip, int port, int recv_buf = 1, int max_inline_data = 0); - RDMAActive(); - ~RDMAActive(); + VerbsRDMAActive(const std::string & ip, int port, int recv_buf = 1, int max_inline_data = 0); + VerbsRDMAActive(); + ~VerbsRDMAActive(); void allocate(); bool connect(uint32_t secret = 0); void disconnect(); - #ifdef USE_LIBFABRIC - fid_domain* pd() const; - #else - ibv_pd* pd() const; - #endif - Connection & connection(); + pd_t pd() const; + + VerbsConnection & connection(); bool is_connected(); }; + template struct RDMAPassive { - #ifndef USE_LIBFABRIC - ConnectionConfiguration _cfg; - #endif Address _addr; - #ifdef USE_LIBFABRIC + // Set of connections that have been + std::unordered_set _active_connections; + + RDMAPassive(const std::string & ip, int port, int recv_buf = 1, bool initialize = true, int max_inline_data = 0); + ~RDMAPassive(); + void allocate(); + pd_t pd() const; + + // Blocking poll for new rdmacm events. + // Returns connection pointer and connection change status. + // When connection is REQUESTED and ESTABLISHED, the pointer points to a valid connection. + // When the status is DISCONNECTED, the pointer points to a closed connection. + // User should deallocate the closed connection. + // When the status is UNKNOWN, the pointer is null. + std::tuple poll_events(bool share_cqs = false); + bool nonblocking_poll_events(int timeout = 100); + void accept(Connection* connection); + void set_nonblocking_poll(); + }; + + struct LibfabricRDMAPassive : RDMAPassive { + LibfabricAddress _addr; fid_eq* _ec = nullptr; fid_domain* _pd = nullptr; fid_pep* _pep = nullptr; @@ -171,31 +185,48 @@ namespace rdmalib { fid_cq* _trx_channel; fid_cntr* _write_counter = nullptr; // fi_gni_ops_domain* _ops; - #else + // Set of connections that have been + std::unordered_set _active_connections; + + LibfabricRDMAPassive(const std::string & ip, int port, int recv_buf = 1, bool initialize = true, int max_inline_data = 0); + ~LibfabricRDMAPassive(); + void allocate(); + + // Blocking poll for new rdmacm events. + // Returns connection pointer and connection change status. + // When connection is REQUESTED and ESTABLISHED, the pointer points to a valid connection. + // When the status is DISCONNECTED, the pointer points to a closed connection. + // User should deallocate the closed connection. + // When the status is UNKNOWN, the pointer is null. + std::tuple poll_events(bool share_cqs = false); + bool nonblocking_poll_events(int timeout = 100); + void accept(LibfabricConnection* connection); + void set_nonblocking_poll(); + }; + + struct VerbsRDMAPassive : RDMAPassive { + ConnectionConfiguration _cfg; + VerbsAddress _addr; rdma_event_channel * _ec; rdma_cm_id* _listen_id; ibv_pd* _pd; - #endif + // Set of connections that have been - std::unordered_set _active_connections; + std::unordered_set _active_connections; - RDMAPassive(const std::string & ip, int port, int recv_buf = 1, bool initialize = true, int max_inline_data = 0); - ~RDMAPassive(); + VerbsRDMAPassive(const std::string & ip, int port, int recv_buf = 1, bool initialize = true, int max_inline_data = 0); + ~VerbsRDMAPassive(); void allocate(); - #ifdef USE_LIBFABRIC - fid_domain* pd() const; - #else - ibv_pd* pd() const; - #endif + // Blocking poll for new rdmacm events. // Returns connection pointer and connection change status. // When connection is REQUESTED and ESTABLISHED, the pointer points to a valid connection. // When the status is DISCONNECTED, the pointer points to a closed connection. // User should deallocate the closed connection. // When the status is UNKNOWN, the pointer is null. - std::tuple poll_events(bool share_cqs = false); + std::tuple poll_events(bool share_cqs = false); bool nonblocking_poll_events(int timeout = 100); - void accept(Connection* connection); + void accept(VerbsConnection* connection); void set_nonblocking_poll(); }; } From 82b3b2defa7bb0d5f5f39f5560f8cfd520a7faef Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Wed, 19 Jul 2023 09:13:27 -0400 Subject: [PATCH 76/91] Refactor up to RDMAActive --- rdmalib/include/rdmalib/connection.hpp | 4 +- rdmalib/include/rdmalib/rdmalib.hpp | 67 ++++++++++++---- rdmalib/lib/rdmalib.cpp | 106 ++++++++++++------------- 3 files changed, 101 insertions(+), 76 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 0adbc60..8c57c6b 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -150,7 +150,7 @@ namespace rdmalib { fi_cq_err_entry _ewc; - LibfabricConnection(bool passive); + LibfabricConnection(bool passive=false); LibfabricConnection(LibfabricConnection&& obj); ~LibfabricConnection(); @@ -216,7 +216,7 @@ namespace rdmalib { struct ibv_recv_wr _batch_wrs[_rbatch]; // preallocated and prefilled batched recv. std::array _rwc_sges; - VerbsConnection(bool passive); + VerbsConnection(bool passive=false); VerbsConnection(VerbsConnection&& obj); ~VerbsConnection(); void close(); diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index 47cc3ff..5298cfe 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -25,6 +25,26 @@ extern "C" { #include #include +template +struct rdmalib_traits; + +namespace rdmalib { + struct LibfabricAddress; + struct VerbsAddress; +} + +template <> +struct rdmalib_traits { + using Connection = rdmalib::LibfabricConnection; + using Address = rdmalib::LibfabricAddress; +}; + +template <> +struct rdmalib_traits { + using Connection = rdmalib::VerbsConnection; + using Address = rdmalib::VerbsAddress; +}; + namespace rdmalib { struct Configuration { @@ -99,18 +119,24 @@ namespace rdmalib { template struct RDMAActive { using pd_t = typename library_traits::pd_t; - std::unique_ptr _conn; - Address _addr; + using Connection_t = typename rdmalib_traits::Connection; + using Address_t = typename rdmalib_traits::Address; + + std::unique_ptr _conn; + Address_t _addr; RDMAActive(const std::string & ip, int port, int recv_buf = 1, int max_inline_data = 0); - RDMAActive(); + RDMAActive() {} ~RDMAActive(); void allocate(); bool connect(uint32_t secret = 0); void disconnect(); - pd_t* pd() const; - Connection & connection(); - bool is_connected(); + pd_t* pd() const + { + return static_cast(this)->pd(); + } + Connection_t & connection() { return _conn; } + bool is_connected() { return conn_.get(); } }; struct LibfabricRDMAActive : RDMAActive { @@ -118,18 +144,19 @@ namespace rdmalib { LibfabricAddress _addr; fid_eq* _ec = nullptr; - fid_domain* _pd = nullptr; fid_cq* _rcv_channel = nullptr; fid_cq* _trx_channel = nullptr; fid_cntr* _write_counter = nullptr; + pd_t _pd; LibfabricRDMAActive(const std::string & ip, int port, int recv_buf = 1, int max_inline_data = 0); - LibfabricRDMAActive(); + LibfabricRDMAActive() {} ~LibfabricRDMAActive(); void allocate(); bool connect(uint32_t secret = 0); void disconnect(); - pd_t pd() const; + pd_t pd() const { return this->_pd; } + LibfabricConnection & connection(); bool is_connected(); }; @@ -139,15 +166,15 @@ namespace rdmalib { std::unique_ptr _conn; VerbsAddress _addr; rdma_event_channel * _ec; - ibv_pd* _pd; + pd_t _pd; VerbsRDMAActive(const std::string & ip, int port, int recv_buf = 1, int max_inline_data = 0); - VerbsRDMAActive(); + VerbsRDMAActive() {} ~VerbsRDMAActive(); void allocate(); bool connect(uint32_t secret = 0); void disconnect(); - pd_t pd() const; + pd_t pd() const { return this->_pd; } VerbsConnection & connection(); bool is_connected(); @@ -155,14 +182,18 @@ namespace rdmalib { template struct RDMAPassive { - Address _addr; + using Connection_t = typename rdmalib_traits::Connection; + using Address_t = typename rdmalib_traits::Address; + using pd_t = typename library_traits::pd_t; + + Address_t _addr; // Set of connections that have been - std::unordered_set _active_connections; + std::unordered_set _active_connections; RDMAPassive(const std::string & ip, int port, int recv_buf = 1, bool initialize = true, int max_inline_data = 0); ~RDMAPassive(); void allocate(); - pd_t pd() const; + pd_t pd() const { return static_cast(this)->pd(); } // Blocking poll for new rdmacm events. // Returns connection pointer and connection change status. @@ -170,9 +201,9 @@ namespace rdmalib { // When the status is DISCONNECTED, the pointer points to a closed connection. // User should deallocate the closed connection. // When the status is UNKNOWN, the pointer is null. - std::tuple poll_events(bool share_cqs = false); + std::tuple poll_events(bool share_cqs = false); bool nonblocking_poll_events(int timeout = 100); - void accept(Connection* connection); + void accept(Connection_t* connection); void set_nonblocking_poll(); }; @@ -191,6 +222,7 @@ namespace rdmalib { LibfabricRDMAPassive(const std::string & ip, int port, int recv_buf = 1, bool initialize = true, int max_inline_data = 0); ~LibfabricRDMAPassive(); void allocate(); + pd_t pd() const { return this->_pd; } // Blocking poll for new rdmacm events. // Returns connection pointer and connection change status. @@ -217,6 +249,7 @@ namespace rdmalib { VerbsRDMAPassive(const std::string & ip, int port, int recv_buf = 1, bool initialize = true, int max_inline_data = 0); ~VerbsRDMAPassive(); void allocate(); + pd_t pd() const { return this->_pd; } // Blocking poll for new rdmacm events. // Returns connection pointer and connection change status. diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 6724755..6bd049f 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -234,13 +234,12 @@ namespace rdmalib { #endif } - RDMAActive::RDMAActive(const std::string & ip, int port, int recv_buf, int max_inline_data): + LibfabricRDMAActive::LibfabricRDMAActive(const std::string & ip, int port, int recv_buf, int max_inline_data): _conn(nullptr), _addr(ip, port, false), _ec(nullptr), _pd(nullptr) { - #ifdef USE_LIBFABRIC // Create a domain (need to do that now so that we can register memory for the domain) impl::expect_zero(fi_domain(_addr.fabric, _addr.addrinfo, &_pd, nullptr)); @@ -263,7 +262,14 @@ namespace rdmalib { cq_attr.size = _addr.addrinfo->rx_attr->size; impl::expect_zero(fi_cq_open(_pd, &cq_attr, &_trx_channel, nullptr)); impl::expect_zero(fi_cq_open(_pd, &cq_attr, &_rcv_channel, nullptr)); - #else + SPDLOG_DEBUG("Create LibfabricRDMAActive"); + } + VerbsRDMAActive::VerbsRDMAActive(const std::string & ip, int port, int recv_buf, int max_inline_data): + _conn(nullptr), + _addr(ip, port, false), + _ec(nullptr), + _pd(nullptr) + { // Size of Queue Pair // Maximum requests in send queue // FIXME: configurable -> parallel workers @@ -285,30 +291,28 @@ namespace rdmalib { _cfg.conn_param.initiator_depth = 4; _cfg.conn_param.retry_count = 3; _cfg.conn_param.rnr_retry_count = 3; - #endif - SPDLOG_DEBUG("Create RDMAActive"); + SPDLOG_DEBUG("Create VerbsRDMAActive"); } - RDMAActive::RDMAActive() {} - - RDMAActive::~RDMAActive() + LibfabricRDMAActive::~LibfabricRDMAActive() { - #ifdef USE_LIBFABRIC if (_pd) impl::expect_zero(fi_close(&_pd->fid)); if (_ec) impl::expect_zero(fi_close(&_ec->fid)); - #else + SPDLOG_DEBUG("Destroy LibfabricRDMAActive"); + } + + VerbsRDMAActive::~VerbsRDMAActive() + { //ibv_dealloc_pd(this->_pd); - #endif - SPDLOG_DEBUG("Destroy RDMAActive"); + SPDLOG_DEBUG("Destroy VerbsRDMAActive"); } - void RDMAActive::allocate() + void LibfabricRDMAActive::allocate() { if(!_conn) { - _conn = std::unique_ptr(new Connection()); - #ifdef USE_LIBFABRIC + _conn = std::unique_ptr(new LibfabricConnection()); // Enable the event queue fi_eq_attr eq_attr; memset(&eq_attr, 0, sizeof(eq_attr)); @@ -317,13 +321,6 @@ namespace rdmalib { impl::expect_zero(fi_eq_open(_addr.fabric, &eq_attr, &_ec, NULL)); // Create and enable the endpoint together with all the accompanying queues _conn->initialize(_addr.fabric, _pd, _addr.addrinfo, _ec, _write_counter, _rcv_channel, _trx_channel); - #else - rdma_cm_id* id; - impl::expect_zero(rdma_create_ep(&id, _addr.addrinfo, nullptr, nullptr)); - impl::expect_zero(rdma_create_qp(id, _pd, &_cfg.attr)); - _conn->initialize(id); - _pd = _conn->id()->pd; - #endif //struct ibv_qp_attr attr; //struct ibv_qp_init_attr init_attr; @@ -375,10 +372,21 @@ namespace rdmalib { //spdlog::info("{} {} {} {} {} {}", ret, errno, conn != nullptr, conn->verbs != nullptr, conn->pd != nullptr, conn->qp != nullptr); } - bool RDMAActive::connect(uint32_t secret) + void VerbsRDMAActive::allocate() + { + if(!_conn) { + _conn = std::unique_ptr(new VerbsConnection()); + rdma_cm_id* id; + impl::expect_zero(rdma_create_ep(&id, _addr.addrinfo, nullptr, nullptr)); + impl::expect_zero(rdma_create_qp(id, _pd, &_cfg.attr)); + _conn->initialize(id); + _pd = _conn->id()->pd; + } + } + + bool LibfabricRDMAActive::connect(uint32_t secret) { allocate(); - #ifdef USE_LIBFABRIC uint32_t *param = nullptr; size_t paramlen = 0; if(secret) { @@ -409,7 +417,18 @@ namespace rdmalib { _pd = nullptr; return false; } - #else + + //struct ibv_qp_attr attr; + //struct ibv_qp_init_attr init_attr; + //impl::expect_zero(ibv_query_qp(_conn->_qp, &attr, IBV_QP_DEST_QPN, &init_attr )); + //SPDLOG_DEBUG("Local QPN {}, remote QPN {} ",_conn->_qp->qp_num, attr.dest_qp_num); + + return true; + } + + bool VerbsRDMAActive::connect(uint32_t secret) + { + allocate(); if(secret) { _cfg.conn_param.private_data = &secret; _cfg.conn_param.private_data_len = sizeof(uint32_t); @@ -426,52 +445,25 @@ namespace rdmalib { _addr._port, _addr._port, ibv_get_device_name(this->_conn->id()->verbs->device) ); } - #endif - - //struct ibv_qp_attr attr; - //struct ibv_qp_init_attr init_attr; - //impl::expect_zero(ibv_query_qp(_conn->_qp, &attr, IBV_QP_DEST_QPN, &init_attr )); - //SPDLOG_DEBUG("Local QPN {}, remote QPN {} ",_conn->_qp->qp_num, attr.dest_qp_num); return true; } - void RDMAActive::disconnect() + void LibfabricRDMAActive::disconnect() { - #ifdef USE_LIBFABRIC // TODO: Add the disconnectin id spdlog::debug("[RDMAActive] Disconnecting connection with id {}", fmt::ptr(&_conn->qp()->fid)); _conn->close(); _conn.reset(); _pd = nullptr; - #else + } + + void VerbsRDMAActive::disconnect() + { spdlog::debug("[RDMAActive] Disonnecting connection with id {}", fmt::ptr(_conn->id())); impl::expect_zero(rdma_disconnect(_conn->id())); _conn.reset(); _pd = nullptr; - #endif - } - - #ifdef USE_LIBFABRIC - fid_domain* RDMAActive::pd() const - { - return this->_pd; - } - #else - ibv_pd* RDMAActive::pd() const - { - return this->_pd; - } - #endif - - Connection & RDMAActive::connection() - { - return *this->_conn; - } - - bool RDMAActive::is_connected() - { - return this->_conn.get(); } RDMAPassive::RDMAPassive(const std::string & ip, int port, int recv_buf, bool initialize, int max_inline_data): From 35405aca69d638c0858a5a41f0e175eeb69ffbf0 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Wed, 19 Jul 2023 09:30:15 -0400 Subject: [PATCH 77/91] Rdmalib compiles --- rdmalib/include/rdmalib/connection.hpp | 2 +- rdmalib/include/rdmalib/rdmalib.hpp | 11 +-- rdmalib/lib/rdmalib.cpp | 97 +++++++++++++------------- 3 files changed, 51 insertions(+), 59 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 8c57c6b..2778404 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -102,7 +102,7 @@ namespace rdmalib { } qp_t qp() const { - return this->_id; + return this->_qp; // TODO sure? } uint32_t private_data() const; diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index 5298cfe..d725e47 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -34,7 +34,7 @@ namespace rdmalib { } template <> -struct rdmalib_traits { +struct rdmalib_traits { // TODO make static? using Connection = rdmalib::LibfabricConnection; using Address = rdmalib::LibfabricAddress; }; @@ -136,7 +136,7 @@ namespace rdmalib { return static_cast(this)->pd(); } Connection_t & connection() { return _conn; } - bool is_connected() { return conn_.get(); } + bool is_connected() { return _conn.get(); } }; struct LibfabricRDMAActive : RDMAActive { @@ -156,9 +156,6 @@ namespace rdmalib { bool connect(uint32_t secret = 0); void disconnect(); pd_t pd() const { return this->_pd; } - - LibfabricConnection & connection(); - bool is_connected(); }; struct VerbsRDMAActive : RDMAActive { @@ -175,9 +172,6 @@ namespace rdmalib { bool connect(uint32_t secret = 0); void disconnect(); pd_t pd() const { return this->_pd; } - - VerbsConnection & connection(); - bool is_connected(); }; template @@ -191,6 +185,7 @@ namespace rdmalib { std::unordered_set _active_connections; RDMAPassive(const std::string & ip, int port, int recv_buf = 1, bool initialize = true, int max_inline_data = 0); + RDMAPassive(); ~RDMAPassive(); void allocate(); pd_t pd() const { return static_cast(this)->pd(); } diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 6bd049f..22b319b 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -466,17 +466,22 @@ namespace rdmalib { _pd = nullptr; } - RDMAPassive::RDMAPassive(const std::string & ip, int port, int recv_buf, bool initialize, int max_inline_data): + LibfabricRDMAPassive::LibfabricRDMAPassive(const std::string & ip, int port, int recv_buf, bool initialize, int max_inline_data): _addr(ip, port, true), _ec(nullptr), - #ifndef USE_LIBFABRIC - _listen_id(nullptr), - #endif _pd(nullptr) { - #ifdef USE_LIBFABRIC impl::expect_zero(fi_domain(_addr.fabric, _addr.addrinfo, &_pd, nullptr)); - #else + if(initialize) + this->allocate(); + } + + VerbsRDMAPassive::VerbsRDMAPassive(const std::string & ip, int port, int recv_buf, bool initialize, int max_inline_data): + _addr(ip, port, true), + _ec(nullptr), + _listen_id(nullptr), + _pd(nullptr) + { // Size of Queue Pair // FIXME: configurable -> parallel workers _cfg.attr.cap.max_send_wr = 40; @@ -492,30 +497,29 @@ namespace rdmalib { _cfg.conn_param.initiator_depth = 4; _cfg.conn_param.retry_count = 3; _cfg.conn_param.rnr_retry_count = 3; - #endif if(initialize) this->allocate(); } - RDMAPassive::~RDMAPassive() + LibfabricRDMAPassive::~LibfabricRDMAPassive() { - #ifdef USE_LIBFABRIC if (_pd) impl::expect_zero(fi_close(&_pd->fid)); if (_pep) impl::expect_zero(fi_close(&_pep->fid)); if (_ec) impl::expect_zero(fi_close(&_ec->fid)); - #else + } + + VerbsRDMAPassive::~VerbsRDMAPassive() + { rdma_destroy_id(this->_listen_id); rdma_destroy_event_channel(this->_ec); - #endif } - void RDMAPassive::allocate() + void LibfabricRDMAPassive::allocate() { - #ifdef USE_LIBFABRIC // Start listening fi_eq_attr eq_attr; memset(&eq_attr, 0, sizeof(eq_attr)); @@ -550,7 +554,10 @@ namespace rdmalib { // uint32_t val; // _ops->get_val(&_pd->fid, GNI_CONN_TABLE_MAX_SIZE, &val); // std::cout << "MAXIMUM VALUE: " << val << std::endl; - #else + } + + void VerbsRDMAPassive::allocate() + { // Start listening impl::expect_nonzero(this->_ec = rdma_create_event_channel()); impl::expect_zero(rdma_create_id(this->_ec, &this->_listen_id, NULL, RDMA_PS_TCP)); @@ -566,23 +573,9 @@ namespace rdmalib { "[RDMAPassive]: listening id {}, protection domain {}", fmt::ptr(this->_listen_id), _pd->handle ); - #endif } - #ifdef USE_LIBFABRIC - fid_domain* RDMAPassive::pd() const - { - return this->_pd; - } - #else - ibv_pd* RDMAPassive::pd() const - { - return this->_pd; - } - #endif - - #ifndef USE_LIBFABRIC - void RDMAPassive::set_nonblocking_poll() + void VerbsRDMAPassive::set_nonblocking_poll() { int fd = this->_ec->fd; int flags = fcntl(fd, F_GETFL); @@ -592,18 +585,19 @@ namespace rdmalib { return; } } - #endif - bool RDMAPassive::nonblocking_poll_events(int timeout) + bool LibfabricRDMAPassive::nonblocking_poll_events(int timeout) { - #ifdef USE_LIBFABRIC uint32_t event; fi_eq_entry entry; int ret = fi_eq_sread(_ec, &event, &entry, sizeof(entry), timeout, FI_PEEK); if (ret < 0 && ret != -FI_EAGAIN && ret != -FI_EAVAIL) spdlog::error("RDMA event poll failed"); return ret > 0 || ret == -FI_EAVAIL; - #else + } + + bool VerbsRDMAPassive::nonblocking_poll_events(int timeout) + { pollfd my_pollfd; my_pollfd.fd = this->_ec->fd; my_pollfd.events = POLLIN; @@ -614,17 +608,15 @@ namespace rdmalib { return false; } return rc > 0; - #endif } - std::tuple RDMAPassive::poll_events(bool share_cqs) + std::tuple LibfabricRDMAPassive::poll_events(bool share_cqs) { - #ifdef USE_LIBFABRIC uint32_t event; // Need those additional bytes in fi_eq_cm_entry so that we can transfer the secret int total_size = sizeof(fi_eq_cm_entry) + sizeof(uint32_t); fi_eq_cm_entry *entry = (fi_eq_cm_entry *)malloc(total_size); - Connection* connection = nullptr; + LibfabricConnection* connection = nullptr; ConnectionStatus status = ConnectionStatus::UNKNOWN; // Poll rdma cm events. @@ -643,7 +635,7 @@ namespace rdmalib { switch (event) { case FI_CONNREQ: - connection = new Connection{true}; + connection = new LibfabricConnection{true}; SPDLOG_DEBUG("[RDMAPassive] Connection request with ret {}", ret); @@ -686,7 +678,7 @@ namespace rdmalib { "[RDMAPassive] Connection is established for id {}, and connection {}", fmt::ptr(entry->fid), fmt::ptr(entry->fid->context) ); - connection = reinterpret_cast(entry->fid->context); + connection = reinterpret_cast(entry->fid->context); status = ConnectionStatus::ESTABLISHED; break; case FI_SHUTDOWN: @@ -694,7 +686,7 @@ namespace rdmalib { "[RDMAPassive] Disconnect for id {}, and connection {}", fmt::ptr(entry->fid), fmt::ptr(entry->fid->context) ); - connection = reinterpret_cast(entry->fid->context); + connection = reinterpret_cast(entry->fid->context); //connection->close(); status = ConnectionStatus::DISCONNECTED; _active_connections.erase(connection); @@ -704,9 +696,14 @@ namespace rdmalib { break; } free(entry); - #else + + return std::make_tuple(connection, status); + } + + std::tuple VerbsRDMAPassive::poll_events(bool share_cqs) + { rdma_cm_event* event = nullptr; - Connection* connection = nullptr; + VerbsConnection* connection = nullptr; ConnectionStatus status = ConnectionStatus::UNKNOWN; // Poll rdma cm events. @@ -721,7 +718,7 @@ namespace rdmalib { switch (event->event) { case RDMA_CM_EVENT_CONNECT_REQUEST: - connection = new Connection{true}; + connection = new VerbsConnection{true}; if(event->param.conn.private_data_len != 0) { uint32_t data = *reinterpret_cast(event->param.conn.private_data); connection->set_private_data(data); @@ -762,7 +759,7 @@ namespace rdmalib { "[RDMAPassive] Connection is established for id {}, and connection {}", fmt::ptr(event->id), fmt::ptr(event->id->context) ); - connection = reinterpret_cast(event->id->context); + connection = reinterpret_cast(event->id->context); status = ConnectionStatus::ESTABLISHED; break; case RDMA_CM_EVENT_DISCONNECTED: @@ -770,7 +767,7 @@ namespace rdmalib { "[RDMAPassive] Disconnect for id {}, and connection {}", fmt::ptr(event->id), fmt::ptr(event->id->context) ); - connection = reinterpret_cast(event->id->context); + connection = reinterpret_cast(event->id->context); //connection->close(); status = ConnectionStatus::DISCONNECTED; _active_connections.erase(connection); @@ -795,23 +792,23 @@ namespace rdmalib { break; } rdma_ack_cm_event(event); - #endif return std::make_tuple(connection, status); } - void RDMAPassive::accept(Connection* connection) { - #ifdef USE_LIBFABRIC + void LibfabricRDMAPassive::accept(LibfabricConnection* connection) { if(fi_accept(connection->qp(), nullptr, 0)) { spdlog::error("Conection accept unsuccessful, reason {} {}", errno, strerror(errno)); connection = nullptr; } - #else + SPDLOG_DEBUG("[RDMAPassive] Connection accepted at QP {}", fmt::ptr(connection->qp())); + } + + void VerbsRDMAPassive::accept(VerbsConnection* connection) { if(rdma_accept(connection->id(), &_cfg.conn_param)) { spdlog::error("Conection accept unsuccesful, reason {} {}", errno, strerror(errno)); connection = nullptr; } - #endif SPDLOG_DEBUG("[RDMAPassive] Connection accepted at QP {}", fmt::ptr(connection->qp())); } } From 210bd7b52708efb7a081b701e4f4f758dc8ba348 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Mon, 24 Jul 2023 06:31:02 -0400 Subject: [PATCH 78/91] Refactoring rfaas code and fixing errors --- rdmalib/include/rdmalib/allocation.hpp | 2 +- rdmalib/include/rdmalib/connection.hpp | 4 ++-- rdmalib/include/rdmalib/libraries.hpp | 15 +++------------ rdmalib/include/rdmalib/rdmalib.hpp | 10 ++++++++++ rdmalib/include/rdmalib/recv_buffer.hpp | 7 ++++--- rdmalib/include/rdmalib/server.hpp | 4 ++-- rfaas/include/rfaas/connection.hpp | 16 +++++++++++----- rfaas/lib/connection.cpp | 18 ++++++++++++------ 8 files changed, 45 insertions(+), 31 deletions(-) diff --git a/rdmalib/include/rdmalib/allocation.hpp b/rdmalib/include/rdmalib/allocation.hpp index 9f08b46..3b6cfbe 100644 --- a/rdmalib/include/rdmalib/allocation.hpp +++ b/rdmalib/include/rdmalib/allocation.hpp @@ -23,7 +23,7 @@ namespace rdmalib { template struct BufferInformation { - using rkey_t = library_traits::rkey_t; + using rkey_t = typename library_traits::rkey_t; uint64_t r_addr; rkey_t r_key; }; diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 2778404..149dadf 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -202,7 +202,7 @@ namespace rdmalib { int32_t _post_write(SGE && elems, const RemoteBuffer_ & rbuf, const uint32_t immediate = 0); int32_t post_write(SGE && elems, const RemoteBuffer_ & rbuf, bool force_inline); - std::tuple poll_wc(QueueType type, bool blocking, int count, bool update); + std::tuple poll_wc(QueueType type, bool blocking=true, int count=-1, bool update=false); }; struct VerbsConnection : Connection @@ -245,7 +245,7 @@ namespace rdmalib { int32_t _post_write(SGE && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); int32_t post_write(SGE && elems, const RemoteBuffer_ & rbuf, bool force_inline); - std::tuple poll_wc(QueueType type, bool blocking, int count); + std::tuple poll_wc(QueueType type, bool blocking=true, int count=-1); }; } diff --git a/rdmalib/include/rdmalib/libraries.hpp b/rdmalib/include/rdmalib/libraries.hpp index d1fdc87..14b32fb 100644 --- a/rdmalib/include/rdmalib/libraries.hpp +++ b/rdmalib/include/rdmalib/libraries.hpp @@ -3,7 +3,7 @@ #include #include -//#include "rdmalib/rdmalib.hpp" +#include // Forward declare ibverbs structs struct ibv_pd; @@ -17,9 +17,11 @@ struct ibv_wc; struct ibverbs; struct libfabric; +// Base trait template struct library_traits; +// Library-specialized traits template <> struct library_traits { @@ -35,11 +37,6 @@ struct library_traits using wc_t = fi_cq_data_entry; using id_t = fid *; using channel_t = fid_cq *; - - // template - // using LibBuffer = rdmalib::Buffer; - // using LibSGE = rdmalib::LibfabricScatterGatherElement; - // using LibConnection = rdmalib::LibfabricConnection; }; template <> @@ -57,12 +54,6 @@ struct library_traits using wc_t = ibv_wc; using id_t = rdma_cm_id *; using channel_t = ibv_comp_channel *; - - // These are going to need to go elsewhere - // template - // using LibBuffer = rdmalib::Buffer; - // using LibSGE = rdmalib::VerbsScatterGatherElement; - // using LibConnection = rdmalib::VerbsConnection; }; #endif \ No newline at end of file diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index d725e47..9f4a338 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -31,18 +31,28 @@ struct rdmalib_traits; namespace rdmalib { struct LibfabricAddress; struct VerbsAddress; + + struct LibfabricRDMAActive; + struct VerbsRDMAActive; + + struct LibfabricRecvBuffer; + struct VerbsRecvBuffer; } template <> struct rdmalib_traits { // TODO make static? using Connection = rdmalib::LibfabricConnection; using Address = rdmalib::LibfabricAddress; + using RDMAActive = rdmalib::LibfabricRDMAActive; + using RecvBuffer = rdmalib::LibfabricRecvBuffer; }; template <> struct rdmalib_traits { using Connection = rdmalib::VerbsConnection; using Address = rdmalib::VerbsAddress; + using RDMAActive = rdmalib::VerbsRDMAActive; + using RecvBuffer = rdmalib::VerbsRecvBuffer; }; namespace rdmalib { diff --git a/rdmalib/include/rdmalib/recv_buffer.hpp b/rdmalib/include/rdmalib/recv_buffer.hpp index bc76f89..4da4998 100644 --- a/rdmalib/include/rdmalib/recv_buffer.hpp +++ b/rdmalib/include/rdmalib/recv_buffer.hpp @@ -15,14 +15,15 @@ namespace rdmalib template struct RecvBuffer { - using wc_t = library_traits::wc_t; + using wc_t = typename library_traits::wc_t; + using Connection_t = typename rdmalib_traits::Connection; //using LibConnection = library_traits::LibConnection; // TODO int _rcv_buf_size; int _refill_threshold; int _requests; constexpr static int DEFAULT_REFILL_THRESHOLD = 8; - LibConnection *_conn; + Connection_t *_conn; RecvBuffer(int rcv_buf_size) : _rcv_buf_size(rcv_buf_size), _refill_threshold(std::min(_rcv_buf_size, DEFAULT_REFILL_THRESHOLD)), @@ -31,7 +32,7 @@ namespace rdmalib { } - inline void connect(LibConnection *conn) + inline void connect(Connection_t *conn) { this->_conn = conn; _requests = 0; diff --git a/rdmalib/include/rdmalib/server.hpp b/rdmalib/include/rdmalib/server.hpp index 7b75dba..04be6dc 100644 --- a/rdmalib/include/rdmalib/server.hpp +++ b/rdmalib/include/rdmalib/server.hpp @@ -27,12 +27,12 @@ namespace rdmalib { namespace server { ServerStatus(std::string address, int port); template - void add_buffer(const rdmalib::Buffer & mr) + void add_buffer(const rdmalib::Buffer & mr) { _buffers.push_back({mr.address(), mr.rkey(), mr.size()}); } - void set_thread_allocator(const rdmalib::Buffer & mr) + void set_thread_allocator(const rdmalib::Buffer & mr) { _threads_allocator = {mr.address(), mr.rkey(), mr.size()}; } diff --git a/rfaas/include/rfaas/connection.hpp b/rfaas/include/rfaas/connection.hpp index 448501b..611a0d0 100644 --- a/rfaas/include/rfaas/connection.hpp +++ b/rfaas/include/rfaas/connection.hpp @@ -13,18 +13,24 @@ namespace rfaas { + template struct manager_connection { + + using RDMAActive_t = typename rdmalib_traits::RDMAActive; + using RecvBuffer_t = typename rdmalib_traits::RecvBuffer; + using Connection_t = typename rdmalib_traits::Connection; + std::string _address; int _port; - rdmalib::Buffer _submit_buffer; - rdmalib::RDMAActive _active; - rdmalib::RecvBuffer _rcv_buffer; - rdmalib::Buffer _allocation_buffer; + rdmalib::Buffer _submit_buffer; + RDMAActive_t _active; + RecvBuffer_t _rcv_buffer; + rdmalib::Buffer _allocation_buffer; int _max_inline_data; manager_connection(std::string address, int port, int rcv_buf, int max_inline_data); - rdmalib::Connection & connection(); + Connection_t & connection(); rdmalib::AllocationRequest & request(); bool connect(); void disconnect(); diff --git a/rfaas/lib/connection.cpp b/rfaas/lib/connection.cpp index d9dd5c3..7177d66 100644 --- a/rfaas/lib/connection.cpp +++ b/rfaas/lib/connection.cpp @@ -11,7 +11,8 @@ namespace rfaas { - manager_connection::manager_connection(std::string address, int port, + template + manager_connection::manager_connection(std::string address, int port, int rcv_buf, int max_inline_data): _address(address), _port(port), @@ -23,7 +24,8 @@ namespace rfaas { _active.allocate(); } - bool manager_connection::connect() + template + bool manager_connection::connect() { SPDLOG_DEBUG("Connecting to manager at {}:{}", _address, _port); bool ret = _active.connect(); @@ -42,7 +44,8 @@ namespace rfaas { return ret; } - void manager_connection::disconnect() + template + void manager_connection::disconnect() { SPDLOG_DEBUG("Disconnecting from manager at {}:{}", _address, _port); // Send deallocation request only if we're connected @@ -57,17 +60,20 @@ namespace rfaas { } } - rdmalib::Connection & manager_connection::connection() + template + manager_connection::Connection_t & manager_connection::connection() { return _active.connection(); } - rdmalib::AllocationRequest & manager_connection::request() + template + rdmalib::AllocationRequest & manager_connection::request() { return *(_allocation_buffer.data() + _rcv_buffer._rcv_buf_size); } - bool manager_connection::submit() + template + bool manager_connection::submit() { rdmalib::ScatterGatherElement sge; size_t obj_size = sizeof(rdmalib::AllocationRequest); From 666c8a9daa256e6ac02db9578a7b5e481a5dc11a Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Wed, 20 Sep 2023 19:56:01 +0000 Subject: [PATCH 79/91] Fixing up rfaaslib --- rdmalib/include/rdmalib/buffer.hpp | 6 + rdmalib/include/rdmalib/connection.hpp | 26 +- rdmalib/include/rdmalib/rdmalib.hpp | 10 +- rdmalib/lib/connection.cpp | 14 +- rfaas/include/rfaas/connection.hpp | 1 + rfaas/include/rfaas/executor.hpp | 460 ++++++++++++++++++------- rfaas/lib/connection.cpp | 6 +- rfaas/lib/executor.cpp | 353 ++++++++++++++----- 8 files changed, 648 insertions(+), 228 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 3a849f1..eb28f03 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -132,6 +132,12 @@ namespace rdmalib } }; + struct LibfabricRemoteBuffer : RemoteBuffer + {}; + + struct VerbsRemoteBuffer : RemoteBuffer + {}; + template struct Buffer : impl::Buffer, Library> { diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 149dadf..bdea0c2 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -63,7 +63,7 @@ namespace rdmalib { using channel_t = typename library_traits::channel_t; template // TODO: remove this generic. should be a trait using SGE = ScatterGatherElement; - using RemoteBuffer_ = RemoteBuffer; + using RemoteBuffer_t = RemoteBuffer; qp_t _qp; int32_t _req_count; @@ -121,23 +121,22 @@ namespace rdmalib { int32_t post_batched_empty_recv(int32_t count = 1); template - int32_t post_write(SGE && elems, const RemoteBuffer_ & buf, bool force_inline = false); + int32_t post_write(SGE && elems, const RemoteBuffer_t & buf, bool force_inline = false); // Solicited makes sense only for RDMA write with immediate template - int32_t post_write(SGE && elems, const RemoteBuffer_ & buf, + int32_t post_write(SGE && elems, const RemoteBuffer_t & buf, uint32_t immediate, bool force_inline = false, bool solicited = false ); template - int32_t post_cas(SGE && elems, const RemoteBuffer_ & buf, uint64_t compare, uint64_t swap); + int32_t post_cas(SGE && elems, const RemoteBuffer_t & buf, uint64_t compare, uint64_t swap); }; struct LibfabricConnection : Connection { template using Buffer = Buffer; - //using RemoteBuffer_ = RemoteBuffer_; using SGE = LibfabricScatterGatherElement; fid_cq *_rcv_channel; @@ -169,12 +168,12 @@ namespace rdmalib { channel_t receive_completion_channel() const; channel_t transmit_completion_channel() const; - int32_t post_cas(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t compare, uint64_t swap); + int32_t post_cas(SGE && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap); int32_t post_send(const SGE & elems, int32_t id, bool force_inline); int32_t post_batched_empty_recv(int count); int32_t post_recv(SGE && elem, int32_t id, int count); - template inline int32_t post_write(const Buffer & buf, const size_t size, const uint64_t offset, const RemoteBuffer_ & rbuf, const uint32_t immediate) { + template int32_t post_write(const Buffer & buf, const size_t size, const uint64_t offset, const RemoteBuffer_t & rbuf, const uint32_t immediate) { int ret = fi_writedata(_qp, (void *)(buf.address() + offset), size, buf.lkey(), immediate + (size << 32), NULL, rbuf.addr, rbuf.rkey, (void *)(_req_count++)); if(ret) { spdlog::error("Post write unsuccessful, reason {} {}, buf size {}, id {}, remote addr {}, remote rkey {}, imm data {}, connection {}", @@ -194,13 +193,13 @@ namespace rdmalib { return _req_count - 1; } - int32_t post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer_ & rbuf, uint64_t add); + int32_t post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer_t & rbuf, uint64_t add); // Register to be notified about all events, including unsolicited ones int wait_events(int timeout = -1); - int32_t _post_write(SGE && elems, const RemoteBuffer_ & rbuf, const uint32_t immediate = 0); - int32_t post_write(SGE && elems, const RemoteBuffer_ & rbuf, bool force_inline); + int32_t _post_write(SGE && elems, const RemoteBuffer_t & rbuf, const uint32_t immediate = 0); + int32_t post_write(SGE && elems, const RemoteBuffer_t & rbuf, bool force_inline); std::tuple poll_wc(QueueType type, bool blocking=true, int count=-1, bool update=false); }; @@ -208,7 +207,6 @@ namespace rdmalib { struct VerbsConnection : Connection { using SGE = VerbsScatterGatherElement; - // using RemoteBuffer_ = RemoteBuffer_; // handled in parent id_t _id; channel_t _channel; @@ -235,15 +233,15 @@ namespace rdmalib { int32_t post_send(const SGE & elems, int32_t id, bool force_inline); int32_t post_batched_empty_recv(int count); int32_t post_recv(SGE && elem, int32_t id, int count); - int32_t post_cas(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t compare, uint64_t swap); - int32_t post_atomic_fadd(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t add); + int32_t post_cas(SGE && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap); + int32_t post_atomic_fadd(SGE && elems, const RemoteBuffer_t & rbuf, uint64_t add); void notify_events(bool only_solicited = false); ibv_cq* wait_events(); void ack_events(ibv_cq* cq, int len); int32_t _post_write(SGE && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); - int32_t post_write(SGE && elems, const RemoteBuffer_ & rbuf, bool force_inline); + int32_t post_write(SGE && elems, const RemoteBuffer_t & rbuf, bool force_inline); std::tuple poll_wc(QueueType type, bool blocking=true, int count=-1); diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index 9f4a338..dbbd341 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -34,17 +34,22 @@ namespace rdmalib { struct LibfabricRDMAActive; struct VerbsRDMAActive; + struct LibfabricRDMAPassive; + struct VerbsRDMAPassive; struct LibfabricRecvBuffer; struct VerbsRecvBuffer; } template <> -struct rdmalib_traits { // TODO make static? +struct rdmalib_traits { using Connection = rdmalib::LibfabricConnection; using Address = rdmalib::LibfabricAddress; using RDMAActive = rdmalib::LibfabricRDMAActive; + using RDMAPassive = rdmalib::LibfabricRDMAPassive; using RecvBuffer = rdmalib::LibfabricRecvBuffer; + using ScatterGatherElement = rdmalib::LibfabricScatterGatherElement; + using RemoteBuffer = rdmalib::LibfabricRemoteBuffer; }; template <> @@ -52,7 +57,10 @@ struct rdmalib_traits { using Connection = rdmalib::VerbsConnection; using Address = rdmalib::VerbsAddress; using RDMAActive = rdmalib::VerbsRDMAActive; + using RDMAPassive = rdmalib::VerbsRDMAPassive; using RecvBuffer = rdmalib::VerbsRecvBuffer; + using ScatterGatherElement = rdmalib::VerbsScatterGatherElement; + using RemoteBuffer = rdmalib::VerbsRemoteBuffer; }; namespace rdmalib { diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index f4c6d46..b6eb80b 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -465,7 +465,7 @@ namespace rdmalib { return wr.wr_id; } - int32_t LibfabricConnection::_post_write(SGE && elems, const RemoteBuffer_ & rbuf, const uint32_t immediate) + int32_t LibfabricConnection::_post_write(SGE && elems, const RemoteBuffer_t & rbuf, const uint32_t immediate) { fi_addr_t temp = 0; int32_t id = _req_count++; @@ -528,7 +528,7 @@ namespace rdmalib { return _req_count - 1; } - int32_t LibfabricConnection::post_write(SGE && elems, const RemoteBuffer_ & rbuf, bool force_inline) + int32_t LibfabricConnection::post_write(SGE && elems, const RemoteBuffer_t & rbuf, bool force_inline) { if (elems.size() > 1) { spdlog::error("Post write unsuccessful on connection {}, reason Function not implemented for multiple sges.", fmt::ptr(this)); @@ -537,7 +537,7 @@ namespace rdmalib { return _post_write(std::forward(elems), rbuf); } - int32_t VerbsConnection::post_write(SGE && elems, const RemoteBuffer_ & rbuf, bool force_inline) + int32_t VerbsConnection::post_write(SGE && elems, const RemoteBuffer_t & rbuf, bool force_inline) { ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); @@ -547,7 +547,7 @@ namespace rdmalib { return _post_write(std::forward(elems), wr, force_inline, false); } - int32_t LibfabricConnection::post_cas(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t compare, uint64_t swap) + int32_t LibfabricConnection::post_cas(SGE && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap) { // TODO check if fi_addr_t temp = 0; @@ -563,7 +563,7 @@ namespace rdmalib { return _req_count - 1; } - int32_t VerbsConnection::post_cas(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t compare, uint64_t swap) + int32_t VerbsConnection::post_cas(SGE && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap) { ibv_send_wr wr, *bad; memset(&wr, 0, sizeof(wr)); @@ -587,7 +587,7 @@ namespace rdmalib { return _req_count - 1; } - int32_t LibfabricConnection::post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer_ & rbuf, uint64_t add) + int32_t LibfabricConnection::post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer_t & rbuf, uint64_t add) { int32_t id = _req_count++; memcpy(_accounting_buf.data(), &add, sizeof(add)); @@ -602,7 +602,7 @@ namespace rdmalib { return _req_count - 1; } - int32_t VerbsConnection::post_atomic_fadd(SGE && elems, const RemoteBuffer_ & rbuf, uint64_t add) + int32_t VerbsConnection::post_atomic_fadd(SGE && elems, const RemoteBuffer_t & rbuf, uint64_t add) { ibv_send_wr wr, *bad; memset(&wr, 0, sizeof(wr)); diff --git a/rfaas/include/rfaas/connection.hpp b/rfaas/include/rfaas/connection.hpp index 611a0d0..6641a9e 100644 --- a/rfaas/include/rfaas/connection.hpp +++ b/rfaas/include/rfaas/connection.hpp @@ -19,6 +19,7 @@ namespace rfaas { using RDMAActive_t = typename rdmalib_traits::RDMAActive; using RecvBuffer_t = typename rdmalib_traits::RecvBuffer; using Connection_t = typename rdmalib_traits::Connection; + using SGE_t = typename rdmalib_traits::ScatterGatherElement; std::string _address; int _port; diff --git a/rfaas/include/rfaas/executor.hpp b/rfaas/include/rfaas/executor.hpp index 7a00e87..9bfeda0 100644 --- a/rfaas/include/rfaas/executor.hpp +++ b/rfaas/include/rfaas/executor.hpp @@ -41,19 +41,32 @@ namespace rfaas { operator int() const; }; + template struct executor_state { - std::unique_ptr conn; - rdmalib::RemoteBuffer remote_input; - rdmalib::RecvBuffer _rcv_buffer; - executor_state(rdmalib::Connection*, int rcv_buf_size); + + using RemoteBuffer_t = typename rdmalib_traits::RemoteBuffer; + using RecvBuffer_t = typename rdmalib_traits::RecvBuffer; + using Connection_t = typename rdmalib_traits::Connection; + + std::unique_ptr conn; + RemoteBuffer_t remote_input; + RecvBuffer_t _rcv_buffer; + executor_state(Connection_t*, int rcv_buf_size); }; + template struct executor { + + using RDMAPassive_t = typename rdmalib_traits::RDMAPassive; + using RecvBuffer_t = typename rdmalib_traits::RecvBuffer; + using ScatterGatherElement_t = typename rdmalib_traits::ScatterGatherElement; + using rkey_t = typename library_traits::rkey_t; + static constexpr int MAX_REMOTE_WORKERS = 64; // FIXME: - rdmalib::RDMAPassive _state; - rdmalib::RecvBuffer _rcv_buffer; - rdmalib::Buffer _execs_buf; + RDMAPassive_t _state; + RecvBuffer_t _rcv_buffer; + rdmalib::Buffer, Library> _execs_buf; std::string _address; int _port; int _rcv_buf_size; @@ -61,8 +74,8 @@ namespace rfaas { int _invoc_id; // FIXME: global settings size_t _max_inlined_msg; - std::vector _connections; - std::unique_ptr _exec_manager; + std::vector> _connections; + std::unique_ptr> _exec_manager; std::vector _func_names; rdmalib::PerfBenchmarker<8> _perf; @@ -80,13 +93,53 @@ namespace rfaas { // Skipping managers is useful for benchmarking bool allocate(std::string functions_path, int numcores, int max_input_size, int hot_timeout, - bool skip_manager = false, rdmalib::Benchmarker<5> * benchmarker = nullptr); - void deallocate(); - rdmalib::Buffer load_library(std::string path); + bool skip_manager = false, rdmalib::Benchmarker<5> * benchmarker = nullptr) + { + static_cast(this)->allocate(functions_path, numcores, max_input_size, hot_timeout, skip_manager, benchmarker); + } + void deallocate() + { + static_cast(this)->deallocate(); + } + rdmalib::Buffer load_library(std::string path) + { + return static_cast(this)->load_library(path); + } + void poll_queue() + { + static_cast(this)->poll_queue(); + } + + template + std::future async(std::string fname, const rdmalib::Buffer & in, rdmalib::Buffer & out, int64_t size = -1); + + template + std::future async(std::string fname, const std::vector> & in, std::vector> & out); + bool block(); + + // FIXME: irange for cores + // FIXME: now only operates on buffers + //template + //void execute(int numcores, std::string fname, Args &&... args) + template + std::tuple execute(std::string fname, const rdmalib::Buffer & in, rdmalib::Buffer & out); + + template + bool execute(std::string fname, const std::vector> & in, std::vector> & out); + }; + + struct libfabric_executor : executor { + using Library = libfabric; + + rdmalib::Buffer load_library(std::string path); void poll_queue(); + bool allocate(std::string functions_path, int numcores, int max_input_size, + int hot_timeout, bool skip_manager, rdmalib::Benchmarker<5> * benchmarker); + void deallocate(); + template - std::future async(std::string fname, const rdmalib::Buffer & in, rdmalib::Buffer & out, int64_t size = -1) + std::future async(std::string fname, const rdmalib::Buffer & in, rdmalib::Buffer & out, int64_t size = -1) { auto it = std::find(_func_names.begin(), _func_names.end(), fname); if(it == _func_names.end()) { @@ -99,11 +152,7 @@ namespace rfaas { char* data = static_cast(in.ptr()); // TODO: we assume here uintptr_t is 8 bytes *reinterpret_cast(data) = out.address(); - #ifdef USE_LIBFABRIC *reinterpret_cast(data + 8) = out.rkey(); - #else - *reinterpret_cast(data + 8) = out.rkey(); - #endif int invoc_id = this->_invoc_id++; //_futures[invoc_id] = std::move(std::promise{}); @@ -114,8 +163,7 @@ namespace rfaas { func_idx, invoc_id, submission_id ); if(size != -1) { - #ifdef USE_LIBFABRIC - rdmalib::ScatterGatherElement sge; + ScatterGatherElement_t sge; sge.add(in, size, 0); _connections[0].conn->post_write( in, @@ -124,8 +172,273 @@ namespace rfaas { _connections[0].remote_input, submission_id ); - #else - rdmalib::ScatterGatherElement sge; + } else { + _connections[0].conn->post_write( + in, + in.bytes(), + 0, + _connections[0].remote_input, + submission_id + ); + } + return std::get<1>(_futures[invoc_id]).get_future(); + } + + template + std::future async(std::string fname, const std::vector> & in, std::vector> & out) + { + auto it = std::find(_func_names.begin(), _func_names.end(), fname); + if(it == _func_names.end()) { + spdlog::error("Function {} not found in the deployed library!", fname); + return std::future{}; + } + int func_idx = std::distance(_func_names.begin(), it); + + int invoc_id = this->_invoc_id++; + //_futures[invoc_id] = std::move(std::promise{}); + int numcores = _connections.size(); + _futures[invoc_id] = std::make_tuple(numcores, std::promise{}); + uint32_t submission_id = (invoc_id << 16) | (1 << 15) | func_idx; + for(int i = 0; i < numcores; ++i) { + // FIXME: here get a future for async + char* data = static_cast(in[i].ptr()); + // TODO: we assume here uintptr_t is 8 bytes + *reinterpret_cast(data) = out[i].address(); + *reinterpret_cast(data + 8) = out[i].rkey(); + + SPDLOG_DEBUG("Invoke function {} with invocation id {}", func_idx, _invoc_id); + _connections[i].conn->post_write( + in[i], + in[i].bytes(), + 0, + _connections[i].remote_input, + submission_id + ); + } + + return std::get<1>(_futures[invoc_id]).get_future(); + } + + bool block() + { + _connections[0].conn->poll_wc(rdmalib::QueueType::SEND, true); + + auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true, -1, true); + uint32_t val = std::get<0>(wc)[0].data; + int return_val = val & 0x0000FFFF; + int finished_invoc_id = val >> 16; + if(return_val == 0) { + SPDLOG_DEBUG("Finished invocation {} succesfully", finished_invoc_id); + return true; + } else { + if(val == 1) + spdlog::error("Invocation: {}, Thread busy, cannot post work", finished_invoc_id); + else + spdlog::error("Invocation: {}, Unknown error {}", finished_invoc_id, val); + return false; + } + } + + // FIXME: irange for cores + // FIXME: now only operates on buffers + //template + //void execute(int numcores, std::string fname, Args &&... args) + template + std::tuple execute(std::string fname, const rdmalib::Buffer & in, rdmalib::Buffer & out) + { + //_perf.point(); + auto it = std::find(_func_names.begin(), _func_names.end(), fname); + if(it == _func_names.end()) { + spdlog::error("Function {} not found in the deployed library!", fname); + return std::make_tuple(false, 0); + } + int func_idx = std::distance(_func_names.begin(), it); + + // FIXME: here get a future for async + char* data = static_cast(in.ptr()); + // TODO: we assume here uintptr_t is 8 bytes + *reinterpret_cast(data) = out.address(); + *reinterpret_cast(data + 8) = out.rkey(); + + int invoc_id = this->_invoc_id++; + SPDLOG_DEBUG( + "Invoke function {} with invocation id {}, submission id {}", + func_idx, invoc_id, (invoc_id << 16) | func_idx + ); + //_perf.point(1); + _connections[0].conn->post_write( + in, + in.bytes(), + 0, + _connections[0].remote_input, + (invoc_id << 16) | func_idx + ); + _active_polling = true; + //_perf.point(2); + //_perf.point(3); + + bool found_result = false; + int return_value = 0; + int out_size = 0; + while(!found_result) { + auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true, -1, true); + for(int i = 0; i < std::get<1>(wc); ++i) { + //_perf.point(4); + uint64_t val = std::get<0>(wc)[i].data; + int return_val = val & 0x0000FFFF; + int finished_invoc_id = val >> 16 & 0x0000FFFF; + int len = val >> 32; + + if(finished_invoc_id == invoc_id) { + found_result = true; + return_value = return_val; + out_size = len; + // spdlog::info("Result {} for id {}", return_val, finished_invoc_id); + } else { + auto it = _futures.find(finished_invoc_id); + //spdlog::info("Poll Future for id {}", finished_invoc_id); + // if it == end -> we have a bug, should never appear + //(*it).second.set_value(return_val); + if(!--std::get<0>(it->second)) + std::get<1>(it->second).set_value(return_val); + } + } + if(found_result) { + //_perf.point(5); + _active_polling = false; + auto wc = _connections[0]._rcv_buffer.poll(false); + // Catch very unlikely interleaving + // Event arrives after we poll while the background thread is skipping + // because we still hold the atomic + // Thus, we later unset the variable since we're done + for(int i = 0; i < std::get<1>(wc); ++i) { + uint32_t val = std::get<0>(wc)[i].data; + int return_val = val & 0x0000FFFF; + int finished_invoc_id = val >> 16; + auto it = _futures.find(finished_invoc_id); + //spdlog::info("Poll Future for id {}", finished_invoc_id); + // if it == end -> we have a bug, should never appear + //(*it).second.set_value(return_val); + if(!--std::get<0>(it->second)) + std::get<1>(it->second).set_value(return_val); + } + //_perf.point(6); + } + } + _connections[0].conn->poll_wc(rdmalib::QueueType::SEND, false); + //_perf.point(7); + if(return_value == 0) { + SPDLOG_DEBUG("Finished invocation {} succesfully", invoc_id); + return std::make_tuple(true, out_size); + } else { + if(return_value == 1) + spdlog::error("Invocation: {}, Thread busy, cannot post work", invoc_id); + else + spdlog::error("Invocation: {}, Unknown error {}", invoc_id, return_value); + return std::make_tuple(false, 0); + } + } + + template + bool execute(std::string fname, const std::vector> & in, std::vector> & out) + { + auto it = std::find(_func_names.begin(), _func_names.end(), fname); + if(it == _func_names.end()) { + spdlog::error("Function {} not found in the deployed library!", fname); + return false; + } + int func_idx = std::distance(_func_names.begin(), it); + + int numcores = _connections.size(); + for(int i = 0; i < numcores; ++i) { + // FIXME: here get a future for async + char* data = static_cast(in[i].ptr()); + // TODO: we assume here uintptr_t is 8 bytes + *reinterpret_cast(data) = out[i].address(); + *reinterpret_cast(data + 8) = out[i].rkey(); + + SPDLOG_DEBUG("Invoke function {} with invocation id {}", func_idx, _invoc_id); + _connections[i].conn->post_write( + in[i], + in[i].bytes(), + 0, + _connections[i].remote_input, + (_invoc_id++ << 16) | func_idx + ); + } + + int expected = numcores; + while(expected) { + auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::SEND, true); + expected -= std::get<1>(wc); + } + + expected = numcores; + bool correct = true; + _active_polling = true; + while(expected) { + auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true, -1, true); + SPDLOG_DEBUG("Found data"); + expected -= std::get<1>(wc); + for(int i = 0; i < std::get<1>(wc); ++i) { + uint32_t val = std::get<0>(wc)[i].data; + int return_val = val & 0x0000FFFF; + int finished_invoc_id = val >> 16; + if(return_val == 0) { + SPDLOG_DEBUG("Finished invocation {} succesfully", finished_invoc_id); + } else { + if(val == 1) + spdlog::error("Invocation: {}, Thread busy, cannot post work", finished_invoc_id); + else + spdlog::error("Invocation: {}, Unknown error {}", finished_invoc_id, val); + } + correct &= return_val == 0; + } + } + _active_polling = false; + + _connections[0]._rcv_buffer._requests += numcores - 1; + for(int i = 1; i < numcores; ++i) + _connections[i]._rcv_buffer._requests--; + return correct; + } + }; + + struct verbs_executor : executor { + using Library = ibverbs; + + rdmalib::Buffer load_library(std::string path); + void poll_queue(); + bool allocate(std::string functions_path, int numcores, int max_input_size, + int hot_timeout, bool skip_manager, rdmalib::Benchmarker<5> * benchmarker); + void deallocate(); + + template + std::future async(std::string fname, const rdmalib::Buffer & in, rdmalib::Buffer & out, int64_t size = -1) + { + auto it = std::find(_func_names.begin(), _func_names.end(), fname); + if(it == _func_names.end()) { + spdlog::error("Function {} not found in the deployed library!", fname); + return std::future{}; + } + int func_idx = std::distance(_func_names.begin(), it); + + // FIXME: here get a future for async + char* data = static_cast(in.ptr()); + // TODO: we assume here uintptr_t is 8 bytes + *reinterpret_cast(data) = out.address(); + *reinterpret_cast(data + 8) = out.rkey(); + + int invoc_id = this->_invoc_id++; + //_futures[invoc_id] = std::move(std::promise{}); + _futures[invoc_id] = std::make_tuple(1, std::promise{}); + uint32_t submission_id = (invoc_id << 16) | (1 << 15) | func_idx; + SPDLOG_DEBUG( + "Invoke function {} with invocation id {}, submission id {}", + func_idx, invoc_id, submission_id + ); + if(size != -1) { + ScatterGatherElement_t sge; sge.add(in, size, 0); _connections[0].conn->post_write( std::move(sge), @@ -134,17 +447,7 @@ namespace rfaas { size <= _max_inlined_msg, true ); - #endif } else { - #ifdef USE_LIBFABRIC - _connections[0].conn->post_write( - in, - in.bytes(), - 0, - _connections[0].remote_input, - submission_id - ); - #else _connections[0].conn->post_write( in, _connections[0].remote_input, @@ -152,16 +455,13 @@ namespace rfaas { in.bytes() <= _max_inlined_msg, true ); - #endif } - #ifndef USE_LIBFABRIC _connections[0]._rcv_buffer.refill(); - #endif return std::get<1>(_futures[invoc_id]).get_future(); } template - std::future async(std::string fname, const std::vector> & in, std::vector> & out) + std::future async(std::string fname, const std::vector> & in, std::vector> & out) { auto it = std::find(_func_names.begin(), _func_names.end(), fname); if(it == _func_names.end()) { @@ -180,22 +480,9 @@ namespace rfaas { char* data = static_cast(in[i].ptr()); // TODO: we assume here uintptr_t is 8 bytes *reinterpret_cast(data) = out[i].address(); - #ifdef USE_LIBFABRIC - *reinterpret_cast(data + 8) = out[i].rkey(); - #else *reinterpret_cast(data + 8) = out[i].rkey(); - #endif SPDLOG_DEBUG("Invoke function {} with invocation id {}", func_idx, _invoc_id); - #ifdef USE_LIBFABRIC - _connections[i].conn->post_write( - in[i], - in[i].bytes(), - 0, - _connections[i].remote_input, - submission_id - ); - #else _connections[i].conn->post_write( in[i], _connections[i].remote_input, @@ -203,14 +490,11 @@ namespace rfaas { in[i].bytes() <= _max_inlined_msg, true ); - #endif } - #ifndef USE_LIBFABRIC for(int i = 0; i < numcores; ++i) { _connections[i]._rcv_buffer.refill(); } - #endif return std::get<1>(_futures[invoc_id]).get_future(); } @@ -218,16 +502,8 @@ namespace rfaas { { _connections[0].conn->poll_wc(rdmalib::QueueType::SEND, true); - #ifdef USE_LIBFABRIC - auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true, -1, true); - #else auto wc = _connections[0]._rcv_buffer.poll(true); - #endif - #ifdef USE_LIBFABRIC - uint32_t val = std::get<0>(wc)[0].data; - #else uint32_t val = ntohl(std::get<0>(wc)[0].imm_data); - #endif int return_val = val & 0x0000FFFF; int finished_invoc_id = val >> 16; if(return_val == 0) { @@ -247,7 +523,7 @@ namespace rfaas { //template //void execute(int numcores, std::string fname, Args &&... args) template - std::tuple execute(std::string fname, const rdmalib::Buffer & in, rdmalib::Buffer & out) + std::tuple execute(std::string fname, const rdmalib::Buffer & in, rdmalib::Buffer & out) { //_perf.point(); auto it = std::find(_func_names.begin(), _func_names.end(), fname); @@ -261,11 +537,7 @@ namespace rfaas { char* data = static_cast(in.ptr()); // TODO: we assume here uintptr_t is 8 bytes *reinterpret_cast(data) = out.address(); - #ifdef USE_LIBFABRIC - *reinterpret_cast(data + 8) = out.rkey(); - #else *reinterpret_cast(data + 8) = out.rkey(); - #endif int invoc_id = this->_invoc_id++; SPDLOG_DEBUG( @@ -273,59 +545,31 @@ namespace rfaas { func_idx, invoc_id, (invoc_id << 16) | func_idx ); //_perf.point(1); - #ifdef USE_LIBFABRIC - _connections[0].conn->post_write( - in, - in.bytes(), - 0, - _connections[0].remote_input, - (invoc_id << 16) | func_idx - ); - #else _connections[0].conn->post_write( in, _connections[0].remote_input, (invoc_id << 16) | func_idx, in.bytes() <= _max_inlined_msg ); - #endif _active_polling = true; //_perf.point(2); - #ifndef USE_LIBFABRIC _connections[0]._rcv_buffer.refill(); - #endif //_perf.point(3); bool found_result = false; int return_value = 0; int out_size = 0; while(!found_result) { - #ifdef USE_LIBFABRIC - auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true, -1, true); - #else auto wc = _connections[0]._rcv_buffer.poll(true); - #endif for(int i = 0; i < std::get<1>(wc); ++i) { - #ifdef USE_LIBFABRIC - //_perf.point(4); - uint64_t val = std::get<0>(wc)[i].data; - int return_val = val & 0x0000FFFF; - int finished_invoc_id = val >> 16 & 0x0000FFFF; - int len = val >> 32; - #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); int return_val = val & 0x0000FFFF; int finished_invoc_id = val >> 16; - #endif if(finished_invoc_id == invoc_id) { found_result = true; return_value = return_val; - #ifdef USE_LIBFABRIC - out_size = len; - #else out_size = std::get<0>(wc)[i].byte_len; - #endif // spdlog::info("Result {} for id {}", return_val, finished_invoc_id); } else { auto it = _futures.find(finished_invoc_id); @@ -339,18 +583,13 @@ namespace rfaas { if(found_result) { //_perf.point(5); _active_polling = false; - #ifndef USE_LIBFABRIC auto wc = _connections[0]._rcv_buffer.poll(false); // Catch very unlikely interleaving // Event arrives after we poll while the background thread is skipping // because we still hold the atomic // Thus, we later unset the variable since we're done for(int i = 0; i < std::get<1>(wc); ++i) { - #ifdef USE_LIBFABRIC - uint32_t val = std::get<0>(wc)[i].data; - #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); - #endif int return_val = val & 0x0000FFFF; int finished_invoc_id = val >> 16; auto it = _futures.find(finished_invoc_id); @@ -360,7 +599,6 @@ namespace rfaas { if(!--std::get<0>(it->second)) std::get<1>(it->second).set_value(return_val); } - #endif //_perf.point(6); } } @@ -379,7 +617,7 @@ namespace rfaas { } template - bool execute(std::string fname, const std::vector> & in, std::vector> & out) + bool execute(std::string fname, const std::vector> & in, std::vector> & out) { auto it = std::find(_func_names.begin(), _func_names.end(), fname); if(it == _func_names.end()) { @@ -394,36 +632,20 @@ namespace rfaas { char* data = static_cast(in[i].ptr()); // TODO: we assume here uintptr_t is 8 bytes *reinterpret_cast(data) = out[i].address(); - #ifdef USE_LIBFABRIC - *reinterpret_cast(data + 8) = out[i].rkey(); - #else *reinterpret_cast(data + 8) = out[i].rkey(); - #endif SPDLOG_DEBUG("Invoke function {} with invocation id {}", func_idx, _invoc_id); - #ifdef USE_LIBFABRIC - _connections[i].conn->post_write( - in[i], - in[i].bytes(), - 0, - _connections[i].remote_input, - (_invoc_id++ << 16) | func_idx - ); - #else _connections[i].conn->post_write( in[i], _connections[i].remote_input, (_invoc_id++ << 16) | func_idx, in[i].bytes() <= _max_inlined_msg ); - #endif } - #ifndef USE_LIBFABRIC for(int i = 0; i < numcores; ++i) { _connections[i]._rcv_buffer.refill(); } - #endif int expected = numcores; while(expected) { auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::SEND, true); @@ -434,19 +656,11 @@ namespace rfaas { bool correct = true; _active_polling = true; while(expected) { - #ifdef USE_LIBFABRIC - auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true, -1, true); - #else auto wc = _connections[0]._rcv_buffer.poll(true); - #endif SPDLOG_DEBUG("Found data"); expected -= std::get<1>(wc); for(int i = 0; i < std::get<1>(wc); ++i) { - #ifdef USE_LIBFABRIC - uint32_t val = std::get<0>(wc)[i].data; - #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); - #endif int return_val = val & 0x0000FFFF; int finished_invoc_id = val >> 16; if(return_val == 0) { diff --git a/rfaas/lib/connection.cpp b/rfaas/lib/connection.cpp index 7177d66..4aa6c68 100644 --- a/rfaas/lib/connection.cpp +++ b/rfaas/lib/connection.cpp @@ -51,7 +51,7 @@ namespace rfaas { // Send deallocation request only if we're connected if(_active.is_connected()) { request() = (rdmalib::AllocationRequest) {-1, 0, 0, 0, 0, 0, 0, ""}; - rdmalib::ScatterGatherElement sge; + SGE_t sge; size_t obj_size = sizeof(rdmalib::AllocationRequest); sge.add(_allocation_buffer, obj_size, obj_size*_rcv_buffer._rcv_buf_size); _active.connection().post_send(sge); @@ -61,7 +61,7 @@ namespace rfaas { } template - manager_connection::Connection_t & manager_connection::connection() + typename manager_connection::Connection_t & manager_connection::connection() { return _active.connection(); } @@ -75,7 +75,7 @@ namespace rfaas { template bool manager_connection::submit() { - rdmalib::ScatterGatherElement sge; + SGE_t sge; size_t obj_size = sizeof(rdmalib::AllocationRequest); sge.add(_allocation_buffer, obj_size, obj_size*_rcv_buffer._rcv_buf_size); _active.connection().post_send(sge); diff --git a/rfaas/lib/executor.cpp b/rfaas/lib/executor.cpp index 02c48d9..fba64d8 100644 --- a/rfaas/lib/executor.cpp +++ b/rfaas/lib/executor.cpp @@ -34,7 +34,8 @@ namespace rfaas { return _timeout; } - executor_state::executor_state(rdmalib::Connection* conn, int rcv_buf_size): + template + executor_state::executor_state(Connection_t* conn, int rcv_buf_size): conn(conn), _rcv_buffer(rcv_buf_size) { @@ -66,17 +67,19 @@ namespace rfaas { _end_requested = false; } - executor::executor(device_data & dev): + template + executor::executor(device_data & dev): executor(dev.ip_address, dev.port, dev.default_receive_buffer_size, dev.max_inline_data) {} - executor::~executor() + template + executor::~executor() { this->deallocate(); _perf.export_csv("client_perf.csv", {"start", "function parsed", "function post written", "buffer refilled", "received result", "parsed result", "catched unlikely case", "polled send"}); } - rdmalib::Buffer executor::load_library(std::string path) + rdmalib::Buffer libfabric_executor::load_library(std::string path) { _func_names.clear(); // Load the shared library with functions code @@ -84,13 +87,70 @@ namespace rfaas { fseek (file, 0 , SEEK_END); size_t len = ftell(file); rewind(file); - rdmalib::Buffer functions(len); + rdmalib::Buffer functions(len); rdmalib::impl::expect_true(fread(functions.data(), 1, len, file) == len); - #ifdef USE_LIBFABRIC functions.register_memory(_state.pd(), FI_WRITE); - #else + fclose(file); + + // FIXME: same function as in server/functions.cpp - merge? + // https://stackoverflow.com/questions/25270275/get-functions-names-in-a-shared-library-programmatically + void* library_handle; + rdmalib::impl::expect_nonnull( + library_handle = dlopen( + path.c_str(), + RTLD_NOW + ), + [](){ spdlog::error(dlerror()); } + ); + struct link_map * map = nullptr; + dlinfo(library_handle, RTLD_DI_LINKMAP, &map); + + Elf64_Sym * symtab = nullptr; + char * strtab = nullptr; + int symentries = 0; + for (auto section = map->l_ld; section->d_tag != DT_NULL; ++section) + { + if (section->d_tag == DT_SYMTAB) + { + symtab = (Elf64_Sym *)section->d_un.d_ptr; + } + if (section->d_tag == DT_STRTAB) + { + strtab = (char*)section->d_un.d_ptr; + } + if (section->d_tag == DT_SYMENT) + { + symentries = section->d_un.d_val; + } + } + int size = strtab - (char *)symtab; + for (int k = 0; k < size / symentries; ++k) + { + auto sym = &symtab[k]; + // If sym is function + if (ELF64_ST_TYPE(symtab[k].st_info) == STT_FUNC) + { + //str is name of each symbol + _func_names.emplace_back(&strtab[sym->st_name]); + } + } + std::sort(_func_names.begin(), _func_names.end()); + dlclose(library_handle); + + return functions; + } + + rdmalib::Buffer verbs_executor::load_library(std::string path) + { + _func_names.clear(); + // Load the shared library with functions code + FILE* file = fopen(path.c_str(), "rb"); + fseek (file, 0 , SEEK_END); + size_t len = ftell(file); + rewind(file); + rdmalib::Buffer functions(len); + rdmalib::impl::expect_true(fread(functions.data(), 1, len, file) == len); functions.register_memory(_state.pd(), IBV_ACCESS_LOCAL_WRITE); - #endif fclose(file); // FIXME: same function as in server/functions.cpp - merge? @@ -141,7 +201,24 @@ namespace rfaas { return functions; } - void executor::deallocate() + void libfabric_executor::deallocate() + { + if(_exec_manager) { + _end_requested = true; + // The background thread could be nullptr if we failed in the allocation process + if(_background_thread) { + _background_thread->join(); + _background_thread.reset(); + } + _exec_manager->disconnect(); + _exec_manager.reset(nullptr); + + // Clear up old connections + _connections.clear(); + } + } + + void verbs_executor::deallocate() { if(_exec_manager) { _end_requested = true; @@ -152,33 +229,20 @@ namespace rfaas { } _exec_manager->disconnect(); _exec_manager.reset(nullptr); - #ifndef USE_LIBFABRIC _state._cfg.attr.send_cq = _state._cfg.attr.recv_cq = 0; - #endif // Clear up old connections _connections.clear(); } } - void executor::poll_queue() + void libfabric_executor::poll_queue() { // FIXME: hide the details in rdmalib spdlog::info("Background thread starts waiting for events"); - #ifdef USE_LIBFABRIC int rc = 1; - #else - _connections[0].conn->notify_events(true); - int flags = fcntl(_connections[0].conn->completion_channel()->fd, F_GETFL); - int rc = fcntl(_connections[0].conn->completion_channel()->fd, F_SETFL, flags | O_NONBLOCK); - if (rc < 0) { - fprintf(stderr, "Failed to change file descriptor of completion event channel\n"); - return; - } - #endif while(!_end_requested && _connections.size()) { - #ifdef USE_LIBFABRIC do { if(_active_polling) std::this_thread::sleep_for(std::chrono::milliseconds(100)); @@ -189,7 +253,52 @@ namespace rfaas { return; } } while (rc != 0); - #else + if (rc < 0) { + fprintf(stderr, "poll failed\n"); + return; + } + if(!_end_requested && !_active_polling) { + auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, false, -1, true); + for(int i = 0; i < std::get<1>(wc); ++i) { + uint32_t val = std::get<0>(wc)[i].data; + int return_val = val & 0x0000FFFF; + int finished_invoc_id = val >> 16; + auto it = _futures.find(finished_invoc_id); + // if it == end -> we have a bug, should never appear + //spdlog::info("Future for id {}", finished_invoc_id); + //(*it).second.set_value(return_val); + // FIXME: handle error + if(!--std::get<0>(it->second)) { + std::get<1>(it->second).set_value(return_val); + // FIXME + // + _connections[0]._rcv_buffer._requests += _connections.size() - 1; + for(size_t i = 1; i < _connections.size(); ++i) + _connections[i]._rcv_buffer._requests--; + } + } + // Poll completions from past sends + for(auto & conn : _connections) + conn.conn->poll_wc(rdmalib::QueueType::SEND, false); + } + } + spdlog::info("Background thread stops waiting for events"); + + } + + void verbs_executor::poll_queue() + { + // FIXME: hide the details in rdmalib + spdlog::info("Background thread starts waiting for events"); + _connections[0].conn->notify_events(true); + int flags = fcntl(_connections[0].conn->completion_channel()->fd, F_GETFL); + int rc = fcntl(_connections[0].conn->completion_channel()->fd, F_SETFL, flags | O_NONBLOCK); + if (rc < 0) { + fprintf(stderr, "Failed to change file descriptor of completion event channel\n"); + return; + } + + while(!_end_requested && _connections.size()) { pollfd my_pollfd; my_pollfd.fd = _connections[0].conn->completion_channel()->fd; my_pollfd.events = POLLIN; @@ -201,28 +310,17 @@ namespace rfaas { return; } } while (rc == 0); - #endif if (rc < 0) { fprintf(stderr, "poll failed\n"); return; } if(!_end_requested && !_active_polling) { - #ifndef USE_LIBFABRIC auto cq = _connections[0].conn->wait_events(); _connections[0].conn->notify_events(true); _connections[0].conn->ack_events(cq, 1); - #endif - #ifdef USE_LIBFABRIC - auto wc = _connections[0].conn->poll_wc(rdmalib::QueueType::RECV, false, -1, true); - #else auto wc = _connections[0]._rcv_buffer.poll(false); - #endif for(int i = 0; i < std::get<1>(wc); ++i) { - #ifdef USE_LIBFABRIC - uint32_t val = std::get<0>(wc)[i].data; - #else uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); - #endif int return_val = val & 0x0000FFFF; int finished_invoc_id = val >> 16; auto it = _futures.find(finished_invoc_id); @@ -245,46 +343,151 @@ namespace rfaas { } } spdlog::info("Background thread stops waiting for events"); + } - // Wait for event - // Ask for next events - // Check if no one is polling - // Check data - //while(!_end_requested && _connections.size()) { - //std::cout << ("Sleep \n"); - //auto cq = _connections[0].conn->wait_events(); - //std::cout << ("Wake up + " + std::to_string(_end_requested) + "\n"); - //if(_end_requested) - // break; - //if(_connections.size() > 0) { - // //std::cout << ("Check connections\n"); - // _connections[0].conn->notify_events(true); - // _connections[0].conn->ack_events(cq, 1); - // //spdlog::info("wake up! {}", _active_polling); - // if(!_active_polling) { - // auto wc = _connections[0]._rcv_buffer.poll(false); - // for(int i = 0; i < std::get<1>(wc); ++i) { - // uint32_t val = ntohl(std::get<0>(wc)[i].imm_data); - // int return_val = val & 0x0000FFFF; - // int finished_invoc_id = val >> 16; - // auto it = _futures.find(finished_invoc_id); - // // if it == end -> we have a bug, should never appear - // //spdlog::info("Future for id {}", finished_invoc_id); - // (*it).second.set_value(return_val); - // // FIXME: handle error - // } - // // Poll completions from past sends - // _connections[0].conn->poll_wc(rdmalib::QueueType::SEND, false); - // } - //} + bool libfabric_executor::allocate(std::string functions_path, int numcores, int max_input_size, + int hot_timeout, bool skip_manager, rdmalib::Benchmarker<5> * benchmarker) + { + rdmalib::Buffer functions = load_library(functions_path); + if(!skip_manager) { + // FIXME: handle more than one manager + servers & instance = servers::instance(); + auto selected_servers = instance.select(numcores); + + _exec_manager.reset( + new manager_connection( + instance.server(selected_servers[0]).address, + instance.server(selected_servers[0]).port, + _rcv_buf_size, + _max_inlined_msg + ) + ); + // Measure connection time + if(benchmarker) + benchmarker->start(); + bool ret = _exec_manager->connect(); + if(benchmarker) { + benchmarker->end(0); + benchmarker->start(); + } + if(!ret) + return false; + + _exec_manager->request() = (rdmalib::AllocationRequest) { + static_cast(hot_timeout), + // FIXME: timeout + 5, + static_cast(numcores), + // FIXME: variable number of inputs + 1, + max_input_size, + functions.data_size(), + _port, + "" + }; + strcpy(_exec_manager->request().listen_address, _address.c_str()); + _exec_manager->submit(); + // Measure submission time + if(benchmarker) { + benchmarker->end(1); + benchmarker->start(); + } + } + + SPDLOG_DEBUG("Allocating {} threads on a remote executor", numcores); + // Now receive the connections from executors + uint32_t obj_size = sizeof(rdmalib::BufferInformation); + + // Accept connect requests, fill receive buffers and accept them. + // When the connection is established, then send data. + this->_connections.reserve(numcores); + int requested = 0, established = 0; + while(established < numcores) { + + //while(conn_status != rdmalib::ConnectionStatus::REQUESTED) + auto [conn, conn_status] = _state.poll_events(true); + if(conn_status == rdmalib::ConnectionStatus::REQUESTED) { + SPDLOG_DEBUG( + "[Executor] Requested connection from executor {}, connection {}", + requested + 1, fmt::ptr(conn) + ); + this->_connections.emplace_back( + conn, + _rcv_buf_size + ); + this->_connections.back().conn->post_recv(_execs_buf.sge(obj_size, requested*obj_size), requested); + _state.accept(this->_connections.back().conn.get()); + ++requested; + } else if(conn_status == rdmalib::ConnectionStatus::ESTABLISHED) { + SPDLOG_DEBUG( + "[Executor] Established connection to executor {}, connection {}", + established + 1, fmt::ptr(conn) + ); + conn->post_send(functions); + SPDLOG_DEBUG("Connected thread {}/{} and submitted function code.", established + 1, numcores); + ++established; + } + // FIXME: fix handling of disconnection + else { + spdlog::error("Unhandled connection event {} in executor allocation", conn_status); + } + } + + // Measure process spawn time + if(benchmarker) { + benchmarker->end(2); + benchmarker->start(); + } + + // Now receive buffer information + int received = 0; + while(received < numcores) { + auto wcs = this->_connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true); + for(int i = 0; i < std::get<1>(wcs); ++i) { + int id = reinterpret_cast(std::get<0>(wcs)[i].op_context); + SPDLOG_DEBUG( + "Received buffer details for thread, id {}, addr {}, rkey {}", + id, _execs_buf.data()[id].r_addr, _execs_buf.data()[id].r_key + ); + _connections[id].remote_input = rdmalib::RemoteBuffer( + _execs_buf.data()[id].r_addr, + _execs_buf.data()[id].r_key + ); + } + received += std::get<1>(wcs); + } + + received = 0; + _active_polling = false; + // Ensure that we are able to process asynchronous replies + // before we start any submissionk. + // FIXME: extend to multiple connections + _background_thread.reset( + new std::thread{ + &executor::poll_queue, + this + } + ); + while(received < numcores) { + auto wcs = this->_connections[0].conn->poll_wc(rdmalib::QueueType::SEND, true); + received += std::get<1>(wcs); + } + // Measure initial configuration submission + if(benchmarker) { + benchmarker->end(3); + benchmarker->start(); + } + //if(_background_thread) { + // _background_thread->detach(); //} - //spdlog::info("Background thread stops waiting for events"); + SPDLOG_DEBUG("Code submission for all threads is finished"); + return true; } - bool executor::allocate(std::string functions_path, int numcores, int max_input_size, + bool verbs_executor::allocate(std::string functions_path, int numcores, int max_input_size, int hot_timeout, bool skip_manager, rdmalib::Benchmarker<5> * benchmarker) { - rdmalib::Buffer functions = load_library(functions_path); + rdmalib::Buffer functions = load_library(functions_path); if(!skip_manager) { // FIXME: handle more than one manager servers & instance = servers::instance(); @@ -332,7 +535,7 @@ namespace rfaas { SPDLOG_DEBUG("Allocating {} threads on a remote executor", numcores); // Now receive the connections from executors - uint32_t obj_size = sizeof(rdmalib::BufferInformation); + uint32_t obj_size = sizeof(rdmalib::BufferInformation); // Accept connect requests, fill receive buffers and accept them. // When the connection is established, then send data. @@ -351,14 +554,10 @@ namespace rfaas { conn, _rcv_buf_size ); - #ifdef USE_LIBFABRIC - this->_connections.back().conn->post_recv(_execs_buf.sge(obj_size, requested*obj_size), requested); - #else this->_connections.back().conn->post_recv(_execs_buf.sge(obj_size, requested*obj_size), requested); // FIXME: this should be in a function // FIXME: here it won't work if rcv_bufer_size < numcores this->_connections.back()._rcv_buffer.connect(this->_connections.back().conn.get()); - #endif _state.accept(this->_connections.back().conn.get()); ++requested; } else if(conn_status == rdmalib::ConnectionStatus::ESTABLISHED) { @@ -387,11 +586,7 @@ namespace rfaas { while(received < numcores) { auto wcs = this->_connections[0].conn->poll_wc(rdmalib::QueueType::RECV, true); for(int i = 0; i < std::get<1>(wcs); ++i) { - #ifdef USE_LIBFABRIC - int id = reinterpret_cast(std::get<0>(wcs)[i].op_context); - #else int id = std::get<0>(wcs)[i].wr_id; - #endif SPDLOG_DEBUG( "Received buffer details for thread, id {}, addr {}, rkey {}", id, _execs_buf.data()[id].r_addr, _execs_buf.data()[id].r_key @@ -408,9 +603,7 @@ namespace rfaas { _active_polling = false; // Ensure that we are able to process asynchronous replies // before we start any submissionk. - #ifndef USE_LIBFABRIC _connections[0].conn->notify_events(true); - #endif // FIXME: extend to multiple connections _background_thread.reset( new std::thread{ From 5de685708c7de38d774bf78d23927f6e36f2c72f Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Fri, 22 Sep 2023 01:40:48 +0000 Subject: [PATCH 80/91] Standardized traits and SGE issue --- rdmalib/include/rdmalib/buffer.hpp | 15 ++---- rdmalib/include/rdmalib/connection.hpp | 50 ++++++++---------- rdmalib/include/rdmalib/libraries.hpp | 69 ++++++++++++++++++++++++- rdmalib/include/rdmalib/rdmalib.hpp | 38 -------------- rdmalib/include/rdmalib/recv_buffer.hpp | 4 ++ rdmalib/lib/connection.cpp | 26 +++++----- rfaas/include/rfaas/connection.hpp | 8 +-- rfaas/include/rfaas/executor.hpp | 26 ++++++---- rfaas/lib/connection.cpp | 4 +- rfaas/lib/executor.cpp | 33 +++++++----- 10 files changed, 151 insertions(+), 122 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index eb28f03..c4f8c60 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -15,12 +15,8 @@ namespace rdmalib { - template - struct ScatterGatherElement; - namespace impl { - // move non-template methods from header template struct Buffer @@ -30,11 +26,7 @@ namespace rdmalib using pd_t = typename library_traits::pd_t; using lkey_t = typename library_traits::lkey_t; using rkey_t = typename library_traits::rkey_t; - - // TODO: DEAL WITH THIS - //using SGE = library_traits::LibSGE; - template - using SGE = ScatterGatherElement; + using ScatterGatherElement_t = typename ::rdmalib::rdmalib_traits::ScatterGatherElement; uint32_t _size; uint32_t _header; @@ -51,7 +43,7 @@ namespace rdmalib Buffer &operator=(Buffer &&obj); ~Buffer() { - static_cast(this)->destroy(); + static_cast(this)->destroy(); } public: @@ -76,8 +68,7 @@ namespace rdmalib { return static_cast(this)->rkey(); } - template - SGE sge(uint32_t size, uint32_t offset) const + ScatterGatherElement_t sge(uint32_t size, uint32_t offset) const { return {address() + offset, size, lkey()}; } diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index bdea0c2..0e624e3 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -61,8 +61,7 @@ namespace rdmalib { using wc_t = typename library_traits::wc_t; using id_t = typename library_traits::id_t; using channel_t = typename library_traits::channel_t; - template // TODO: remove this generic. should be a trait - using SGE = ScatterGatherElement; + using ScatterGatherElement_t = typename ::rdmalib::rdmalib_traits::ScatterGatherElement; using RemoteBuffer_t = RemoteBuffer; qp_t _qp; @@ -114,30 +113,24 @@ namespace rdmalib { std::tuple poll_wc(QueueType, bool blocking = true, int count = -1, bool update = false); - template - int32_t post_send(const SGE & elem, int32_t id = -1, bool force_inline = false); - template - int32_t post_recv(SGE && elem, int32_t id = -1, int32_t count = 1); + int32_t post_send(const ScatterGatherElement_t & elem, int32_t id = -1, bool force_inline = false); + int32_t post_recv(ScatterGatherElement_t && elem, int32_t id = -1, int32_t count = 1); int32_t post_batched_empty_recv(int32_t count = 1); - template - int32_t post_write(SGE && elems, const RemoteBuffer_t & buf, bool force_inline = false); + int32_t post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & buf, bool force_inline = false); // Solicited makes sense only for RDMA write with immediate - template - int32_t post_write(SGE && elems, const RemoteBuffer_t & buf, + int32_t post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & buf, uint32_t immediate, bool force_inline = false, bool solicited = false ); - template - int32_t post_cas(SGE && elems, const RemoteBuffer_t & buf, uint64_t compare, uint64_t swap); + int32_t post_cas(ScatterGatherElement_t && elems, const RemoteBuffer_t & buf, uint64_t compare, uint64_t swap); }; struct LibfabricConnection : Connection { template using Buffer = Buffer; - using SGE = LibfabricScatterGatherElement; fid_cq *_rcv_channel; fid_cq *_trx_channel; @@ -145,7 +138,7 @@ namespace rdmalib { uint64_t _counter; fid_domain* _domain = nullptr; - std::array _rwc_sges; + std::array _rwc_sges; fi_cq_err_entry _ewc; @@ -168,12 +161,13 @@ namespace rdmalib { channel_t receive_completion_channel() const; channel_t transmit_completion_channel() const; - int32_t post_cas(SGE && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap); - int32_t post_send(const SGE & elems, int32_t id, bool force_inline); + int32_t post_cas(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap); + int32_t post_send(const ScatterGatherElement_t & elems, int32_t id, bool force_inline); int32_t post_batched_empty_recv(int count); - int32_t post_recv(SGE && elem, int32_t id, int count); + int32_t post_recv(ScatterGatherElement_t && elem, int32_t id, int count); - template int32_t post_write(const Buffer & buf, const size_t size, const uint64_t offset, const RemoteBuffer_t & rbuf, const uint32_t immediate) { + template + int32_t post_write(const Buffer & buf, const size_t size, const uint64_t offset, const RemoteBuffer_t & rbuf, const uint32_t immediate) { int ret = fi_writedata(_qp, (void *)(buf.address() + offset), size, buf.lkey(), immediate + (size << 32), NULL, rbuf.addr, rbuf.rkey, (void *)(_req_count++)); if(ret) { spdlog::error("Post write unsuccessful, reason {} {}, buf size {}, id {}, remote addr {}, remote rkey {}, imm data {}, connection {}", @@ -198,21 +192,19 @@ namespace rdmalib { // Register to be notified about all events, including unsolicited ones int wait_events(int timeout = -1); - int32_t _post_write(SGE && elems, const RemoteBuffer_t & rbuf, const uint32_t immediate = 0); - int32_t post_write(SGE && elems, const RemoteBuffer_t & rbuf, bool force_inline); + int32_t _post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, const uint32_t immediate = 0); + int32_t post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, bool force_inline); std::tuple poll_wc(QueueType type, bool blocking=true, int count=-1, bool update=false); }; struct VerbsConnection : Connection { - using SGE = VerbsScatterGatherElement; - id_t _id; channel_t _channel; struct ibv_recv_wr _batch_wrs[_rbatch]; // preallocated and prefilled batched recv. - std::array _rwc_sges; + std::array _rwc_sges; VerbsConnection(bool passive=false); VerbsConnection(VerbsConnection&& obj); @@ -230,18 +222,18 @@ namespace rdmalib { void initialize(rdma_cm_id* id); ibv_comp_channel* completion_channel() const; - int32_t post_send(const SGE & elems, int32_t id, bool force_inline); + int32_t post_send(const ScatterGatherElement_t & elems, int32_t id, bool force_inline); int32_t post_batched_empty_recv(int count); - int32_t post_recv(SGE && elem, int32_t id, int count); - int32_t post_cas(SGE && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap); - int32_t post_atomic_fadd(SGE && elems, const RemoteBuffer_t & rbuf, uint64_t add); + int32_t post_recv(ScatterGatherElement_t && elem, int32_t id, int count); + int32_t post_cas(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap); + int32_t post_atomic_fadd(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, uint64_t add); void notify_events(bool only_solicited = false); ibv_cq* wait_events(); void ack_events(ibv_cq* cq, int len); - int32_t _post_write(SGE && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); - int32_t post_write(SGE && elems, const RemoteBuffer_t & rbuf, bool force_inline); + int32_t _post_write(ScatterGatherElement_t && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); + int32_t post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, bool force_inline); std::tuple poll_wc(QueueType type, bool blocking=true, int count=-1); diff --git a/rdmalib/include/rdmalib/libraries.hpp b/rdmalib/include/rdmalib/libraries.hpp index 14b32fb..3b712de 100644 --- a/rdmalib/include/rdmalib/libraries.hpp +++ b/rdmalib/include/rdmalib/libraries.hpp @@ -56,4 +56,71 @@ struct library_traits using channel_t = ibv_comp_channel *; }; -#endif \ No newline at end of file +namespace rdmalib { + template + struct ScatterGatherElement; + + struct LibfabricScatterGatherElement; + struct VerbsScatterGatherElement; + + /* + template + struct sge_trait; + + template <> + struct sge_trait + { + using ScatterGatherElement = LibfabricScatterGatherElement; + }; + + template <> + struct sge_trait + { + using ScatterGatherElement = VerbsScatterGatherElement; + }; + */ + + template + struct rdmalib_traits; + + struct LibfabricConnection; + struct VerbsConnection; + + struct LibfabricRemoteBuffer; + struct VerbsRemoteBuffer; + + struct LibfabricAddress; + struct VerbsAddress; + + struct LibfabricRDMAActive; + struct VerbsRDMAActive; + struct LibfabricRDMAPassive; + struct VerbsRDMAPassive; + + struct LibfabricRecvBuffer; + struct VerbsRecvBuffer; + + template <> + struct rdmalib_traits { + using Connection = LibfabricConnection; + using Address = LibfabricAddress; + using RDMAActive = LibfabricRDMAActive; + using RDMAPassive = LibfabricRDMAPassive; + using RecvBuffer = LibfabricRecvBuffer; + using ScatterGatherElement = LibfabricScatterGatherElement; + using RemoteBuffer = LibfabricRemoteBuffer; + }; + + template <> + struct rdmalib_traits { + using Connection = VerbsConnection; + using Address = VerbsAddress; + using RDMAActive = VerbsRDMAActive; + using RDMAPassive = VerbsRDMAPassive; + using RecvBuffer = VerbsRecvBuffer; + using ScatterGatherElement = VerbsScatterGatherElement; + using RemoteBuffer = VerbsRemoteBuffer; + }; +} + +#endif diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index dbbd341..0c82cc1 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -25,44 +25,6 @@ extern "C" { #include #include -template -struct rdmalib_traits; - -namespace rdmalib { - struct LibfabricAddress; - struct VerbsAddress; - - struct LibfabricRDMAActive; - struct VerbsRDMAActive; - struct LibfabricRDMAPassive; - struct VerbsRDMAPassive; - - struct LibfabricRecvBuffer; - struct VerbsRecvBuffer; -} - -template <> -struct rdmalib_traits { - using Connection = rdmalib::LibfabricConnection; - using Address = rdmalib::LibfabricAddress; - using RDMAActive = rdmalib::LibfabricRDMAActive; - using RDMAPassive = rdmalib::LibfabricRDMAPassive; - using RecvBuffer = rdmalib::LibfabricRecvBuffer; - using ScatterGatherElement = rdmalib::LibfabricScatterGatherElement; - using RemoteBuffer = rdmalib::LibfabricRemoteBuffer; -}; - -template <> -struct rdmalib_traits { - using Connection = rdmalib::VerbsConnection; - using Address = rdmalib::VerbsAddress; - using RDMAActive = rdmalib::VerbsRDMAActive; - using RDMAPassive = rdmalib::VerbsRDMAPassive; - using RecvBuffer = rdmalib::VerbsRecvBuffer; - using ScatterGatherElement = rdmalib::VerbsScatterGatherElement; - using RemoteBuffer = rdmalib::VerbsRemoteBuffer; -}; - namespace rdmalib { struct Configuration { diff --git a/rdmalib/include/rdmalib/recv_buffer.hpp b/rdmalib/include/rdmalib/recv_buffer.hpp index 4da4998..9baf6df 100644 --- a/rdmalib/include/rdmalib/recv_buffer.hpp +++ b/rdmalib/include/rdmalib/recv_buffer.hpp @@ -60,6 +60,8 @@ namespace rdmalib struct LibfabricRecvBuffer : RecvBuffer { + LibfabricRecvBuffer(int rcv_buf_size) : RecvBuffer(rcv_buf_size) {}; + inline std::tuple poll(bool blocking = false) { auto wc = this->_conn->poll_wc(rdmalib::QueueType::RECV, blocking); @@ -72,6 +74,8 @@ namespace rdmalib struct VerbsRecvBuffer : RecvBuffer { + VerbsRecvBuffer(int rcv_buf_size) : RecvBuffer(rcv_buf_size) {}; + inline std::tuple poll(bool blocking = false) { auto wc = this->_conn->poll_wc(rdmalib::QueueType::RECV, blocking); diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index b6eb80b..ac1efce 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -271,7 +271,7 @@ namespace rdmalib { this->_private_data = private_data; } - int32_t LibfabricConnection::post_send(const SGE & elems, int32_t id, bool force_inline) + int32_t LibfabricConnection::post_send(const ScatterGatherElement_t & elems, int32_t id, bool force_inline) { // FIXME: extend with multiple sges id = id == -1 ? _req_count++ : id; @@ -290,7 +290,7 @@ namespace rdmalib { return _req_count - 1; } - int32_t VerbsConnection::post_send(const SGE & elems, int32_t id, bool force_inline) + int32_t VerbsConnection::post_send(const ScatterGatherElement_t & elems, int32_t id, bool force_inline) { // FIXME: extend with multiple sges struct ibv_send_wr wr, *bad; @@ -409,7 +409,7 @@ namespace rdmalib { return count; } - int32_t LibfabricConnection::post_recv(SGE && elem, int32_t id, int count) + int32_t LibfabricConnection::post_recv(ScatterGatherElement_t && elem, int32_t id, int count) { fi_addr_t temp = 0; id = id == -1 ? _req_count++ : id; @@ -435,7 +435,7 @@ namespace rdmalib { return id; } - int32_t VerbsConnection::post_recv(SGE && elem, int32_t id, int count) + int32_t VerbsConnection::post_recv(ScatterGatherElement_t && elem, int32_t id, int count) { // FIXME: extend with multiple sges struct ibv_recv_wr wr, *bad; @@ -465,7 +465,7 @@ namespace rdmalib { return wr.wr_id; } - int32_t LibfabricConnection::_post_write(SGE && elems, const RemoteBuffer_t & rbuf, const uint32_t immediate) + int32_t LibfabricConnection::_post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, const uint32_t immediate) { fi_addr_t temp = 0; int32_t id = _req_count++; @@ -491,7 +491,7 @@ namespace rdmalib { } - int32_t VerbsConnection::_post_write(SGE && elems, ibv_send_wr wr, bool force_inline, bool force_solicited) + int32_t VerbsConnection::_post_write(ScatterGatherElement_t && elems, ibv_send_wr wr, bool force_inline, bool force_solicited) { ibv_send_wr* bad; wr.wr_id = _req_count++; @@ -528,26 +528,26 @@ namespace rdmalib { return _req_count - 1; } - int32_t LibfabricConnection::post_write(SGE && elems, const RemoteBuffer_t & rbuf, bool force_inline) + int32_t LibfabricConnection::post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, bool force_inline) { if (elems.size() > 1) { spdlog::error("Post write unsuccessful on connection {}, reason Function not implemented for multiple sges.", fmt::ptr(this)); return -1; } - return _post_write(std::forward(elems), rbuf); + return _post_write(std::forward(elems), rbuf); } - int32_t VerbsConnection::post_write(SGE && elems, const RemoteBuffer_t & rbuf, bool force_inline) + int32_t VerbsConnection::post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, bool force_inline) { ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); wr.opcode = IBV_WR_RDMA_WRITE; wr.wr.rdma.remote_addr = rbuf.addr; wr.wr.rdma.rkey = rbuf.rkey; - return _post_write(std::forward(elems), wr, force_inline, false); + return _post_write(std::forward(elems), wr, force_inline, false); } - int32_t LibfabricConnection::post_cas(SGE && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap) + int32_t LibfabricConnection::post_cas(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap) { // TODO check if fi_addr_t temp = 0; @@ -563,7 +563,7 @@ namespace rdmalib { return _req_count - 1; } - int32_t VerbsConnection::post_cas(SGE && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap) + int32_t VerbsConnection::post_cas(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap) { ibv_send_wr wr, *bad; memset(&wr, 0, sizeof(wr)); @@ -602,7 +602,7 @@ namespace rdmalib { return _req_count - 1; } - int32_t VerbsConnection::post_atomic_fadd(SGE && elems, const RemoteBuffer_t & rbuf, uint64_t add) + int32_t VerbsConnection::post_atomic_fadd(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, uint64_t add) { ibv_send_wr wr, *bad; memset(&wr, 0, sizeof(wr)); diff --git a/rfaas/include/rfaas/connection.hpp b/rfaas/include/rfaas/connection.hpp index 6641a9e..758af91 100644 --- a/rfaas/include/rfaas/connection.hpp +++ b/rfaas/include/rfaas/connection.hpp @@ -16,10 +16,10 @@ namespace rfaas { template struct manager_connection { - using RDMAActive_t = typename rdmalib_traits::RDMAActive; - using RecvBuffer_t = typename rdmalib_traits::RecvBuffer; - using Connection_t = typename rdmalib_traits::Connection; - using SGE_t = typename rdmalib_traits::ScatterGatherElement; + using RDMAActive_t = typename rdmalib::rdmalib_traits::RDMAActive; + using RecvBuffer_t = typename rdmalib::rdmalib_traits::RecvBuffer; + using Connection_t = typename rdmalib::rdmalib_traits::Connection; + using ScatterGatherElement_t = typename rdmalib::rdmalib_traits::ScatterGatherElement; std::string _address; int _port; diff --git a/rfaas/include/rfaas/executor.hpp b/rfaas/include/rfaas/executor.hpp index 9bfeda0..fdbabb6 100644 --- a/rfaas/include/rfaas/executor.hpp +++ b/rfaas/include/rfaas/executor.hpp @@ -44,9 +44,9 @@ namespace rfaas { template struct executor_state { - using RemoteBuffer_t = typename rdmalib_traits::RemoteBuffer; - using RecvBuffer_t = typename rdmalib_traits::RecvBuffer; - using Connection_t = typename rdmalib_traits::Connection; + using RemoteBuffer_t = typename rdmalib::rdmalib_traits::RemoteBuffer; + using RecvBuffer_t = typename rdmalib::rdmalib_traits::RecvBuffer; + using Connection_t = typename rdmalib::rdmalib_traits::Connection; std::unique_ptr conn; RemoteBuffer_t remote_input; @@ -57,9 +57,10 @@ namespace rfaas { template struct executor { - using RDMAPassive_t = typename rdmalib_traits::RDMAPassive; - using RecvBuffer_t = typename rdmalib_traits::RecvBuffer; - using ScatterGatherElement_t = typename rdmalib_traits::ScatterGatherElement; + using RDMAPassive_t = typename rdmalib::rdmalib_traits::RDMAPassive; + using RecvBuffer_t = typename rdmalib::rdmalib_traits::RecvBuffer; + using ScatterGatherElement_t = typename rdmalib::rdmalib_traits::ScatterGatherElement; + using RemoteBuffer_t = typename rdmalib::rdmalib_traits::RemoteBuffer; using rkey_t = typename library_traits::rkey_t; static constexpr int MAX_REMOTE_WORKERS = 64; @@ -95,7 +96,7 @@ namespace rfaas { bool allocate(std::string functions_path, int numcores, int max_input_size, int hot_timeout, bool skip_manager = false, rdmalib::Benchmarker<5> * benchmarker = nullptr) { - static_cast(this)->allocate(functions_path, numcores, max_input_size, hot_timeout, skip_manager, benchmarker); + return static_cast(this)->allocate(functions_path, numcores, max_input_size, hot_timeout, skip_manager, benchmarker); } void deallocate() { @@ -131,9 +132,11 @@ namespace rfaas { struct libfabric_executor : executor { using Library = libfabric; - rdmalib::Buffer load_library(std::string path); + rdmalib::Buffer load_library(std::string path); void poll_queue(); + libfabric_executor(std::string address, int port, int rcv_buf_size, int max_inlined_msg); + bool allocate(std::string functions_path, int numcores, int max_input_size, int hot_timeout, bool skip_manager, rdmalib::Benchmarker<5> * benchmarker); void deallocate(); @@ -204,7 +207,11 @@ namespace rfaas { char* data = static_cast(in[i].ptr()); // TODO: we assume here uintptr_t is 8 bytes *reinterpret_cast(data) = out[i].address(); + #ifdef USE_LIBFABRIC *reinterpret_cast(data + 8) = out[i].rkey(); + #else + *reinterpret_cast(data + 8) = out[i].rkey(); + #endif SPDLOG_DEBUG("Invoke function {} with invocation id {}", func_idx, _invoc_id); _connections[i].conn->post_write( @@ -407,10 +414,11 @@ namespace rfaas { struct verbs_executor : executor { using Library = ibverbs; - rdmalib::Buffer load_library(std::string path); + rdmalib::Buffer load_library(std::string path); void poll_queue(); bool allocate(std::string functions_path, int numcores, int max_input_size, int hot_timeout, bool skip_manager, rdmalib::Benchmarker<5> * benchmarker); + verbs_executor(std::string address, int port, int rcv_buf_size, int max_inlined_msg); void deallocate(); template diff --git a/rfaas/lib/connection.cpp b/rfaas/lib/connection.cpp index 4aa6c68..cd096c5 100644 --- a/rfaas/lib/connection.cpp +++ b/rfaas/lib/connection.cpp @@ -51,7 +51,7 @@ namespace rfaas { // Send deallocation request only if we're connected if(_active.is_connected()) { request() = (rdmalib::AllocationRequest) {-1, 0, 0, 0, 0, 0, 0, ""}; - SGE_t sge; + ScatterGatherElement_t sge; size_t obj_size = sizeof(rdmalib::AllocationRequest); sge.add(_allocation_buffer, obj_size, obj_size*_rcv_buffer._rcv_buf_size); _active.connection().post_send(sge); @@ -75,7 +75,7 @@ namespace rfaas { template bool manager_connection::submit() { - SGE_t sge; + ScatterGatherElement_t sge; size_t obj_size = sizeof(rdmalib::AllocationRequest); sge.add(_allocation_buffer, obj_size, obj_size*_rcv_buffer._rcv_buf_size); _active.connection().post_send(sge); diff --git a/rfaas/lib/executor.cpp b/rfaas/lib/executor.cpp index fba64d8..3939047 100644 --- a/rfaas/lib/executor.cpp +++ b/rfaas/lib/executor.cpp @@ -41,7 +41,8 @@ namespace rfaas { { } - executor::executor(std::string address, int port, int rcv_buf_size, int max_inlined_msg): + template + executor::executor(std::string address, int port, int rcv_buf_size, int max_inlined_msg): _state(address, port, rcv_buf_size + 1), _rcv_buffer(rcv_buf_size), _execs_buf(MAX_REMOTE_WORKERS), @@ -49,24 +50,28 @@ namespace rfaas { _port(port), _rcv_buf_size(rcv_buf_size), _executions(0), - #ifdef USE_LIBFABRIC - _invoc_id(1), - #else - _invoc_id(0), - #endif _max_inlined_msg(max_inlined_msg), _perf(1000) { - #ifdef USE_LIBFABRIC - _execs_buf.register_memory(_state.pd(), FI_WRITE | FI_REMOTE_WRITE); - #else - _execs_buf.register_memory(_state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); - #endif events = 0; _active_polling = false; _end_requested = false; } + libfabric_executor::libfabric_executor(std::string address, int port, int rcv_buf_size, int max_inlined_msg): + executor(address, port, rcv_buf_size, max_inlined_msg) + { + _invoc_id = 1; + _execs_buf.register_memory(_state.pd(), FI_WRITE | FI_REMOTE_WRITE); + } + + verbs_executor::verbs_executor(std::string address, int port, int rcv_buf_size, int max_inlined_msg): + executor(address, port, rcv_buf_size, max_inlined_msg) + { + _invoc_id = 0; + _execs_buf.register_memory(_state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + } + template executor::executor(device_data & dev): executor(dev.ip_address, dev.port, dev.default_receive_buffer_size, dev.max_inline_data) @@ -449,7 +454,7 @@ namespace rfaas { "Received buffer details for thread, id {}, addr {}, rkey {}", id, _execs_buf.data()[id].r_addr, _execs_buf.data()[id].r_key ); - _connections[id].remote_input = rdmalib::RemoteBuffer( + _connections[id].remote_input = RemoteBuffer_t( _execs_buf.data()[id].r_addr, _execs_buf.data()[id].r_key ); @@ -494,7 +499,7 @@ namespace rfaas { auto selected_servers = instance.select(numcores); _exec_manager.reset( - new manager_connection( + new manager_connection( instance.server(selected_servers[0]).address, instance.server(selected_servers[0]).port, _rcv_buf_size, @@ -591,7 +596,7 @@ namespace rfaas { "Received buffer details for thread, id {}, addr {}, rkey {}", id, _execs_buf.data()[id].r_addr, _execs_buf.data()[id].r_key ); - _connections[id].remote_input = rdmalib::RemoteBuffer( + _connections[id].remote_input = RemoteBuffer_t( _execs_buf.data()[id].r_addr, _execs_buf.data()[id].r_key ); From 5a485b6f88128174f2c8f817517d045a0015b4b3 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Fri, 22 Sep 2023 01:46:59 +0000 Subject: [PATCH 81/91] Fixed destructor error --- rdmalib/include/rdmalib/buffer.hpp | 9 +++------ rdmalib/lib/buffer.cpp | 4 ++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index c4f8c60..7059436 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -41,10 +41,7 @@ namespace rdmalib Buffer(uint32_t size, uint32_t byte_size, uint32_t header); Buffer(Buffer &&); Buffer &operator=(Buffer &&obj); - ~Buffer() - { - static_cast(this)->destroy(); - } + ~Buffer() {} public: uintptr_t address() const; @@ -79,7 +76,7 @@ namespace rdmalib void register_memory(pd_t pd, int access); lkey_t lkey() const; rkey_t rkey() const; - void destroy(); + ~LibfabricBuffer(); }; struct VerbsBuffer : Buffer @@ -87,7 +84,7 @@ namespace rdmalib void register_memory(pd_t pd, int access); lkey_t lkey() const; rkey_t rkey() const; - void destroy(); + ~VerbsBuffer(); }; } diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index 46e9eae..1809d9a 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -92,7 +92,7 @@ namespace rdmalib _bytes, fmt::ptr(_ptr)); } - void LibfabricBuffer::destroy() + LibfabricBuffer::~LibfabricBuffer() { SPDLOG_DEBUG( "Deallocate {} bytes, mr {}, ptr {}", @@ -103,7 +103,7 @@ namespace rdmalib munmap(_ptr, _bytes); } - void VerbsBuffer::destroy() + VerbsBuffer::~VerbsBuffer() { SPDLOG_DEBUG( "Deallocate {} bytes, mr {}, ptr {}", From 0c1af12e6f5734b1e42b224462a69c652e150dda Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Fri, 22 Sep 2023 02:14:34 +0000 Subject: [PATCH 82/91] Fixed no declaration matches by adding default arguments --- rdmalib/include/rdmalib/connection.hpp | 5 +++-- rdmalib/lib/connection.cpp | 1 + rfaas/include/rfaas/executor.hpp | 7 +++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 0e624e3..f549757 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -164,7 +164,7 @@ namespace rdmalib { int32_t post_cas(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap); int32_t post_send(const ScatterGatherElement_t & elems, int32_t id, bool force_inline); int32_t post_batched_empty_recv(int count); - int32_t post_recv(ScatterGatherElement_t && elem, int32_t id, int count); + int32_t post_recv(ScatterGatherElement_t && elem, int32_t id, int count=1); template int32_t post_write(const Buffer & buf, const size_t size, const uint64_t offset, const RemoteBuffer_t & rbuf, const uint32_t immediate) { @@ -224,7 +224,7 @@ namespace rdmalib { int32_t post_send(const ScatterGatherElement_t & elems, int32_t id, bool force_inline); int32_t post_batched_empty_recv(int count); - int32_t post_recv(ScatterGatherElement_t && elem, int32_t id, int count); + int32_t post_recv(ScatterGatherElement_t && elem, int32_t id, int count=1); int32_t post_cas(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap); int32_t post_atomic_fadd(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, uint64_t add); @@ -234,6 +234,7 @@ namespace rdmalib { int32_t _post_write(ScatterGatherElement_t && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); int32_t post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, bool force_inline); + //int32_t post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, bool force_inline); // TODO experiment std::tuple poll_wc(QueueType type, bool blocking=true, int count=-1); diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index ac1efce..faa41cb 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -537,6 +537,7 @@ namespace rdmalib { return _post_write(std::forward(elems), rbuf); } + //int32_t VerbsConnection::post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, bool force_inline) int32_t VerbsConnection::post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, bool force_inline) { ibv_send_wr wr; diff --git a/rfaas/include/rfaas/executor.hpp b/rfaas/include/rfaas/executor.hpp index fdbabb6..4ff223d 100644 --- a/rfaas/include/rfaas/executor.hpp +++ b/rfaas/include/rfaas/executor.hpp @@ -448,6 +448,7 @@ namespace rfaas { if(size != -1) { ScatterGatherElement_t sge; sge.add(in, size, 0); + /* _connections[0].conn->post_write( std::move(sge), _connections[0].remote_input, @@ -455,6 +456,12 @@ namespace rfaas { size <= _max_inlined_msg, true ); + */ + _connections[0].conn->post_write( + std::move(sge), + _connections[0].remote_input, + size <= _max_inlined_msg + ); } else { _connections[0].conn->post_write( in, From 9d5b9771d5576637dc69ae45aa15e22a8000e1c1 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Fri, 22 Sep 2023 04:01:13 +0000 Subject: [PATCH 83/91] Fixed more errors --- rdmalib/include/rdmalib/buffer.hpp | 14 +++++++++++--- rdmalib/include/rdmalib/connection.hpp | 7 ++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 7059436..907965b 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -92,7 +92,6 @@ namespace rdmalib template struct RemoteBuffer { - using rkey_t = typename library_traits::rkey_t; uintptr_t addr; @@ -121,10 +120,19 @@ namespace rdmalib }; struct LibfabricRemoteBuffer : RemoteBuffer - {}; + { + LibfabricRemoteBuffer() : RemoteBuffer() {} + LibfabricRemoteBuffer(uintptr_t addr, rkey_t rkey, uint32_t size = 0) : + RemoteBuffer(addr, rkey, size) {} + + }; struct VerbsRemoteBuffer : RemoteBuffer - {}; + { + VerbsRemoteBuffer() : RemoteBuffer() {} + VerbsRemoteBuffer(uintptr_t addr, rkey_t rkey, uint32_t size = 0) : + RemoteBuffer(addr, rkey, size) {} + }; template struct Buffer : impl::Buffer, Library> diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index f549757..5a4cca3 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -62,7 +62,8 @@ namespace rdmalib { using id_t = typename library_traits::id_t; using channel_t = typename library_traits::channel_t; using ScatterGatherElement_t = typename ::rdmalib::rdmalib_traits::ScatterGatherElement; - using RemoteBuffer_t = RemoteBuffer; + //using RemoteBuffer_t = RemoteBuffer; + using RemoteBuffer_t = typename ::rdmalib::rdmalib_traits::RemoteBuffer; qp_t _qp; int32_t _req_count; @@ -162,7 +163,7 @@ namespace rdmalib { channel_t transmit_completion_channel() const; int32_t post_cas(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap); - int32_t post_send(const ScatterGatherElement_t & elems, int32_t id, bool force_inline); + int32_t post_send(const ScatterGatherElement_t & elem, int32_t id = -1, bool force_inline = false); int32_t post_batched_empty_recv(int count); int32_t post_recv(ScatterGatherElement_t && elem, int32_t id, int count=1); @@ -222,7 +223,7 @@ namespace rdmalib { void initialize(rdma_cm_id* id); ibv_comp_channel* completion_channel() const; - int32_t post_send(const ScatterGatherElement_t & elems, int32_t id, bool force_inline); + int32_t post_send(const ScatterGatherElement_t & elem, int32_t id = -1, bool force_inline = false); int32_t post_batched_empty_recv(int count); int32_t post_recv(ScatterGatherElement_t && elem, int32_t id, int count=1); int32_t post_cas(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap); From 0f9feebc255bc5679c4c9170649d1d35262745b5 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Fri, 22 Sep 2023 05:21:40 +0000 Subject: [PATCH 84/91] Fixed rfaaslib and rdmalib --- errors | 203 +++++++++++++++++++++++++ rdmalib/include/rdmalib/connection.hpp | 17 ++- rfaas/lib/executor.cpp | 10 +- 3 files changed, 223 insertions(+), 7 deletions(-) create mode 100644 errors diff --git a/errors b/errors new file mode 100644 index 0000000..4b6aad8 --- /dev/null +++ b/errors @@ -0,0 +1,203 @@ +/home/ubuntu/rfaas-refactor/rfaas/lib/resources.cpp: In member function ‘std::vector rfaas::servers::select(int)’: +/home/ubuntu/rfaas-refactor/rfaas/lib/resources.cpp:43:40: warning: unused parameter ‘cores’ [-Wunused-parameter] + 43 | std::vector servers::select(int cores) + | ~~~~^~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:25:10: error: template argument required for ‘struct RecvBuffer’ + 25 | struct RecvBuffer; + | ^~~~~~~~~~ +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:48:7: error: template placeholder type ‘Connection<...auto...>’ must be followed by a simple declarator-id + 48 | rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, + | ^~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here + 55 | struct Connection { + | ^~~~~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:48:42: error: expected ‘)’ before ‘,’ token + 48 | rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, + | ^ + | ) +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:47:39: note: to match this ‘(’ + 47 | inline void send_updated_execution( + | ^ +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:48:68: error: wrong number of template arguments (1, should be 2) + 48 | rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, + | ^ +In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:24, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/buffer.hpp:138:10: note: provided for ‘template struct rdmalib::Buffer’ + 138 | struct Buffer : impl::Buffer, Library> + | ^~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:78:7: error: template placeholder type ‘Connection<...auto...>’ must be followed by a simple declarator-id + 78 | rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, + | ^~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here + 55 | struct Connection { + | ^~~~~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:78:42: error: expected ‘)’ before ‘,’ token + 78 | rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, + | ^ + | ) +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:77:37: note: to match this ‘(’ + 77 | inline void send_updated_polling( + | ^ +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:78:68: error: wrong number of template arguments (1, should be 2) + 78 | rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, + | ^ +In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:24, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/buffer.hpp:138:10: note: provided for ‘template struct rdmalib::Buffer’ + 138 | struct Buffer : impl::Buffer, Library> + | ^~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:122:25: error: wrong number of template arguments (1, should be 2) + 122 | rdmalib::Buffer send, rcv; + | ^ +In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:24, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/buffer.hpp:138:10: note: provided for ‘template struct rdmalib::Buffer’ + 138 | struct Buffer : impl::Buffer, Library> + | ^~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:123:5: error: invalid use of template-name ‘rdmalib::RecvBuffer’ without an argument list + 123 | rdmalib::RecvBuffer wc_buffer; + | ^~~~~~~ +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:124:5: error: template placeholder type ‘Connection<...auto...>’ must be followed by a simple declarator-id + 124 | rdmalib::Connection* conn; + | ^~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here + 55 | struct Connection { + | ^~~~~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:125:5: error: template placeholder type ‘Connection<...auto...>’ must be followed by a simple declarator-id + 125 | rdmalib::Connection* _mgr_connection; + | ^~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here + 55 | struct Connection { + | ^~~~~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:128:29: error: wrong number of template arguments (1, should be 2) + 128 | rdmalib::Buffer _accounting_buf; + | ^ +In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:24, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/buffer.hpp:138:10: note: provided for ‘template struct rdmalib::Buffer’ + 138 | struct Buffer : impl::Buffer, Library> + | ^~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp: In constructor ‘server::Thread::Thread(std::string, int, int, int, int, int, int, const executor::ManagerConnection&)’: +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:146:41: error: ‘rdmalib::functions::Submission’ has not been declared + 146 | rcv(buf_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), + | ^~~~~~~~~~ +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:149:7: error: class ‘server::Thread’ does not have any field named ‘conn’ + 149 | conn(nullptr), + | ^~~~ +/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:146:7: error: expression list treated as compound expression in mem-initializer [-fpermissive] + 146 | rcv(buf_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/server.hpp: At global scope: +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:119:5: error: invalid use of template-name ‘rdmalib::RDMAPassive’ without an argument list + 119 | rdmalib::RDMAPassive _state; + | ^~~~~~~ +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:120:5: error: invalid use of template-name ‘rdmalib::server::ServerStatus’ without an argument list + 120 | rdmalib::server::ServerStatus _status; + | ^~~~~~~ +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:124:5: error: template placeholder type ‘Connection<...auto...>’ must be followed by a simple declarator-id + 124 | rdmalib::Connection* _conn; + | ^~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here + 55 | struct Connection { + | ^~~~~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:125:5: error: invalid use of template-name ‘rdmalib::RecvBuffer’ without an argument list + 125 | rdmalib::RecvBuffer _wc_buffer; + | ^~~~~~~ +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:141:43: error: wrong number of template arguments (1, should be 2) + 141 | void register_buffer(rdmalib::Buffer & buf, bool is_recv_buffer) + | ^ +In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:24, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/buffer.hpp:138:10: note: provided for ‘template struct rdmalib::Buffer’ + 138 | struct Buffer : impl::Buffer, Library> + | ^~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:161:23: error: template placeholder type ‘Connection<...auto...>’ must be followed by a simple declarator-id + 161 | void reload_queue(rdmalib::Connection & conn, int32_t idx); + | ^~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here + 55 | struct Connection { + | ^~~~~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:161:49: error: expected ‘)’ before ‘,’ token + 161 | void reload_queue(rdmalib::Connection & conn, int32_t idx); + | ~ ^ + | ) +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:161:51: error: variable or field ‘int32_t’ declared void + 161 | void reload_queue(rdmalib::Connection & conn, int32_t idx); + | ^~~~~~~ +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:161:51: error: expected ‘;’ at end of member declaration + 161 | void reload_queue(rdmalib::Connection & conn, int32_t idx); + | ^~~~~~~ + | ; +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:161:59: error: ‘idx’ does not name a type + 161 | void reload_queue(rdmalib::Connection & conn, int32_t idx); + | ^~~ +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:163:28: error: deduced class type ‘RDMAPassive’ in function return type + 163 | rdmalib::RDMAPassive & state(); + | ^~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:158:10: note: ‘template struct rdmalib::RDMAPassive’ declared here + 158 | struct RDMAPassive { + | ^~~~~~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:164:26: error: deduced class type ‘Connection’ in function return type + 164 | rdmalib::Connection* poll_communication(); + | ^~~~~~~~~~~~~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, + from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here + 55 | struct Connection { + | ^~~~~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:165:43: error: deduced class type ‘ServerStatus’ in function return type + 165 | const rdmalib::server::ServerStatus & status() const; + | ^~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:13: +/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/server.hpp:20:10: note: ‘template struct rdmalib::server::ServerStatus’ declared here + 20 | struct ServerStatus { + | ^~~~~~~~~~~~ +In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: +/home/ubuntu/rfaas-refactor/server/executor/server.hpp: In member function ‘void server::Server::register_buffer(int&, bool)’: +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:147:13: error: request for member ‘register_memory’ in ‘buf’, which is of non-class type ‘int’ + 147 | buf.register_memory(_state.pd(), IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE); + | ^~~~~~~~~~~~~~~ +/home/ubuntu/rfaas-refactor/server/executor/server.hpp:154:13: error: request for member ‘register_memory’ in ‘buf’, which is of non-class type ‘int’ + 154 | buf.register_memory(_state.pd(), IBV_ACCESS_LOCAL_WRITE); + | ^~~~~~~~~~~~~~~ +gmake[2]: *** [CMakeFiles/executor.dir/build.make:76: CMakeFiles/executor.dir/server/executor/cli.cpp.o] Error 1 +gmake[1]: *** [CMakeFiles/Makefile2:226: CMakeFiles/executor.dir/all] Error 2 +gmake: *** [Makefile:136: all] Error 2 diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 5a4cca3..4881cca 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -29,7 +29,6 @@ namespace rdmalib { RECV }; - #ifndef USE_LIBFABRIC struct ConnectionConfiguration { // Configuration of QP ibv_qp_init_attr attr; @@ -37,7 +36,6 @@ namespace rdmalib { ConnectionConfiguration(); }; - #endif enum class ConnectionStatus { // The connection object does not bind to a defined RDMA connection. @@ -112,10 +110,19 @@ namespace rdmalib { // Blocking, no timeout - std::tuple poll_wc(QueueType, bool blocking = true, int count = -1, bool update = false); + std::tuple poll_wc(QueueType type, bool blocking = true, int count = -1, bool update = false) + { + return static_cast(this)->poll_wc(type, blocking, count, update); + } - int32_t post_send(const ScatterGatherElement_t & elem, int32_t id = -1, bool force_inline = false); - int32_t post_recv(ScatterGatherElement_t && elem, int32_t id = -1, int32_t count = 1); + int32_t post_send(const ScatterGatherElement_t & elem, int32_t id = -1, bool force_inline = false) + { + return static_cast(this)->post_send(elem, id, force_inline); + } + int32_t post_recv(ScatterGatherElement_t && elem, int32_t id = -1, int32_t count = 1) + { + return static_cast(this)->post_recv(elem, id, count); + } int32_t post_batched_empty_recv(int32_t count = 1); int32_t post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & buf, bool force_inline = false); diff --git a/rfaas/lib/executor.cpp b/rfaas/lib/executor.cpp index 3939047..3c83da0 100644 --- a/rfaas/lib/executor.cpp +++ b/rfaas/lib/executor.cpp @@ -428,7 +428,10 @@ namespace rfaas { "[Executor] Established connection to executor {}, connection {}", established + 1, fmt::ptr(conn) ); - conn->post_send(functions); + ScatterGatherElement_t sge; + sge.add(functions, functions.size(), 0); + conn->post_send(sge); + //conn->post_send(functions); SPDLOG_DEBUG("Connected thread {}/{} and submitted function code.", established + 1, numcores); ++established; } @@ -570,7 +573,10 @@ namespace rfaas { "[Executor] Established connection to executor {}, connection {}", established + 1, fmt::ptr(conn) ); - conn->post_send(functions); + ScatterGatherElement_t sge; + sge.add(functions, functions.size(), 0); + conn->post_send(sge); + //conn->post_send(functions); SPDLOG_DEBUG("Connected thread {}/{} and submitted function code.", established + 1, numcores); ++established; } From 15937dbea4040782b581d136312b5f65327c50f0 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Fri, 22 Sep 2023 05:42:47 +0000 Subject: [PATCH 85/91] Refactored fast executor --- rdmalib/include/rdmalib/libraries.hpp | 11 ++++--- server/common.hpp | 10 +++--- server/executor/fast_executor.hpp | 46 +++++++++++++++------------ 3 files changed, 37 insertions(+), 30 deletions(-) diff --git a/rdmalib/include/rdmalib/libraries.hpp b/rdmalib/include/rdmalib/libraries.hpp index 3b712de..df8c133 100644 --- a/rdmalib/include/rdmalib/libraries.hpp +++ b/rdmalib/include/rdmalib/libraries.hpp @@ -85,21 +85,22 @@ namespace rdmalib { struct LibfabricConnection; struct VerbsConnection; - struct LibfabricRemoteBuffer; struct VerbsRemoteBuffer; - struct LibfabricAddress; struct VerbsAddress; - struct LibfabricRDMAActive; struct VerbsRDMAActive; struct LibfabricRDMAPassive; struct VerbsRDMAPassive; - struct LibfabricRecvBuffer; struct VerbsRecvBuffer; + namespace functions { + struct LibfabricSubmission; + struct VerbsSubmission; + } + template <> struct rdmalib_traits { using Connection = LibfabricConnection; @@ -109,6 +110,7 @@ namespace rdmalib { using RecvBuffer = LibfabricRecvBuffer; using ScatterGatherElement = LibfabricScatterGatherElement; using RemoteBuffer = LibfabricRemoteBuffer; + using Submission = functions::LibfabricSubmission; }; template <> @@ -120,6 +122,7 @@ namespace rdmalib { using RecvBuffer = VerbsRecvBuffer; using ScatterGatherElement = VerbsScatterGatherElement; using RemoteBuffer = VerbsRemoteBuffer; + using Submission = functions::VerbsSubmission; }; } diff --git a/server/common.hpp b/server/common.hpp index 0279cab..324bc40 100644 --- a/server/common.hpp +++ b/server/common.hpp @@ -5,18 +5,18 @@ #include #include +#include + namespace executor { + template struct ManagerConnection { + using rkey_t = typename library_traits::rkey_t; std::string addr; int port; int secret; uint64_t r_addr; - #ifdef USE_LIBFABRIC - uint64_t r_key; - #else - uint32_t r_key; - #endif + rkey_t r_key; }; } diff --git a/server/executor/fast_executor.hpp b/server/executor/fast_executor.hpp index bb1b67c..fbeac6a 100644 --- a/server/executor/fast_executor.hpp +++ b/server/executor/fast_executor.hpp @@ -21,13 +21,13 @@ using namespace std::chrono_literals; -namespace rdmalib { - struct RecvBuffer; -} - namespace server { + template struct Accounting { + using Connection_t = typename rdmalib::rdmalib_traits::Connection; + using RecvBuffer_t = typename rdmalib::rdmalib_traits::RecvBuffer; + typedef std::chrono::high_resolution_clock clock_t; typedef std::chrono::time_point timepoint_t; static constexpr long int BILLING_GRANULARITY = std::chrono::duration_cast(1s).count(); @@ -45,8 +45,8 @@ namespace server { } inline void send_updated_execution( - rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, - const executor::ManagerConnection & _mgr_conn, + Connection_t* mgr_connection, rdmalib::Buffer & _accounting_buf, + const executor::ManagerConnection & _mgr_conn, bool force = false, bool wait = true ) @@ -75,8 +75,8 @@ namespace server { } inline void send_updated_polling( - rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, - const executor::ManagerConnection & _mgr_conn, + Connection_t* mgr_connection, rdmalib::Buffer & _accounting_buf, + const executor::ManagerConnection & _mgr_conn, bool force = false, bool wait = true ) @@ -107,8 +107,11 @@ namespace server { }; // FIXME: is not movable or copyable at the moment + template struct Thread { - + using Connection_t = typename rdmalib::rdmalib_traits::Connection; + using RecvBuffer_t = typename rdmalib::rdmalib_traits::RecvBuffer; + using Submission_t = typename rdmalib::rdmalib_traits::Submission; constexpr static int invocation_mask = 0x00007FFF; constexpr static int solicited_mask = 0x00008000; @@ -119,13 +122,13 @@ namespace server { int id, repetitions; int max_repetitions; uint64_t sum; - rdmalib::Buffer send, rcv; - rdmalib::RecvBuffer wc_buffer; - rdmalib::Connection* conn; - rdmalib::Connection* _mgr_connection; - const executor::ManagerConnection & _mgr_conn; - Accounting _accounting; - rdmalib::Buffer _accounting_buf; + rdmalib::Buffer send, rcv; + RecvBuffer_t wc_buffer; + Connection_t * conn; + Connection_t * _mgr_connection; + const executor::ManagerConnection & _mgr_conn; + Accounting _accounting; + rdmalib::Buffer _accounting_buf; rdmalib::PerfBenchmarker<9> _perf; // FIXME: Adjust to billing granularity constexpr static int HOT_POLLING_VERIFICATION_PERIOD = 10000; @@ -133,7 +136,7 @@ namespace server { Thread(std::string addr_, int port_, int id_, int functions_size, int buf_size, int recv_buffer_size, int max_inline_data_, - const executor::ManagerConnection & mgr_conn): + const executor::ManagerConnection & mgr_conn): _functions(functions_size), addr(addr_), port(port_), @@ -143,7 +146,7 @@ namespace server { max_repetitions(0), sum(0), send(buf_size), - rcv(buf_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), + rcv(buf_size, Submission_t::DATA_HEADER_SIZE), // +1 to handle batching of functions work completions + initial code submission wc_buffer(recv_buffer_size + 1), conn(nullptr), @@ -154,15 +157,16 @@ namespace server { { } - Accounting::timepoint_t work(int invoc_id, int func_id, bool solicited, uint32_t in_size); + typename Accounting::timepoint_t work(int invoc_id, int func_id, bool solicited, uint32_t in_size); void hot(uint32_t hot_timeout); void warm(); void thread_work(int timeout); }; + template struct FastExecutors { - std::vector _threads_data; + std::vector> _threads_data; std::vector _threads; bool _closing; int _numcores; @@ -179,7 +183,7 @@ namespace server { int recv_buf_size, int max_inline_data, int pin_threads, - const executor::ManagerConnection & mgr_conn + const executor::ManagerConnection & mgr_conn ); ~FastExecutors(); From 724ccab47b2faabcd1e0ff9e7a7de7e08601d31c Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Fri, 22 Sep 2023 06:13:57 +0000 Subject: [PATCH 86/91] Refactoring server --- rdmalib/include/rdmalib/allocation.hpp | 5 +- server/executor/cli.cpp | 14 ++++- server/executor/opts.cpp | 35 ++++++------ server/executor/server.hpp | 78 ++++++++++++++++---------- 4 files changed, 78 insertions(+), 54 deletions(-) diff --git a/rdmalib/include/rdmalib/allocation.hpp b/rdmalib/include/rdmalib/allocation.hpp index 3b6cfbe..c6f7d4d 100644 --- a/rdmalib/include/rdmalib/allocation.hpp +++ b/rdmalib/include/rdmalib/allocation.hpp @@ -4,6 +4,8 @@ #include +#include "libraries.hpp" + namespace rdmalib { struct AllocationRequest @@ -23,9 +25,8 @@ namespace rdmalib { template struct BufferInformation { - using rkey_t = typename library_traits::rkey_t; uint64_t r_addr; - rkey_t r_key; + typename library_traits::rkey_t r_key; }; } diff --git a/server/executor/cli.cpp b/server/executor/cli.cpp index 73ef522..44f39c5 100644 --- a/server/executor/cli.cpp +++ b/server/executor/cli.cpp @@ -20,7 +20,15 @@ int main(int argc, char ** argv) // Register a SIGINT handler so that we can gracefully exit //server::SignalHandler sighandler; - auto opts = server::opts(argc, argv); + + #ifdef USE_LIBFABRIC + using Library = libfabric; + #else + using Library = ibverbs; + #endif + + auto opts = server::opts(argc, argv); + if(opts.verbose) spdlog::set_level(spdlog::level::debug); else @@ -55,14 +63,14 @@ int main(int argc, char ** argv) ); #endif - executor::ManagerConnection mgr{ + executor::ManagerConnection mgr{ opts.mgr_address, opts.mgr_port, opts.mgr_secret, opts.accounting_buffer_addr, opts.accounting_buffer_rkey }; - server::FastExecutors executor( + server::FastExecutors executor( opts.address, opts.port, opts.func_size, opts.fast_executors, diff --git a/server/executor/opts.cpp b/server/executor/opts.cpp index c8f9f65..5ea6c1c 100644 --- a/server/executor/opts.cpp +++ b/server/executor/opts.cpp @@ -4,8 +4,13 @@ #include "server.hpp" namespace server { + namespace types { + template + using rkey_t = typename library_traits::rkey_t; + } - Options opts(int argc, char ** argv) + template + Options opts(int argc, char ** argv) { cxxopts::Options options("serverless-rdma-server", "Handle functions invocations."); options.add_options() @@ -27,20 +32,16 @@ namespace server { ("v,verbose", "Verbose output", cxxopts::value()->default_value("false")) ("mgr-address", "Use selected address", cxxopts::value()) ("mgr-port", "Use selected port", cxxopts::value()) - ("mgr-secret", "Use selected port", cxxopts::value()) - ("mgr-buf-addr", "Use selected port", cxxopts::value()) - #ifdef USE_LIBFABRIC - ("mgr-buf-rkey", "Use selected port", cxxopts::value()) - #else - ("mgr-buf-rkey", "Use selected port", cxxopts::value()) - #endif + ("mgr-secret", "Manager secret", cxxopts::value()) + ("mgr-buf-addr", "Manager buffer address", cxxopts::value()) + ("mgr-buf-rkey", "Manager remote key", cxxopts::value>()) #ifdef USE_GNI_AUTH ("authentication-cookie", "Use selected port", cxxopts::value()) #endif ; auto parsed_options = options.parse(argc, argv); - Options result; + Options result; result.address = parsed_options["address"].as(); result.port = parsed_options["port"].as(); result.cheap_executors = parsed_options["cheap"].as(); @@ -59,31 +60,27 @@ namespace server { result.mgr_port = parsed_options["mgr-port"].as(); result.mgr_secret = parsed_options["mgr-secret"].as(); result.accounting_buffer_addr = parsed_options["mgr-buf-addr"].as(); - #ifdef USE_LIBFABRIC - result.accounting_buffer_rkey = parsed_options["mgr-buf-rkey"].as(); - #else - result.accounting_buffer_rkey = parsed_options["mgr-buf-rkey"].as(); - #endif + result.accounting_buffer_rkey = parsed_options["mgr-buf-rkey"].as>(); #ifdef USE_GNI_AUTH result.authentication_cookie = parsed_options["authentication-cookie"].as(); #endif std::string polling_mgr = parsed_options["polling-mgr"].as(); if(polling_mgr == "server") { - result.polling_manager = Options::PollingMgr::SERVER; + result.polling_manager = Options::PollingMgr::SERVER; } else if(polling_mgr == "server-notify") { - result.polling_manager = Options::PollingMgr::SERVER_NOTIFY; + result.polling_manager = Options::PollingMgr::SERVER_NOTIFY; } else if(polling_mgr == "thread") { - result.polling_manager = Options::PollingMgr::THREAD; + result.polling_manager = Options::PollingMgr::THREAD; } else { throw std::runtime_error("Unrecognized choice for polling-mgr option: " + polling_mgr); } std::string polling_type = parsed_options["polling-type"].as(); if(polling_type == "wc") { - result.polling_type = Options::PollingType::WC; + result.polling_type = Options::PollingType::WC; } else if(polling_type == "dram") { - result.polling_type = Options::PollingType::DRAM; + result.polling_type = Options::PollingType::DRAM; } else { throw std::runtime_error("Unrecognized choice for polling-type option: " + polling_type); } diff --git a/server/executor/server.hpp b/server/executor/server.hpp index c566cc4..0290776 100644 --- a/server/executor/server.hpp +++ b/server/executor/server.hpp @@ -16,6 +16,7 @@ namespace server { + template struct Server; struct SignalHandler { @@ -26,7 +27,9 @@ namespace server { static void handler(int); }; + template struct Options { + using rkey_t = typename library_traits::rkey_t; enum class PollingMgr { SERVER=0, @@ -58,17 +61,14 @@ namespace server { int mgr_port; int mgr_secret; uint64_t accounting_buffer_addr; - #ifdef USE_LIBFABRIC - uint64_t accounting_buffer_rkey; - #else - uint32_t accounting_buffer_rkey; - #endif + rkey_t accounting_buffer_rkey; #ifdef USE_GNI_AUTH uint32_t authentication_cookie; #endif }; - Options opts(int argc, char ** argv); + template + Options opts(int argc, char ** argv); //struct InvocationStatus { // rdmalib::Connection* connection; @@ -108,6 +108,7 @@ namespace server { + template struct Server { // FIXME: "cheap" invocation @@ -116,13 +117,17 @@ namespace server { //static const int QUEUE_MSG_SIZE = 100; //static const int QUEUE_MSG_SIZE = 4096; //std::array, QUEUE_SIZE> _queue; - rdmalib::RDMAPassive _state; - rdmalib::server::ServerStatus _status; + using RDMAPassive_t = typename rdmalib::rdmalib_traits::RDMAPassive; + using Connection_t = typename rdmalib::rdmalib_traits::Connection; + using RecvBuffer_t = typename rdmalib::rdmalib_traits::RecvBuffer; + + RDMAPassive_t _state; + rdmalib::server::ServerStatus _status; rdmalib::functions::FunctionsDB _db; //Executors _exec; - FastExecutors _fast_exec; - rdmalib::Connection* _conn; - rdmalib::RecvBuffer _wc_buffer; + FastExecutors _fast_exec; + Connection_t* _conn; + RecvBuffer_t _wc_buffer; bool _inline_data; Server( @@ -138,31 +143,18 @@ namespace server { ); template - void register_buffer(rdmalib::Buffer & buf, bool is_recv_buffer) + void register_buffer(rdmalib::Buffer & buf, bool is_recv_buffer) { - if(is_recv_buffer) { - #ifdef USE_LIBFABRIC - buf.register_memory(_state.pd(), FI_WRITE | FI_REMOTE_WRITE); - #else - buf.register_memory(_state.pd(), IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE); - #endif - _status.add_buffer(buf); - } else { - #ifdef USE_LIBFABRIC - buf.register_memory(_state.pd(), FI_WRITE); - #else - buf.register_memory(_state.pd(), IBV_ACCESS_LOCAL_WRITE); - #endif - } + static_cast(this)->register_buffer(buf, is_recv_buffer); } //void allocate_send_buffers(int numcores, int size); //void allocate_rcv_buffers(int numcores, int size); - void reload_queue(rdmalib::Connection & conn, int32_t idx); + void reload_queue(Connection_t & conn, int32_t idx); void listen(); - rdmalib::RDMAPassive & state(); - rdmalib::Connection* poll_communication(); - const rdmalib::server::ServerStatus & status() const; + RDMAPassive_t & state(); + Connection_t * poll_communication(); + const rdmalib::server::ServerStatus & status() const; std::tuple poll_server(int, int); std::tuple poll_threads(int, int); @@ -172,5 +164,31 @@ namespace server { //void poll_srq(); }; + struct LibfabricServer : Server { + template + void register_buffer(rdmalib::Buffer & buf, bool is_recv_buffer) + { + if(is_recv_buffer) { + buf.register_memory(_state.pd(), FI_WRITE | FI_REMOTE_WRITE); + _status.add_buffer(buf); + } else { + buf.register_memory(_state.pd(), FI_WRITE); + } + } + }; + + struct VerbsServer : Server { + template + void register_buffer(rdmalib::Buffer & buf, bool is_recv_buffer) + { + if(is_recv_buffer) { + buf.register_memory(_state.pd(), IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE); + _status.add_buffer(buf); + } else { + buf.register_memory(_state.pd(), IBV_ACCESS_LOCAL_WRITE); + } + } + }; + } From 27e908e497fc86e38bd2a8267379c683d360935a Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Fri, 22 Sep 2023 15:28:22 +0000 Subject: [PATCH 87/91] Fixing executor errors --- server/executor/fast_executor.cpp | 291 ++++++++++++++++++++++-------- server/executor/fast_executor.hpp | 49 ++++- server/structures.hpp | 25 ++- 3 files changed, 284 insertions(+), 81 deletions(-) diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index 7b7c17a..03e6134 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -44,10 +44,10 @@ namespace server { // SignalHandler::closing = true; // } - Accounting::timepoint_t Thread::work(int invoc_id, int func_id, bool solicited, uint32_t in_size) + typename Accounting::timepoint_t LibfabricThread::work(int invoc_id, int func_id, bool solicited, uint32_t in_size) { // FIXME: load func ptr - rdmalib::functions::Submission* header = reinterpret_cast(rcv.ptr()); + Submission_t* header = reinterpret_cast(rcv.ptr()); auto ptr = _functions.function(func_id); SPDLOG_DEBUG("Thread {} begins work! Executing function {} with size {}, invoc id {}, solicited reply? {}", @@ -62,7 +62,6 @@ namespace server { // Send back: the value of immediate write // first 16 bytes - invocation id // second 16 bytes - return value (0 on no error) - #ifdef USE_LIBFABRIC conn->post_write( send, out_size, @@ -70,7 +69,34 @@ namespace server { {header->r_address, header->r_key}, (invoc_id << 16) | 0 ); - #else + //_perf.point(4); + auto end = std::chrono::high_resolution_clock::now(); + _accounting.update_execution_time(start, end); + _accounting.send_updated_execution(_mgr_connection, _accounting_buf, _mgr_conn); + //_perf.point(5); + //int cpu = sched_getcpu(); + //spdlog::info("Execution + sent took {} us on {} CPU", std::chrono::duration_cast(end-start).count(), cpu); + return end; + } + + typename Accounting::timepoint_t VerbsThread::work(int invoc_id, int func_id, bool solicited, uint32_t in_size) + { + // FIXME: load func ptr + Submission_t* header = reinterpret_cast(rcv.ptr()); + auto ptr = _functions.function(func_id); + + SPDLOG_DEBUG("Thread {} begins work! Executing function {} with size {}, invoc id {}, solicited reply? {}", + id, _functions._names[func_id], in_size, invoc_id, solicited + ); + auto start = std::chrono::high_resolution_clock::now(); + // Data to ignore header passed in the buffer + //_perf.point(2); + uint32_t out_size = (*ptr)(rcv.data(), in_size, send.ptr()); + SPDLOG_DEBUG("Thread {} finished work!", id); + //_perf.point(3); + // Send back: the value of immediate write + // first 16 bytes - invocation id + // second 16 bytes - return value (0 on no error) conn->post_write( send.sge(out_size, 0), {header->r_address, header->r_key}, @@ -78,7 +104,6 @@ namespace server { out_size <= max_inline_data, solicited ); - #endif //_perf.point(4); auto end = std::chrono::high_resolution_clock::now(); _accounting.update_execution_time(start, end); @@ -89,7 +114,7 @@ namespace server { return end; } - void Thread::hot(uint32_t timeout) + void LibfabricThread::hot(int timeout) { //rdmalib::Benchmarker<1> server_processing_times{max_repetitions}; SPDLOG_DEBUG("Thread {} Begins hot polling", id); @@ -98,22 +123,70 @@ namespace server { while(repetitions < max_repetitions && !SignalHandler::closing) { // if we block, we never handle the interruption - #ifdef USE_LIBFABRIC auto wcs = conn->poll_wc(rdmalib::QueueType::RECV, false, -1, true); - #else - auto wcs = wc_buffer.poll(); - #endif if(std::get<1>(wcs)) { for(int i = 0; i < std::get<1>(wcs); ++i) { //_perf.point(); //server_processing_times.start(); - #ifdef USE_LIBFABRIC fi_cq_data_entry* wc = &std::get<0>(wcs)[i]; int func_id = wc->data & invocation_mask; int invoc_id = (wc->data >> 16) & 0x0000FFFF; bool solicited = wc->data & solicited_mask; int len = wc->data >> 32; - #else + SPDLOG_DEBUG( + "Thread {} Invoc id {} Execute func {} Repetition {}", + id, invoc_id, func_id, repetitions + ); + //_perf.point(1); + // Measure hot polling time until we started execution + auto now = std::chrono::high_resolution_clock::now(); + auto func_end = work(invoc_id, func_id, solicited, + len - Submission_t::DATA_HEADER_SIZE + ); + _accounting.update_polling_time(start, now); + i = 0; + start = func_end; + //_perf.point(6); + //sum += server_processing_times.end(); + conn->poll_wc(rdmalib::QueueType::SEND, true); + repetitions += 1; + //_perf.point(7); + } + //_perf.point(8); + } + ++i; + + // FIXME: adjust period to the timeout + if(i == HOT_POLLING_VERIFICATION_PERIOD) { + auto now = std::chrono::high_resolution_clock::now(); + auto time_passed = _accounting.update_polling_time(start, now); + _accounting.send_updated_polling(_mgr_connection, _accounting_buf, _mgr_conn); + start = now; + + if(_polling_state != PollingState::HOT_ALWAYS && time_passed >= timeout) { + _polling_state = PollingState::WARM; + // FIXME: can we miss an event here? + SPDLOG_DEBUG("Switching to warm polling after {} us with no invocations", time_passed); + } + i = 0; + } + } + } + + void VerbsThread::hot(int timeout) + { + //rdmalib::Benchmarker<1> server_processing_times{max_repetitions}; + SPDLOG_DEBUG("Thread {} Begins hot polling", id); + auto start = std::chrono::high_resolution_clock::now(); + int i = 0; + while(repetitions < max_repetitions && !SignalHandler::closing) { + + // if we block, we never handle the interruption + auto wcs = wc_buffer.poll(); + if(std::get<1>(wcs)) { + for(int i = 0; i < std::get<1>(wcs); ++i) { + //_perf.point(); + //server_processing_times.start(); ibv_wc* wc = &std::get<0>(wcs)[i]; if(wc->status) { spdlog::error("Failed work completion! Reason: {}", ibv_wc_status_str(wc->status)); @@ -123,7 +196,6 @@ namespace server { int func_id = info & invocation_mask; int invoc_id = info >> 16; bool solicited = info & solicited_mask; - #endif SPDLOG_DEBUG( "Thread {} Invoc id {} Execute func {} Repetition {}", id, invoc_id, func_id, repetitions @@ -131,15 +203,9 @@ namespace server { //_perf.point(1); // Measure hot polling time until we started execution auto now = std::chrono::high_resolution_clock::now(); - #ifdef USE_LIBFABRIC auto func_end = work(invoc_id, func_id, solicited, - len - rdmalib::functions::Submission::DATA_HEADER_SIZE + wc->byte_len - Submission_t::DATA_HEADER_SIZE ); - #else - auto func_end = work(invoc_id, func_id, solicited, - wc->byte_len - rdmalib::functions::Submission::DATA_HEADER_SIZE - ); - #endif _accounting.update_polling_time(start, now); i = 0; start = func_end; @@ -149,9 +215,7 @@ namespace server { repetitions += 1; //_perf.point(7); } - #ifndef USE_LIBFABRIC wc_buffer.refill(); - #endif //_perf.point(8); } ++i; @@ -166,9 +230,7 @@ namespace server { if(_polling_state != PollingState::HOT_ALWAYS && time_passed >= timeout) { _polling_state = PollingState::WARM; // FIXME: can we miss an event here? - #ifndef USE_LIBFABRIC conn->notify_events(); - #endif SPDLOG_DEBUG("Switching to warm polling after {} us with no invocations", time_passed); return; } @@ -177,7 +239,7 @@ namespace server { } } - void Thread::warm() + void LibfabricThread::warm() { //rdmalib::Benchmarker<1> server_processing_times{max_repetitions}; // FIXME: this should be automatic @@ -186,22 +248,57 @@ namespace server { while(repetitions < max_repetitions && !SignalHandler::closing) { // if we block, we never handle the interruption - #ifdef USE_LIBFABRIC auto wcs = conn->poll_wc(rdmalib::QueueType::RECV, false, -1, true); - #else - auto wcs = wc_buffer.poll(); - #endif if(std::get<1>(wcs)) { for(int i = 0; i < std::get<1>(wcs); ++i) { //server_processing_times.start(); - #ifdef USE_LIBFABRIC fi_cq_data_entry* wc = &std::get<0>(wcs)[i]; int func_id = wc->data & invocation_mask; int invoc_id = (wc->data >> 16) & 0x0000FFFF; bool solicited = wc->data & solicited_mask; int len = wc->data >> 32; - #else + SPDLOG_DEBUG( + "Thread {} Invoc id {} Execute func {} Repetition {}", + id, invoc_id, func_id, repetitions + ); + + work(invoc_id, func_id, solicited, len - Submission_t::DATA_HEADER_SIZE); + + //sum += server_processing_times.end(); + conn->poll_wc(rdmalib::QueueType::SEND, true); + repetitions += 1; + } + if(_polling_state != PollingState::WARM_ALWAYS) { + SPDLOG_DEBUG("Switching to hot polling after invocation!"); + _polling_state = PollingState::HOT; + return; + } + } + + // Do waiting after a single polling - avoid missing an events that + // arrived before we called notify_events + if(repetitions < max_repetitions && !SignalHandler::closing) { + rdmalib::impl::expect_zero(conn->wait_events()); + } + } + SPDLOG_DEBUG("Thread {} Stopped warm polling", id); + } + + void VerbsThread::warm() + { + //rdmalib::Benchmarker<1> server_processing_times{max_repetitions}; + // FIXME: this should be automatic + SPDLOG_DEBUG("Thread {} Begins warm polling", id); + + while(repetitions < max_repetitions && !SignalHandler::closing) { + + // if we block, we never handle the interruption + auto wcs = wc_buffer.poll(); + if(std::get<1>(wcs)) { + for(int i = 0; i < std::get<1>(wcs); ++i) { + + //server_processing_times.start(); ibv_wc* wc = &std::get<0>(wcs)[i]; if(wc->status) { spdlog::error("Failed work completion! Reason: {}", ibv_wc_status_str(wc->status)); @@ -211,25 +308,18 @@ namespace server { int func_id = info & invocation_mask; bool solicited = info & solicited_mask; int invoc_id = info >> 16; - #endif SPDLOG_DEBUG( "Thread {} Invoc id {} Execute func {} Repetition {}", id, invoc_id, func_id, repetitions ); - #ifdef USE_LIBFABRIC - work(invoc_id, func_id, solicited, len - rdmalib::functions::Submission::DATA_HEADER_SIZE); - #else - work(invoc_id, func_id, solicited, wc->byte_len - rdmalib::functions::Submission::DATA_HEADER_SIZE); - #endif + work(invoc_id, func_id, solicited, wc->byte_len - Submission_t::DATA_HEADER_SIZE); //sum += server_processing_times.end(); conn->poll_wc(rdmalib::QueueType::SEND, true); repetitions += 1; } - #ifndef USE_LIBFABRIC wc_buffer.refill(); - #endif if(_polling_state != PollingState::WARM_ALWAYS) { SPDLOG_DEBUG("Switching to hot polling after invocation!"); _polling_state = PollingState::HOT; @@ -240,45 +330,107 @@ namespace server { // Do waiting after a single polling - avoid missing an events that // arrived before we called notify_events if(repetitions < max_repetitions && !SignalHandler::closing) { - #ifdef USE_LIBFABRIC - rdmalib::impl::expect_zero(conn->wait_events()); - #else auto cq = conn->wait_events(); conn->ack_events(cq, 1); conn->notify_events(); - #endif } } SPDLOG_DEBUG("Thread {} Stopped warm polling", id); } - void Thread::thread_work(int timeout) + void LibfabricThread::thread_work(int timeout) { - rdmalib::RDMAActive mgr_connection(_mgr_conn.addr, _mgr_conn.port, wc_buffer._rcv_buf_size, max_inline_data); + RDMAActive_t mgr_connection(_mgr_conn.addr, _mgr_conn.port, wc_buffer._rcv_buf_size, max_inline_data); mgr_connection.allocate(); this->_mgr_connection = &mgr_connection.connection(); - #ifdef USE_LIBFABRIC _accounting_buf.register_memory(mgr_connection.pd(), FI_READ | FI_WRITE | FI_REMOTE_WRITE); - #else - _accounting_buf.register_memory(mgr_connection.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_ATOMIC); - #endif if(!mgr_connection.connect(_mgr_conn.secret)) return; spdlog::info("Thread {} Established connection to the manager!", id); // FIXME: why rdmaactive needs rcv_buf_size? - rdmalib::RDMAActive active(addr, port, wc_buffer._rcv_buf_size, max_inline_data); - rdmalib::Buffer func_buffer(_functions.memory(), _functions.size()); + RDMAActive_t active(addr, port, wc_buffer._rcv_buf_size, max_inline_data); + rdmalib::Buffer func_buffer(_functions.memory(), _functions.size()); active.allocate(); this->conn = &active.connection(); // Receive function data from the client - this WC must be posted first // We do it before connection to ensure that client does not start sending before us - #ifdef USE_LIBFABRIC func_buffer.register_memory(active.pd(), FI_READ | FI_WRITE | FI_REMOTE_WRITE); - #else + this->conn->post_recv(func_buffer); + + // Request notification before connecting - avoid missing a WC! + // Do it only when starting from a warm directly + if(timeout == -1) { + _polling_state = PollingState::HOT_ALWAYS; + } else if(timeout == 0) { + _polling_state = PollingState::WARM_ALWAYS; + } else { + _polling_state = PollingState::HOT; + } + + if(!active.connect()) + return; + + // Now generic receives for function invocations + send.register_memory(active.pd(), FI_WRITE | FI_READ); + rcv.register_memory(active.pd(), FI_WRITE | FI_REMOTE_WRITE); + spdlog::info("Thread {} Established connection to client!", id); + + // Send to the client information about thread buffer + rdmalib::Buffer, Library> buf(1); + buf.register_memory(active.pd(), FI_WRITE | FI_READ); + buf.data()[0].r_addr = rcv.address(); + buf.data()[0].r_key = rcv.rkey(); + SPDLOG_DEBUG("Thread {} Sends buffer details to client! Addr {} rkey {}", id, buf.data()[0].r_addr, buf.data()[0].r_key); + this->conn->post_send(buf, 0, buf.size() <= max_inline_data); + this->conn->poll_wc(rdmalib::QueueType::SEND, true, 1); + SPDLOG_DEBUG("Thread {} Sent buffer details to client!", id); + + // We should have received functions data - just one message + this->conn->poll_wc(rdmalib::QueueType::RECV, true, 1); + _functions.process_library(); + + spdlog::info("Thread {} begins work with timeout {}", id, timeout); + + // FIXME: catch interrupt handler here + while(repetitions < max_repetitions && !SignalHandler::closing) { + if(_polling_state == PollingState::HOT || _polling_state == PollingState::HOT_ALWAYS) + hot(timeout); + else + warm(); + } + + // Submit final accounting information + _accounting.send_updated_execution(_mgr_connection, _accounting_buf, _mgr_conn, true, false); + _accounting.send_updated_polling(_mgr_connection, _accounting_buf, _mgr_conn, true, false); + spdlog::info( + "Thread {} finished work, spent {} ns hot polling and {} ns computation, {} executions.", + id, _accounting.total_hot_polling_time , _accounting.total_execution_time, repetitions + ); + // FIXME: revert after manager starts to detect disconnection events + // mgr_connection.disconnect(); + } + + void VerbsThread::thread_work(int timeout) + { + RDMAActive_t mgr_connection(_mgr_conn.addr, _mgr_conn.port, wc_buffer._rcv_buf_size, max_inline_data); + mgr_connection.allocate(); + this->_mgr_connection = &mgr_connection.connection(); + _accounting_buf.register_memory(mgr_connection.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_ATOMIC); + if(!mgr_connection.connect(_mgr_conn.secret)) + return; + spdlog::info("Thread {} Established connection to the manager!", id); + + // FIXME: why rdmaactive needs rcv_buf_size? + RDMAActive_t active(addr, port, wc_buffer._rcv_buf_size, max_inline_data); + rdmalib::Buffer func_buffer(_functions.memory(), _functions.size()); + + active.allocate(); + this->conn = &active.connection(); + // Receive function data from the client - this WC must be posted first + // We do it before connection to ensure that client does not start sending before us func_buffer.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); - #endif this->conn->post_recv(func_buffer); // Request notification before connecting - avoid missing a WC! @@ -290,32 +442,21 @@ namespace server { } else { _polling_state = PollingState::HOT; } - #ifndef USE_LIBFABRIC if(_polling_state == PollingState::WARM_ALWAYS || _polling_state == PollingState::WARM) conn->notify_events(); - #endif if(!active.connect()) return; // Now generic receives for function invocations - #ifdef USE_LIBFABRIC - send.register_memory(active.pd(), FI_WRITE | FI_READ); - rcv.register_memory(active.pd(), FI_WRITE | FI_REMOTE_WRITE); - #else send.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE); rcv.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); this->wc_buffer.connect(this->conn); - #endif spdlog::info("Thread {} Established connection to client!", id); // Send to the client information about thread buffer - rdmalib::Buffer buf(1); - #ifdef USE_LIBFABRIC - buf.register_memory(active.pd(), FI_WRITE | FI_READ); - #else + rdmalib::Buffer, Library> buf(1); buf.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE); - #endif buf.data()[0].r_addr = rcv.address(); buf.data()[0].r_key = rcv.rkey(); SPDLOG_DEBUG("Thread {} Sends buffer details to client! Addr {} rkey {}", id, buf.data()[0].r_addr, buf.data()[0].r_key); @@ -340,9 +481,7 @@ namespace server { // Submit final accounting information _accounting.send_updated_execution(_mgr_connection, _accounting_buf, _mgr_conn, true, false); _accounting.send_updated_polling(_mgr_connection, _accounting_buf, _mgr_conn, true, false); - #ifndef USE_LIBFABRIC mgr_connection.connection().poll_wc(rdmalib::QueueType::SEND, true, 2); - #endif spdlog::info( "Thread {} finished work, spent {} ns hot polling and {} ns computation, {} executions.", id, _accounting.total_hot_polling_time , _accounting.total_execution_time, repetitions @@ -351,14 +490,15 @@ namespace server { // mgr_connection.disconnect(); } - FastExecutors::FastExecutors(std::string client_addr, int port, + template + FastExecutors::FastExecutors(std::string client_addr, int port, int func_size, int numcores, int msg_size, int recv_buf_size, int max_inline_data, int pin_threads, - const executor::ManagerConnection & mgr_conn + const executor::ManagerConnection & mgr_conn ): _closing(false), _numcores(numcores), @@ -375,13 +515,15 @@ namespace server { ); } - FastExecutors::~FastExecutors() + template + FastExecutors::~FastExecutors() { spdlog::info("FastExecutor is closing threads..."); close(); } - void FastExecutors::close() + template + void FastExecutors::close() { if(_closing) return; @@ -411,13 +553,14 @@ namespace server { _closing = true; } - void FastExecutors::allocate_threads(int timeout, int iterations) + template + void FastExecutors::allocate_threads(int timeout, int iterations) { int pin_threads = _pin_threads; for(int i = 0; i < _numcores; ++i) { _threads_data[i].max_repetitions = iterations; _threads.emplace_back( - &Thread::thread_work, + &Thread_t::thread_work, &_threads_data[i], timeout ); diff --git a/server/executor/fast_executor.hpp b/server/executor/fast_executor.hpp index fbeac6a..d4b4c10 100644 --- a/server/executor/fast_executor.hpp +++ b/server/executor/fast_executor.hpp @@ -17,6 +17,7 @@ #include "functions.hpp" #include "common.hpp" +#include "structures.hpp" #include using namespace std::chrono_literals; @@ -107,11 +108,12 @@ namespace server { }; // FIXME: is not movable or copyable at the moment - template + template struct Thread { using Connection_t = typename rdmalib::rdmalib_traits::Connection; using RecvBuffer_t = typename rdmalib::rdmalib_traits::RecvBuffer; using Submission_t = typename rdmalib::rdmalib_traits::Submission; + using RDMAActive_t = typename rdmalib::rdmalib_traits::RDMAActive; constexpr static int invocation_mask = 0x00007FFF; constexpr static int solicited_mask = 0x00008000; @@ -157,8 +159,48 @@ namespace server { { } + typename Accounting::timepoint_t work(int invoc_id, int func_id, bool solicited, uint32_t in_size) + { + return static_cast(this)->work(invoc_id, func_id, solicited, in_size); + } + void hot(uint32_t hot_timeout) + { + static_cast(this)->hot(hot_timeout); + } + void warm() + { + static_cast(this)->warm(); + } + void thread_work(int timeout) + { + static_cast(this)->thread_work(timeout); + } + }; + + struct LibfabricThread : Thread { + using Library = libfabric; + LibfabricThread(std::string addr_, int port_, int id_, int functions_size, + int buf_size, int recv_buffer_size, int max_inline_data_, + const executor::ManagerConnection & mgr_conn): + Thread(addr_, port_, id_, functions_size, buf_size, recv_buffer_size, max_inline_data_, + _mgr_conn) {} + + typename Accounting::timepoint_t work(int invoc_id, int func_id, bool solicited, uint32_t in_size); + void hot(int timeout); + void warm(); + void thread_work(int timeout); + }; + + struct VerbsThread : Thread { + using Library = ibverbs; + VerbsThread(std::string addr_, int port_, int id_, int functions_size, + int buf_size, int recv_buffer_size, int max_inline_data_, + const executor::ManagerConnection & mgr_conn): + Thread(addr_, port_, id_, functions_size, buf_size, recv_buffer_size, max_inline_data_, + _mgr_conn) {} + typename Accounting::timepoint_t work(int invoc_id, int func_id, bool solicited, uint32_t in_size); - void hot(uint32_t hot_timeout); + void hot(int timeout); void warm(); void thread_work(int timeout); }; @@ -166,7 +208,8 @@ namespace server { template struct FastExecutors { - std::vector> _threads_data; + using Thread_t = typename server_traits::Thread; + std::vector _threads_data; std::vector _threads; bool _closing; int _numcores; diff --git a/server/structures.hpp b/server/structures.hpp index 0678d97..65310f8 100644 --- a/server/structures.hpp +++ b/server/structures.hpp @@ -5,16 +5,33 @@ #include "rdmalib/connection.hpp" #include -#include -#include -#include +#include namespace server { + template struct ThreadStatus { + using Connection_t = typename rdmalib::rdmalib_traits::Connection; rdmalib::functions::FuncType func; uint32_t invoc_id; - rdmalib::Connection * connection; + Connection_t * connection; + }; + + template + struct server_traits; + + // Forward declare + struct LibfabricThread; + struct VerbsThread; + + template <> + struct server_traits { + using Thread = LibfabricThread; + }; + + template <> + struct server_traits { + using Thread = VerbsThread; }; } From 6394a14fd2b203c08006adbffb6e25ac9d3ad4ff Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Fri, 22 Sep 2023 17:46:50 +0000 Subject: [PATCH 88/91] Executor builds now --- rdmalib/include/rdmalib/connection.hpp | 5 +++-- rdmalib/include/rdmalib/rdmalib.hpp | 2 +- rdmalib/include/rdmalib/util.hpp | 10 ++++++++++ rdmalib/lib/connection.cpp | 7 +++++++ server/executor/fast_executor.cpp | 22 +++++++++++++++------- server/executor/fast_executor.hpp | 6 +++++- 6 files changed, 41 insertions(+), 11 deletions(-) diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 4881cca..6641cb9 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -60,7 +60,6 @@ namespace rdmalib { using id_t = typename library_traits::id_t; using channel_t = typename library_traits::channel_t; using ScatterGatherElement_t = typename ::rdmalib::rdmalib_traits::ScatterGatherElement; - //using RemoteBuffer_t = RemoteBuffer; using RemoteBuffer_t = typename ::rdmalib::rdmalib_traits::RemoteBuffer; qp_t _qp; @@ -137,6 +136,7 @@ namespace rdmalib { struct LibfabricConnection : Connection { + using Library = libfabric; template using Buffer = Buffer; @@ -208,6 +208,7 @@ namespace rdmalib { struct VerbsConnection : Connection { + using Library = ibverbs; id_t _id; channel_t _channel; @@ -235,6 +236,7 @@ namespace rdmalib { int32_t post_recv(ScatterGatherElement_t && elem, int32_t id, int count=1); int32_t post_cas(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, uint64_t compare, uint64_t swap); int32_t post_atomic_fadd(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, uint64_t add); + int32_t post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer_t & rbuf, uint64_t add); void notify_events(bool only_solicited = false); ibv_cq* wait_events(); @@ -242,7 +244,6 @@ namespace rdmalib { int32_t _post_write(ScatterGatherElement_t && elems, ibv_send_wr wr, bool force_inline, bool force_solicited); int32_t post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, bool force_inline); - //int32_t post_write(ScatterGatherElement_t && elems, const RemoteBuffer_t & rbuf, bool force_inline); // TODO experiment std::tuple poll_wc(QueueType type, bool blocking=true, int count=-1); diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index 0c82cc1..cde70a9 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -115,7 +115,7 @@ namespace rdmalib { { return static_cast(this)->pd(); } - Connection_t & connection() { return _conn; } + Connection_t & connection() { return *_conn; } bool is_connected() { return _conn.get(); } }; diff --git a/rdmalib/include/rdmalib/util.hpp b/rdmalib/include/rdmalib/util.hpp index ec68a6c..7c635a7 100644 --- a/rdmalib/include/rdmalib/util.hpp +++ b/rdmalib/include/rdmalib/util.hpp @@ -15,6 +15,16 @@ namespace rdmalib { namespace impl { template void expect_zero(U && u) + { + if(u) { + spdlog::error("Expected zero, found: {}, errno {}, message {}", u, errno, strerror(errno)); + traceback(); + } + assert(!u); + } + + template + void expect_zero_verbose(U && u) { if(u) { #ifdef USE_LIBFABRIC diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index faa41cb..4eec27d 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -626,6 +626,13 @@ namespace rdmalib { return _req_count - 1; } + int32_t VerbsConnection::post_atomic_fadd(const Buffer & _accounting_buf, const RemoteBuffer_t & rbuf, uint64_t add) + { + ScatterGatherElement_t accounting_sge; + accounting_sge.add(_accounting_buf, _accounting_buf.size()); + return post_atomic_fadd(std::move(accounting_sge), rbuf, add); + } + std::tuple LibfabricConnection::poll_wc(QueueType type, bool blocking, int count, bool update) { int ret = 0; diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index 03e6134..2a551d2 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -100,9 +100,9 @@ namespace server { conn->post_write( send.sge(out_size, 0), {header->r_address, header->r_key}, - (invoc_id << 16) | 0, - out_size <= max_inline_data, - solicited + //(invoc_id << 16) | 0, + out_size <= max_inline_data + //solicited ); //_perf.point(4); auto end = std::chrono::high_resolution_clock::now(); @@ -357,7 +357,9 @@ namespace server { // Receive function data from the client - this WC must be posted first // We do it before connection to ensure that client does not start sending before us func_buffer.register_memory(active.pd(), FI_READ | FI_WRITE | FI_REMOTE_WRITE); - this->conn->post_recv(func_buffer); + ScatterGatherElement_t func_sge; + func_sge.add(func_buffer, func_buffer.size()); + this->conn->post_recv(std::move(func_sge),-1,1); // Request notification before connecting - avoid missing a WC! // Do it only when starting from a warm directly @@ -379,11 +381,13 @@ namespace server { // Send to the client information about thread buffer rdmalib::Buffer, Library> buf(1); + ScatterGatherElement_t buf_sge; buf.register_memory(active.pd(), FI_WRITE | FI_READ); buf.data()[0].r_addr = rcv.address(); buf.data()[0].r_key = rcv.rkey(); SPDLOG_DEBUG("Thread {} Sends buffer details to client! Addr {} rkey {}", id, buf.data()[0].r_addr, buf.data()[0].r_key); - this->conn->post_send(buf, 0, buf.size() <= max_inline_data); + buf_sge.add(buf, buf.size()); + this->conn->post_send(std::move(buf_sge), 0, buf.size() <= max_inline_data); this->conn->poll_wc(rdmalib::QueueType::SEND, true, 1); SPDLOG_DEBUG("Thread {} Sent buffer details to client!", id); @@ -431,7 +435,9 @@ namespace server { // Receive function data from the client - this WC must be posted first // We do it before connection to ensure that client does not start sending before us func_buffer.register_memory(active.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); - this->conn->post_recv(func_buffer); + ScatterGatherElement_t func_buf_sge; + func_buf_sge.add(func_buffer, func_buffer.size()); + this->conn->post_recv(std::move(func_buf_sge),-1); // Request notification before connecting - avoid missing a WC! // Do it only when starting from a warm directly @@ -460,7 +466,9 @@ namespace server { buf.data()[0].r_addr = rcv.address(); buf.data()[0].r_key = rcv.rkey(); SPDLOG_DEBUG("Thread {} Sends buffer details to client! Addr {} rkey {}", id, buf.data()[0].r_addr, buf.data()[0].r_key); - this->conn->post_send(buf, 0, buf.size() <= max_inline_data); + ScatterGatherElement_t buf_sge; + buf_sge.add(buf, buf.size()); + this->conn->post_send(std::move(buf_sge), 0, buf.size() <= max_inline_data); this->conn->poll_wc(rdmalib::QueueType::SEND, true, 1); SPDLOG_DEBUG("Thread {} Sent buffer details to client!", id); diff --git a/server/executor/fast_executor.hpp b/server/executor/fast_executor.hpp index d4b4c10..50e8df7 100644 --- a/server/executor/fast_executor.hpp +++ b/server/executor/fast_executor.hpp @@ -28,6 +28,7 @@ namespace server { struct Accounting { using Connection_t = typename rdmalib::rdmalib_traits::Connection; using RecvBuffer_t = typename rdmalib::rdmalib_traits::RecvBuffer; + using ScatterGatherElement_t = typename rdmalib::rdmalib_traits::ScatterGatherElement; typedef std::chrono::high_resolution_clock clock_t; typedef std::chrono::time_point timepoint_t; @@ -53,8 +54,10 @@ namespace server { ) { if(force || execution_time > BILLING_GRANULARITY) { + // ScatterGatherElement_t accounting_sge; + // accounting_sge.add(_accounting_buf, _accounting_buf.size()); mgr_connection->post_atomic_fadd( - _accounting_buf, + _accounting_buf, // Give raw buff here { _mgr_conn.r_addr + 8, _mgr_conn.r_key}, execution_time ); @@ -114,6 +117,7 @@ namespace server { using RecvBuffer_t = typename rdmalib::rdmalib_traits::RecvBuffer; using Submission_t = typename rdmalib::rdmalib_traits::Submission; using RDMAActive_t = typename rdmalib::rdmalib_traits::RDMAActive; + using ScatterGatherElement_t = typename rdmalib::rdmalib_traits::ScatterGatherElement; constexpr static int invocation_mask = 0x00007FFF; constexpr static int solicited_mask = 0x00008000; From 2ee3afdfe8207648c46fa650eeb0b696af107faa Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Mon, 25 Sep 2023 01:22:41 +0000 Subject: [PATCH 89/91] Added debug script --- scripts/run_debug.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100755 scripts/run_debug.sh diff --git a/scripts/run_debug.sh b/scripts/run_debug.sh new file mode 100755 index 0000000..0beebba --- /dev/null +++ b/scripts/run_debug.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# run the server with ./scripts/run_debug.sh server [debug] +# run the client (a warm benchmarker) with ./scripts/run_debug.sh bench [debug] + +cmake --build . + +cmd='' + +if [[ "$1" == "server" ]]; then + cmd="./bin/executor_manager -c config/executor_manager.json --device-database config/devices.json --skip-resource-manager -v" +elif [[ "$1" == "bench" ]]; then + cmd="./benchmarks/warm_benchmarker --config config/benchmark.json --device-database config/devices.json --name empty --functions ./examples/libfunctions.so --executors-database config/executors_database.json -s 1000 -v" +fi + +if [[ "$2" == "debug" ]]; then + cmd="gdb --args $cmd" +fi + +final="PATH=$PATH:bin/ $cmd" +eval "$final" + From 6ed92094802d60a9e3c7e5003aa2df724162ad00 Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Mon, 25 Sep 2023 01:23:08 +0000 Subject: [PATCH 90/91] [breaking] Moved template code to headers --- rdmalib/include/rdmalib/buffer.hpp | 118 ++++++++++++++++++- rdmalib/include/rdmalib/connection.hpp | 46 ++++++++ rdmalib/include/rdmalib/rdmalib.hpp | 34 ++++++ rdmalib/lib/buffer.cpp | 106 ----------------- rdmalib/lib/connection.cpp | 47 -------- rdmalib/lib/rdmalib.cpp | 14 +-- rdmalib/lib/server.cpp | 36 ------ rfaas/include/rfaas/connection.hpp | 73 ++++++++++++ rfaas/include/rfaas/executor.hpp | 36 ++++++ rfaas/lib/connection.cpp | 72 ----------- rfaas/lib/executor.cpp | 36 ------ server/executor/cli.cpp | 2 + server/executor/fast_executor.cpp | 88 -------------- server/executor/fast_executor.hpp | 88 ++++++++++++++ server/executor/{opts.cpp => opts.hpp} | 2 +- server/executor_manager/executor_process.cpp | 4 +- 16 files changed, 396 insertions(+), 406 deletions(-) rename server/executor/{opts.cpp => opts.hpp} (99%) diff --git a/rdmalib/include/rdmalib/buffer.hpp b/rdmalib/include/rdmalib/buffer.hpp index 907965b..9efb618 100644 --- a/rdmalib/include/rdmalib/buffer.hpp +++ b/rdmalib/include/rdmalib/buffer.hpp @@ -2,15 +2,22 @@ #ifndef __RDMALIB_BUFFER_HPP__ #define __RDMALIB_BUFFER_HPP__ +#include + +#include +#include +#include +#include +#include + #include #include #include -#include -#include - #include +#include +#include namespace rdmalib { @@ -87,7 +94,110 @@ namespace rdmalib ~VerbsBuffer(); }; - } + template + Buffer::Buffer() : _size(0), + _header(0), + _bytes(0), + _byte_size(0), + _ptr(nullptr), + _mr(nullptr), + _own_memory(false) + { + } + + template + Buffer::Buffer(Buffer &&obj) : _size(obj._size), + _header(obj._header), + _bytes(obj._bytes), + _byte_size(obj._byte_size), + _ptr(obj._ptr), + _mr(obj._mr), + _own_memory(obj._own_memory) + { + obj._size = obj._bytes = obj._header = 0; + obj._ptr = obj._mr = nullptr; + } + + template + Buffer &Buffer::operator=(Buffer &&obj) + { + _size = obj._size; + _bytes = obj._bytes; + _bytes = obj._byte_size; + _header = obj._header; + _ptr = obj._ptr; + _mr = obj._mr; + _own_memory = obj._own_memory; + + obj._size = obj._bytes = 0; + obj._ptr = obj._mr = nullptr; + return *this; + } + + template + Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header) : _size(size), + _header(header), + _bytes(size * byte_size + header), + _byte_size(byte_size), + _mr(nullptr), + _own_memory(true) + { + // size_t alloc = _bytes; + // if(alloc < 4096) { + // alloc = 4096; + // spdlog::warn("Page too small, allocating {} bytes", alloc); + // } + // page-aligned address for maximum performance + _ptr = mmap(nullptr, _bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + SPDLOG_DEBUG( + "Allocated {} bytes, address {}", + _bytes, fmt::ptr(_ptr)); + } + + template + Buffer::Buffer(void *ptr, uint32_t size, uint32_t byte_size) : _size(size), + _header(0), + _bytes(size * byte_size), + _byte_size(byte_size), + _ptr(ptr), + _mr(nullptr), + _own_memory(false) + { + SPDLOG_DEBUG( + "Allocated {} bytes, address {}", + _bytes, fmt::ptr(_ptr)); + } + template + uint32_t Buffer::data_size() const + { + return this->_size; + } + + template + uint32_t Buffer::size() const + { + return this->_size + this->_header; + } + + template + uint32_t Buffer::bytes() const + { + return this->_bytes; + } + template + uintptr_t Buffer::address() const + { + assert(this->_mr); + return reinterpret_cast(this->_ptr); + } + + template + void *Buffer::ptr() const + { + return this->_ptr; + } + + } /* end impl block */ template struct RemoteBuffer diff --git a/rdmalib/include/rdmalib/connection.hpp b/rdmalib/include/rdmalib/connection.hpp index 6641cb9..2f998d5 100644 --- a/rdmalib/include/rdmalib/connection.hpp +++ b/rdmalib/include/rdmalib/connection.hpp @@ -248,6 +248,52 @@ namespace rdmalib { std::tuple poll_wc(QueueType type, bool blocking=true, int count=-1); }; + + template + void LibfabricConnection::initialize_batched_recv(const rdmalib::impl::Buffer & buf, size_t offset) + { + for(int i = 0; i < _rbatch; i++){ + _rwc_sges[i] = buf.sge(offset, i*offset); + //for(auto & sg : _rwc_sges[i]._sges) + //sg.addr += i*offset; + } + } + + template + void VerbsConnection::initialize_batched_recv(const rdmalib::impl::Buffer & buf, size_t offset) + { + for(int i = 0; i < _rbatch; i++){ + _rwc_sges[i] = buf.sge(offset, i*offset); + //for(auto & sg : _rwc_sges[i]._sges) + //sg.addr += i*offset; + _batch_wrs[i].sg_list = _rwc_sges[i].array(); + _batch_wrs[i].num_sge = _rwc_sges[i].size(); + } + } + + template + uint32_t Connection::private_data() const + { + return this->_private_data; + } + + template + ConnectionStatus Connection::status() const + { + return this->_status; + } + + template + void Connection::set_status(ConnectionStatus status) + { + this->_status = status; + } + + template + void Connection::set_private_data(uint32_t private_data) + { + this->_private_data = private_data; + } } #endif diff --git a/rdmalib/include/rdmalib/rdmalib.hpp b/rdmalib/include/rdmalib/rdmalib.hpp index cde70a9..cbe62bb 100644 --- a/rdmalib/include/rdmalib/rdmalib.hpp +++ b/rdmalib/include/rdmalib/rdmalib.hpp @@ -8,6 +8,7 @@ #include #include #include +#include // #ifdef USE_LIBFABRIC #include @@ -23,6 +24,7 @@ extern "C" { #include #include +#include #include namespace rdmalib { @@ -237,6 +239,38 @@ namespace rdmalib { void accept(VerbsConnection* connection); void set_nonblocking_poll(); }; + + namespace server { + + template + ServerStatus::ServerStatus(): + _address(""), + _port(0) + {} + + template + ServerStatus::ServerStatus(std::string address, int port): + _address(address), + _port(port) + {} + + template + ServerStatus ServerStatus::deserialize(std::istream & in) + { + ServerStatus status; + cereal::JSONInputArchive archive_in(in); + archive_in(status); + return status; + } + + template + void ServerStatus::serialize(std::ostream & out) const + { + cereal::JSONOutputArchive archive_out(out); + archive_out(*this); + } + } + } #endif diff --git a/rdmalib/lib/buffer.cpp b/rdmalib/lib/buffer.cpp index 1809d9a..eac7380 100644 --- a/rdmalib/lib/buffer.cpp +++ b/rdmalib/lib/buffer.cpp @@ -17,81 +17,6 @@ namespace rdmalib { namespace impl { - - template - Buffer::Buffer() : _size(0), - _header(0), - _bytes(0), - _byte_size(0), - _ptr(nullptr), - _mr(nullptr), - _own_memory(false) - { - } - - template - Buffer::Buffer(Buffer &&obj) : _size(obj._size), - _header(obj._header), - _bytes(obj._bytes), - _byte_size(obj._byte_size), - _ptr(obj._ptr), - _mr(obj._mr), - _own_memory(obj._own_memory) - { - obj._size = obj._bytes = obj._header = 0; - obj._ptr = obj._mr = nullptr; - } - - template - Buffer &Buffer::operator=(Buffer &&obj) - { - _size = obj._size; - _bytes = obj._bytes; - _bytes = obj._byte_size; - _header = obj._header; - _ptr = obj._ptr; - _mr = obj._mr; - _own_memory = obj._own_memory; - - obj._size = obj._bytes = 0; - obj._ptr = obj._mr = nullptr; - return *this; - } - - template - Buffer::Buffer(uint32_t size, uint32_t byte_size, uint32_t header) : _size(size), - _header(header), - _bytes(size * byte_size + header), - _byte_size(byte_size), - _mr(nullptr), - _own_memory(true) - { - // size_t alloc = _bytes; - // if(alloc < 4096) { - // alloc = 4096; - // spdlog::warn("Page too small, allocating {} bytes", alloc); - // } - // page-aligned address for maximum performance - _ptr = mmap(nullptr, _bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); - SPDLOG_DEBUG( - "Allocated {} bytes, address {}", - _bytes, fmt::ptr(_ptr)); - } - - template - Buffer::Buffer(void *ptr, uint32_t size, uint32_t byte_size) : _size(size), - _header(0), - _bytes(size * byte_size), - _byte_size(byte_size), - _ptr(ptr), - _mr(nullptr), - _own_memory(false) - { - SPDLOG_DEBUG( - "Allocated {} bytes, address {}", - _bytes, fmt::ptr(_ptr)); - } - LibfabricBuffer::~LibfabricBuffer() { SPDLOG_DEBUG( @@ -132,24 +57,6 @@ namespace rdmalib _bytes, fmt::ptr(_mr), fmt::ptr(_mr->addr), _mr->lkey, _mr->rkey); } - template - uint32_t Buffer::data_size() const - { - return this->_size; - } - - template - uint32_t Buffer::size() const - { - return this->_size + this->_header; - } - - template - uint32_t Buffer::bytes() const - { - return this->_bytes; - } - LibfabricBuffer::lkey_t LibfabricBuffer::lkey() const { assert(this->_mr); @@ -175,19 +82,6 @@ namespace rdmalib return this->_mr->rkey; } - template - uintptr_t Buffer::address() const - { - assert(this->_mr); - return reinterpret_cast(this->_ptr); - } - - template - void *Buffer::ptr() const - { - return this->_ptr; - } - } } diff --git a/rdmalib/lib/connection.cpp b/rdmalib/lib/connection.cpp index 4eec27d..b1d1740 100644 --- a/rdmalib/lib/connection.cpp +++ b/rdmalib/lib/connection.cpp @@ -117,29 +117,6 @@ namespace rdmalib { _batch_wrs[_rbatch-1].next = NULL; } - - template - void LibfabricConnection::initialize_batched_recv(const rdmalib::impl::Buffer & buf, size_t offset) - { - for(int i = 0; i < _rbatch; i++){ - _rwc_sges[i] = buf.sge(offset, i*offset); - //for(auto & sg : _rwc_sges[i]._sges) - //sg.addr += i*offset; - } - } - - template - void VerbsConnection::initialize_batched_recv(const rdmalib::impl::Buffer & buf, size_t offset) - { - for(int i = 0; i < _rbatch; i++){ - _rwc_sges[i] = buf.sge(offset, i*offset); - //for(auto & sg : _rwc_sges[i]._sges) - //sg.addr += i*offset; - _batch_wrs[i].sg_list = _rwc_sges[i].array(); - _batch_wrs[i].num_sge = _rwc_sges[i].size(); - } - } - void LibfabricConnection::initialize(fid_fabric* fabric, fid_domain* pd, fi_info* info, fid_eq* ec, fid_cntr* write_cntr, fid_cq* rx_channel, fid_cq* tx_channel) { // Create the endpoint and set its flags up so that we get completions on RDM @@ -247,30 +224,6 @@ namespace rdmalib { return this->_channel; } - template - uint32_t Connection::private_data() const - { - return this->_private_data; - } - - template - ConnectionStatus Connection::status() const - { - return this->_status; - } - - template - void Connection::set_status(ConnectionStatus status) - { - this->_status = status; - } - - template - void Connection::set_private_data(uint32_t private_data) - { - this->_private_data = private_data; - } - int32_t LibfabricConnection::post_send(const ScatterGatherElement_t & elems, int32_t id, bool force_inline) { // FIXME: extend with multiple sges diff --git a/rdmalib/lib/rdmalib.cpp b/rdmalib/lib/rdmalib.cpp index 22b319b..2f86c5b 100644 --- a/rdmalib/lib/rdmalib.cpp +++ b/rdmalib/lib/rdmalib.cpp @@ -209,7 +209,6 @@ namespace rdmalib { LibfabricAddress::~LibfabricAddress() { - #ifdef USE_LIBFABRIC // TODO Check how to free those and if it's necessary at all. // When closing the addringo we obtain a double free or corruption problem. // It seems that the problem is coming from the the ep_attr. @@ -217,21 +216,10 @@ namespace rdmalib { // impl::expect_zero(fi_close(&fabric->fid)); // if (addrinfo) // fi_freeinfo(addrinfo); - #endif } VerbsAddress::~VerbsAddress() { - #ifdef USE_LIBFABRIC - // TODO Check how to free those and if it's necessary at all. - // When closing the addringo we obtain a double free or corruption problem. - // It seems that the problem is coming from the the ep_attr. - // if (fabric) - // impl::expect_zero(fi_close(&fabric->fid)); - // if (addrinfo) - // fi_freeinfo(addrinfo); - #else rdma_freeaddrinfo(addrinfo); - #endif } LibfabricRDMAActive::LibfabricRDMAActive(const std::string & ip, int port, int recv_buf, int max_inline_data): @@ -460,7 +448,7 @@ namespace rdmalib { void VerbsRDMAActive::disconnect() { - spdlog::debug("[RDMAActive] Disonnecting connection with id {}", fmt::ptr(_conn->id())); + spdlog::debug("[RDMAActive] Disonnecting connection with id {}", fmt::ptr(_conn->id())); impl::expect_zero(rdma_disconnect(_conn->id())); _conn.reset(); _pd = nullptr; diff --git a/rdmalib/lib/server.cpp b/rdmalib/lib/server.cpp index d37dd1d..e69de29 100644 --- a/rdmalib/lib/server.cpp +++ b/rdmalib/lib/server.cpp @@ -1,36 +0,0 @@ - -#include - -#include - -namespace rdmalib { namespace server { - - template - ServerStatus::ServerStatus(): - _address(""), - _port(0) - {} - - template - ServerStatus::ServerStatus(std::string address, int port): - _address(address), - _port(port) - {} - - template - ServerStatus ServerStatus::deserialize(std::istream & in) - { - ServerStatus status; - cereal::JSONInputArchive archive_in(in); - archive_in(status); - return status; - } - - template - void ServerStatus::serialize(std::ostream & out) const - { - cereal::JSONOutputArchive archive_out(out); - archive_out(*this); - } - -}} diff --git a/rfaas/include/rfaas/connection.hpp b/rfaas/include/rfaas/connection.hpp index 758af91..dc8dac6 100644 --- a/rfaas/include/rfaas/connection.hpp +++ b/rfaas/include/rfaas/connection.hpp @@ -38,6 +38,79 @@ namespace rfaas { bool submit(); }; + template + manager_connection::manager_connection(std::string address, int port, + int rcv_buf, int max_inline_data): + _address(address), + _port(port), + _active(_address, _port, rcv_buf), + _rcv_buffer(rcv_buf), + _allocation_buffer(rcv_buf + 1), + _max_inline_data(max_inline_data) + { + _active.allocate(); + } + + template + bool manager_connection::connect() + { + SPDLOG_DEBUG("Connecting to manager at {}:{}", _address, _port); + bool ret = _active.connect(); + if(!ret) { + spdlog::error("Couldn't connect to manager at {}:{}", _address, _port); + return false; + } + #ifdef USE_LIBFABRIC + _allocation_buffer.register_memory(_active.pd(), FI_WRITE | FI_REMOTE_WRITE); + #else + _allocation_buffer.register_memory(_active.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); + #endif + // Initialize batch receive WCs + _active.connection().initialize_batched_recv(_allocation_buffer, sizeof(rdmalib::AllocationRequest)); + _rcv_buffer.connect(&_active.connection()); + return ret; + } + + template + void manager_connection::disconnect() + { + SPDLOG_DEBUG("Disconnecting from manager at {}:{}", _address, _port); + // Send deallocation request only if we're connected + if(_active.is_connected()) { + request() = (rdmalib::AllocationRequest) {-1, 0, 0, 0, 0, 0, 0, ""}; + ScatterGatherElement_t sge; + size_t obj_size = sizeof(rdmalib::AllocationRequest); + sge.add(_allocation_buffer, obj_size, obj_size*_rcv_buffer._rcv_buf_size); + _active.connection().post_send(sge); + _active.connection().poll_wc(rdmalib::QueueType::SEND, true); + _active.disconnect(); + } + } + + template + typename manager_connection::Connection_t & manager_connection::connection() + { + return _active.connection(); + } + + template + rdmalib::AllocationRequest & manager_connection::request() + { + return *(_allocation_buffer.data() + _rcv_buffer._rcv_buf_size); + } + + template + bool manager_connection::submit() + { + ScatterGatherElement_t sge; + size_t obj_size = sizeof(rdmalib::AllocationRequest); + sge.add(_allocation_buffer, obj_size, obj_size*_rcv_buffer._rcv_buf_size); + _active.connection().post_send(sge); + _active.connection().poll_wc(rdmalib::QueueType::SEND, true); + // FIXME: check failure + return true; + } + } #endif diff --git a/rfaas/include/rfaas/executor.hpp b/rfaas/include/rfaas/executor.hpp index 4ff223d..5c73e8f 100644 --- a/rfaas/include/rfaas/executor.hpp +++ b/rfaas/include/rfaas/executor.hpp @@ -698,6 +698,42 @@ namespace rfaas { } }; + template + executor_state::executor_state(Connection_t* conn, int rcv_buf_size): + conn(conn), + _rcv_buffer(rcv_buf_size) + { + } + + template + executor::executor(std::string address, int port, int rcv_buf_size, int max_inlined_msg): + _state(address, port, rcv_buf_size + 1), + _rcv_buffer(rcv_buf_size), + _execs_buf(MAX_REMOTE_WORKERS), + _address(address), + _port(port), + _rcv_buf_size(rcv_buf_size), + _executions(0), + _max_inlined_msg(max_inlined_msg), + _perf(1000) + { + events = 0; + _active_polling = false; + _end_requested = false; + } + + template + executor::executor(device_data & dev): + executor(dev.ip_address, dev.port, dev.default_receive_buffer_size, dev.max_inline_data) + {} + + template + executor::~executor() + { + this->deallocate(); + _perf.export_csv("client_perf.csv", {"start", "function parsed", "function post written", "buffer refilled", "received result", "parsed result", "catched unlikely case", "polled send"}); + } + } #endif diff --git a/rfaas/lib/connection.cpp b/rfaas/lib/connection.cpp index cd096c5..4dd7f52 100644 --- a/rfaas/lib/connection.cpp +++ b/rfaas/lib/connection.cpp @@ -11,78 +11,6 @@ namespace rfaas { - template - manager_connection::manager_connection(std::string address, int port, - int rcv_buf, int max_inline_data): - _address(address), - _port(port), - _active(_address, _port, rcv_buf), - _rcv_buffer(rcv_buf), - _allocation_buffer(rcv_buf + 1), - _max_inline_data(max_inline_data) - { - _active.allocate(); - } - - template - bool manager_connection::connect() - { - SPDLOG_DEBUG("Connecting to manager at {}:{}", _address, _port); - bool ret = _active.connect(); - if(!ret) { - spdlog::error("Couldn't connect to manager at {}:{}", _address, _port); - return false; - } - #ifdef USE_LIBFABRIC - _allocation_buffer.register_memory(_active.pd(), FI_WRITE | FI_REMOTE_WRITE); - #else - _allocation_buffer.register_memory(_active.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); - #endif - // Initialize batch receive WCs - _active.connection().initialize_batched_recv(_allocation_buffer, sizeof(rdmalib::AllocationRequest)); - _rcv_buffer.connect(&_active.connection()); - return ret; - } - - template - void manager_connection::disconnect() - { - SPDLOG_DEBUG("Disconnecting from manager at {}:{}", _address, _port); - // Send deallocation request only if we're connected - if(_active.is_connected()) { - request() = (rdmalib::AllocationRequest) {-1, 0, 0, 0, 0, 0, 0, ""}; - ScatterGatherElement_t sge; - size_t obj_size = sizeof(rdmalib::AllocationRequest); - sge.add(_allocation_buffer, obj_size, obj_size*_rcv_buffer._rcv_buf_size); - _active.connection().post_send(sge); - _active.connection().poll_wc(rdmalib::QueueType::SEND, true); - _active.disconnect(); - } - } - - template - typename manager_connection::Connection_t & manager_connection::connection() - { - return _active.connection(); - } - - template - rdmalib::AllocationRequest & manager_connection::request() - { - return *(_allocation_buffer.data() + _rcv_buffer._rcv_buf_size); - } - - template - bool manager_connection::submit() - { - ScatterGatherElement_t sge; - size_t obj_size = sizeof(rdmalib::AllocationRequest); - sge.add(_allocation_buffer, obj_size, obj_size*_rcv_buffer._rcv_buf_size); - _active.connection().post_send(sge); - _active.connection().poll_wc(rdmalib::QueueType::SEND, true); - // FIXME: check failure - return true; - } } diff --git a/rfaas/lib/executor.cpp b/rfaas/lib/executor.cpp index 3c83da0..98b7ec0 100644 --- a/rfaas/lib/executor.cpp +++ b/rfaas/lib/executor.cpp @@ -34,30 +34,6 @@ namespace rfaas { return _timeout; } - template - executor_state::executor_state(Connection_t* conn, int rcv_buf_size): - conn(conn), - _rcv_buffer(rcv_buf_size) - { - } - - template - executor::executor(std::string address, int port, int rcv_buf_size, int max_inlined_msg): - _state(address, port, rcv_buf_size + 1), - _rcv_buffer(rcv_buf_size), - _execs_buf(MAX_REMOTE_WORKERS), - _address(address), - _port(port), - _rcv_buf_size(rcv_buf_size), - _executions(0), - _max_inlined_msg(max_inlined_msg), - _perf(1000) - { - events = 0; - _active_polling = false; - _end_requested = false; - } - libfabric_executor::libfabric_executor(std::string address, int port, int rcv_buf_size, int max_inlined_msg): executor(address, port, rcv_buf_size, max_inlined_msg) { @@ -72,18 +48,6 @@ namespace rfaas { _execs_buf.register_memory(_state.pd(), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); } - template - executor::executor(device_data & dev): - executor(dev.ip_address, dev.port, dev.default_receive_buffer_size, dev.max_inline_data) - {} - - template - executor::~executor() - { - this->deallocate(); - _perf.export_csv("client_perf.csv", {"start", "function parsed", "function post written", "buffer refilled", "received result", "parsed result", "catched unlikely case", "polled send"}); - } - rdmalib::Buffer libfabric_executor::load_library(std::string path) { _func_names.clear(); diff --git a/server/executor/cli.cpp b/server/executor/cli.cpp index 44f39c5..dc6d928 100644 --- a/server/executor/cli.cpp +++ b/server/executor/cli.cpp @@ -11,9 +11,11 @@ #include #include +#include #include "rdmalib/connection.hpp" #include "server.hpp" #include "fast_executor.hpp" +#include "opts.hpp" int main(int argc, char ** argv) { diff --git a/server/executor/fast_executor.cpp b/server/executor/fast_executor.cpp index 2a551d2..66caf56 100644 --- a/server/executor/fast_executor.cpp +++ b/server/executor/fast_executor.cpp @@ -498,94 +498,6 @@ namespace server { // mgr_connection.disconnect(); } - template - FastExecutors::FastExecutors(std::string client_addr, int port, - int func_size, - int numcores, - int msg_size, - int recv_buf_size, - int max_inline_data, - int pin_threads, - const executor::ManagerConnection & mgr_conn - ): - _closing(false), - _numcores(numcores), - _max_repetitions(0), - _pin_threads(pin_threads) - //_mgr_conn(mgr_conn) - { - // Reserve place to ensure that no reallocations happen - _threads_data.reserve(numcores); - for(int i = 0; i < numcores; ++i) - _threads_data.emplace_back( - client_addr, port, i, func_size, msg_size, - recv_buf_size, max_inline_data, mgr_conn - ); - } - - template - FastExecutors::~FastExecutors() - { - spdlog::info("FastExecutor is closing threads..."); - close(); - } - - template - void FastExecutors::close() - { - if(_closing) - return; - // FIXME: this should be only for 'warm' - //{ - // std::lock_guard g(m); - // _closing = true; - // // wake threads, letting them exit - // wakeup(); - //} - // make sure we join before destructing - SPDLOG_DEBUG("Wait on {} threads", _threads.size()); - for(auto & thread : _threads) - // Might have been closed earlier - if(thread.joinable()) - thread.join(); - SPDLOG_DEBUG("Finished wait on {} threads", _threads.size()); - - for(auto & thread : _threads_data) { - thread._perf.export_csv("executor_perf.csv", {"found request", "parsed request", "obtained the header and function", "finished executing", "results post written", "accounting updated", "polling accounting updated", "send queue polled", "buffer refilled"}); - spdlog::info("Thread {} Repetitions {} Avg time {} ms", - thread.id, - thread.repetitions, - static_cast(thread._accounting.total_execution_time) / thread.repetitions / 1000.0 - ); - } - _closing = true; - } - - template - void FastExecutors::allocate_threads(int timeout, int iterations) - { - int pin_threads = _pin_threads; - for(int i = 0; i < _numcores; ++i) { - _threads_data[i].max_repetitions = iterations; - _threads.emplace_back( - &Thread_t::thread_work, - &_threads_data[i], - timeout - ); - // FIXME: make sure that native handle is actually from pthreads - if(pin_threads != -1) { - spdlog::info("Pin thread to core {}", pin_threads); - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - CPU_SET(pin_threads++, &cpuset); - rdmalib::impl::expect_zero(pthread_setaffinity_np( - _threads[i].native_handle(), - sizeof(cpu_set_t), &cpuset - )); - } - } - } - //void FastExecutors::serial_thread_poll_func(int) //{ // uint64_t sum = 0; diff --git a/server/executor/fast_executor.hpp b/server/executor/fast_executor.hpp index 50e8df7..49ea3d4 100644 --- a/server/executor/fast_executor.hpp +++ b/server/executor/fast_executor.hpp @@ -238,6 +238,94 @@ namespace server { void allocate_threads(int, int); }; + template + FastExecutors::FastExecutors(std::string client_addr, int port, + int func_size, + int numcores, + int msg_size, + int recv_buf_size, + int max_inline_data, + int pin_threads, + const executor::ManagerConnection & mgr_conn + ): + _closing(false), + _numcores(numcores), + _max_repetitions(0), + _pin_threads(pin_threads) + //_mgr_conn(mgr_conn) + { + // Reserve place to ensure that no reallocations happen + _threads_data.reserve(numcores); + for(int i = 0; i < numcores; ++i) + _threads_data.emplace_back( + client_addr, port, i, func_size, msg_size, + recv_buf_size, max_inline_data, mgr_conn + ); + } + + template + FastExecutors::~FastExecutors() + { + spdlog::info("FastExecutor is closing threads..."); + close(); + } + + template + void FastExecutors::close() + { + if(_closing) + return; + // FIXME: this should be only for 'warm' + //{ + // std::lock_guard g(m); + // _closing = true; + // // wake threads, letting them exit + // wakeup(); + //} + // make sure we join before destructing + SPDLOG_DEBUG("Wait on {} threads", _threads.size()); + for(auto & thread : _threads) + // Might have been closed earlier + if(thread.joinable()) + thread.join(); + SPDLOG_DEBUG("Finished wait on {} threads", _threads.size()); + + for(auto & thread : _threads_data) { + thread._perf.export_csv("executor_perf.csv", {"found request", "parsed request", "obtained the header and function", "finished executing", "results post written", "accounting updated", "polling accounting updated", "send queue polled", "buffer refilled"}); + spdlog::info("Thread {} Repetitions {} Avg time {} ms", + thread.id, + thread.repetitions, + static_cast(thread._accounting.total_execution_time) / thread.repetitions / 1000.0 + ); + } + _closing = true; + } + + template + void FastExecutors::allocate_threads(int timeout, int iterations) + { + int pin_threads = _pin_threads; + for(int i = 0; i < _numcores; ++i) { + _threads_data[i].max_repetitions = iterations; + _threads.emplace_back( + &Thread_t::thread_work, + &_threads_data[i], + timeout + ); + // FIXME: make sure that native handle is actually from pthreads + if(pin_threads != -1) { + spdlog::info("Pin thread to core {}", pin_threads); + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(pin_threads++, &cpuset); + rdmalib::impl::expect_zero(pthread_setaffinity_np( + _threads[i].native_handle(), + sizeof(cpu_set_t), &cpuset + )); + } + } + } + } #endif diff --git a/server/executor/opts.cpp b/server/executor/opts.hpp similarity index 99% rename from server/executor/opts.cpp rename to server/executor/opts.hpp index 5ea6c1c..255b6a9 100644 --- a/server/executor/opts.cpp +++ b/server/executor/opts.hpp @@ -1,7 +1,7 @@ #include -#include "server.hpp" +// #include "server.hpp" namespace server { namespace types { diff --git a/server/executor_manager/executor_process.cpp b/server/executor_manager/executor_process.cpp index 8e0bf57..31d8e5e 100644 --- a/server/executor_manager/executor_process.cpp +++ b/server/executor_manager/executor_process.cpp @@ -131,7 +131,6 @@ namespace rfaas::executor_manager { #ifdef USE_GNI_AUTH "--authentication-cookie", authentication_cookie.c_str(), #endif - nullptr }; } else if(sandbox_type == SandboxType::SARUS) { @@ -166,7 +165,6 @@ namespace rfaas::executor_manager { #ifdef USE_GNI_AUTH "--authentication-cookie", authentication_cookie.c_str(), #endif - nullptr }; } else if(sandbox_type == SandboxType::DOCKER) { @@ -228,7 +226,6 @@ namespace rfaas::executor_manager { "--mgr-secret", mgr_secret.c_str(), "--mgr-buf-addr", mgr_buf_addr.c_str(), "--mgr-buf-rkey", mgr_buf_rkey.c_str(), - nullptr }; } @@ -239,6 +236,7 @@ namespace rfaas::executor_manager { } ); std::copy(additional_args.begin(), additional_args.end(), std::back_inserter(cstrings_argv)); + cstrings_argv.push_back(nullptr); SPDLOG_DEBUG("Executor launch arguments"); for(const char* str : cstrings_argv) From ee375f574cf274653372c00f19babf69f40abf2f Mon Sep 17 00:00:00 2001 From: Matt Nappo Date: Mon, 25 Sep 2023 15:58:11 +0000 Subject: [PATCH 91/91] Repo cleanup --- errors | 203 --------------------------------------------------------- 1 file changed, 203 deletions(-) delete mode 100644 errors diff --git a/errors b/errors deleted file mode 100644 index 4b6aad8..0000000 --- a/errors +++ /dev/null @@ -1,203 +0,0 @@ -/home/ubuntu/rfaas-refactor/rfaas/lib/resources.cpp: In member function ‘std::vector rfaas::servers::select(int)’: -/home/ubuntu/rfaas-refactor/rfaas/lib/resources.cpp:43:40: warning: unused parameter ‘cores’ [-Wunused-parameter] - 43 | std::vector servers::select(int cores) - | ~~~~^~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:25:10: error: template argument required for ‘struct RecvBuffer’ - 25 | struct RecvBuffer; - | ^~~~~~~~~~ -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:48:7: error: template placeholder type ‘Connection<...auto...>’ must be followed by a simple declarator-id - 48 | rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, - | ^~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here - 55 | struct Connection { - | ^~~~~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:48:42: error: expected ‘)’ before ‘,’ token - 48 | rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, - | ^ - | ) -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:47:39: note: to match this ‘(’ - 47 | inline void send_updated_execution( - | ^ -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:48:68: error: wrong number of template arguments (1, should be 2) - 48 | rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, - | ^ -In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:24, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/buffer.hpp:138:10: note: provided for ‘template struct rdmalib::Buffer’ - 138 | struct Buffer : impl::Buffer, Library> - | ^~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:78:7: error: template placeholder type ‘Connection<...auto...>’ must be followed by a simple declarator-id - 78 | rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, - | ^~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here - 55 | struct Connection { - | ^~~~~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:78:42: error: expected ‘)’ before ‘,’ token - 78 | rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, - | ^ - | ) -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:77:37: note: to match this ‘(’ - 77 | inline void send_updated_polling( - | ^ -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:78:68: error: wrong number of template arguments (1, should be 2) - 78 | rdmalib::Connection* mgr_connection, rdmalib::Buffer & _accounting_buf, - | ^ -In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:24, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/buffer.hpp:138:10: note: provided for ‘template struct rdmalib::Buffer’ - 138 | struct Buffer : impl::Buffer, Library> - | ^~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:122:25: error: wrong number of template arguments (1, should be 2) - 122 | rdmalib::Buffer send, rcv; - | ^ -In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:24, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/buffer.hpp:138:10: note: provided for ‘template struct rdmalib::Buffer’ - 138 | struct Buffer : impl::Buffer, Library> - | ^~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:123:5: error: invalid use of template-name ‘rdmalib::RecvBuffer’ without an argument list - 123 | rdmalib::RecvBuffer wc_buffer; - | ^~~~~~~ -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:124:5: error: template placeholder type ‘Connection<...auto...>’ must be followed by a simple declarator-id - 124 | rdmalib::Connection* conn; - | ^~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here - 55 | struct Connection { - | ^~~~~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:125:5: error: template placeholder type ‘Connection<...auto...>’ must be followed by a simple declarator-id - 125 | rdmalib::Connection* _mgr_connection; - | ^~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here - 55 | struct Connection { - | ^~~~~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:128:29: error: wrong number of template arguments (1, should be 2) - 128 | rdmalib::Buffer _accounting_buf; - | ^ -In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:24, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/buffer.hpp:138:10: note: provided for ‘template struct rdmalib::Buffer’ - 138 | struct Buffer : impl::Buffer, Library> - | ^~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/server.hpp:15, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp: In constructor ‘server::Thread::Thread(std::string, int, int, int, int, int, int, const executor::ManagerConnection&)’: -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:146:41: error: ‘rdmalib::functions::Submission’ has not been declared - 146 | rcv(buf_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), - | ^~~~~~~~~~ -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:149:7: error: class ‘server::Thread’ does not have any field named ‘conn’ - 149 | conn(nullptr), - | ^~~~ -/home/ubuntu/rfaas-refactor/server/executor/fast_executor.hpp:146:7: error: expression list treated as compound expression in mem-initializer [-fpermissive] - 146 | rcv(buf_size, rdmalib::functions::Submission::DATA_HEADER_SIZE), - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/server.hpp: At global scope: -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:119:5: error: invalid use of template-name ‘rdmalib::RDMAPassive’ without an argument list - 119 | rdmalib::RDMAPassive _state; - | ^~~~~~~ -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:120:5: error: invalid use of template-name ‘rdmalib::server::ServerStatus’ without an argument list - 120 | rdmalib::server::ServerStatus _status; - | ^~~~~~~ -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:124:5: error: template placeholder type ‘Connection<...auto...>’ must be followed by a simple declarator-id - 124 | rdmalib::Connection* _conn; - | ^~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here - 55 | struct Connection { - | ^~~~~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:125:5: error: invalid use of template-name ‘rdmalib::RecvBuffer’ without an argument list - 125 | rdmalib::RecvBuffer _wc_buffer; - | ^~~~~~~ -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:141:43: error: wrong number of template arguments (1, should be 2) - 141 | void register_buffer(rdmalib::Buffer & buf, bool is_recv_buffer) - | ^ -In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:24, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/buffer.hpp:138:10: note: provided for ‘template struct rdmalib::Buffer’ - 138 | struct Buffer : impl::Buffer, Library> - | ^~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:161:23: error: template placeholder type ‘Connection<...auto...>’ must be followed by a simple declarator-id - 161 | void reload_queue(rdmalib::Connection & conn, int32_t idx); - | ^~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here - 55 | struct Connection { - | ^~~~~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:161:49: error: expected ‘)’ before ‘,’ token - 161 | void reload_queue(rdmalib::Connection & conn, int32_t idx); - | ~ ^ - | ) -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:161:51: error: variable or field ‘int32_t’ declared void - 161 | void reload_queue(rdmalib::Connection & conn, int32_t idx); - | ^~~~~~~ -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:161:51: error: expected ‘;’ at end of member declaration - 161 | void reload_queue(rdmalib::Connection & conn, int32_t idx); - | ^~~~~~~ - | ; -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:161:59: error: ‘idx’ does not name a type - 161 | void reload_queue(rdmalib::Connection & conn, int32_t idx); - | ^~~ -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:163:28: error: deduced class type ‘RDMAPassive’ in function return type - 163 | rdmalib::RDMAPassive & state(); - | ^~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:158:10: note: ‘template struct rdmalib::RDMAPassive’ declared here - 158 | struct RDMAPassive { - | ^~~~~~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:164:26: error: deduced class type ‘Connection’ in function return type - 164 | rdmalib::Connection* poll_communication(); - | ^~~~~~~~~~~~~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/rdmalib.hpp:25, - from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:12: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/connection.hpp:55:10: note: ‘template struct rdmalib::Connection’ declared here - 55 | struct Connection { - | ^~~~~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:165:43: error: deduced class type ‘ServerStatus’ in function return type - 165 | const rdmalib::server::ServerStatus & status() const; - | ^~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:13: -/home/ubuntu/rfaas-refactor/rdmalib/include/rdmalib/server.hpp:20:10: note: ‘template struct rdmalib::server::ServerStatus’ declared here - 20 | struct ServerStatus { - | ^~~~~~~~~~~~ -In file included from /home/ubuntu/rfaas-refactor/server/executor/cli.cpp:15: -/home/ubuntu/rfaas-refactor/server/executor/server.hpp: In member function ‘void server::Server::register_buffer(int&, bool)’: -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:147:13: error: request for member ‘register_memory’ in ‘buf’, which is of non-class type ‘int’ - 147 | buf.register_memory(_state.pd(), IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE); - | ^~~~~~~~~~~~~~~ -/home/ubuntu/rfaas-refactor/server/executor/server.hpp:154:13: error: request for member ‘register_memory’ in ‘buf’, which is of non-class type ‘int’ - 154 | buf.register_memory(_state.pd(), IBV_ACCESS_LOCAL_WRITE); - | ^~~~~~~~~~~~~~~ -gmake[2]: *** [CMakeFiles/executor.dir/build.make:76: CMakeFiles/executor.dir/server/executor/cli.cpp.o] Error 1 -gmake[1]: *** [CMakeFiles/Makefile2:226: CMakeFiles/executor.dir/all] Error 2 -gmake: *** [Makefile:136: all] Error 2