Skip to content

Commit

Permalink
Added SW-support for AES, DPI and compression
Browse files Browse the repository at this point in the history
  • Loading branch information
Maximilian committed Nov 14, 2024
1 parent e9599bb commit af836e6
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 15 deletions.
28 changes: 23 additions & 5 deletions examples_sw/apps/rdma_service/client/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ int main(int argc, char *argv[])
sg.rdma.len = min_size;
sg.rdma.local_stream = strmHost;

// Get a hMem to write values into the payload of the RDMA-packets
uint64_t *hMem = (uint64_t*)(cthread.getQpair()->local.vaddr);

// Set the Coyote Operation, which can either be a REMOTE_WRITE or a REMOTE_READ, depending on the settings for the experiment
CoyoteOper coper = oper ? CoyoteOper::REMOTE_RDMA_WRITE : CoyoteOper::REMOTE_RDMA_READ;;

Expand All @@ -213,17 +216,21 @@ int main(int argc, char *argv[])
// Lambda-function for throughput-benchmarking
auto benchmark_thr = [&]() {
// For the desired number of repetitions per size, invoke the cThread-Function with the coyote-Operation
for(int i = 0; i < n_reps_thr; i++)
for(int i = 0; i < n_reps_thr; i++) {
# ifdef VERBOSE
std::cout << "rdma_client: invoke the operation " << std::endl;
# endif
cthread.invoke(coper, &sg);
hMem[sg.rdma.len/8-1] = hMem[sg.rdma.len/8-1] + 1;
std::cout << "CLIENT: Sent out message #" << i << " at message-size " << sg.rdma.len << " with content " << hMem[sg.rdma.len/8-1] << std::endl;
}

// Increment the hMem-value
// hMem[sg.rdma.len/8-1] = hMem[sg.rdma.len/8-1] + 1;

// Check the number of completed RDMA-transactions, wait until all operations have been completed. Check for stalling in-between.
while(cthread.checkCompleted(CoyoteOper::LOCAL_WRITE) < n_reps_thr) {
# ifdef VERBOSE
std::cout << "rdma_client: Current number of completed operations: " << cthread.checkCompleted(CoyoteOper::LOCAL_WRITE) << std::endl;
# endif
// std::cout << "CLIENT: Current number of completed operations: " << cthread.checkCompleted(CoyoteOper::LOCAL_WRITE) << std::endl;
// stalled is an atomic boolean used for event-handling (?) that would indicate a stalled operation
if( stalled.load() ) throw std::runtime_error("Stalled, SIGINT caught");
}
Expand Down Expand Up @@ -255,14 +262,25 @@ int main(int argc, char *argv[])
# ifdef VERBOSE
std::cout << "rdma_client: invoke the operation " << std::endl;
# endif

// Increment the hMem-value
hMem[sg.rdma.len/8-1] = hMem[sg.rdma.len/8-1] + 1;
cthread.invoke(coper, &sg);

std::cout << "CLIENT: Sent out message #" << i << " at message-size " << sg.rdma.len << " with content " << hMem[sg.rdma.len/8-1] << std::endl;

bool message_written = false;
while(cthread.checkCompleted(CoyoteOper::LOCAL_WRITE) < i+1) {
# ifdef VERBOSE
std::cout << "rdma_client: Current number of completed operations: " << cthread.checkCompleted(CoyoteOper::LOCAL_WRITE) << std::endl;
# endif
# endif

// As long as the completion is not yet received, check for a possible stall-event
if( stalled.load() ) throw std::runtime_error("Stalled, SIGINT caught");
}

std::cout << "CLIENT: Received an ACK for this message!" << std::endl;
std::cout << "CLIENT: Received the following memory content: " << hMem[sg.rdma.len/8-1] << std::endl;
}
};

Expand Down
27 changes: 23 additions & 4 deletions examples_sw/apps/rdma_service/server/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ int main(int argc, char *argv[])
memset(&sg, 0, sizeof(rdmaSg));
sg.rdma.len = min_size; sg.rdma.local_stream = strmHost;

// Get a memory handle to manipulate values in the RDMA payloads
uint64_t *hMem = (uint64_t*)(cthread->getQpair()->local.vaddr);

while(sg.rdma.len <= max_size) {
// Sync via the cThread that is part of the cService-daemon that was just started in the background
# ifdef VERBOSE
Expand All @@ -139,14 +142,19 @@ int main(int argc, char *argv[])

if(rdwr) {
// THR - wait until all expected WRITEs are coming in. Incoming RDMA_WRITEs are LOCAL_WRITEs on this side
while(cthread->checkCompleted(CoyoteOper::LOCAL_WRITE) < n_reps_thr) { }

while(cthread->checkCompleted(CoyoteOper::LOCAL_WRITE) < n_reps_thr) {
std::cout << "CLIENT: Current number of completed operations: " << cthread->checkCompleted(CoyoteOper::LOCAL_WRITE) << std::endl;
}

// THR - issuing the same amount of "Write-Backs" to the client
for(int i = 0; i < n_reps_thr; i++)
for(int i = 0; i < n_reps_thr; i++) {
# ifdef VERBOSE
std::cout << "rdma_server: invoke the operation " << std::endl;
# endif
hMem[sg.rdma.len/8-1] = hMem[sg.rdma.len/8-1] + 1;
cthread->invoke(CoyoteOper::REMOTE_RDMA_WRITE, &sg);
std::cout << "SERVER: Sent out message #" << i << " at message-size " << sg.rdma.len << " with content " << hMem[sg.rdma.len/8-1] << std::endl;
}

// Sync via the thread that is located within the cService-daemon
# ifdef VERBOSE
Expand All @@ -161,7 +169,18 @@ int main(int argc, char *argv[])
// LAT - iterate over the number of ping-pong-exchanges according to the desired experiment setting
for(int i = 0; i < n_reps_lat; i++) {
// Wait for the next incoming WRITE
while(cthread->checkCompleted(CoyoteOper::LOCAL_WRITE) < i+1) { }
bool message_written = false;
while(cthread->checkCompleted(CoyoteOper::LOCAL_WRITE) < i+1) {
if(!message_written) {
std::cout << "RDMA-Server: Waiting for an incoming RDMA-WRITE at currently " << i << "." << std::endl;
message_written = true;
}
}

// Increment the number in the payload before writing back
hMem[sg.rdma.len/8-1] = hMem[sg.rdma.len/8-1] + 1;

std::cout << "RDMA-Server: Invoking a RDMA-WRITE from the Server to the Client at currently " << (i+1) << "." << std::endl;
cthread->invoke(CoyoteOper::REMOTE_RDMA_WRITE, &sg);
}
} else {
Expand Down
2 changes: 1 addition & 1 deletion sw/include/bThread.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class bThread {
*/

// Constructor-Call
bThread(int32_t vfid, pid_t hpid, uint32_t dev, cSched *csched = nullptr, void (*uisr)(int) = nullptr);
bThread(int32_t vfid, pid_t hpid, uint32_t dev, cSched *csched = nullptr, void (*uisr)(int) = nullptr, bool encryption_required = false, bool compression_required = false, bool dpi_required = false);

// Destructor-Call
~bThread();
Expand Down
16 changes: 12 additions & 4 deletions sw/include/cDefs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ struct csAlloc {

/**
* Queue pairs
*/
*/

// One queue - a queue pair has a local and a remote copy of this
struct ibvQ {
Expand All @@ -519,6 +519,11 @@ struct ibvQ {
// Global ID for identifying a network interface in RDMA-networks (either InfiniBand or RoCE). For us, it's mostly a concatination of repeated IP-addresses
char gid[33] = { 0 };

// Balboa capabilities: AES-key, compression-bit and and DPI-bit
__uint128_t aes_key;
bool compression_enabled;
bool dpi_enabled;

// Converter GID to integer
uint32_t gidToUint(int idx) {
if(idx > 24) {
Expand All @@ -541,13 +546,16 @@ struct ibvQ {
}

void print(const char *name) {
printf("%s: QPN 0x%06x, PSN 0x%06x, VADDR %016lx, SIZE %08x, IP 0x%08x\n",
name, qpn, psn, (uint64_t)vaddr, size, ip_addr);
uint64_t aes_high = (uint64_t)(aes_key >> 64);
uint64_t aes_low = (uint64_t)(aes_key);

printf("%s: QPN 0x%06x, PSN 0x%06x, VADDR %016lx, SIZE %08x, IP 0x%08x\n, AES-key 0x%lx%016lx\n, Compression %d\n, DPI %d\n",
name, qpn, psn, (uint64_t)vaddr, size, ip_addr, aes_high, aes_low, compression_enabled, dpi_enabled);
}
};

/**
* Queue pair - combination of a local and a remote ibvQ
* Queue pair - combination of a local and a remote ibvQ e
*/
struct ibvQp {
public:
Expand Down
21 changes: 21 additions & 0 deletions sw/include/cLib.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,27 @@ class cLib {
// Received remote QP is located in the receive buffer and is getting copied over to the thread, which manages all QPs
memcpy(&cthread->getQpair()->remote, recv_buff, sizeof(ibvQ));

// Negotiate the Balboa-capabilities by comparing local and remote queue

// AES-encryption: The larger aes-key becomes the common one. If both AES-keys are set to 0, no encryption is used for this QP
if(cthread->getQpair()->local.aes_key > cthread->getQpair()->remote.aes_key) {
cthread->getQpair()->remote.aes_key = cthread->getQpair()->local.aes_key;
} else {
cthread->getQpair()->local.aes_key = cthread->getQpair()->remote.aes_key;
}

// Compression agreement: If at least one party wants compression, it is used for this communication flow
if(cthread->getQpair()->local.compression_enabled || cthread->getQpair()->remote.compression_enabled) {
cthread->getQpair()->remote.compression_enabled = true;
cthread->getQpair()->local.compression_enabled = true;
}

// DPI agreement: If at least one party wants to use DPI, it is used for this communication flow
if(cthread->getQpair()->local.dpi_enabled || cthread->getQpair()->remote.dpi_enabled) {
cthread->getQpair()->remote.dpi_enabled = true;
cthread->getQpair()->local.dpi_enabled = true;
}

// Output: Print local and remote QPs
std::cout << "Queue pair: " << std::endl;
cthread->getQpair()->local.print("Local ");
Expand Down
20 changes: 19 additions & 1 deletion sw/src/bThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ static unsigned seed = std::chrono::system_clock::now().time_since_epoch().count
*
* Constructor that sets variables for vfid, cscheduler and lastly the plock (enum open_or_create and a generated name)
*/
bThread::bThread(int32_t vfid, pid_t hpid, uint32_t dev, cSched *csched, void (*uisr)(int)) : vfid(vfid), csched(csched),
bThread::bThread(int32_t vfid, pid_t hpid, uint32_t dev, cSched *csched, void (*uisr)(int), bool encryption_required, bool compression_required, bool dpi_required) : vfid(vfid), csched(csched),
plock(open_or_create, ("vpga_mtx_user_" + std::to_string(vfid)).c_str())
{
DBG3("bThread: opening vFPGA-" << vfid << ", hpid " << hpid);
Expand Down Expand Up @@ -188,6 +188,7 @@ bThread::bThread(int32_t vfid, pid_t hpid, uint32_t dev, cSched *csched, void (*
// Random number generators
std::default_random_engine rand_gen(seed);
std::uniform_int_distribution<int> distr(0, std::numeric_limits<std::uint32_t>::max());
std::uniform_int_distribution<uint64_t> distr_aes(1, std::numeric_limits<std::uint64_t>::max());

// Read the IP-address via a ioctl-system call and store it in tmp
if (ioctl(fd, IOCTL_GET_IP_ADDRESS, &tmp))
Expand All @@ -208,6 +209,23 @@ bThread::bThread(int32_t vfid, pid_t hpid, uint32_t dev, cSched *csched, void (*
qpair->local.psn = distr(rand_gen) & 0xFFFFFF; // Generate a random PSN to start with on the local side
qpair->local.rkey = 0; // Local rkey is hard-coded to 0

// Balboa-capabilities

// AES-Encryption
if(encryption_required) {
// If AES is required, create a random AES-key as part of the Queue
qpair->local.aes_key = distr_aes(rand_gen);
} else {
// If no AES-encryption is required, set the AES-key to 0.
qpair->local.aes_key = 0;
}

// Compression-bit
qpair->local.compression_enabled = compression_required;

// DPI-bit
qpair->local.dpi_enabled = dpi_required;

# ifdef VERBOSE
std::cout << "bThread: RDMA is enabled, created the local QP with QPN " << qpair->local.qpn << ", local PSN " << qpair->local.psn << ", and local rkey " << qpair->local.rkey << "." << std::endl;
# endif
Expand Down
22 changes: 22 additions & 0 deletions sw/src/cService.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,28 @@ void cService::acceptConnectionRemote() {
# endif

cthread->getQpair()->remote = r_qp; // store the received remote QP

// Negotiate the Balboa-capabilities by comparing local and remote queue

// AES-encryption: The larger aes-key becomes the common one. If both AES-keys are set to 0, no encryption is used for this QP
if(cthread->getQpair()->local.aes_key > cthread->getQpair()->remote.aes_key) {
cthread->getQpair()->remote.aes_key = cthread->getQpair()->local.aes_key;
} else {
cthread->getQpair()->local.aes_key = cthread->getQpair()->remote.aes_key;
}

// Compression agreement: If at least one party wants compression, it is used for this communication flow
if(cthread->getQpair()->local.compression_enabled || cthread->getQpair()->remote.compression_enabled) {
cthread->getQpair()->remote.compression_enabled = true;
cthread->getQpair()->local.compression_enabled = true;
}

// DPI agreement: If at least one party wants to use DPI, it is used for this communication flow
if(cthread->getQpair()->local.dpi_enabled || cthread->getQpair()->remote.dpi_enabled) {
cthread->getQpair()->remote.dpi_enabled = true;
cthread->getQpair()->local.dpi_enabled = true;
}

cthread->getMem({CoyoteAlloc::HPF, r_qp.size, true}); // Allocate memory for receiving data for RDMA

# ifdef VERBOSE
Expand Down

0 comments on commit af836e6

Please sign in to comment.