Skip to content

Commit

Permalink
format source code
Browse files Browse the repository at this point in the history
  • Loading branch information
Chen Shaoyuan committed Dec 19, 2024
1 parent 67bc451 commit 1c40d93
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 105 deletions.
5 changes: 2 additions & 3 deletions mooncake-transfer-engine/include/topology.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include <string>

namespace mooncake
{
std::string discoverTopologyMatrix();
namespace mooncake {
std::string discoverTopologyMatrix();
}
162 changes: 66 additions & 96 deletions mooncake-transfer-engine/src/topology.cpp
Original file line number Diff line number Diff line change
@@ -1,84 +1,74 @@
#include <vector>
#include <glog/logging.h>
#include <jsoncpp/json/json.h>

#include <fstream>
#include <iostream>
#include <map>
#include <string>
#include <utility>
#include <fstream>
#include <iostream>

#include <jsoncpp/json/json.h>
#include <glog/logging.h>
#include <vector>

#ifdef USE_CUDA
#include "cuda_runtime.h"
#endif

#include <sys/types.h>
#include <dirent.h>
#include <ctype.h>
#include <string.h>
#include <dirent.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>

#include "topology.h"

struct InfinibandDevice
{
struct InfinibandDevice {
std::string name;
std::string pci_bus_id;
int numa_node;
};

struct TopologyEntry
{
struct TopologyEntry {
std::string name;
std::vector<std::string> preferred_hca;
std::vector<std::string> avail_hca;

Json::Value to_json()
{
Json::Value to_json() {
Json::Value matrix(Json::arrayValue);
Json::Value hca_list(Json::arrayValue);
for (auto &hca : preferred_hca)
{
for (auto &hca : preferred_hca) {
hca_list.append(hca);
}
matrix.append(hca_list);
hca_list.clear();
for (auto &hca : avail_hca)
{
for (auto &hca : avail_hca) {
hca_list.append(hca);
}
matrix.append(hca_list);
return matrix;
}
};

static std::vector<InfinibandDevice> list_infiniband_devices()
{
static std::vector<InfinibandDevice> list_infiniband_devices() {
DIR *dir = opendir("/sys/class/infiniband");
struct dirent *entry;
std::vector<InfinibandDevice> devices;

if (dir == NULL)
{
if (dir == NULL) {
LOG(WARNING) << "failed to list /sys/class/infiniband";
return {};
}
while ((entry = readdir(dir)))
{
if (entry->d_name[0] == '.')
{
while ((entry = readdir(dir))) {
if (entry->d_name[0] == '.') {
continue;
}

std::string device_name = entry->d_name;

char path[PATH_MAX];
char resolved_path[PATH_MAX];
snprintf(path, sizeof(path), "/sys/class/infiniband/%s/../..", entry->d_name);
if (realpath(path, resolved_path) == NULL)
{
snprintf(path, sizeof(path), "/sys/class/infiniband/%s/../..",
entry->d_name);
if (realpath(path, resolved_path) == NULL) {
LOG(ERROR) << "realpath: " << strerror(errno);
continue;
}
Expand All @@ -96,140 +86,120 @@ static std::vector<InfinibandDevice> list_infiniband_devices()
return devices;
}

static std::vector<TopologyEntry> discover_cpu_topology(const std::vector<InfinibandDevice> &all_hca)
{
static std::vector<TopologyEntry> discover_cpu_topology(
const std::vector<InfinibandDevice> &all_hca) {
DIR *dir = opendir("/sys/devices/system/node");
struct dirent *entry;
std::vector<TopologyEntry> topology;

if (dir == NULL)
{
if (dir == NULL) {
LOG(WARNING) << "failed to list /sys/devices/system/node";
return {};
}
while ((entry = readdir(dir)))
{
while ((entry = readdir(dir))) {
const char *prefix = "node";
if (entry->d_type != DT_DIR || strncmp(entry->d_name, prefix, strlen(prefix)) != 0)
{
if (entry->d_type != DT_DIR ||
strncmp(entry->d_name, prefix, strlen(prefix)) != 0) {
continue;
}
int node_id = atoi(entry->d_name + strlen(prefix));
std::vector<std::string> preferred_hca;
std::vector<std::string> avail_hca;
for (const auto &hca : all_hca)
{
if (hca.numa_node == node_id)
{
for (const auto &hca : all_hca) {
if (hca.numa_node == node_id) {
preferred_hca.push_back(hca.name);
}
else
{
} else {
avail_hca.push_back(hca.name);
}
}
topology.push_back(TopologyEntry{.name = "cpu:" + std::to_string(node_id),
.preferred_hca = std::move(preferred_hca),
.avail_hca = std::move(avail_hca)});
topology.push_back(
TopologyEntry{.name = "cpu:" + std::to_string(node_id),
.preferred_hca = std::move(preferred_hca),
.avail_hca = std::move(avail_hca)});
}
(void)closedir(dir);
return topology;
}

#ifdef USE_CUDA

static int get_pci_distance(const char *bus1, const char *bus2)
{
static int get_pci_distance(const char *bus1, const char *bus2) {
char buf[PATH_MAX];
char path1[PATH_MAX];
char path2[PATH_MAX];
snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s", bus1);
if (realpath(buf, path1) == NULL)
{
if (realpath(buf, path1) == NULL) {
return -1;
}
snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s", bus2);
if (realpath(buf, path2) == NULL)
{
if (realpath(buf, path2) == NULL) {
return -1;
}

char *ptr1 = path1;
char *ptr2 = path2;
while (*ptr1 && *ptr1 == *ptr2)
{
while (*ptr1 && *ptr1 == *ptr2) {
ptr1++;
ptr2++;
}
int distance = 0;
for (; *ptr1; ptr1++)
{
for (; *ptr1; ptr1++) {
distance += (*ptr1 == '/');
}
for (; *ptr2; ptr2++)
{
for (; *ptr2; ptr2++) {
distance += (*ptr2 == '/');
}

return distance;
}

static std::vector<TopologyEntry> discover_cuda_topology(const std::vector<InfinibandDevice> &all_hca)
{
static std::vector<TopologyEntry> discover_cuda_topology(
const std::vector<InfinibandDevice> &all_hca) {
std::vector<TopologyEntry> topology;
int device_count;
if (cudaGetDeviceCount(&device_count) != cudaSuccess)
{
if (cudaGetDeviceCount(&device_count) != cudaSuccess) {
device_count = 0;
}
for (int i = 0; i < device_count; i++)
{
for (int i = 0; i < device_count; i++) {
char pci_bus_id[20];
if (cudaDeviceGetPCIBusId(pci_bus_id, sizeof(pci_bus_id), i) != cudaSuccess)
{
if (cudaDeviceGetPCIBusId(pci_bus_id, sizeof(pci_bus_id), i) !=
cudaSuccess) {
continue;
}
for (char *ch = pci_bus_id; (*ch = tolower(*ch)); ch++)
;

std::vector<std::string> preferred_hca;
std::vector<std::string> avail_hca;
for (const auto &hca : all_hca)
{
if (get_pci_distance(hca.pci_bus_id.c_str(), pci_bus_id) == 0)
{
for (const auto &hca : all_hca) {
if (get_pci_distance(hca.pci_bus_id.c_str(), pci_bus_id) == 0) {
preferred_hca.push_back(hca.name);
}
else
{
} else {
avail_hca.push_back(hca.name);
}
}
topology.push_back(TopologyEntry{.name = "cuda:" + std::to_string(i),
.preferred_hca = std::move(preferred_hca),
.avail_hca = std::move(avail_hca)});
topology.push_back(
TopologyEntry{.name = "cuda:" + std::to_string(i),
.preferred_hca = std::move(preferred_hca),
.avail_hca = std::move(avail_hca)});
}
return topology;
}

#endif // USE_CUDA

namespace mooncake
{
std::string discoverTopologyMatrix()
{
auto all_hca = list_infiniband_devices();
Json::Value value(Json::objectValue);
for (auto &ent : discover_cpu_topology(all_hca))
{
value[ent.name] = ent.to_json();
}
#endif // USE_CUDA

namespace mooncake {
std::string discoverTopologyMatrix() {
auto all_hca = list_infiniband_devices();
Json::Value value(Json::objectValue);
for (auto &ent : discover_cpu_topology(all_hca)) {
value[ent.name] = ent.to_json();
}
#ifdef USE_CUDA
for (auto &ent : discover_cuda_topology(all_hca))
{
value[ent.name] = ent.to_json();
}
#endif
return value.toStyledString();
for (auto &ent : discover_cuda_topology(all_hca)) {
value[ent.name] = ent.to_json();
}
#endif
return value.toStyledString();
}
} // namespace mooncake
11 changes: 5 additions & 6 deletions mooncake-transfer-engine/tests/topology_test.cpp
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
#include <gtest/gtest.h>
#include "topology.h"

#include <glog/logging.h>
#include <gtest/gtest.h>

#include "transfer_metadata.h"
#include "topology.h"

TEST(ToplogyTest, GetTopologyMatrix)
{
TEST(ToplogyTest, GetTopologyMatrix) {
std::string topo = mooncake::discoverTopologyMatrix();
LOG(INFO) << topo;
mooncake::TransferMetadata::PriorityMatrix matrix;
std::vector<std::string> rnic_list;
mooncake::TransferMetadata::parseNicPriorityMatrix(topo, matrix, rnic_list);
}

int main(int argc, char **argv)
{
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

0 comments on commit 1c40d93

Please sign in to comment.