forked from BeStateless/RAMCloud
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdpdk-config.patch
312 lines (295 loc) · 12.4 KB
/
dpdk-config.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
Add command-line parameters for DPDK configuration.
From: Aaron Jones <[email protected]>
Adds the following command-line arguments to RAMCloud to allow more control
over how DPDK is configured.
--dpdkArgs - Specify the paramters to rte_eal_init. If blank then
RAMCloud's default will be used.
--dpdkSkipInit - Specify whether or not rte_eal_init should be called.
RAMCloud client programs may want to call this function
themselves to customize the configuration. If this is
specified then --dpdkArgs is silently ignored.
--dpdkVlanTag - Specify the VLAN tag under which all RAMCloud traffic is
sent.
Updates DPDK initialization in src/DpdkDriver.cc to process these arguments.
---
src/DpdkDriver.cc | 104 +++++++++++++++++++++++++++++++++--------------
src/DpdkDriver.h | 9 ++++
src/OptionParser.cc | 15 +++++++
src/OptionParser.h | 33 +++++++++++++++
src/TransportManager.cc | 6 ++-
5 files changed, 133 insertions(+), 34 deletions(-)
diff --git a/src/DpdkDriver.cc b/src/DpdkDriver.cc
index a8ee1c52..a5408b0e 100644
--- a/src/DpdkDriver.cc
+++ b/src/DpdkDriver.cc
@@ -97,6 +97,7 @@ DpdkDriver::DpdkDriver()
, bandwidthMbps(10000)
, highestPriorityAvail(7)
, lowestPriorityAvail(0)
+ , vlanTag(0)
, fileLogger(NOTICE, "DPDK: ")
{
localMac.construct("01:23:45:67:89:ab");
@@ -121,7 +122,11 @@ DpdkDriver::DpdkDriver()
* Selects which physical port to use for communication.
*/
-DpdkDriver::DpdkDriver(Context* context, int port)
+DpdkDriver::DpdkDriver(Context* context,
+ int port,
+ std::string args,
+ bool skipInit,
+ uint16_t tag)
: context(context)
, packetBufPool()
, packetBufsUtilized(0)
@@ -136,6 +141,7 @@ DpdkDriver::DpdkDriver(Context* context, int port)
// Assume we are allowed to use all 8 ethernet priorities.
, highestPriorityAvail(7)
, lowestPriorityAvail(0)
+ , vlanTag(tag)
, fileLogger(NOTICE, "DPDK: ")
{
struct ether_addr mac;
@@ -143,6 +149,12 @@ DpdkDriver::DpdkDriver(Context* context, int port)
struct rte_eth_conf portConf;
int ret;
+ if (vlanTag >= 4095) {
+ throw DriverException(HERE,
+ "Bad vlan tag, expected vlanTag < 4095, got %i",
+ static_cast<int>(vlanTag));
+ }
+
portId = downCast<uint8_t>(port);
// Initialize the DPDK environment with some default parameters.
@@ -151,34 +163,43 @@ DpdkDriver::DpdkDriver(Context* context, int port)
// This is a bug in DPDK as of 9/2016; if the bug gets fixed, then
// the --file-prefix argument can be removed.
LOG(NOTICE, "Using DPDK version %s", rte_version());
- char nameBuffer[1000];
- if (gethostname(nameBuffer, sizeof(nameBuffer)) != 0) {
- throw DriverException(HERE, format("gethostname failed: %s",
- strerror(errno)));
- }
- nameBuffer[sizeof(nameBuffer)-1] = 0; // Needed if name was too long.
- const char *argv[] = {"rc", "--file-prefix", nameBuffer, "-c", "1",
- "-n", "1", NULL};
- int argc = static_cast<int>(sizeof(argv) / sizeof(argv[0])) - 1;
-
- rte_openlog_stream(fileLogger.getFile());
- ret = rte_eal_init(argc, const_cast<char**>(argv));
- if (ret < 0) {
- throw DriverException(HERE, "rte_eal_init failed");
- }
-
- // create an memory pool for accommodating packet buffers
- mbufPool = rte_mempool_create("mbuf_pool", NB_MBUF,
- MBUF_SIZE, 32,
- sizeof32(struct rte_pktmbuf_pool_private),
- rte_pktmbuf_pool_init, NULL,
- rte_pktmbuf_init, NULL,
- rte_socket_id(), 0);
-
- if (!mbufPool) {
- throw DriverException(HERE, format(
- "Failed to allocate memory for packet buffers: %s",
- rte_strerror(rte_errno)));
+ if (!skipInit)
+ {
+ auto dpdkInit = [&](int argc, char** argv) {
+ for (int i = 0; i < argc; ++i)
+ LOG(NOTICE, "DPDK command-line %i = %s", i, argv[i]);
+ rte_openlog_stream(fileLogger.getFile());
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0) {
+ throw DriverException(HERE, "rte_eal_init failed");
+ }
+ };
+
+ if (args.empty()) {
+ // Initialize the DPDK environment with some default parameters.
+ // --file-prefix is needed to avoid false lock conflicts if servers
+ // run on different nodes, but with a shared NFS home directory.
+ // This is a bug in DPDK as of 9/2016; if the bug gets fixed, then
+ // the --file-prefix argument can be removed.
+ char nameBuffer[1000];
+ if (gethostname(nameBuffer, sizeof(nameBuffer)) != 0) {
+ throw DriverException(HERE, format("gethostname failed: %s",
+ strerror(errno)));
+ }
+ nameBuffer[sizeof(nameBuffer)-1] = 0; // Needed if name was too long.
+ const char *argv[] = {"rc", "--file-prefix", nameBuffer, "-c", "1",
+ "-n", "1", NULL};
+ int argc = static_cast<int>(sizeof(argv) / sizeof(argv[0])) - 1;
+ dpdkInit(argc, const_cast<char**>(argv));
+ } else {
+ std::replace(args.begin(), args.end(), ' ', '\0');
+ int argc = static_cast<int>(std::count(args.begin(), args.end(), '\0') + 1);
+ char* cstr = const_cast<char*>(args.c_str());
+ char* argv[argc];
+ for (int i = 0, pos = 0; i < argc; ++i, pos = args.find_first_of('\0', pos) + 1)
+ argv[i] = cstr + pos;
+ dpdkInit(argc, argv);
+ }
}
// ensure that DPDK was able to detect a compatible and available NIC
@@ -217,10 +238,29 @@ DpdkDriver::DpdkDriver(Context* context, int port)
}
}
+ int dpdk_socket = rte_eth_dev_socket_id(portId);
+ if (dpdk_socket < 0) {
+ throw DriverException(HERE, format("Unable to get socket for ethernet device."));
+ }
+
+ // create an memory pool for accommodating packet buffers
+ mbufPool = rte_mempool_create("mbuf_pool", NB_MBUF,
+ MBUF_SIZE, 32,
+ sizeof32(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ dpdk_socket, 0);
+
+ if (!mbufPool) {
+ throw DriverException(HERE, format(
+ "Failed to allocate memory for packet buffers: %s",
+ rte_strerror(rte_errno)));
+ }
+
// setup and initialize the receive and transmit NIC queues,
// and activate the port.
- rte_eth_rx_queue_setup(portId, 0, NDESC, 0, NULL, mbufPool);
- rte_eth_tx_queue_setup(portId, 0, NDESC, 0, NULL);
+ rte_eth_rx_queue_setup(portId, 0, NDESC, dpdk_socket, NULL, mbufPool);
+ rte_eth_tx_queue_setup(portId, 0, NDESC, dpdk_socket, NULL);
// set the MTU that the NIC port should support
ret = rte_eth_dev_set_mtu(portId, MAX_PAYLOAD_SIZE);
@@ -528,7 +568,7 @@ DpdkDriver::sendPacket(const Address* addr,
// Fill out the PCP field and the Ethernet frame type of the encapsulated
// frame (DEI and VLAN ID are not relevant and trivially set to 0).
struct vlan_hdr* vlanHdr = reinterpret_cast<struct vlan_hdr*>(p);
- vlanHdr->vlan_tci = rte_cpu_to_be_16(PRIORITY_TO_PCP[priority]);
+ vlanHdr->vlan_tci = rte_cpu_to_be_16(PRIORITY_TO_PCP[priority] | vlanTag);
vlanHdr->eth_proto = rte_cpu_to_be_16(NetUtil::EthPayloadType::RAMCLOUD);
p += VLAN_TAG_LEN;
diff --git a/src/DpdkDriver.h b/src/DpdkDriver.h
index ba6d2314..4b5935ca 100644
--- a/src/DpdkDriver.h
+++ b/src/DpdkDriver.h
@@ -57,7 +57,11 @@ class DpdkDriver : public Driver
#if TESTING
explicit DpdkDriver();
#endif
- explicit DpdkDriver(Context* context, int port = 0);
+ explicit DpdkDriver(Context* context,
+ int port = 0,
+ std::string args = "",
+ bool skipInit = false,
+ uint16_t tag = 0);
virtual ~DpdkDriver();
virtual int getHighestPacketPriority();
virtual uint32_t getMaxPacketSize();
@@ -176,6 +180,9 @@ class DpdkDriver : public Driver
/// be `highestPriorityAvail - lowestPriorityAvail + 1`.
int lowestPriorityAvail;
+ /// The VLAN tag to put into outgoing packets.
+ uint16_t vlanTag;
+
/// Used to redirect log entries from the DPDK log into the RAMCloud log.
FileLogger fileLogger;
diff --git a/src/OptionParser.cc b/src/OptionParser.cc
index 1945cc64..7876a725 100644
--- a/src/OptionParser.cc
+++ b/src/OptionParser.cc
@@ -234,6 +234,21 @@ OptionParser::setup(int argc, char* argv[])
default_value(-1),
"Selects the Ethernet port that the DPDK driver should use, "
"or -1 if DPDK should not be enabled.")
+ ("dpdkArgs",
+ ProgramOptions::value<std::string>(&options.dpdkArgs)->
+ default_value(""),
+ "The arguments to give to rte_eal_init to initialize DPDK. If"
+ " empty then the arguments will be generated.")
+ ("dpdkSkipInit",
+ ProgramOptions::value<bool>(&options.dpdkSkipInit)->
+ default_value(false),
+ "Whether or not to skip rte_eal_init because it has already been"
+ " called. Default is false, which means rte_eal_init will be"
+ " called.")
+ ("dpdkVlanTag",
+ ProgramOptions::value<uint16_t>(&options.dpdkVlanTag)->
+ default_value(0),
+ "The VLAN tag to include in outgoing DPDK packets.")
("portTimeout",
ProgramOptions::value<int32_t>(&options.portTimeout)->
default_value(-1), // Overriding to the initial value.
diff --git a/src/OptionParser.h b/src/OptionParser.h
index d57a3f16..c9ef9759 100644
--- a/src/OptionParser.h
+++ b/src/OptionParser.h
@@ -46,6 +46,9 @@ class CommandLineOptions {
, portTimeout(0)
, clusterName()
, dpdkPort(0)
+ , dpdkArgs("")
+ , dpdkSkipInit(false)
+ , dpdkVlanTag(0)
{
}
@@ -118,6 +121,33 @@ class CommandLineOptions {
return dpdkPort;
}
+ /**
+ * Returns the arguments to give to rte_eal_init to initialize DPDK. If
+ * blank, one will be generated.
+ */
+ std::string getDpdkArgs() const
+ {
+ return dpdkArgs;
+ }
+
+ /**
+ * Returns whether or not rte_eal_init should be skipped when initializing
+ * DPDK. This can be done because DPDK has already been initialized by an
+ * application instantiating a RAMCloud::RAMCloud.
+ */
+ bool getDpdkSkipInit() const
+ {
+ return dpdkSkipInit;
+ }
+
+ /**
+ * Gets the VLAN tag that should be used in packets outgoing from RAMCloud.
+ */
+ uint16_t getDpdkVlanTag() const
+ {
+ return dpdkVlanTag;
+ }
+
string coordinatorLocator; ///< See getCoordinatorLocator().
string localLocator; ///< See getLocalLocator().
string externalStorageLocator; ///< See getExternalStorageLocator().
@@ -126,6 +156,9 @@ class CommandLineOptions {
int32_t portTimeout; ///< See getSessionTimeout().
string clusterName; ///< See getClusterName().
int dpdkPort; ///< See getDpdkPort().
+ std::string dpdkArgs; ///< See getDpdkArgs().
+ bool dpdkSkipInit; ///< See getDpdkSkipInit().
+ uint16_t dpdkVlanTag; ///< See getDpdkVlanTag().
};
/**
diff --git a/src/TransportManager.cc b/src/TransportManager.cc
index 59425165..ede7e501 100644
--- a/src/TransportManager.cc
+++ b/src/TransportManager.cc
@@ -157,7 +157,11 @@ TransportManager::TransportManager(Context* context)
if (context->options != NULL) {
int dpdkPort = context->options->getDpdkPort();
if (dpdkPort >= 0) {
- dpdkDriver = new DpdkDriver(context, dpdkPort);
+ dpdkDriver = new DpdkDriver(context,
+ dpdkPort,
+ context->options->getDpdkArgs(),
+ context->options->getDpdkSkipInit(),
+ context->options->getDpdkVlanTag());
basicDpdkTransportFactory.setDpdkDriver(dpdkDriver);
}
}