-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbottleneck.proto
284 lines (256 loc) · 18.2 KB
/
bottleneck.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
syntax = "proto3";
message BottleneckLog {
/*
1 - 14 are the transfer metrics. It is generated by some computation using the
values generated using "ss -t -i state ESTABLISHED dst [dst-ip];" command.
The line which is parsed to generate the following fields is given below:
Recv-Q Send-Q Local Address:Port Peer Address:Port
0 5814120 [::ffff:134.197.113.70]:36662 [::ffff:134.197.113.71]:50505 users:(("java",pid=12466,fd=16))
htcp wscale:13,13 rto:204 rtt:0.224/0.045 mss:1448 pmtu:1500 rcvmss:536
advmss:1448 cwnd:456 ssthresh:424 bytes_acked:17599506586 segs_out:12192101
segs_in:129549 data_segs_out:12192099 send 23581.7Mbps lastrcv:18044 pacing_rate 28298.1Mbps
delivery_rate 7348.1Mbps busy:18024ms rwnd_limited:17584ms(97.6%) unacked:176 retrans:0/244
reordering:174 rcv_space:14480 rcv_ssthresh:42242 notsent:5560320 minrtt:0.029
*/
float avg_rtt_value = 1;
float pacing_rate = 2; //p_avg_value
float cwnd_rate = 3; //avg_cwnd_value
float avg_retransmission_timeout_value = 4; //avg_rto_value
float byte_ack = 5;
float seg_out = 6;
float retrans = 7; //retransmission
float mss_value = 8; //avg_mss_rate
float ssthresh_value = 9; //avg_ssthresh_value
float segs_in = 10;
float avg_send_value = 11;
float unacked_value = 12; //unacked packets
float rcv_space = 13; //avg_rcv_space
float send_buffer_value = 14; //1 - 14 is Transfer Metrics
/*
15 to 29 is Disk Metrics got by using "iostat -x [Device_Name];" command.
*/
optional float read_req = 15; //The number (after merges) of read requests completed per second for the device.
optional float write_req = 16; //The number (after merges) of write requests completed per second for the device.
optional float rkB = 17; //The number of sectors (kilobytes, megabytes) read from the device per second.
optional float wkB = 18; //The number of sectors (kilobytes, megabytes) written to the device per second.
optional float rrqm = 19; //The number of read requests merged per second that were queued to the device.
optional float wrqm = 20; //The number of write requests merged per second that were queued to the device.
optional float rrqm_perc = 21; //The percentage of read requests merged together before being sent to the device.
optional float wrqm_perc = 22; //The percentage of write requests merged together before being sent to the device.
optional float r_await = 23; //The average time (in milliseconds) for read requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.
optional float w_await = 24; //The average time (in milliseconds) for write requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.
optional float aqu_sz = 25; //The average queue length of the requests that were issued to the device. Note: In previous versions, this field was known as avgqu-sz.
optional float rareq_sz = 26; //The average size (in kilobytes) of the read requests that were issued to the device.
optional float wareq_sz = 27; //The average size (in kilobytes) of the write requests that were issued to the device.
optional float svctm = 28; //The average service time (in milliseconds) for I/O requests that were issued to the device. Warning! Do not trust this field any more. This field will be removed in a future sysstat version.
optional float util = 29; //Percentage of elapsed time during which I/O requests were issued to the device (bandwidth utilization for the device). Device saturation occurs when this value is close to 100% for devices serving requests serially. But for devices serving requests in parallel, such as RAID arrays and modern SSDs, this number does not reflect their performance limits.
/*
30 - 36 is generated using "cat /proc/{pid}/io;" command.
To read more go to https://man7.org/linux/man-pages/man5/proc.5.html
and search for "/proc/[pid]/io"
*/
optional uint64 rchar = 30; //characters read; The number of bytes which this task has caused to be read from storage.
optional uint64 wchar = 31; //characters written; The number of bytes which this task has caused, or shall cause to be written to disk.
optional uint64 syscr = 32; //Attempt to count the number of read I/O operations—that is, system calls such as read(2) and pread(2).
optional uint64 syscw = 33; //Attempt to count the number of write I/O operations—that is, system calls such as write(2) and pwrite(2).
optional uint64 read_bytes_io = 34; //Attempt to count the number of bytes which this process really did cause to be fetched from the storage layer. This is accurate for block-backed filesystems.
optional uint64 write_bytes_io = 35; //Attempt to count the number of bytes which this process caused to be sent to the storage layer.
optional uint64 cancelled_write_bytes = 36; //The big inaccuracy here is truncate. If a process writes 1 MB to a file and then deletes the file, it will in fact perform no writeout. But it will have been accounted as having caused 1 MB of write. In other words: this field represents the number of bytes which this process caused to not happen, by truncating pagecache. A task can cause "negative" I/O too. If this task truncates some dirty pagecache, some I/O which another task has been accounted for (in its write_bytes) will not be happening.
/* 37 - 86 are generated using "cat /proc/{pid}/stat;" command.
To read more go to https://man7.org/linux/man-pages/man5/proc.5.html
and search for "/proc/[pid]/stat"
*/
optional int32 pid = 37; //The process id.
optional int32 ppid = 38; //The PID of the parent of this process
optional int32 pgrp = 39; //The process group ID of the process.
optional int32 session = 40; //The session ID of the process.
optional int32 tty_nr = 41; //The controlling terminal of the process. (The minor device number is contained in the combination of bits 31 to 20 and 7 to 0; the major device number is in bits 15 to 8.)
optional int32 tpgid = 42; //The ID of the foreground process group of the controlling terminal of the process.
optional uint32 flags = 43; //The kernel flags word of the process. For bit meanings, see the PF_* defines in the Linux kernel source file include/linux/sched.h. Details depend on the kernel version. The format for this field was %lu before Linux 2.6.
uint64 minflt = 44; //The number of minor faults the process has made which have not required loading a memory page from disk.
uint64 cminflt = 45; //The number of minor faults that the process's waited-for children have made.
uint64 majflt = 46; //The number of major faults the process has made which have required loading a memory page from disk.
uint64 cmajflt = 47; //The number of major faults that the process's waited-for children have made.
uint64 utime = 48; //Amount of time that this process has been scheduled in user mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). This includes guest time, guest_time (time spent running a virtual CPU, see below), so that applications that are not aware of the guest time field do not lose that time from their calculations.
uint64 stime = 49; //Amount of time that this process has been scheduled in kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK))
int64 cutime = 50; //Amount of time that this process's waited-for children have been scheduled in user mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). (See also times(2).) This includes guest time, cguest_time (time spent running a virtual CPU, see below).
int64 cstime = 51; //Amount of time that this process's waited-for children have been scheduled in kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).
optional int64 priority = 52; //(Explanation for Linux 2.6) For processes running a real-time scheduling policy (policy below; see sched_setscheduler(2)), this is the negated scheduling priority, minus one; that is, a number in the range -2 to -100, corresponding to real-time priorities 1 to 99.
optional int64 nice = 53; //The nice value (see setpriority(2)), a value in the range 19 (low priority) to -20 (high priority).
int64 num_threads = 54; //Number of threads in this process (since Linux 2.6). Before kernel 2.6, this field was hard coded to 0 as a placeholder for an earlier removed field.
int64 itrealvalue = 55; //The time in jiffies before the next SIGALRM is sent to the process due to an interval timer.
optional uint64 starttime = 56; //The time the process started after system boot.
uint64 vsize = 57; //Virtual memory size in bytes.
int64 rss = 58; //Resident Set Size: number of pages the process has in real memory. This is just the pages which count toward text, data, or stack space. This does not include pages which have not been demand-loaded in, or which are swapped out. This value is inaccurate; see /proc/[pid]/statm below.
uint64 rsslim = 59; //Current soft limit in bytes on the rss of the process; see the description of RLIMIT_RSS in getrlimit(2).
optional uint64 startcode = 60; //The address above which program text can run.
optional uint64 endcode = 61; //The address below which program text can run.
optional uint64 startstack = 62; //The address of the start (i.e., bottom) of the stack.
optional uint64 kstkesp = 63; //The current value of ESP (stack pointer), as found in the kernel stack page for the process.
optional uint64 kstkeip = 64; //The current EIP (instruction pointer).
optional uint64 signal = 65; //[Obsolete] The bitmap of pending signals, displayed as a decimal number. Obsolete, because it does not provide information on real-time signals; use /proc/[pid]/status instead.
optional uint64 blocked = 66; //[Obsolete] The bitmap of blocked signals, displayed as a decimal number. Obsolete, because it does not provide information on real-time signals; use /proc/[pid]/status instead.
optional uint64 sigignore = 67; //[Obsolete] The bitmap of ignored signals, displayed as a decimal number.
optional uint64 sigcatch = 68; //[Obsolete] The bitmap of caught signals, displayed as a decimal number.
optional uint64 wchan = 69; //This is the "channel" in which the process is waiting. It is the address of a location in the kernel where the process is sleeping.
uint64 nswap = 70; //Number of pages swapped (not maintained).
uint64 cnswap = 71; //Cumulative nswap for child processes (not maintained).
optional int32 exit_signal = 72; //Signal to be sent to parent when we die.
optional int32 processor = 73; //CPU number last executed on.
uint32 rt_priority = 74; //Real-time scheduling priority, a number in the range 1 to 99 for processes scheduled under a real- time policy, or 0, for non-real-time processes (see sched_setscheduler(2)).
optional uint32 policy = 75; //Scheduling policy (see sched_setscheduler(2)). Decode using the SCHED_* constants in linux/sched.h.
uint64 delayacct_blkio_ticks = 76; //Aggregated block I/O delays, measured in clock ticks (centiseconds).
uint64 guest_time = 77; //Guest time of the process (time spent running a virtual CPU for a guest operating system), measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).
int64 cguest_time = 78; //Guest time of the process's children, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).
optional uint64 start_data = 79; //Address above which program initialized and uninitialized (BSS) data are placed.
optional uint64 end_data = 80; //Address below which program initialized and uninitialized (BSS) data are placed.
optional uint64 start_brk = 81; //Address above which program heap can be expanded with brk(2).
optional uint64 arg_start = 82; //Address above which program command-line arguments (argv) are placed.
optional uint64 arg_end = 83; //Address below program command-line arguments (argv) are placed.
optional uint64 env_start = 84; //Address above which program environment is placed.
optional uint64 env_end = 85; //Address below which program environment is placed.
optional int32 exit_code = 86; //The thread's exit status in the form reported by waitpid(2).
/*
The fields 87 - 88 are generated using following command "ps -p [pid] -o '%cpu,%mem';"
*/
float cpu_usage_percentage = 87; //Total CPU usuage percentage
float mem_usage_percentage = 88; //Total Memory usuage percentage
/*
The fields 89 - 91 are generated using following command "cat /proc/sys/net/ipv4/tcp_rmem".
You can find further documentation in https://man7.org/linux/man-pages/man7/tcp.7.html
and search tcp_rmem;
*/
uint64 tcp_rcv_buffer_min = 89; //minimum size of the receive buffer used by each TCP socket.
uint64 tcp_rcv_buffer_default = 90; //the default size of the receive buffer for a TCP socket.
uint64 tcp_rcv_buffer_max = 91; //the maximum size of the receive buffer used by each TCP socket.
/*
The fields 92 - 94 are generated using following command "cat /proc/sys/net/ipv4/tcp_wmem".
You can find further documentation in https://man7.org/linux/man-pages/man7/tcp.7.html
and search tcp_wmem;
*/
uint64 tcp_snd_buffer_min = 92; //minimum size of the send buffer used by each TCP socket.
uint64 tcp_snd_buffer_default = 93; //the default size of the send buffer for a TCP socket.
uint64 tcp_snd_buffer_max = 94; //the maximum size of the send buffer used by each TCP socket.
optional uint64 req_waittime = 95;
optional uint64 req_active = 96;
optional uint64 read_bytes = 97;
optional uint64 write_bytes = 98;
optional uint64 ost_setattr = 99;
optional uint64 ost_read = 100;
optional uint64 ost_write = 101;
optional uint64 ost_get_info = 102;
optional uint64 ost_connect = 103;
optional uint64 ost_punch = 104;
optional uint64 ost_statfs = 105;
optional uint64 ost_sync = 106;
optional uint64 ost_quotactl = 107;
optional uint64 ldlm_cancel = 108;
optional uint64 obd_ping = 109;
optional uint64 pending_read_pages = 110;
optional uint64 read_RPCs_in_flight = 111;
optional uint64 avg_waittime_md1 = 112;
optional uint64 inflight_md1 = 113;
optional uint64 unregistering_md1 = 114;
optional uint64 timeouts_md1 = 115;
optional uint64 req_waittime_md1 = 116;
optional uint64 req_active_md1 = 117;
optional uint64 mds_getattr_md1 = 118;
optional uint64 mds_getattr_lock_md1 = 119;
optional uint64 mds_close_md1 = 120;
optional uint64 mds_readpage_md1 = 121;
optional uint64 mds_connect_md1 = 122;
optional uint64 mds_get_root_md1 = 123;
optional uint64 mds_statfs_md1 = 124;
optional uint64 mds_sync_md1 = 125;
optional uint64 mds_quotactl_md1 = 126;
optional uint64 mds_getxattr_md1 = 127;
optional uint64 mds_hsm_state_set_md1 = 128;
optional uint64 ldlm_cancel_md1 = 129;
optional uint64 obd_ping_md1 = 130;
optional uint64 seq_query_md1 = 131;
optional uint64 fld_query_md1 = 132;
optional uint64 close_md1 = 133;
optional uint64 create_md1 = 134;
optional uint64 enqueue_md1 = 135;
optional uint64 getattr_md1 = 136;
optional uint64 intent_lock_md1 = 137;
optional uint64 link_md1 = 138;
optional uint64 rename_md1 = 139;
optional uint64 setattr_md1 = 140;
optional uint64 fsync_md1 = 141;
optional uint64 read_page_md1 = 142;
optional uint64 unlink_md1 = 143;
optional uint64 setxattr_md1 = 144;
optional uint64 getxattr_md1 = 145;
optional uint64 intent_getattr_async_md1 = 146;
optional uint64 revalidate_lock_md1 = 147;
optional uint64 avg_waittime_md2 = 148;
optional uint64 inflight_md2 = 149;
optional uint64 unregistering_md2 = 150;
optional uint64 timeouts_md2 = 151;
optional uint64 req_waittime_md2 = 152;
optional uint64 req_active_md2 = 153;
optional uint64 mds_getattr_md2 = 154;
optional uint64 mds_close_md2 = 155;
optional uint64 mds_readpage_md2 = 156;
optional uint64 mds_connect_md2 = 157;
optional uint64 mds_statfs_md2 = 158;
optional uint64 mds_sync_md2 = 159;
optional uint64 mds_quotactl_md2 = 160;
optional uint64 mds_getxattr_md2 = 161;
optional uint64 mds_hsm_state_set_md2 = 162;
optional uint64 ldlm_cancel_md2 = 163;
optional uint64 obd_ping_md2 = 164;
optional uint64 seq_query_md2 = 165;
optional uint64 fld_query_md2 = 166;
optional uint64 close_md2 = 167;
optional uint64 create_md2 = 168;
optional uint64 enqueue_md2 = 169;
optional uint64 getattr_md2 = 170;
optional uint64 intent_lock_md2 = 171;
optional uint64 link_md2 = 172;
optional uint64 rename_md2 = 173;
optional uint64 setattr_md2 = 174;
optional uint64 fsync_md2 = 175;
optional uint64 read_page_md2 = 176;
optional uint64 unlink_md2 = 177;
optional uint64 setxattr_md2 = 178;
optional uint64 getxattr_md2 = 179;
optional uint64 intent_getattr_async_md2 = 180;
optional uint64 revalidate_lock_md2 = 181;
/* Field 182 is the label of the experiment used.*/
int32 label_value = 182; //The label of the experiment used.
}
message BottleneckFile {
repeated BottleneckLog rows = 1;
int32 read_threads = 2;
int32 write_threads = 3;
int32 network_threads = 4;
int32 cpu_stress = 5;
int32 io_stress = 6;
float mem_stress = 7;
float link_loss = 8;
enum DELAY_DISTRIBUTION {
NORMAL = 0;
PARETO = 1;
PARETONORMAL = 2;
}
float link_delay = 9;
DELAY_DISTRIBUTION link_delay_distribution = 10;
float link_variation = 11;
float link_duplicate = 12;
float link_corrupt = 13;
float link_reorder = 14;
float link_latency = 15;
enum Network {
DTNS = 0;
AWS = 1;
CC = 2;
AWS_FXS = 3;
}
optional Network network_type = 16;
optional float created_time_utc = 17;
}
message BottleneckFiles {
repeated BottleneckFile logs = 1;
}