-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathmetrics.proto
764 lines (663 loc) · 26.4 KB
/
metrics.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
// Copyright(c) 2017-2018 Zededa, Inc.
// All rights reserved.
syntax = "proto3";
import "google/protobuf/timestamp.proto";
import "evecommon/evecommon.proto";
package org.lfedge.eve.metrics;
option go_package = "github.com/lf-edge/eve-api/go/metrics";
option java_package = "org.lfedge.eve.metrics";
// Metrics from devices and applications
enum ZmetricTypes {
ZmNop = 0;
ZmDevice = 1;
ZmApp = 3;
}
// memoryMetric - DEPRECATED. DO NOT USE THIS.
message memoryMetric {
uint32 usedMem = 2; //in MBytes
uint32 availMem = 3; //in MBytes
double usedPercentage = 4; // Obsolete - No Longer Used.
double availPercentage = 5; // Obsolete - No Longer Used.
}
// App Memory Metric
message AppMemoryMetric {
// allocatedMB - Memory allocated to app instance by EVE independent
// of the state of the App Instance.
uint32 allocatedMB = 1;
// usedMB - Memory Used by app from totalMB Memory
// usedMB <= totalMB in normal cases.
// Enforcement of the allocatedMB could be lax, hence usedMB can sometimes
// exceed allocatedMB.
uint32 usedMB = 2;
}
message DeviceMemoryMetric {
// memoryMB - Total Device memory
// This is the same as reported in the memory field in the
// device info message. Repeated here for convenience of controller to process
// the metrics message.
uint32 memoryMB = 1;
// allocated_appsMB - Total memory Allocated to AppInstances
// In steady state, this is the sum of all the AppMemoryMetric.allocatedMB
// from all the app instances
uint32 allocated_appsMB = 2;
// allocated_eveMB - Total Allocated Memory for Eve
uint32 allocated_eveMB = 3;
// used_eveMB - Memory currently used by Eve.
// used_eveMB <= allocated_eveMB in normal case
// Eve may not be able to enforce ie., used_eveMB might be greater than
// allocated_eveMB.
uint32 used_eveMB = 4;
}
message networkMetric {
// iName - Set to SystemAdapter.Name which is the Logicallabel in phyio
string iName = 1; // name from config; displayName for network instance
// alias - Set to SystemAdapter.alias
string alias = 20;
uint64 txBytes = 2; // in bytes
uint64 rxBytes = 3; // in bytes
uint64 txDrops = 4;
uint64 rxDrops = 5;
// deprecated = 6;
// deprecated = 7;
uint64 txPkts = 8;
uint64 rxPkts = 9;
uint64 txErrors = 10;
uint64 rxErrors = 11;
uint64 txAclDrops = 12;
uint64 rxAclDrops = 13;
uint64 txAclRateLimitDrops = 14;
uint64 rxAclRateLimitDrops = 15;
// localName - Set to IfName of the Adapter
string localName = 16; // local vif name e.g., nbu*
}
message CellularMetric {
// Logical label assigned to the physical cellular modem.
string logicallabel = 1;
CellularSignalStrength signal_strength = 2;
CellularPacketStats packet_stats = 3;
}
// CellularSignalStrength contains cellular signal strength information.
// The maximum value of int32 (0x7FFFFFFF) represents unspecified/unavailable metric.
message CellularSignalStrength {
// Received Signal Strength Indicator (RSSI) measured in dBm (decibel-milliwatts).
int32 rssi = 1;
// Reference Signal Received Quality (RSRQ) measured in dB (decibels).
int32 rsrq = 2;
// Reference Signal Receive Power (RSRP) measured in dBm (decibel-milliwatts).
int32 rsrp = 3;
// Signal-to-Noise Ratio (SNR) measured in dB (decibels).
int32 snr = 4;
}
// Stats collected by the cellular modem itself.
message CellularPacketStats {
NetworkStats rx = 1;
NetworkStats tx = 2;
}
// Failures and successes for communication to zedcloud
// for each management port
message zedcloudMetric {
string ifName = 1; // interface name for management port
uint64 failures = 2;
uint64 success = 3;
google.protobuf.Timestamp lastFailure = 4;
google.protobuf.Timestamp lastSuccess = 5;
repeated urlcloudMetric urlMetrics = 6;
uint64 authVerifyFailure = 7; // networking success, authen verify fail count for V2 API
}
// Information for each API URL
message urlcloudMetric {
string url = 1;
int64 tryMsgCount = 2; // Things we might have tried to send
int64 tryByteCount = 3; // or it failed before we could send them.
int64 sentMsgCount = 4;
int64 sentByteCount = 5;
int64 recvMsgCount = 6;
int64 recvByteCount = 7;
int64 total_time_spent = 8; // Total time spent servicing requests that succeeded
int64 sess_resume_count = 9; // Total counts of the TLS session resumption
}
// Failures and successes for the object decryption
message CipherMetric {
string agent_name = 1;
uint64 failure_count = 2;
uint64 success_count = 3;
google.protobuf.Timestamp last_failure = 4;
google.protobuf.Timestamp last_success = 5;
repeated TypeCounter tc = 6;
}
// Various error codes
// Note that CIPHER_ERROR_NO_DATA isn't really an error; means that there
// was no encrypted nor cleartext data.
// Must match CipherError in pkg/pillar/types
enum CipherError {
CIPHER_ERROR_INVALID = 0;
CIPHER_ERROR_NOT_READY = 1; // Not yet received ECDH controller cert
CIPHER_ERROR_DECRYPT_FAILED = 2; // ECDH decrypt failed
CIPHER_ERROR_UNMARSHAL_FAILED = 3; // Failed protobuf decode post decryption
CIPHER_ERROR_CLEARTEXT_FALLBACK = 4; // Failure then using cleartext
CIPHER_ERROR_MISSING_FALLBACK = 5; // Failed and no cleartext to fall back to
CIPHER_ERROR_NO_CIPHER = 6; // Only cleartext received
CIPHER_ERROR_NO_DATA = 7; // No data to encrypt/decrypt
}
// Counters for each of the different CipherError codes
message TypeCounter {
CipherError error_code = 1;
uint64 count = 2;
}
message appCpuMetric {
// deprecated = 2;
// deprecated = 3;
google.protobuf.Timestamp upTime = 4;
// should be deprecated
uint64 total = 5; // cpu total in secs. In docker stats, it's the container user usage
uint64 systemTotal = 6; // docker host system cpu total in secs. inc user, system and idle
uint64 total_ns = 7; // like 'total' but in nanoseconds. Overflow may happen after ~40 years without EVE restart
}
// StorageVDevMetrics - contains virtual device statistics from ZFS storage
message StorageVDevMetrics {
// VolumeUUID (how and in ZInfoVolume.uuid)
// Non-empty if metrics are collected from /dev/...
// For example: /dev/zd* (zvol)
string volume_u_u_i_d = 1;
uint64 alloc = 2; // Space allocated (in bytes)
uint64 total = 3; // Total device capacity (in bytes)
// Deflated capacity on this device available to ZFS. (in bytes)
uint64 deflated_space = 4;
// Replaceable dev size. Displays the minimum volume
// required to replace this device. (in bytes)
uint64 replaceable_size = 5;
// Expandable dev size. This value determines how much space
// the pool can expand from this device. (in bytes)
uint64 expandable_size = 6;
uint64 read_errors = 7; // Counter read errors.
uint64 write_errors = 8; // Counter write errors.
uint64 checksum_errors = 9; // Counter checksum errors.
uint64 bytes_read = 10; // Total number of bytes read.
uint64 bytes_write = 11; // Total number of bytes write.
// The total number of read operations sent to
// the pool or device, including metadata requests.
uint64 ops_count_read = 12;
// The total number of write operations sent to the pool or device.
uint64 ops_count_write = 13;
// Number of I/Os currently in progress.
uint64 i_os_in_progress = 14;
// The total number of milliseconds spent by all reads.
uint64 read_ticks = 15;
// The total number of milliseconds spent by all writes.
uint64 write_ticks = 16;
// The number of milliseconds spent doing I/Os.
uint64 i_os_total_ticks = 17;
// weighted_io_ticks is the weighted number of milliseconds
// spent doing I/Os. This can also be used to estimate average
// queue wait time for requests.
uint64 weighted_i_o_ticks = 18;
}
// StorageDiskMetric - contains disk metrics from ZFS storage
message StorageDiskMetric {
org.lfedge.eve.common.DiskDescription disk_name = 1;
StorageVDevMetrics metrics = 2; // Metrics for the disk
}
// StorageChildren - contains child for zpool metrics
// from ZFS storage for RAID or Mirrors
message StorageChildrenMetric {
// The GUID of the child. Unique value.
uint64 g_u_i_d = 1;
// Metrics for disks included in RAID or mirror
repeated StorageDiskMetric disks = 2;
// Children of this dataset
repeated StorageChildrenMetric children = 3;
// Metrics for the whole RAID or mirror
StorageVDevMetrics metrics = 4;
}
// StorageMetric - contains zpool metrics including child devices
message StorageMetric {
string pool_name = 1;
// Deprecated tag 2
// Deprecated tag 3
// Deprecated tag 4
// Time of the collection metrics.
google.protobuf.Timestamp collection_time = 5;
// Metrics for zpool
StorageVDevMetrics zpool_metrics = 6;
// Metrics for RAID or Mirror
repeated StorageChildrenMetric children_datasets = 7;
// Metrics for disks that are not included in the RAID or mirror
repeated StorageDiskMetric disks = 8;
// Metrics for zvols
repeated StorageVDevMetrics zvols = 9;
}
message deviceMetric {
// deprecated = 1;
// memory - OBSOLETE. use DeviceMemoryMetric
memoryMetric memory = 2;
repeated networkMetric network = 3;
repeated zedcloudMetric zedcloud = 4;
// deprecated = 5;
repeated diskMetric disk = 6;
appCpuMetric cpuMetric = 7;
repeated MetricItem metricItems = 8;
uint64 runtimeStorageOverheadMB = 9; // In MB
uint64 appRunTimeStorageMB = 10; // In MB
// systemServicesMemoryMB - OBSOLETE. Will no longer be used.
memoryMetric systemServicesMemoryMB = 11; // In MB
logMetric log = 12;
repeated CipherMetric cipher = 13; // Object decryption
AclMetric acl = 14;
newlogMetric newlog = 15;
zedboxStats zedbox = 16;
// deviceMemory - Memory metrics.
DeviceMemoryMetric deviceMemory = 17;
// Last change to EdgeDevConfig received by device from controller
google.protobuf.Timestamp last_received_config = 18;
// Last change to EdgeDevConfig processed by device
google.protobuf.Timestamp last_processed_config = 19;
repeated CellularMetric cellular = 20;
FlowlogMetric flowlog = 21;
// Time to wait between metrics messages before marking an edge-node as
// inactive in the controller. 'dormantTimeInSeconds' is subjected to change based
// of network connectivity.
uint64 dormant_time_in_seconds = 22;
repeated StorageMetric storage_metrics = 23; // Storage metrics from ZFS
}
message AclMetric {
uint64 total_rule_count = 1; // Total number of rules found in filter (FORWARD), raw (PREROUTING) tables
}
message appContainerMetric {
string appContainerName = 1; // the unique key for the container in a VM or IoT Edge
string status = 2; // Status string e.g. Uptime 3 hours
uint32 PIDs = 3; // Number of PIDs inside the container
appCpuMetric cpu = 4; // container cpu usage
// memory - OBSOLETE. Use appContainerMemory instead
memoryMetric memory = 5; // container memory usage, will fill 'usedMem', 'availMem' initially
networkMetric network = 6; // container network usage, will fill 'txBytes', 'rxBytes' initially
diskMetric disk = 7; // container Block IO, will fill 'readBytes', 'writeBytes' initially
AppMemoryMetric appContainerMemory = 8;
}
enum MetricItemType {
MetricItemOther = 0; // E.g., a string like an ESSID
MetricItemGauge = 1; // Goes up and down over time
MetricItemCounter = 2; // Monotonically increasing (until reboot)
MetricItemState = 3; // Toggles on and off; count transitions
}
// Open-ended metrics from different part of the device such as LTE modem
// metrics.
message MetricItem {
string key = 1; // E.g., "lte-signal-strength"
MetricItemType type = 2;
oneof metricItemValue {
bool boolValue = 3;
uint32 uint32Value = 4; // If timer this is in seconds
uint64 uint64Value = 5;
float floatValue = 6;
string stringValue = 7; // Use with care
}
}
// For each partition; counts since boot
message diskMetric {
string disk = 1; // E.g., "mmcblk0p2"
string mountPath = 2; // E.g., "/config"
uint64 readBytes = 3; // In MB
uint64 writeBytes = 4; // In MB
uint64 readCount = 5; // Number of ops
uint64 writeCount = 6; // Number of ops
uint64 total = 7; // in MBytes; if we know the mountpath
uint64 used = 8; // in MBytes; if we know the mountpath
uint64 free = 9; // in MBytes; if we know the mountpath
}
message appDiskMetric {
string disk = 1; // E.g., "mmcblk0p2"
uint64 provisioned = 2; // in MBytes
uint64 used = 3; // in MBytes
string diskType = 4; // Type of disk, e.g., QCOW2, RAW etc.
bool dirty = 5; // Dirty flag
}
message appMetric {
string AppID = 1;
string appVersion = 10;
string AppName = 2;
appCpuMetric cpu = 3;
// memory - OBSOLETE. Use appMemory instead
memoryMetric memory = 4;
repeated networkMetric network = 5;
repeated appDiskMetric disk = 6;
repeated appContainerMetric container = 7;
AppMemoryMetric appMemory = 8;
repeated AppPatchEnvelopeMetric patch_envelope = 9;
}
// Provide information on PatchEnvelope usage per app
message AppPatchEnvelopeMetric {
// UUID of PatchEnvelope
string uuid = 1;
// Version of PatchEnvelope
string version = 2;
// count the number of times app instance called patch APIs
uint64 patch_api_call_count = 3;
// count the number of times app instance actually downloaded
// whole patch envelope or part of it
uint64 download_count = 4;
}
// We track device and app logs separately with these counters.
// An event is counted as a log input that can be sent onwards (to cloud) or
// dropped for a few reason.
// Thus:
// totalAppLogInput = numAppEventsSent + numAppEventErrors + num4xxResponses
// totalDeviceLogInput = numDeviceEventsSent + numDeviceEventErrors
message logMetric {
// Sent events count also includes events that the device tried to send but got deferred.
// Because deferred logs will eventually reach cloud after network is restored.
uint64 numDeviceEventsSent = 1;
uint64 numDeviceBundlesSent = 2;
uint64 numAppEventsSent = 3;
uint64 numAppBundlesSent = 4;
// num4xxResponses: This counter is akin to event errors/drops.
//
// 4xx response for a device event bundle does not make sense, unless
// there is a programming error in EVE code that sends device event bundles to cloud.
// But, app instances can disappear from cloud and any further logs sent by EVE to cloud
// will result in a 4xx response back.
// Any time an app log bundle gets rejected with 4xx response from cloud, num4xxResponses
// should be incremented by the number of events present in that rejected bundle.
uint64 num4xxResponses = 5;
google.protobuf.Timestamp lastDeviceBundleSendTime = 6;
google.protobuf.Timestamp lastAppBundleSendTime = 7;
bool isLogProcessingDeferred = 8;
uint64 numTimesDeferred = 9;
google.protobuf.Timestamp lastLogDeferTime = 10;
uint64 totalDeviceLogInput = 13;
uint64 totalAppLogInput = 14;
// eg:- Size of individual event is larger than the max proto buf size limit.
uint64 numDeviceEventErrors = 15;
uint64 numAppEventErrors = 16;
uint64 numDeviceBundleProtoBytesSent = 17;
uint64 numAppBundleProtoBytesSent = 18;
// Split of totalDeviceLogInput per source string
map <string, uint64> input_sources = 19;
}
// Lisp stats
message PktStat {
uint64 Packets = 1;
uint64 Bytes = 2;
}
// Vpn Stats
message ZMetricConn {
PktStat InPkts = 1;
PktStat OutPkts = 2;
PktStat ErrPkts = 3;
PktStat CarierErrPkts = 4;
}
message ZMetricVpn {
ZMetricConn ConnStat = 1;
ZMetricConn IkeStat = 2;
ZMetricConn NatTStat = 3;
ZMetricConn EspStat = 4;
}
// For other services with no specific metrics
message ZMetricNone {
}
// flow stats
message ZMetricFlowLink {
oneof Link {
string subNet = 1; // IpSec: Subnet
// deprecated = 2; string eid = 2
}
string spiId = 3; // IpSec: SpiId
}
message ZMetricFlowEndPoint {
oneof Endpoint {
string ipAddr = 1; // IpSec: Endpoint Ip Address
// deprecated = 2; string rloc = 2
}
repeated ZMetricFlowLink link = 5;
PktStat stats = 10; // traffic
}
message ZMetricFlow {
string id = 1; // IpSec Conn-Id
string name = 2; // IpSec Tun-Name
uint32 type = 3; // IpSec-Vpn
// deprecated = 4; uint64 iid = 4
uint64 estTime = 5; // in seconds
ZMetricFlowEndPoint lEndPoint = 10;
repeated ZMetricFlowEndPoint rEndPoint = 11;
}
message NetworkStats {
uint64 totalPackets = 1;
uint64 errors = 2;
uint64 drops = 3;
uint64 totalBytes = 4;
}
message ZMetricNetworkStats {
NetworkStats rx = 1;
NetworkStats tx = 2;
}
// Metrics published for a NI multipath route with probing-based selection
// of the output port.
// The route is identified by dst_network.
// current_port (and currentIntf) indicate which port is currently selected.
// The rest are probing metrics and status info used to make the selection.
// In older EVE versions, probing was less flexible and it only allowed to select
// a single port for the entire network instance and all its routes. In that case
// dst_network is published empty and there is only one instance of ZProbeNIMetrics
// published for the network instance.
message ZProbeNIMetrics { // per NI probing stats
string currentIntf = 1; // current picked uplink interface
string remoteEndpoint = 2; // remote URL or IP address for probing
uint32 pingIntv = 3; // local ping interval in seconds
uint32 remotePingIntv = 4; // remote probe interval in seconds
uint32 uplinkCnt = 5; // total number of uplink intf in consideration
message ZProbeIntfMetric { // level 2
string intfName = 11; // uplink interface name, e.g. eth1, wlan0
string gatewayNexhtop = 12; // intf nexthop IP address
bool gatewayUP = 13; // local gateway status UP or not
bool remoteHostUP = 14; // remote url/IP status UP or not
uint32 nexthopUpCount = 15; // local ping success count
uint32 nexthopDownCount = 16; // local ping failure count
uint32 remoteUpCount = 17; // remote probing success count
uint32 remoteDownCount = 18; // remote probing failure count
uint32 remoteProbeLatency = 19; // remote host probe latency in msec
}
repeated ZProbeIntfMetric intfMetric = 10; // per uplink intf probing stats
string dst_network = 20;
// Logical label of the currently selected output port for the route.
// Note that currentIntf reports the interface name of this port.
string current_port = 21;
}
message ZMetricNetworkInstance {
string networkID = 2; // UUID
string networkVersion = 3;
uint32 instType = 5; // ZNetworkInstType
string displayname = 6; // From NetworkInstance config
bool activated = 7; // Forwarding enabled
repeated networkMetric network = 10; // Aggregate counters for bridge
// NI uplink probing stats
// Deprecated - newer EVE versions publish a list of probeMetrics (see below),
// with one entry for every multipath route with probing enabled.
ZProbeNIMetrics probeMetric = 12 [deprecated = true];
repeated ZProbeNIMetrics probe_metrics = 13;
oneof InstanceContent {
ZMetricVpn vpnm = 20;
// deprecated = 21; ZMetricLisp lispm = 21
ZMetricNone nonem = 22;
}
repeated ZMetricFlow flowStats = 30;
// deprecated = 31; ZMetricLispGlobal lispGlobalStats = 31
ZMetricNetworkStats networkStats = 40; // Network bridge interface statistics
vlanInfo vlan_info = 41; // Information about vlans attached to this network instance
}
message ZMetricVolume {
string uuid = 1;
string displayName = 2;
uint64 readBytes = 3;
uint64 writeBytes = 4;
uint64 readCount = 5;
uint64 writeCount = 6;
uint64 totalBytes = 7;
uint64 usedBytes = 8;
uint64 freeBytes = 9;
}
// metrics for a single process
message ZMetricProcess {
int32 pid = 1;
string name = 2;
bool user_process = 3;
bool watched = 4;
int32 num_fds = 5;
int32 num_threads = 6;
double user_time = 7;
double system_time = 8;
double cpu_percent = 9;
google.protobuf.Timestamp create_time = 10;
uint64 vm_bytes = 11;
uint64 rss_bytes = 12;
float memory_percent = 13;
// We report the stack for !watched proceeses which have been running for a while
string stack = 14;
}
// This is the request payload for POST /api/v1/edgeDevice/metrics
// ZMetricMsg carries periodic metrics; typically one message is sent for
// all of the objects on a device (the device itself plus all of the app instances
// and all of the network instances). As such there is normally no need to
// retransmit these messages if there is some network failure; the next periodic
// message will include the cumulative numbers.
// The message is assumed to be protected by a TLS session bound to the
// device certificate.
message ZMetricMsg {
string devID = 1;
// deprecated = 2;
google.protobuf.Timestamp atTimeStamp = 3;
oneof MetricContent {
deviceMetric dm = 4;
}
repeated appMetric am = 5;
// deprecated = 6;
repeated ZMetricNetworkInstance nm = 7;
repeated ZMetricVolume vm = 8;
repeated ZMetricProcess pr = 9;
KubeClusterMetrics cm = 10;
}
// newlogMetric - stats for newlog
message newlogMetric {
// Status of failed to send to controller and start time of fail to send
bool failedToSend = 1;
google.protobuf.Timestamp failSentStartTime = 2;
// Stats on total bytes uploaded to controller, the total 4xx responses
// current upload interval in seconds, and current logfile timeout value in seconds
// maximum gzip file size so far, and average gzip file size
uint64 totalBytesUpload = 3;
uint32 num4xxResponses = 4;
uint32 currentUploadIntv = 5;
uint32 logfileTimeout = 6;
uint32 maxGzipFileSize = 7;
uint32 avgGzipFileSize = 8;
// Stats for upload to controller letency
// the min, max and average upload delay in msec, and the last upload delay in msec
uint32 mimUploadMsec = 9;
uint32 maxUploadMsec = 10;
uint32 avgUploadMsec = 11;
uint32 lastUploadMsec = 12;
// Stats for controller server, the current and average CPU ussage in percentage
// and the current and average processing delay in msec (from receiving newlog batch to reply to device)
float currentCPULoadPct = 13;
float averageCPULoadPct = 14;
uint32 currentProcessDelay = 15;
uint32 averageProcessDelay = 16;
//
logfileMetrics deviceMetrics = 17;
logfileMetrics appMetrics = 18;
// top 10 device log (not app) source in total bytes in percentage
map <string, uint32> top10_input_sources = 19;
// counter for not uploaded gzip files removed due to exceeding user defined or default quota
uint32 gzipFilesRemoved = 20;
// counter for 429 returned status code for uploading
uint32 tooManyRequest = 21;
// counter for gzip files bypassing the uploading to cloud
uint32 skipUploadAppFile = 22;
// total size of logs on device
uint64 total_size_logs = 23;
// timestamp of the latest device log saved on device
google.protobuf.Timestamp oldest_saved_device_log = 24;
}
// logfileMetrics - is shared for both device log and application log
message logfileMetrics {
// Stats for gzip files been sent to controller, total bytes written into gzip files
// total bytes written into logfiles before compressed, number of gzip files currently
// remain in the gzip directory and total input events
uint64 numGzipFileSent = 1;
uint64 numGzipBytesWrite = 2;
uint64 numBytesWrite = 3;
uint32 numGzipFileInDir = 4;
uint64 numInputEvent = 5;
// Stats for log upload file retries, the number of files can not be sent and kept on device
// the most recent gzip file sent timestamp of the gzip file and the timestamp for sending that file
uint64 numGzipFileRetry = 6;
uint32 numGzipFileKeptLocal = 7;
google.protobuf.Timestamp recentGzipFileTime = 8;
google.protobuf.Timestamp lastGzipFileSendTime = 9;
}
// zedboxStats - for zedbox process items
message zedboxStats {
uint32 numGoRoutines = 1;
}
// Contains information about the access vlans attached to network instance
message vlanInfo {
uint32 num_trunk_ports = 1; // Number of ports attached to this network instance that are designated trunk
map <uint32, uint32> vlan_counts = 2; // vlan id to it's usage count map
}
// Flowlog stats.
message FlowlogMetric {
// Counting FlowMessage instances.
// Note that FlowMessage is used to package and carry a list of flows and DNS requests.
FlowlogCounters messages = 1;
// Counting FlowMessage.Flows.
FlowlogCounters flows = 2;
// Counting FlowMessage.DnsReqs.
FlowlogCounters dns_requests = 3;
}
// Counters for published/dropped flowlog messages/flows or DNS requests.
// Note that every record is eventually either successfully published or dropped.
// In the process of publishing a flowlog record, one or more failed attempts can be made.
// This means that the total number of fully processed records (i.e. not queued anymore)
// equals the sum of "success" and "drops", while "failed_attempts" is an orthogonal metric.
message FlowlogCounters {
// Flow records successfully published to zedcloud.
uint64 success = 1;
// Flow records dropped because the flowlog queue would not fit them.
// This is either because the queue was already full when the record was created,
// or the publish attempts would keep failing and the queue was nearing its capacity.
uint64 drops = 2;
// The number of failed attempts to publish a flow record.
uint64 failed_attempts = 3;
}
// Resources used as reported by kubernetes api for the node objects.
message KubeNodeMetrics {
// Name of the node
string name = 1;
// CPU cores in milli cores
uint32 cpu_cores = 2;
// CPU usage in percentage
float cpu_usage = 3;
// Memory usage in bytes
uint32 memory_usage = 4;
// Memory usage in percentage
float memory_usage_percentage = 5;
}
// Resources used as reported by kubernetes api for running eve user apps.
message KubeAppMetrics {
// Name of the EVE application
string name = 1;
// CPU cores in milli cores
uint32 cpu_cores = 2;
// Memory usage in bytes
uint32 memory_usage = 3;
}
// Parent cluster metrics object to be sent by the node which has leader election.
message KubeClusterMetrics {
// Cluster UUID which will match config.EdgeNodeCluster.cluster_id passed
// in to the node by the controller.
string cluster_id = 1;
// Metrics of the nodes in the cluster
repeated KubeNodeMetrics nodes = 2;
// Metrics of the EVE pod applications in the cluster
repeated KubeAppMetrics eve_pod_apps = 3;
// Metrics of the EVE vmi applications in the cluster
repeated KubeAppMetrics eve_vmi_apps = 4;
}