Skip to content

Commit

Permalink
ctop: Add multicond attribute
Browse files Browse the repository at this point in the history
Signed-off-by: iipeace <[email protected]>
  • Loading branch information
iipeace committed Nov 3, 2023
1 parent af230f5 commit 737f51f
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 61 deletions.
41 changes: 23 additions & 18 deletions guider/guider.conf
Original file line number Diff line number Diff line change
Expand Up @@ -46,28 +46,31 @@
- FIELD
< common >
- apply: activation flag (true/false)
- perm: permission requirement (root)
- interval: duration condition for event (tick)
- comp: check compare operation (big, less, eq)
- type: interval value type (avg/min/max)
- taskmon: monitoring status for tasks (true/false)
- message: event message (string)
- explain: description (string)
- after: uptime condition for event handling
- before: uptime condition for event handling
- caution: caution (string)
- command: command set for handling the started event
- lcommand: command set for handling the finished event
- signal: signal timer for command task (tick{:SIGNAL})
- oneshot: one-time event handling flag (true/false)
- refresh: restart timer for items disabled by oneshot (tick)
- comp: check compare operation (big, less, eq)
- include: task filter for inclusion
- except: task filter for exclusion
- explain: description (string)
- goneshot: global oneshot flag (true/false)
- skip: global skip count flag (tick)
- interval: duration condition for event (tick)
- lcommand: command set for handling the finished event
- lock: global lock flag to handle the event exclusively (true/false)
- max: maximum count for event handling (tick)
- re: regex filter flag (true/false)
- before: uptime condition for event handling
- after: uptime condition for event handling
- rbefore: runtime condition for event handling
- message: event message (string)
- multicond: multiple condition flag for event (true/false)
- oneshot: one-time event handling flag (true/false)
- perm: permission requirement (root)
- rafter: runtime condition for event handling
- rbefore: runtime condition for event handling
- re: regex filter flag for logs (true/false)
- refresh: restart timer for items disabled by oneshot (tick)
- signal: signal timer for command task (tick{:SIGNAL})
- skip: global skip count flag (tick)
- taskmon: monitoring status for tasks (true/false)
- type: interval value type (avg/min/max)

< cpu >
- total: total CPU usage for system/process/thread (%)
Expand Down Expand Up @@ -340,9 +343,11 @@
{
"apply": "false",
"total": 90,
"interval": 1,
"kernel": 80,
"interval": 2,
"multicond": "true",
"lcommand": ["SAVE"],
"message": "system CPU usage is high",
"message": "system kernel CPU usage is high",
"explain": "save monitoring results when the event is finished"
}
],
Expand Down
155 changes: 112 additions & 43 deletions guider/guider.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
__credits__ = "Peace Lee"
__license__ = "GPLv2"
__version__ = "3.9.8"
__revision__ = "231102"
__revision__ = "231103"
__maintainer__ = "Peace Lee"
__email__ = "[email protected]"
__repository__ = "https://github.com/iipeace/guider"
Expand Down Expand Up @@ -25628,6 +25628,7 @@ class SysMgr(object):
thrTimerList = []
thrEvtList = {}
thrEvtHist = {}
thrEvtCondList = {}
thrEvtCntList = {}
eventLockList = {}
eventCommandList = {}
Expand Down Expand Up @@ -35169,7 +35170,7 @@ def printHelp(force=False, isExit=True):
- {5:1} {7:1} from when specific function calls return
# {0:1} {1:1} {9:1} -c "write|getret:start, *"

- {5:1} with colorful elapsed time exceeds 0.1 second {4:1} {7:1}
- {5:1} with colorful elapsed time exceeds 0.1 seconds {4:1} {7:1}
# {0:1} {1:1} {9:1} -c "write|getret" -q ELAPSED:0.1

- {5:1} and call them again repeatedly {4:1} {7:1}
Expand Down Expand Up @@ -36175,7 +36176,7 @@ def _getDesc(s, t=0):
- {2:1} with stdev and histogram for elapsed time {3:1}
# {0:1} {1:1} -g a.out -q STDEV, PRINTHIST

- {2:1} {3:1} with colorful elapsed time exceeds 0.1 second
- {2:1} {3:1} with colorful elapsed time exceeds 0.1 seconds
# {0:1} {1:1} -g a.out -q ELAPSED:0.1

- Monitor only successful syscalls {3:1}
Expand Down Expand Up @@ -37425,7 +37426,7 @@ def _getDesc(s, t=0):
- {4:1} {6:1} without strip for buffer contents
# {0:1} {1:1} -I "ls -al" -t write -q NOSTRIP

- {4:1} with colorful elapsed time exceeds 0.1 second
- {4:1} with colorful elapsed time exceeds 0.1 seconds
# {0:1} {1:1} -g a.out -c write -q ELAPSED:0.1

- {3:1} {5:1} and print strings in specific maximum size
Expand Down Expand Up @@ -40879,7 +40880,7 @@ def _getDesc(s, t=0):
# {0:1} {1:1} write:TEST
# {0:1} {1:1} -g write:TEST

- Read specific files infinitely with printing elapsed time
- Read specific files infinitely with elapsed time exceeds specific seconds for a chunk
# {0:1} {1:1} "read:TEST1,TEST2" -R -q ELAPSED:0
# {0:1} {1:1} "read:TEST1,TEST2" -R -q ELAPSED:0.1

Expand Down Expand Up @@ -63343,7 +63344,11 @@ def _readChunk(fobj, chunk=4096, sync=False, elapsed=False):
if elapsed != -1:
diff = time.time() - prev
if diff >= elapsed:
SysMgr.printWarn("elapsed %.3f" % (diff), True)
SysMgr.printWarn(
"elapsed %.3f sec for reading %s"
% (diff, UtilMgr.convSize2Unit(chunk)),
True,
)

yield ret

Expand All @@ -63362,7 +63367,11 @@ def _writeChunk(fobj, chunk=4096, sync=False, elapsed=False):
if elapsed != -1:
diff = time.time() - prev
if diff >= elapsed:
SysMgr.printWarn("elapsed %.3f" % (diff), True)
SysMgr.printWarn(
"elapsed %.3f sec for writing %s"
% (diff, UtilMgr.convSize2Unit(chunk)),
True,
)

yield ret

Expand Down Expand Up @@ -128367,7 +128376,7 @@ def printSystemUsage(self):
self.addSysInterval("mem", "pg", pgDirty)
self.addSysInterval("mem", "slab", totalSlabMem)
self.addSysInterval("mem", "cache", totalCacheMem)
self.addSysInterval("mem", "kernel", totalKernelMem)
self.addSysInterval("mem", "kernelmem", totalKernelMem)
self.addSysInterval("mem", "nrMinFlt", nrMinFault)
self.addSysInterval("mem", "pgRclmBg", pgRclmBg)
self.addSysInterval("mem", "pgRclmFg", pgRclmFg)
Expand Down Expand Up @@ -135557,6 +135566,9 @@ def _getIntval(item):
# init event item #
self.reportData["event"] = {}

# init conditional event list #
SysMgr.thrEvtCondList = {}

# check image created #
if SysMgr.imagePath:
self.reportData["event"]["IMAGE_CREATED"] = SysMgr.imagePath
Expand Down Expand Up @@ -135810,8 +135822,11 @@ def _getIntval(item):
"CmaFree",
"cmaDev",
]
convTable = {
"kernel": "kernelmem",
}
for item in items:
intval = _getIntval(item)
intval = _getIntval(convTable.get(item, item))
self.checkThreshold("mem", item, "MEM", "big", intval=intval)
except SystemExit:
sys.exit(0)
Expand Down Expand Up @@ -136332,6 +136347,39 @@ def _check(attr, name, stat):
except:
SysMgr.printWarn("failed to check task thresholds", reason=True)

# handle events having multicond condition #
for en, ev in SysMgr.thrEvtCondList.items():
thr, res, conds = ev
if not res:
continue

(
oneshot,
goneshot,
refresh,
lock,
maxcnt,
skip,
) = TaskAnalyzer.getThresholdAttr(thr)

evtinfo = en.split("_", 2)
details = "%s_%s" % (evtinfo[1], "-".join(conds))

self.setThresholdEvent(
thr,
"multicond",
evtinfo[0].upper(),
None,
target="true",
attr=details,
oneshot=oneshot,
goneshot=goneshot,
refresh=refresh,
lock=lock,
maxcnt=maxcnt,
skip=skip,
)

# handle events #
self.handleThresholdEvents()

Expand Down Expand Up @@ -136386,6 +136434,23 @@ def setThresholdEvent(
# init value #
value = None

# condition event checker #
def _checkMultiCond(flag):
if item == "multicond" or comval.get("multicond") != "true":
return False

ename = "%s_%s_%s" % (event, attr, comval)
now = SysMgr.thrEvtCondList.get(ename)
if now:
if now[1] is False:
return False
SysMgr.thrEvtCondList[ename][1] = flag
SysMgr.thrEvtCondList[ename][2].append(item)
else:
SysMgr.thrEvtCondList[ename] = [comval, flag, [item]]

return flag

# check return condition #
if not item in comval:
return
Expand Down Expand Up @@ -136413,6 +136478,7 @@ def setThresholdEvent(
value = target
elif intval and "interval" in comval:
if comval["interval"] > len(intval):
_checkMultiCond(False)
return

# check items in intervals #
Expand All @@ -136432,24 +136498,23 @@ def setThresholdEvent(
else:
result = sum(intval) / len(intval)

threshold = UtilMgr.convUnit2Size(thresholdVal)
if (
(comp == "big" and threshold <= result)
or (comp == "less" and threshold >= result)
or (comp == "eq" and threshold == result)
):
value = result
cond = result
else:
cond = target

# compare values #
if comp:
threshold = UtilMgr.convUnit2Size(thresholdVal)
if (
(comp == "big" and threshold <= target)
or (comp == "less" and threshold >= target)
or (comp == "eq" and threshold == target)
(comp == "big" and threshold <= cond)
or (comp == "less" and threshold >= cond)
or (comp == "eq" and threshold == cond)
):
value = target
value = cond

# check value #
# check result #
if value is None:
_checkMultiCond(False)
return

# check time conditions #
Expand All @@ -136459,31 +136524,41 @@ def setThresholdEvent(
if not key in comval:
continue

ok = True

# get threshold value #
threshold = UtilMgr.convUnit2Time(comval[key])

# check skip conditions #
if key == "before":
if SysMgr.uptime > threshold:
return
ok = False
elif key == "after":
if SysMgr.uptime < threshold:
return
ok = False
elif key == "rbefore":
runtime = SysMgr.getRuntime(sec=True)
if runtime > threshold:
return
ok = False
elif key == "rafter":
runtime = SysMgr.getRuntime(sec=True)
if runtime < threshold:
return
ok = False

if not ok:
_checkMultiCond(False)
return
except SystemExit:
sys.exit(0)
except:
SysMgr.printWarn(
"failed to check '%s' condition" % key, True, True
)

# check multicond condition #
if _checkMultiCond(True):
return

# set event name #
ename = "%s_%s_%s" % (event, attr, item)
eventList = []
Expand Down Expand Up @@ -136518,6 +136593,7 @@ def setThresholdEvent(
"ignored '%s' event because of the lock of [%s]"
% (ename, ", ".join(SysMgr.eventLockList.keys()))
)
_checkMultiCond(False)
return

# handle goneshot flag #
Expand Down Expand Up @@ -136970,23 +137046,17 @@ def _checkThreshold(cond):
elif "except" in cond:
if "task" in addval:
pid = next(iter(addval["task"]))
comm = addval["task"][pid]["comm"].lstrip("*")

if type(cond["except"]) is list:
elist = cond["except"]
else:
elist = [cond["except"]]

if UtilMgr.isValidStr(comm, elist):
return False
etarget = addval["task"][pid]["comm"].lstrip("*")
elif "dev" in addval:
etarget = addval["dev"]
else:
if type(cond["except"]) is list:
elist = cond["except"]
else:
elist = [cond["except"]]
etarget = None

if UtilMgr.isValidStr(addval["dev"], elist):
return False
elist = cond["except"]
if etarget and UtilMgr.isValidStr(
etarget, elist if type(elist) is list else [elist]
):
return False

# check oneshot flag #
(
Expand Down Expand Up @@ -137518,10 +137588,9 @@ def _setDefaultInfo(data, pid, comm, runtime=None):
# check resource threshold #
self.checkResourceThreshold()

# print system status to file if condition is met #
# legacy: print system status to file if condition is met #
if (
"event" in self.reportData
and self.reportData["event"]
self.reportData.get("event")
and SysMgr.reportFileEnable
and SysMgr.outPath
):
Expand Down

0 comments on commit 737f51f

Please sign in to comment.