Skip to content

Commit

Permalink
refactor: reduce ipmi log (#971)
Browse files Browse the repository at this point in the history
* refactor: reduce ipmi log

* refactor: reduce ipmi log

* refactor: reduce ipmi log

* refactor: reduce ipmi log
  • Loading branch information
tanxiao1990 authored Jun 14, 2024
1 parent beda1a1 commit 87a6144
Show file tree
Hide file tree
Showing 12 changed files with 77 additions and 52 deletions.
10 changes: 6 additions & 4 deletions inputs/ipmi/exporter/collector_bmc.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ var (
)
)

type BMCCollector struct{}
type BMCCollector struct {
debugMod bool
}

func (c BMCCollector) Name() CollectorName {
return BMCCollectorName
Expand All @@ -54,18 +56,18 @@ func (c BMCCollector) Args() []string {
func (c BMCCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) {
firmwareRevision, err := freeipmi.GetBMCInfoFirmwareRevision(result)
if err != nil {
log.Println("msg", "Failed to collect BMC data", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect BMC data", "target", targetName(target.host), "error", err)
return 0, err
}
manufacturerID, err := freeipmi.GetBMCInfoManufacturerID(result)
if err != nil {
log.Println("msg", "Failed to collect BMC data", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect BMC data", "target", targetName(target.host), "error", err)
return 0, err
}
systemFirmwareVersion, err := freeipmi.GetBMCInfoSystemFirmwareVersion(result)
if err != nil {
// This one is not always available.
log.Println("msg", "Failed to parse bmc-info data", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to parse bmc-info data", "target", targetName(target.host), "error", err)
systemFirmwareVersion = "N/A"
}
ch <- prometheus.MustNewConstMetric(
Expand Down
23 changes: 13 additions & 10 deletions inputs/ipmi/exporter/collector_bmc_watchdog.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
package exporter

import (
"github.com/prometheus/client_golang/prometheus"
"log"

"github.com/prometheus/client_golang/prometheus"

"flashcat.cloud/categraf/inputs/ipmi/exporter/freeipmi"
)

Expand Down Expand Up @@ -81,7 +82,9 @@ var (
)
)

type BMCWatchdogCollector struct{}
type BMCWatchdogCollector struct {
debugMod bool
}

func (c BMCWatchdogCollector) Name() CollectorName {
return BMCWatchdogCollectorName
Expand All @@ -98,42 +101,42 @@ func (c BMCWatchdogCollector) Args() []string {
func (c BMCWatchdogCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) {
timerState, err := freeipmi.GetBMCWatchdogTimerState(result)
if err != nil {
log.Println("msg", "Failed to collect BMC watchdog timer", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect BMC watchdog timer", "target", targetName(target.host), "error", err)
return 0, err
}
currentTimerUse, err := freeipmi.GetBMCWatchdogTimerUse(result)
if err != nil {
log.Println("msg", "Failed to collect BMC watchdog timer use", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect BMC watchdog timer use", "target", targetName(target.host), "error", err)
return 0, err
}
loggingState, err := freeipmi.GetBMCWatchdogLoggingState(result)
if err != nil {
log.Println("msg", "Failed to collect BMC watchdog logging", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect BMC watchdog logging", "target", targetName(target.host), "error", err)
return 0, err
}
currentTimeoutAction, err := freeipmi.GetBMCWatchdogTimeoutAction(result)
if err != nil {
log.Println("msg", "Failed to collect BMC watchdog timeout action", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect BMC watchdog timeout action", "target", targetName(target.host), "error", err)
return 0, err
}
currentPretimeoutInterrupt, err := freeipmi.GetBMCWatchdogPretimeoutInterrupt(result)
if err != nil {
log.Println("msg", "Failed to collect BMC watchdog pretimeout interrupt", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect BMC watchdog pretimeout interrupt", "target", targetName(target.host), "error", err)
return 0, err
}
pretimeoutInterval, err := freeipmi.GetBMCWatchdogPretimeoutInterval(result)
if err != nil {
log.Println("msg", "Failed to collect BMC watchdog pretimeout interval", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect BMC watchdog pretimeout interval", "target", targetName(target.host), "error", err)
return 0, err
}
initialCountdown, err := freeipmi.GetBMCWatchdogInitialCountdown(result)
if err != nil {
log.Println("msg", "Failed to collect BMC watchdog initial countdown", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect BMC watchdog initial countdown", "target", targetName(target.host), "error", err)
return 0, err
}
currentCountdown, err := freeipmi.GetBMCWatchdogCurrentCountdown(result)
if err != nil {
log.Println("msg", "Failed to collect BMC watchdog current countdown", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect BMC watchdog current countdown", "target", targetName(target.host), "error", err)
return 0, err
}

Expand Down
10 changes: 6 additions & 4 deletions inputs/ipmi/exporter/collector_chassis.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ var (
)
)

type ChassisCollector struct{}
type ChassisCollector struct {
debugMod bool
}

func (c ChassisCollector) Name() CollectorName {
return ChassisCollectorName
Expand All @@ -66,17 +68,17 @@ func (c ChassisCollector) Args() []string {
func (c ChassisCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) {
currentChassisPowerState, err := freeipmi.GetChassisPowerState(result)
if err != nil {
log.Println("msg", "Failed to collect chassis data", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect chassis data", "target", targetName(target.host), "error", err)
return 0, err
}
currentChassisDriveFault, err := freeipmi.GetChassisDriveFault(result)
if err != nil {
log.Println("msg", "Failed to collect chassis data", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect chassis data", "target", targetName(target.host), "error", err)
return 0, err
}
currentChassisCoolingFault, err := freeipmi.GetChassisCoolingFault(result)
if err != nil {
log.Println("msg", "Failed to collect chassis data", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect chassis data", "target", targetName(target.host), "error", err)
return 0, err
}
ch <- prometheus.MustNewConstMetric(
Expand Down
6 changes: 4 additions & 2 deletions inputs/ipmi/exporter/collector_dcmi.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ var (
)
)

type DCMICollector struct{}
type DCMICollector struct {
debugMod bool
}

func (c DCMICollector) Name() CollectorName {
return DCMICollectorName
Expand All @@ -54,7 +56,7 @@ func (c DCMICollector) Args() []string {
func (c DCMICollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) {
currentPowerConsumption, err := freeipmi.GetCurrentPowerConsumption(result)
if err != nil {
log.Println("msg", "Failed to collect DCMI data", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect DCMI data", "target", targetName(target.host), "error", err)
return 0, err
}
// Returned value negative == Power Measurement is not avail
Expand Down
12 changes: 8 additions & 4 deletions inputs/ipmi/exporter/collector_ipmi.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,9 @@ var (
)
)

type IPMICollector struct{}
type IPMICollector struct {
debugMod bool
}

func (c IPMICollector) Name() CollectorName {
return IPMICollectorName
Expand All @@ -151,7 +153,7 @@ func (c IPMICollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metr
targetHost := targetName(target.host)
results, err := freeipmi.GetSensorData(result, excludeIds)
if err != nil {
log.Println("msg", "Failed to collect sensor data", "target", targetHost, "error", err)
log.Println("E!", "Failed to collect sensor data", "target", targetHost, "error", err)
return 0, err
}
for _, data := range results {
Expand All @@ -167,11 +169,13 @@ func (c IPMICollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metr
case "N/A":
state = math.NaN()
default:
log.Println("msg", "Unknown sensor state", "target", targetHost, "state", data.State)
log.Println("W!", "Unknown sensor state", "target", targetHost, "state", data.State)
state = math.NaN()
}

log.Println("msg", "Got values", "target", targetHost, "data", fmt.Sprintf("%+v", data))
if c.debugMod {
log.Println("D!", "Got values", "target", targetHost, "data", fmt.Sprintf("%+v", data))
}

switch data.Unit {
case "RPM":
Expand Down
15 changes: 10 additions & 5 deletions inputs/ipmi/exporter/collector_notwindows.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,14 @@ func markCollectorUp(ch chan<- prometheus.Metric, name string, up int) {
}

// Collect implements Prometheus.Collector.
func Collect(ch chan<- prometheus.Metric, host, binPath string, config IPMIConfig) {
func Collect(ch chan<- prometheus.Metric, host, binPath string, config IPMIConfig, debugMod bool) {
start := time.Now()
defer func() {
duration := time.Since(start).Seconds()
log.Println("msg", "Scrape duration", "target", targetName(host), "duration", duration)

if debugMod {
log.Println("D!", "Scrape duration", "target", targetName(host), "duration", duration)
}
ch <- prometheus.MustNewConstMetric(
durationDesc,
prometheus.GaugeValue,
Expand All @@ -97,15 +100,17 @@ func Collect(ch chan<- prometheus.Metric, host, binPath string, config IPMIConfi
config: config,
}

for _, collector := range config.GetCollectors() {
for _, collector := range config.GetCollectors(debugMod) {
var up int
log.Println("msg", "Running collector", "target", target.host, "collector", collector.Name())
if debugMod {
log.Println("D!", "Running collector", "target", target.host, "collector", collector.Name())
}

fqcmd := path.Join(binPath, collector.Cmd())
args := collector.Args()
cfg := config.GetFreeipmiConfig()

result := freeipmi.Execute(fqcmd, args, cfg, target.host)
result := freeipmi.Execute(fqcmd, args, cfg, target.host, debugMod)

up, _ = collector.Collect(result, ch, target)
markCollectorUp(ch, string(collector.Name()), up)
Expand Down
8 changes: 5 additions & 3 deletions inputs/ipmi/exporter/collector_sel.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ var (
)
)

type SELCollector struct{}
type SELCollector struct {
debugMod bool
}

func (c SELCollector) Name() CollectorName {
return SELCollectorName
Expand All @@ -61,12 +63,12 @@ func (c SELCollector) Args() []string {
func (c SELCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) {
entriesCount, err := freeipmi.GetSELInfoEntriesCount(result)
if err != nil {
log.Println("msg", "Failed to collect SEL data", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect SEL data", "target", targetName(target.host), "error", err)
return 0, err
}
freeSpace, err := freeipmi.GetSELInfoFreeSpace(result)
if err != nil {
log.Println("msg", "Failed to collect SEL data", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect SEL data", "target", targetName(target.host), "error", err)
return 0, err
}
ch <- prometheus.MustNewConstMetric(
Expand Down
10 changes: 6 additions & 4 deletions inputs/ipmi/exporter/collector_sm_lan_mode.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ var (
)
)

type SMLANModeCollector struct{}
type SMLANModeCollector struct {
debugMod bool
}

func (c SMLANModeCollector) Name() CollectorName {
return SMLANModeCollectorName
Expand All @@ -56,11 +58,11 @@ func (c SMLANModeCollector) Args() []string {
func (c SMLANModeCollector) Collect(result freeipmi.Result, ch chan<- prometheus.Metric, target ipmiTarget) (int, error) {
octets, err := freeipmi.GetRawOctets(result)
if err != nil {
log.Println("msg", "Failed to collect LAN mode data", "target", targetName(target.host), "error", err)
log.Println("E!", "Failed to collect LAN mode data", "target", targetName(target.host), "error", err)
return 0, err
}
if len(octets) != 3 {
log.Println("msg", "Unexpected number of octets", "target", targetName(target.host), "octets", octets)
log.Println("E!", "Unexpected number of octets", "target", targetName(target.host), "octets", octets)
return 0, fmt.Errorf("unexpected number of octets in raw response: %d", len(octets))
}

Expand All @@ -69,7 +71,7 @@ func (c SMLANModeCollector) Collect(result freeipmi.Result, ch chan<- prometheus
value, _ := strconv.Atoi(octets[2])
ch <- prometheus.MustNewConstMetric(lanModeDesc, prometheus.GaugeValue, float64(value))
default:
log.Println("msg", "Unexpected lan mode status (ipmi-raw)", "target", targetName(target.host), "sgatus", octets[2])
log.Println("E!", "Unexpected lan mode status (ipmi-raw)", "target", targetName(target.host), "sgatus", octets[2])
return 0, fmt.Errorf("unexpected lan mode status: %s", octets[2])
}

Expand Down
2 changes: 1 addition & 1 deletion inputs/ipmi/exporter/collector_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ type IPMIConfig struct {
Timeout uint32
}

func Collect(ch chan<- prometheus.Metric, host, binPath string, config IPMIConfig) {
func Collect(ch chan<- prometheus.Metric, host, binPath string, config IPMIConfig, debugMod bool) {
return
}
24 changes: 12 additions & 12 deletions inputs/ipmi/exporter/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,29 +67,29 @@ func (c ConfiguredCollector) Collect(output freeipmi.Result, ch chan<- prometheu
return c.collector.Collect(output, ch, target)
}

func (c CollectorName) GetInstance() (Collector, error) {
func (c CollectorName) GetInstance(debugMod bool) (Collector, error) {
// This is where a new Collector would have to be "registered"
switch c {
case IPMICollectorName:
return IPMICollector{}, nil
return IPMICollector{debugMod: debugMod}, nil
case BMCCollectorName:
return BMCCollector{}, nil
return BMCCollector{debugMod: debugMod}, nil
case BMCWatchdogCollectorName:
return BMCWatchdogCollector{}, nil
return BMCWatchdogCollector{debugMod: debugMod}, nil
case SELCollectorName:
return SELCollector{}, nil
return SELCollector{debugMod: debugMod}, nil
case DCMICollectorName:
return DCMICollector{}, nil
return DCMICollector{debugMod: debugMod}, nil
case ChassisCollectorName:
return ChassisCollector{}, nil
return ChassisCollector{debugMod: debugMod}, nil
case SMLANModeCollectorName:
return SMLANModeCollector{}, nil
return SMLANModeCollector{debugMod: debugMod}, nil
}
return nil, fmt.Errorf("invalid Collector: %s", string(c))
}

func (c CollectorName) IsValid() error {
_, err := c.GetInstance()
func (c CollectorName) IsValid(debugMod bool) error {
_, err := c.GetInstance(debugMod)
return err
}

Expand Down Expand Up @@ -141,11 +141,11 @@ func checkOverflow(m map[string]interface{}, ctx string) error {
return nil
}

func (c IPMIConfig) GetCollectors() []Collector {
func (c IPMIConfig) GetCollectors(debugMod bool) []Collector {
result := []Collector{}
for _, co := range c.Collectors {
// At this point validity has already been checked
i, _ := co.GetInstance()
i, _ := co.GetInstance(debugMod)
cc := ConfiguredCollector{
collector: i,
command: c.CollectorCmd[i.Name()],
Expand Down
7 changes: 5 additions & 2 deletions inputs/ipmi/exporter/freeipmi/freeipmi.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ func freeipmiConfigPipe(config string) (string, error) {
return pipe, nil
}

func Execute(cmd string, args []string, config string, target string) Result {
func Execute(cmd string, args []string, config string, target string, debugMod bool) Result {
pipe, err := freeipmiConfigPipe(config)
if err != nil {
return Result{nil, err}
Expand All @@ -152,7 +152,10 @@ func Execute(cmd string, args []string, config string, target string) Result {
args = append(args, "-h", target)
}

log.Println("msg", "Executing", "command", cmd, "args", fmt.Sprintf("%+v", args))
if debugMod {
log.Println("D!", "Executing", "command", cmd, "args", fmt.Sprintf("%+v", args))
}

out, err := exec.Command(cmd, args...).CombinedOutput()
if err != nil {
err = fmt.Errorf("error running %s: %s", cmd, err)
Expand Down
2 changes: 1 addition & 1 deletion inputs/ipmi/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (m *Instance) Gather(slist *types.SampleList) {
metricChan := make(chan prometheus.Metric, 500)

go func() {
exporter.Collect(metricChan, m.Target, m.Path, m.IPMIConfig)
exporter.Collect(metricChan, m.Target, m.Path, m.IPMIConfig, m.DebugMod)
close(metricChan)
}()
for metric := range metricChan {
Expand Down

0 comments on commit 87a6144

Please sign in to comment.