Skip to content

Commit

Permalink
debugd: return systemd logs if restarting a unit fails (#3159)
Browse files Browse the repository at this point in the history
* Wait until logcollection containers are ready
* Add systemd unit journal entries to error message

---------

Signed-off-by: Daniel Weiße <[email protected]>
  • Loading branch information
daniel-weisse authored Jun 12, 2024
1 parent 08770a6 commit c682558
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 1 deletion.
10 changes: 9 additions & 1 deletion debugd/internal/debugd/deploy/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ type SystemdUnit struct {
type ServiceManager struct {
log *slog.Logger
dbus dbusClient
journal journalReader
fs afero.Fs
systemdUnitFilewriteLock sync.Mutex
}
Expand All @@ -71,6 +72,7 @@ func NewServiceManager(log *slog.Logger) *ServiceManager {
return &ServiceManager{
log: log,
dbus: &dbusWrapper{},
journal: &journalctlWrapper{},
fs: fs,
systemdUnitFilewriteLock: sync.Mutex{},
}
Expand Down Expand Up @@ -99,6 +101,11 @@ type dbusConn interface {
Close()
}

type journalReader interface {
// ReadJournal reads the journal for a specific unit.
readJournal(unit string) string
}

// SystemdAction will perform a systemd action on a service unit (start, stop, restart, reload).
func (s *ServiceManager) SystemdAction(ctx context.Context, request ServiceManagerRequest) error {
log := s.log.With(slog.String("unit", request.Unit), slog.String("action", request.Action.String()))
Expand Down Expand Up @@ -139,7 +146,8 @@ func (s *ServiceManager) SystemdAction(ctx context.Context, request ServiceManag
return nil

default:
return fmt.Errorf("performing action %q on systemd unit %q failed: expected %q but received %q", request.Action.String(), request.Unit, "done", result)
serviceJournal := s.journal.readJournal(request.Unit)
return fmt.Errorf("performing action %q on systemd unit %q failed: expected %q but received %q. systemd unit journal entries: %s", request.Action.String(), request.Unit, "done", result, serviceJournal)
}
}

Expand Down
9 changes: 9 additions & 0 deletions debugd/internal/debugd/deploy/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ func TestSystemdAction(t *testing.T) {
manager := ServiceManager{
log: logger.NewTest(t),
dbus: &tc.dbus,
journal: &stubJournalReader{},
fs: fs,
systemdUnitFilewriteLock: sync.Mutex{},
}
Expand Down Expand Up @@ -183,6 +184,7 @@ func TestWriteSystemdUnitFile(t *testing.T) {
manager := ServiceManager{
log: logger.NewTest(t),
dbus: &tc.dbus,
journal: &stubJournalReader{},
fs: fs,
systemdUnitFilewriteLock: sync.Mutex{},
}
Expand Down Expand Up @@ -296,6 +298,7 @@ func TestOverrideServiceUnitExecStart(t *testing.T) {
manager := ServiceManager{
log: logger.NewTest(t),
dbus: &tc.dbus,
journal: &stubJournalReader{},
fs: fs,
systemdUnitFilewriteLock: sync.Mutex{},
}
Expand Down Expand Up @@ -367,3 +370,9 @@ func (c *fakeDbusConn) ReloadContext(_ context.Context) error {
}

func (c *fakeDbusConn) Close() {}

type stubJournalReader struct{}

func (s *stubJournalReader) readJournal(_ string) string {
return ""
}
83 changes: 83 additions & 0 deletions debugd/internal/debugd/deploy/wrappers.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package deploy

import (
"context"
"os/exec"

"github.com/coreos/go-systemd/v22/dbus"
)
Expand Down Expand Up @@ -48,3 +49,85 @@ func (c *dbusConnWrapper) ReloadContext(ctx context.Context) error {
func (c *dbusConnWrapper) Close() {
c.conn.Close()
}

type journalctlWrapper struct{}

func (j *journalctlWrapper) readJournal(unit string) string {
out, _ := exec.CommandContext(context.Background(), "journalctl", "-u", unit, "--no-pager").CombinedOutput()
return string(out)
}

/*
// Preferably, we would use the systemd journal API directly.
// However, this requires linking against systemd libraries, so we go with the easier journalctl command for now.
type sdJournalWrapper struct{}
// readJournal reads the journal for a specific unit.
func (s *sdJournalWrapper) readJournal(unit string) string {
journal, err := sdjournal.NewJournal()
if err != nil {
log.Printf("opening journal: %s", err)
return ""
}
defer journal.Close()
// Filter the journal for the specified unit
filters := []string{
fmt.Sprintf("_SYSTEMD_UNIT=%s", unit),
fmt.Sprintf("UNIT=%s", unit),
fmt.Sprintf("OBJECT_SYSTEMD_UNIT=%s", unit),
fmt.Sprintf("_SYSTEMD_SLICE=%s", unit),
fmt.Sprintf("_SYSTEMD_USER_UNIT=%s", unit),
fmt.Sprintf("USER_UNIT=%s", unit),
fmt.Sprintf("COREDUMP_USER_UNIT=%s", unit),
fmt.Sprintf("OBJECT_SYSTEMD_USER_UNIT=%s", unit),
fmt.Sprintf("_SYSTEMD_USER_SLICE=%s", unit),
}
for _, filter := range filters {
if err := journal.AddMatch(filter); err != nil {
log.Printf("applying filter %q: %s", filter, err)
return ""
}
if err := journal.AddDisjunction(); err != nil {
log.Printf("adding disjunction to journal filter: %s", err)
return ""
}
}
// Seek to the beginning of the journal
if err := journal.SeekHead(); err != nil {
log.Printf("seeking journal tail: %s", err)
return ""
}
// Iterate over the journal entries
var previousCursor string
journalLog := &strings.Builder{}
for {
if _, err := journal.Next(); err != nil {
log.Printf("getting next entry in journal: %s", err)
return ""
}
entry, err := journal.GetEntry()
if err != nil {
log.Printf("getting journal entry: %s", err)
return ""
}
// Abort if we reached the end of the journal, i.e. the cursor didn't change
if entry.Cursor == previousCursor {
break
}
previousCursor = entry.Cursor
if _, err := journalLog.WriteString(entry.Fields[sdjournal.SD_JOURNAL_FIELD_MESSAGE] + "\n"); err != nil {
log.Printf("copying journal entry to buffer: %s", err)
return ""
}
}
return strings.TrimSpace(journalLog.String())
}
*/
8 changes: 8 additions & 0 deletions debugd/internal/debugd/logcollector/logcollector.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ func startPod(ctx context.Context, logger *slog.Logger) error {
if err := runLogstashCmd.Start(); err != nil {
return fmt.Errorf("failed to start logstash: %w", err)
}
if out, err := exec.CommandContext(ctx, "podman", "wait", "logstash", "--condition=running", "--interval=15s").CombinedOutput(); err != nil {
logger.Error("Logstash container failed to reach healthy status", "err", err, "output", out)
return fmt.Errorf("waiting for logstash container to reach healthy status: %w; output: %s", err, out)
}

// start filebeat container
filebeatLog := newCmdLogger(logger.WithGroup("filebeat"))
Expand All @@ -225,6 +229,10 @@ func startPod(ctx context.Context, logger *slog.Logger) error {
if err := runFilebeatCmd.Start(); err != nil {
return fmt.Errorf("failed to run filebeat: %w", err)
}
if out, err := exec.CommandContext(ctx, "podman", "wait", "filebeat", "--condition=running", "--interval=15s").CombinedOutput(); err != nil {
logger.Error("Filebeat container failed to reach healthy status", "err", err, "output", out)
return fmt.Errorf("waiting for filebeat container to reach healthy status: %w; output: %s", err, out)
}

return nil
}
Expand Down

0 comments on commit c682558

Please sign in to comment.