Skip to content

Commit

Permalink
Add docs, wrap errors and stop uptime reports
Browse files Browse the repository at this point in the history
  • Loading branch information
AbdelrahmanElawady committed Dec 3, 2023
1 parent a757aac commit 7c2ea19
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 41 deletions.
2 changes: 1 addition & 1 deletion cmds/modules/zui/header.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func headerRenderer(ctx context.Context, c zbus.Client, h *widgets.Paragraph, r
}

cache := green("OK")
if app.CheckFlag(app.LimitedCache) {
if app.CheckFlag(app.LimitedCache) || app.CheckFlag(app.ReadonlyCache) {
cache = red("no ssd disks detected")
}

Expand Down
13 changes: 8 additions & 5 deletions cmds/modules/zui/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,11 @@ func action(ctx *cli.Context) error {
resources.SetRect(0, 14, width, 22)
resources.Border = false

errorsGrid := ui.NewGrid()
errorsGrid.Title = "Errors"
errorsGrid.SetRect(0, 22, width, 26)
errorsParagraph := widgets.NewParagraph()
errorsParagraph.Title = "Errors"
errorsParagraph.SetRect(0, 22, width, 26)
errorsParagraph.Border = true
errorsParagraph.WrapText = true

var flag signalFlag

Expand All @@ -111,7 +113,7 @@ func action(ctx *cli.Context) error {
log.Error().Err(err).Msg("failed to start resources renderer")
}

mod := zui.New(ctx.Context, errorsGrid, &flag)
mod := zui.New(ctx.Context, errorsParagraph, &flag)

server.Register(zbus.ObjectID{Name: module, Version: "0.0.1"}, mod)

Expand All @@ -123,7 +125,7 @@ func action(ctx *cli.Context) error {
}()

render := func() {
ui.Render(header, netgrid, resources, errorsGrid)
ui.Render(header, netgrid, resources, errorsParagraph)
}

render()
Expand All @@ -138,6 +140,7 @@ func action(ctx *cli.Context) error {
case "<Resize>":
payload := e.Payload.(ui.Resize)
header.SetRect(0, 0, payload.Width, 3)
errorsParagraph.SetRect(0, 22, payload.Width, 26)
// grid.SetRect(0, 3, payload.Width, payload.Height)
ui.Clear()
render()
Expand Down
29 changes: 29 additions & 0 deletions docs/tasks/healthcheck.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# HealthCheck

## Overview

Health check task executes some checks over ZOS components to determine if the node is in a usable state or not and set flags for the Power Daemon to stop uptime reports if the node is unusable.

## Configuration

- Name: `healthcheck`
- Schedule: Every 20 mins.

## Details

- Check if the node cache disk is usable or not by trying to write some data to it. If it failed, it set the Readonly flag.

## Result Sample

```json
{
"description": "health check task runs multiple checks to ensure the node is in a usable state and set flags for the power daemon to stop reporting uptime if it is not usable",
"name": "healthcheck",
"result": {
"cache": [
"failed to write to cache: open /var/cache/healthcheck: operation not permitted"
]
},
"timestamp": 1701599580
}
```
6 changes: 4 additions & 2 deletions pkg/perf/healthcheck/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,16 @@ func (h *healthcheckTask) Run(ctx context.Context) (interface{}, error) {

func cacheCheck(ctx context.Context) (string, error) {
const label = "cache"
_, err := os.Create("/var/cache/healthcheck")
const checkFile = "/var/cache/healthcheck"

_, err := os.Create(checkFile)
if err != nil {
if err := app.SetFlag(app.ReadonlyCache); err != nil {
log.Error().Err(err).Msg("failed to set readonly flag")
}
return label, fmt.Errorf("failed to write to cache: %w", err)
}
defer os.Remove("/var/cache/healthcheck")
defer os.Remove(checkFile)

if err := app.DeleteFlag(app.ReadonlyCache); err != nil {
log.Error().Err(err).Msg("failed to delete readonly flag")
Expand Down
20 changes: 19 additions & 1 deletion pkg/power/uptime.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/rs/zerolog/log"
"github.com/shirou/gopsutil/host"
substrate "github.com/threefoldtech/tfchain/clients/tfchain-client-go"
"github.com/threefoldtech/zos/pkg/app"
"github.com/threefoldtech/zos/pkg/utils"
)

Expand Down Expand Up @@ -61,7 +62,7 @@ func (u *Uptime) SendNow() (types.Hash, error) {
}

func (u *Uptime) uptime(ctx context.Context) error {
for {
report := func() error {
log.Debug().Msg("updating node uptime")
hash, err := u.SendNow()
if err != nil {
Expand All @@ -71,6 +72,23 @@ func (u *Uptime) uptime(ctx context.Context) error {
u.Mark.Signal()

log.Info().Str("hash", hash.Hex()).Msg("node uptime hash")
return nil
}
for {
unusable := false
if app.CheckFlag(app.ReadonlyCache) {
log.Error().Msg("node cache is read only")
unusable = true
}
if app.CheckFlag(app.LimitedCache) {
log.Error().Msg("node is running on limited cache")
unusable = true
}
if unusable {
log.Error().Msg("node is not usable skipping uptime reports")
} else if err := report(); err != nil {
return err
}

select {
case <-ctx.Done():
Expand Down
50 changes: 18 additions & 32 deletions pkg/zui/zui.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,18 @@ import (
"sync"
"time"

ui "github.com/gizak/termui/v3"
"github.com/gizak/termui/v3/widgets"
"github.com/threefoldtech/zos/pkg"
)

type module struct {
grid *ui.Grid
render Signaler
labels []labelData
table *widgets.Table
mu *sync.Mutex
render Signaler
labels []labelData
paragraph *widgets.Paragraph
mu *sync.Mutex
}

// Signaler interface to signal ZUI to render some element.
type Signaler interface {
Signal()
}
Expand All @@ -28,32 +27,22 @@ type labelData struct {
errors []string
}

func New(ctx context.Context, grid *ui.Grid, render Signaler) pkg.ZUI {
table := widgets.NewTable()
grid.Set(
ui.NewRow(1.0, table),
)
table.Title = "Errors"
table.FillRow = true
table.RowSeparator = false

table.Rows = [][]string{
{"[No Errors!](fg:green)"},
}

// New returns a new ZUI module.
func New(ctx context.Context, p *widgets.Paragraph, render Signaler) pkg.ZUI {
zuiModule := &module{
grid: grid,
render: render,
table: table,
labels: make([]labelData, 0),
mu: &sync.Mutex{},
render: render,
labels: make([]labelData, 0),
paragraph: p,
mu: &sync.Mutex{},
}
go zuiModule.renderErrors(ctx)
return zuiModule
}

var _ pkg.ZUI = (*module)(nil)

// PushErrors pushes the given errors to the ZUI module to be displayed.
// It can also remove stop displaying certain label by sending an empty errors slice.
func (m *module) PushErrors(label string, errors []string) {
m.mu.Lock()
defer m.mu.Unlock()
Expand All @@ -76,22 +65,19 @@ func (m *module) renderErrors(ctx context.Context) {
labels := make([]labelData, len(m.labels))
copy(labels, m.labels)
m.mu.Unlock()
display(labels, m.table, m.render)
display(labels, m.paragraph, m.render)
// in case nothing got displayed
<-time.After(2 * time.Second)
}
}
}

func display(labels []labelData, table *widgets.Table, render Signaler) {
table.Rows = [][]string{
{"[No Errors!](fg:green)"},
}
func display(labels []labelData, p *widgets.Paragraph, render Signaler) {
p.Text = "[No Errors!](fg:green)"

for _, label := range labels {
for _, e := range label.errors {
table.Rows = [][]string{
{fmt.Sprintf("%s: [%s](fg:red)", label.label, e)},
}
p.Text = fmt.Sprintf("%s: [%s](fg:red)", label.label, e)
render.Signal()
<-time.After(2 * time.Second)
}
Expand Down

0 comments on commit 7c2ea19

Please sign in to comment.