diff --git a/Dockerfile b/Dockerfile index 75d0a43..cd75cbd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ -FROM alpine:3.8 as runner +FROM alpine:latest COPY bioscfg /usr/sbin/bioscfg RUN chmod +x /usr/sbin/bioscfg -ENTRYPOINT bioscfg +ENTRYPOINT ["/usr/sbin/bioscfg"] \ No newline at end of file diff --git a/Makefile b/Makefile index 1301c62..0cecd32 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ GIT_SUMMARY := $(shell git describe --tags --dirty --always) VERSION := $(shell git describe --tags 2> /dev/null) BUILD_DATE := $(shell date +%s) GIT_COMMIT_FULL := $(shell git rev-parse HEAD) -GO_VERSION := $(shell expr `go version |cut -d ' ' -f3 |cut -d. -f2` \>= 16) +GO_VERSION := $(shell expr `go version |cut -d ' ' -f3 |cut -d. -f2` \>= 22) DOCKER_IMAGE := "ghcr.io/metal-toolbox/bioscfg" REPO := "https://github.com/metal-toolbox/bioscfg.git" @@ -30,38 +30,36 @@ gen-mock: ## build-osx build-osx: ifeq ($(GO_VERSION), 0) - $(error build requies go version 1.22.1 or higher) + $(error build requies go version 1.22 or higher) endif - go build -o bioscfg \ - -ldflags \ + CGO_ENABLED=0 go build -o bioscfg \ + -ldflags \ "-X $(LDFLAG_LOCATION).GitCommit=$(GIT_COMMIT) \ - -X $(LDFLAG_LOCATION).GitBranch=$(GIT_BRANCH) \ - -X $(LDFLAG_LOCATION).GitSummary=$(GIT_SUMMARY) \ - -X $(LDFLAG_LOCATION).AppVersion=$(VERSION) \ - -X $(LDFLAG_LOCATION).BuildDate=$(BUILD_DATE)" - + -X $(LDFLAG_LOCATION).GitBranch=$(GIT_BRANCH) \ + -X $(LDFLAG_LOCATION).GitSummary=$(GIT_SUMMARY) \ + -X $(LDFLAG_LOCATION).AppVersion=$(VERSION) \ + -X $(LDFLAG_LOCATION).BuildDate=$(BUILD_DATE)" ## Build linux bin build-linux: ifeq ($(GO_VERSION), 0) - $(error build requies go version 1.22.1 or higher) + $(error build requies go version 1.22 or higher) endif - GOOS=linux GOARCH=amd64 go build -o bioscfg \ - -ldflags \ + CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o bioscfg \ + -ldflags \ "-X $(LDFLAG_LOCATION).GitCommit=$(GIT_COMMIT) \ - -X $(LDFLAG_LOCATION).GitBranch=$(GIT_BRANCH) \ - -X $(LDFLAG_LOCATION).GitSummary=$(GIT_SUMMARY) \ - -X $(LDFLAG_LOCATION).AppVersion=$(VERSION) \ - -X $(LDFLAG_LOCATION).BuildDate=$(BUILD_DATE)" - + -X $(LDFLAG_LOCATION).GitBranch=$(GIT_BRANCH) \ + -X $(LDFLAG_LOCATION).GitSummary=$(GIT_SUMMARY) \ + -X $(LDFLAG_LOCATION).AppVersion=$(VERSION) \ + -X $(LDFLAG_LOCATION).BuildDate=$(BUILD_DATE)" ## build docker image and tag as ghcr.io/metal-toolbox/bioscfg:latest build-image: build-linux @echo ">>>> NOTE: You may want to execute 'make build-image-nocache' depending on the Docker stages changed" - docker build --rm=true -f Dockerfile -t ${DOCKER_IMAGE}:latest . \ - --label org.label-schema.schema-version=1.0 \ - --label org.label-schema.vcs-ref=$(GIT_COMMIT_FULL) \ - --label org.label-schema.vcs-url=$(REPO) + docker build --rm=true -f Dockerfile -t ${DOCKER_IMAGE}:latest . \ + --label org.label-schema.schema-version=1.0 \ + --label org.label-schema.vcs-ref=$(GIT_COMMIT_FULL) \ + --label org.label-schema.vcs-url=$(REPO) ## tag and push devel docker image to local registry push-image-devel: build-image @@ -73,6 +71,11 @@ push-image-devel: build-image push-image: docker push ${DOCKER_IMAGE}:latest +## Clean all caches +clean-all: + golangci-lint cache clean + go clean -modcache -testcache -cache -fuzzcache + # https://gist.github.com/prwhite/8168133 # COLORS GREEN := $(shell tput -Txterm setaf 2) @@ -80,7 +83,6 @@ YELLOW := $(shell tput -Txterm setaf 3) WHITE := $(shell tput -Txterm setaf 7) RESET := $(shell tput -Txterm sgr0) - TARGET_MAX_CHAR_NUM=20 ## Show help help: diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 2903d62..11594b0 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -1,6 +1,10 @@ apiVersion: v2 name: bioscfg -description: A helm chart for deploying the bioscfg controller. -type: application -version: 0.0.1 -appVersion: "0.0.1" +description: A chart to control BMCs +version: v0.1.1 +keywords: + - bmc + - bios +home: "https://github.com/metal-toolbox/bioscfg" +sources: + - "https://github.com/metal-toolbox/bioscfg" \ No newline at end of file diff --git a/chart/templates/bioscfg-configmap.yaml b/chart/templates/bioscfg-configmap.yaml deleted file mode 100644 index 014566d..0000000 --- a/chart/templates/bioscfg-configmap.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: bioscfg-config - namespace: default -data: - config.yaml: | - log_level: debug - concurrency: 5 - nats: - app_name: bioscfg - consumer: - pull: true - ack_wait: 5m - max_ack_pending: 10 - queue_group: bioscfg - stream: - name: controllers - subjects: - - com.hollow.sh.controllers.commands.> - - com.hollow.sh.controllers.responses.> - acknowledgements: true - duplicate_window: 5m - retention: workQueue diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml new file mode 100644 index 0000000..cf2edf1 --- /dev/null +++ b/chart/templates/configmap.yaml @@ -0,0 +1,10 @@ +{{ if .Values.enable }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: bioscfg-config +data: + config.yaml: |- +{{ toYaml .Values.env | indent 4 }} +{{ end }} \ No newline at end of file diff --git a/chart/templates/bioscfg-deployment.yaml b/chart/templates/deployment.yaml similarity index 56% rename from chart/templates/bioscfg-deployment.yaml rename to chart/templates/deployment.yaml index 3bc7941..90bc995 100644 --- a/chart/templates/bioscfg-deployment.yaml +++ b/chart/templates/deployment.yaml @@ -17,16 +17,16 @@ spec: terminationGracePeriodSeconds: 1200 containers: - name: bioscfg - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + image: {{ .Values.image.repository.url }}/bioscfg:{{ .Values.image.repository.tag }} + imagePullPolicy: {{ .Values.image.pullPolicy }} command: [ "bioscfg", + "run", "--config", "/etc/bioscfg/config.yaml", "--enable-pprof", "--log-level", "debug", - "--facility-code", - "{{ .Values.location }}" ] volumeMounts: - name: config-volume @@ -35,32 +35,17 @@ spec: mountPath: /etc/nats readOnly: true env: - - name: BIOSCFG_NATS_URL - value: "{{ .Values.env.NATS_URL }}" - - name: BIOSCFG_NATS_CONNECT_TIMEOUT - value: "{{ .Values.env.NATS_CONNECT_TIMEOUT }}" - - name: BIOSCFG_NATS_KV_REPLICAS - value: "{{ .Values.env.NATS_KV_REPLICAS }}" - - name: BIOSCFG_NATS_CREDS_FILE - value: /etc/nats/nats.creds - - name: BIOSCFG_FLEETDB_ENDPOINT - value: "{{ .Values.env.FLEETDB_ENDPOINT }}" - - name: BIOSCFG_FLEETDB_DISABLE_OAUTH - value: "{{ .Values.env.FLEETDB_DISABLE_OAUTH }}" - - name: BIOSCFG_FLEETDB_FACILITY_CODE - value: "{{ .Values.location }}" - - name: BIOSCFG_FLEETDB_OIDC_AUDIENCE_ENDPOINT - value: "{{ .Values.env.FLEETDB_OIDC_AUDIENCE_ENDPOINT }}" - - name: BIOSCFG_FLEETDB_OIDC_ISSUER_ENDPOINT - value: "{{ .Values.env.FLEETDB_OIDC_ISSUER_ENDPOINT }}" - - name: BIOSCFG_FLEETDB_OIDC_CLIENT_SCOPES - value: "{{ .Values.env.FLEETDB_OIDC_CLIENT_SCOPES }}" + {{- if .Values.env.endpoints.fleetdb.authenticate }} + - name: FLIPFLOP_ENDPOINTS_FLEETDB_OIDC_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: bioscfg-secrets + key: fleetdb-oidc-client-secret + {{- end }} - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: "{{ .Values.env.OTEL_EXPORTER_OTLP_ENDPOINT }}" + value: "{{ .Values.env.endpoints.otel.url }}" - name: OTEL_EXPORTER_OTLP_INSECURE - value: "{{ .Values.env.OTEL_EXPORTER_OTLP_INSECURE }}" - - name: BIOSCFG_FLEETDB_OIDC_CLIENT_ID - value: "{{ .Values.env.FLEETDB_OIDC_CLIENT_ID }}" + value: "{{ not .Values.env.endpoints.otel.authenticate }}" securityContext: capabilities: drop: diff --git a/chart/values.yaml b/chart/values.yaml index df3e8d2..9855c0e 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -1,14 +1,29 @@ -location: "sandbox" +enable: true image: - repository: "localhost:5001/bioscfg" - tag: latest pullPolicy: Always + repository: + tag: latest + url: localhost:5001 env: - FLEETDB_ENDPOINT: http://fleetdb:8000 - FLEETDB_DISABLE_OAUTH: true - NATS_URL: "nats://nats:4222" - NATS_CONNECT_TIMEOUT: 60s - NATS_KV_REPLICAS: 1 - # telemetry configuration - OTEL_EXPORTER_OTLP_ENDPOINT: jaeger:4317 - OTEL_EXPORTER_OTLP_INSECURE: true \ No newline at end of file + test: myTest + test1: myFirstTest + facility: sandbox + log_level: debug + concurrency: 5 + dryrun: false + endpoints: + fleetdb: + authenticate: false + oidc_audience_url: + oidc_client_id: + oidc_issuer_url: + oidc_client_scopes: + url: http://fleetdb:8000 + nats: + connect_timeout: 60s + kv_replication: 1 + creds_file: /etc/nats/nats.creds + url: nats://nats:4222 + otel: + authenticate: false + url: jaeger:4317 \ No newline at end of file diff --git a/cmd/bioscfg.go b/cmd/bioscfg.go new file mode 100644 index 0000000..ff5615e --- /dev/null +++ b/cmd/bioscfg.go @@ -0,0 +1,26 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/metal-toolbox/bioscfg/internal/bioscfg" + "github.com/spf13/cobra" +) + +// bioscfgCmd represents the bioscfg command +var bioscfgCmd = &cobra.Command{ + Use: "run", + Short: "Run the BiosCfg Controller", + Run: func(cmd *cobra.Command, _ []string) { + err := bioscfg.Run(cmd.Context(), ConfigFile, LogLevel, EnableProfiling) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + }, +} + +func init() { + rootCmd.AddCommand(bioscfgCmd) +} diff --git a/cmd/root.go b/cmd/root.go index 0b9f446..52fae87 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -18,26 +18,21 @@ package cmd import ( "fmt" - "log/slog" "os" - "github.com/metal-toolbox/bioscfg/internal/model" "github.com/spf13/cobra" ) var ( - args = &model.Args{} + LogLevel string + ConfigFile string + EnableProfiling bool ) // rootCmd represents the base command when called without any subcommands var rootCmd = &cobra.Command{ - Use: "bioscfg", - Short: "bioscfg remotely manages BIOS settings", - Run: func(cmd *cobra.Command, _ []string) { - if err := runWorker(cmd.Context(), args); err != nil { - os.Exit(1) - } - }, + Use: "controller", + Short: "Fleet Services Controllers", } // Execute adds all child commands to the root command and sets flags appropriately. @@ -51,19 +46,11 @@ func Execute() { func init() { rootCmd.PersistentFlags(). - StringVar(&args.ConfigFile, "config", "", "configuration file (default is $HOME/.bioscfg.yml)") + StringVar(&ConfigFile, "config", "", "configuration file (default is $HOME/.bioscfg.yml)") rootCmd.PersistentFlags(). - StringVar(&args.LogLevel, "log-level", "info", "set logging level - debug, trace") + StringVar(&LogLevel, "log-level", "info", "set logging level - debug, trace") rootCmd.PersistentFlags(). - BoolVarP(&args.EnableProfiling, "enable-pprof", "", false, "Enable profiling endpoint at: http://localhost:9091") - - rootCmd.PersistentFlags(). - StringVarP(&args.FacilityCode, "facility-code", "f", "", "The facility code this bioscfg instance is associated with") - - if err := rootCmd.MarkPersistentFlagRequired("facility-code"); err != nil { - slog.Error("failed to mark required flag", "error", err) - os.Exit(1) - } + BoolVarP(&EnableProfiling, "enable-pprof", "", false, "Enable profiling endpoint at: http://localhost:9091") } diff --git a/cmd/run.go b/cmd/run.go deleted file mode 100644 index 24f07dd..0000000 --- a/cmd/run.go +++ /dev/null @@ -1,92 +0,0 @@ -package cmd - -import ( - "context" - "log/slog" - _ "net/http/pprof" // nolint:gosec // profiling endpoint listens on localhost. - "os" - "os/signal" - "syscall" - - "github.com/equinix-labs/otel-init-go/otelinit" - "github.com/metal-toolbox/bioscfg/internal/configuration" - "github.com/metal-toolbox/bioscfg/internal/handlers" - "github.com/metal-toolbox/bioscfg/internal/log" - "github.com/metal-toolbox/bioscfg/internal/metrics" - "github.com/metal-toolbox/bioscfg/internal/model" - "github.com/metal-toolbox/bioscfg/internal/profiling" - "github.com/metal-toolbox/bioscfg/internal/store" - "github.com/metal-toolbox/bioscfg/internal/version" - "github.com/metal-toolbox/ctrl" -) - -func runWorker(ctx context.Context, args *model.Args) error { - config, err := configuration.Load(args) - if err != nil { - slog.Error("Failed to load configuration", "error", err) - return err - } - - slog.Info("Configuration loaded", config.AsLogFields()...) - - log.SetLevel(config.LogLevel) - - // serve metrics endpoint - metrics.ListenAndServe() - version.ExportBuildInfoMetric() - - if config.EnableProfiling { - profiling.Enable() - } - - ctx, otelShutdown := otelinit.InitOpenTelemetry(ctx, model.AppName) - defer otelShutdown(ctx) - - repository, err := store.NewRepository(ctx, config) - if err != nil { - slog.Error("Failed to create repository", "error", err) - return err - } - - termChan := make(chan os.Signal, 1) - signal.Notify(termChan, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) - - ctx, cancel := context.WithCancel(ctx) - - // Cancel the context when we receive a termination signal. - go func() { - s := <-termChan - slog.Info("Received signal for termination, exiting...", "signal", s.String()) - cancel() - }() - - nc := ctrl.NewNatsController( - model.AppName, - config.FacilityCode, - model.AppSubject, - config.NatsConfig.NatsURL, - config.NatsConfig.CredsFile, - model.AppSubject, - ctrl.WithConcurrency(config.Concurrency), - ctrl.WithKVReplicas(config.NatsConfig.KVReplicas), - ctrl.WithConnectionTimeout(config.NatsConfig.ConnectTimeout), - ctrl.WithLogger(log.NewLogrusLogger(config.LogLevel)), - ) - - if err = nc.Connect(ctx); err != nil { - slog.Error("Failed to connect to NATS", "error", err) - return err - } - - slog.With(version.Current().AsLogFields()...).Info("bioscfg worker running") - - err = nc.ListenEvents(ctx, func() ctrl.TaskHandler { - return handlers.NewHandlerFactory(repository) - }) - if err != nil { - slog.Error("Failed to listen for events", "error", err) - return err - } - - return nil -} diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..b8abf2c --- /dev/null +++ b/config.yaml @@ -0,0 +1,20 @@ +facility: sandbox +log_level: debug +concurrency: 5 +dryrun: false +endpoints: + fleetdb: + authenticate: false + oidc_audience_url: + oidc_client_id: + oidc_issuer_url: + oidc_client_scopes: + url: http://fleetdb:8000 + nats: + connect_timeout: 60s + kv_replication: 1 + creds_file: /etc/nats/nats.creds + url: nats://nats:4222 + otel: + authenticate: false + url: jaeger:4317 \ No newline at end of file diff --git a/go.mod b/go.mod index b5b14c8..156b9f0 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,8 @@ module github.com/metal-toolbox/bioscfg go 1.22.1 require ( - github.com/banzaicloud/logrus-runtime-formatter v0.0.0-20190729070250-5ae5475bae5e github.com/bmc-toolbox/bmclib/v2 v2.2.4 + github.com/bombsimon/logrusr/v2 v2.0.1 github.com/coreos/go-oidc v2.2.1+incompatible github.com/equinix-labs/otel-init-go v0.0.9 github.com/google/uuid v1.6.0 @@ -12,7 +12,7 @@ require ( github.com/jeremywohl/flatten v1.0.1 github.com/metal-toolbox/ctrl v0.2.9 github.com/metal-toolbox/fleetdb v1.19.5 - github.com/metal-toolbox/rivets v1.3.8-0.20240923144748-4fa59d630b50 + github.com/metal-toolbox/rivets v1.3.8 github.com/mitchellh/copystructure v1.2.0 github.com/mitchellh/mapstructure v1.5.0 github.com/pkg/errors v0.9.1 @@ -24,14 +24,17 @@ require ( go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 go.opentelemetry.io/otel v1.28.0 go.opentelemetry.io/otel/trace v1.28.0 + golang.org/x/net v0.28.0 golang.org/x/oauth2 v0.22.0 ) require ( + cloud.google.com/go/kms v1.17.1 // indirect dario.cat/mergo v1.0.0 // indirect github.com/Jeffail/gabs/v2 v2.7.0 // indirect github.com/VictorLowther/simplexml v0.0.0-20180716164440-0bff93621230 // indirect github.com/VictorLowther/soap v0.0.0-20150314151524-8e36fca84b22 // indirect + github.com/banzaicloud/logrus-runtime-formatter v0.0.0-20190729070250-5ae5475bae5e // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bmc-toolbox/common v0.0.0-20240723142833-87832458b53b // indirect github.com/bytedance/sonic v1.12.1 // indirect @@ -133,7 +136,6 @@ require ( golang.org/x/arch v0.9.0 // indirect golang.org/x/crypto v0.26.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect - golang.org/x/net v0.28.0 // indirect golang.org/x/sys v0.24.0 // indirect golang.org/x/text v0.17.0 // indirect golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 // indirect diff --git a/go.sum b/go.sum index ee05935..43373b7 100644 --- a/go.sum +++ b/go.sum @@ -30,6 +30,7 @@ cloud.google.com/go v0.97.0/go.mod h1:GF7l59pYBVlXQIBLx3a761cZ41F9bBH3JUlihCt2Ud cloud.google.com/go v0.99.0/go.mod h1:w0Xx2nLzqWJPuozYQX+hFfCSI8WioryfRDzkoI/Y2ZA= cloud.google.com/go v0.100.2/go.mod h1:4Xra9TjzAeYHrl5+oeLlzbM2k3mjVhZh4UqTZ//w99A= cloud.google.com/go v0.115.0 h1:CnFSK6Xo3lDYRoBKEcAtia6VSC837/ZkJuRduSFnr14= +cloud.google.com/go v0.115.0/go.mod h1:8jIM5vVgoAEoiVxQ/O4BFTfHqulPZgs/ufEzMcFMdWU= cloud.google.com/go/auth v0.7.2 h1:uiha352VrCDMXg+yoBtaD0tUF4Kv9vrtrWPYXwutnDE= cloud.google.com/go/auth v0.7.2/go.mod h1:VEc4p5NNxycWQTMQEDQF0bd6aTMb6VgYDXEwiJJQAbs= cloud.google.com/go/auth/oauth2adapt v0.2.3 h1:MlxF+Pd3OmSudg/b1yZ5lJwoXCEaeedAguodky1PcKI= @@ -51,10 +52,12 @@ cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykW cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= cloud.google.com/go/firestore v1.6.1/go.mod h1:asNXNOzBdyVQmEU+ggO8UPodTkEVFW5Qx+rwHnAz+EY= -cloud.google.com/go/iam v1.1.7 h1:z4VHOhwKLF/+UYXAJDFwGtNF0b6gjsW1Pk9Ml0U/IoM= -cloud.google.com/go/iam v1.1.7/go.mod h1:J4PMPg8TtyurAUvSmPj8FF3EDgY1SPRZxcUGrn7WXGA= -cloud.google.com/go/kms v1.15.8 h1:szIeDCowID8th2i8XE4uRev5PMxQFqW+JjwYxL9h6xs= -cloud.google.com/go/kms v1.15.8/go.mod h1:WoUHcDjD9pluCg7pNds131awnH429QGvRM3N/4MyoVs= +cloud.google.com/go/iam v1.1.8 h1:r7umDwhj+BQyz0ScZMp4QrGXjSTI3ZINnpgU2nlB/K0= +cloud.google.com/go/iam v1.1.8/go.mod h1:GvE6lyMmfxXauzNq8NbgJbeVQNspG+tcdL/W8QO1+zE= +cloud.google.com/go/kms v1.17.1 h1:5k0wXqkxL+YcXd4viQzTqCgzzVKKxzgrK+rCZJytEQs= +cloud.google.com/go/kms v1.17.1/go.mod h1:DCMnCF/apA6fZk5Cj4XsD979OyHAqFasPuA5Sd0kGlQ= +cloud.google.com/go/longrunning v0.5.7 h1:WLbHekDbjK1fVFD3ibpFFVoyizlLRl73I7YKuAKilhU= +cloud.google.com/go/longrunning v0.5.7/go.mod h1:8GClkudohy1Fxm3owmBGid8W0pSgodEMwEAztp38Xng= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= @@ -217,6 +220,7 @@ github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vb github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= +github.com/go-logr/logr v1.0.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -500,6 +504,7 @@ github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -551,8 +556,8 @@ github.com/metal-toolbox/ctrl v0.2.9 h1:Q1Hqpqyb71/gg2PcX/qrfoDE8FlydJt4rPQb7/Z8 github.com/metal-toolbox/ctrl v0.2.9/go.mod h1:QVATUIWFx3dbjOoEX0EnJHtRvypRlXZ9HUGaPLRyTG8= github.com/metal-toolbox/fleetdb v1.19.5 h1:ERgdFAUtWnT/AeVhCGclsENmwPhU88JUcgOZAdxWKYI= github.com/metal-toolbox/fleetdb v1.19.5/go.mod h1:k9MZXQsJX4NfBoANst6g1468papSs0tzsSyzN3gGWuQ= -github.com/metal-toolbox/rivets v1.3.8-0.20240923144748-4fa59d630b50 h1:v5aGsD3WnCOD6IB8o9F4XqR9kB/Vr/+LUQTmaG+aQYI= -github.com/metal-toolbox/rivets v1.3.8-0.20240923144748-4fa59d630b50/go.mod h1:8irU6eXgOa3QkjdcGi/aY4vqoMqCkbwVz7iVTYYPCX8= +github.com/metal-toolbox/rivets v1.3.8 h1:BxzBPBYPMGBwJurIe+8Xji2YL7vHZUHbOmMpszWfPYw= +github.com/metal-toolbox/rivets v1.3.8/go.mod h1:8irU6eXgOa3QkjdcGi/aY4vqoMqCkbwVz7iVTYYPCX8= github.com/microsoft/go-mssqldb v0.17.0/go.mod h1:OkoNGhGEs8EZqchVTtochlXruEhEOaO4S0d2sB5aeGQ= github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= @@ -671,6 +676,7 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/slack-go/slack v0.13.1 h1:6UkM3U1OnbhPsYeb1IMkQ6HSNOSikWluwOncJt4Tz/o= @@ -1025,6 +1031,7 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210608053332-aa57babbf139/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/internal/bioscfg/bioscfg.go b/internal/bioscfg/bioscfg.go new file mode 100644 index 0000000..50a1d9a --- /dev/null +++ b/internal/bioscfg/bioscfg.go @@ -0,0 +1,111 @@ +package bioscfg + +import ( + "context" + + "github.com/metal-toolbox/bioscfg/internal/config" + "github.com/metal-toolbox/bioscfg/internal/store/fleetdb" + "github.com/metal-toolbox/ctrl" + rctypes "github.com/metal-toolbox/rivets/condition" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +var ( + pkgName = "internal/bioscfg" +) + +// BiosCfg BiosCfg Controller Struct +type BiosCfg struct { + cfg *config.Configuration + logger *logrus.Entry + fleetdb *fleetdb.Store + nc *ctrl.NatsController +} + +// New create a new BiosCfg Controller +func New(ctx context.Context, cfg *config.Configuration, logger *logrus.Entry) (*BiosCfg, error) { + bc := &BiosCfg{ + cfg: cfg, + logger: logger, + } + + err := bc.initDependences(ctx) + if err != nil { + return nil, err + } + + return bc, nil +} + +// Listen listen to Nats for tasks +func (bc *BiosCfg) Listen(ctx context.Context) error { + handleFactory := func() ctrl.TaskHandler { + return &TaskHandler{ + cfg: bc.cfg, + logger: bc.logger, + controllerID: bc.nc.ID(), + fleetdb: bc.fleetdb, + } + } + + err := bc.nc.ListenEvents(ctx, handleFactory) + if err != nil { + return err + } + + return nil +} + +// initDependences Initialize network dependencies +func (bc *BiosCfg) initDependences(ctx context.Context) error { + err := bc.initNats(ctx) + if err != nil { + return errors.Wrap(err, "failed to initialize connection to nats") + } + + err = bc.initFleetDB(ctx) + if err != nil { + return errors.Wrap(err, "failed to initialize connection to fleetdb") + } + + return nil +} + +func (bc *BiosCfg) initNats(ctx context.Context) error { + bc.nc = ctrl.NewNatsController( + string(rctypes.BiosControl), + bc.cfg.FacilityCode, + string(rctypes.BiosControl), + bc.cfg.Endpoints.Nats.URL, + bc.cfg.Endpoints.Nats.CredsFile, + rctypes.BiosControl, + ctrl.WithConcurrency(bc.cfg.Concurrency), + ctrl.WithKVReplicas(bc.cfg.Endpoints.Nats.KVReplicationFactor), + ctrl.WithLogger(bc.logger.Logger), + ctrl.WithConnectionTimeout(bc.cfg.Endpoints.Nats.ConnectTimeout), + ) + + err := bc.nc.Connect(ctx) + if err != nil { + bc.logger.Error(err) + return err + } + + return nil +} + +func (bc *BiosCfg) initFleetDB(ctx context.Context) error { + store, err := fleetdb.New( + ctx, + &bc.cfg.Endpoints.FleetDB, + bc.logger.Logger, + ) + if err != nil { + return err + } + + bc.fleetdb = store + + return nil +} diff --git a/internal/bioscfg/errors.go b/internal/bioscfg/errors.go new file mode 100644 index 0000000..1d32181 --- /dev/null +++ b/internal/bioscfg/errors.go @@ -0,0 +1,9 @@ +package bioscfg + +import "errors" + +var ( + errInvalidConditionParams = errors.New("invalid condition parameters") + errTaskConv = errors.New("error in generic Task conversion") + errUnsupportedAction = errors.New("unsupported action") +) diff --git a/internal/bioscfg/handler.go b/internal/bioscfg/handler.go new file mode 100644 index 0000000..25d911f --- /dev/null +++ b/internal/bioscfg/handler.go @@ -0,0 +1,154 @@ +package bioscfg + +import ( + "context" + "time" + + "github.com/metal-toolbox/bioscfg/internal/config" + "github.com/metal-toolbox/bioscfg/internal/model" + "github.com/metal-toolbox/bioscfg/internal/store/bmc" + "github.com/metal-toolbox/bioscfg/internal/store/fleetdb" + "github.com/metal-toolbox/ctrl" + rctypes "github.com/metal-toolbox/rivets/condition" + "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/trace" +) + +type TaskHandler struct { + logger *logrus.Entry + cfg *config.Configuration + fleetdb *fleetdb.Store + bmcClient bmc.BMC + publisher ctrl.Publisher + server *model.Asset + task *Task + startTS time.Time + controllerID string +} + +func (th *TaskHandler) HandleTask(ctx context.Context, genTask *rctypes.Task[any, any], publisher ctrl.Publisher) error { + ctx, span := otel.Tracer(pkgName).Start( + ctx, + "bioscfg.HandleTask", + ) + defer span.End() + + var err error + th.publisher = publisher + + // Ungeneric the task + th.task, err = NewTask(genTask) + if err != nil { + th.logger.WithFields(logrus.Fields{ + "conditionID": genTask.ID, + "controllerID": th.controllerID, + "err": err.Error(), + }).Error("asset lookup error") + return err + } + + // Get Server + th.server, err = th.fleetdb.AssetByID(ctx, th.task.Parameters.AssetID) + if err != nil { + th.logger.WithFields(logrus.Fields{ + "assetID": th.task.Parameters.AssetID.String(), + "conditionID": th.task.ID, + "controllerID": th.controllerID, + "err": err.Error(), + }).Error("asset lookup error") + + return ctrl.ErrRetryHandler + } + + // New log entry for this condition + th.logger = th.logger.WithFields( + logrus.Fields{ + "controllerID": th.controllerID, + "conditionID": th.task.ID.String(), + "serverID": th.server.ID.String(), + "bmc": th.server.BmcAddress.String(), + "action": th.task.Parameters.Action, + }, + ) + + // Get BMC Client + if th.cfg.Dryrun { // Fake BMC + th.bmcClient = bmc.NewDryRunBMCClient(th.server) + th.logger.Warn("Running BMC in Dryrun mode") + } else { + th.bmcClient = bmc.NewBMCClient(th.server, th.logger) + } + + err = th.bmcClient.Open(ctx) + if err != nil { + th.logger.WithError(err).Error("bmc connection failed to connect") + return err + } + defer func() { + if err := th.bmcClient.Close(ctx); err != nil { + th.logger.WithError(err).Error("bmc connection close error") + } + }() + + return th.Run(ctx) +} + +func (th *TaskHandler) Run(ctx context.Context) error { + ctx, span := otel.Tracer(pkgName).Start( + ctx, + "TaskHandler.Run", + trace.WithSpanKind(trace.SpanKindConsumer), + ) + defer span.End() + + th.logger.Info("running condition action") + err := th.publishActive(ctx, "running condition action") + if err != nil { + return err + } + + switch th.task.Parameters.Action { + case rctypes.ResetSettings: + return th.ResetBios(ctx) + default: + return th.failedWithError(ctx, string(th.task.Parameters.Action), errUnsupportedAction) + } +} + +// ResetBios reset the bios of the server +func (th *TaskHandler) ResetBios(ctx context.Context) error { + // Get Power State + state, err := th.bmcClient.GetPowerState(ctx) + if err != nil { + return th.failedWithError(ctx, "error getting power state", err) + } + + err = th.publishActivef(ctx, "current power state: %s", state) + if err != nil { + return err + } + + // Reset Bios + err = th.bmcClient.ResetBios(ctx) + if err != nil { + return th.failedWithError(ctx, "error reseting bios", err) + } + + err = th.publishActive(ctx, "BIOS settings reset") + if err != nil { + return err + } + + // Reboot (if ON) + if state == model.PowerStateOn { + err = th.bmcClient.SetPowerState(ctx, model.PowerStateReset) + if err != nil { + return th.failedWithError(ctx, "failed to reboot server", err) + } + + return th.successful(ctx, "rebooting server") + } + + return th.successful(ctx, "skipping server reboot, not on") +} diff --git a/internal/bioscfg/publish.go b/internal/bioscfg/publish.go new file mode 100644 index 0000000..551ea3a --- /dev/null +++ b/internal/bioscfg/publish.go @@ -0,0 +1,113 @@ +package bioscfg + +import ( + "context" + "fmt" + "time" + + "github.com/metal-toolbox/bioscfg/internal/metrics" + rctypes "github.com/metal-toolbox/rivets/condition" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" +) + +func (th *TaskHandler) publish(ctx context.Context, status string, state rctypes.State) error { + th.task.State = state + th.task.Status.Append(status) + + genTask, err := th.task.ToGeneric() + if err != nil { + th.logger.WithError(errTaskConv).Error() + return err + } + + if errDelay := sleepInContext(ctx, 10*time.Second); errDelay != nil { + return context.Canceled + } + + return th.publisher.Publish(ctx, + genTask, + false, + ) +} + +func (th *TaskHandler) publishActive(ctx context.Context, status string) error { + err := th.publish(ctx, status, rctypes.Active) + if err != nil { + th.logger.Infof("failed to publish condition status: %s", status) + return err + } + + th.logger.Infof("condition active: %s", status) + return nil +} + +func (th *TaskHandler) publishActivef(ctx context.Context, status string, args ...interface{}) error { + if len(args) > 0 { + status = fmt.Sprintf(status, args) + } + + return th.publishActive(ctx, status) +} + +// failed condition helper method +func (th *TaskHandler) failed(ctx context.Context, status string) error { + err := th.publish(ctx, status, rctypes.Failed) + + th.registerConditionMetrics(string(rctypes.Failed)) + + if err != nil { + th.logger.Infof("failed to publish condition status: %s", status) + return err + } + + th.logger.Warnf("condition failed: %s", status) + return nil +} + +func (th *TaskHandler) failedWithError(ctx context.Context, status string, err error) error { + newError := th.failed(ctx, errors.Wrap(err, status).Error()) + if newError != nil { + if err != nil { + return errors.Wrap(newError, err.Error()) + } + + return newError + } + + return err +} + +// successful condition helper method +func (th *TaskHandler) successful(ctx context.Context, status string) error { + err := th.publish(ctx, status, rctypes.Succeeded) + + th.registerConditionMetrics(string(rctypes.Succeeded)) + + if err != nil { + th.logger.Warnf("failed to publish condition status: %s", status) + return err + } + + th.logger.Infof("condition complete: %s", status) + return nil +} + +func (th *TaskHandler) registerConditionMetrics(status string) { + metrics.ConditionRunTimeSummary.With( + prometheus.Labels{ + "condition": string(rctypes.ServerControl), + "state": status, + }, + ).Observe(time.Since(th.startTS).Seconds()) +} + +// sleepInContext +func sleepInContext(ctx context.Context, t time.Duration) error { + select { + case <-time.After(t): + return nil + case <-ctx.Done(): + return context.Canceled + } +} diff --git a/internal/bioscfg/run.go b/internal/bioscfg/run.go new file mode 100644 index 0000000..5b8d19a --- /dev/null +++ b/internal/bioscfg/run.go @@ -0,0 +1,52 @@ +package bioscfg + +import ( + "context" + + "github.com/equinix-labs/otel-init-go/otelinit" + "github.com/metal-toolbox/bioscfg/internal/config" + "github.com/metal-toolbox/bioscfg/internal/metrics" + "github.com/metal-toolbox/bioscfg/internal/model" + "github.com/metal-toolbox/bioscfg/internal/profiling" + "github.com/metal-toolbox/bioscfg/internal/version" + "github.com/sirupsen/logrus" +) + +func Run(ctx context.Context, configFile, logLevel string, enableProfiling bool) error { + cfg, err := config.Load(configFile, logLevel) + if err != nil { + return err + } + + logger := logrus.New() + // TODO; Replace cfg.LogLevel with logrus.LogLevel, it should marshall/unmarshall? + logger.Level, err = logrus.ParseLevel(cfg.LogLevel) + if err != nil { + return err + } + + metrics.ListenAndServe() + version.ExportBuildInfoMetric() + if enableProfiling { + profiling.Enable() + } + + ctx, otelShutdown := otelinit.InitOpenTelemetry(ctx, model.Name) + defer otelShutdown(ctx) + + v, err := version.Current().AsMap() + if err != nil { + return err + } + loggerEntry := logger.WithFields(v) + loggerEntry.Infof("Initializing %s", model.Name) + + controller, err := New(ctx, cfg, loggerEntry) + if err != nil { + return err + } + + loggerEntry.Infof("Success! %s is starting to listen for conditions", model.Name) + + return controller.Listen(ctx) +} diff --git a/internal/bioscfg/task.go b/internal/bioscfg/task.go new file mode 100644 index 0000000..8f74604 --- /dev/null +++ b/internal/bioscfg/task.go @@ -0,0 +1,89 @@ +package bioscfg + +import ( + "encoding/json" + + rctypes "github.com/metal-toolbox/rivets/condition" + rtypes "github.com/metal-toolbox/rivets/types" + "github.com/mitchellh/copystructure" + "github.com/pkg/errors" +) + +type Task rctypes.Task[*rctypes.BiosControlTaskParameters, json.RawMessage] + +func NewTask(task *rctypes.Task[any, any]) (*Task, error) { + paramsJSON, ok := task.Parameters.(json.RawMessage) + if !ok { + return nil, errInvalidConditionParams + } + + params := rctypes.BiosControlTaskParameters{} + if err := json.Unmarshal(paramsJSON, ¶ms); err != nil { + return nil, err + } + + // deep copy fields referenced by pointer + asset, err := copystructure.Copy(task.Server) + if err != nil { + return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Server") + } + + fault, err := copystructure.Copy(task.Fault) + if err != nil { + return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Fault") + } + + return &Task{ + StructVersion: task.StructVersion, + ID: task.ID, + Kind: task.Kind, + State: task.State, + Status: task.Status, + Parameters: ¶ms, + Fault: fault.(*rctypes.Fault), + FacilityCode: task.FacilityCode, + Server: asset.(*rtypes.Server), + WorkerID: task.WorkerID, + TraceID: task.TraceID, + SpanID: task.SpanID, + CreatedAt: task.CreatedAt, + UpdatedAt: task.UpdatedAt, + CompletedAt: task.CompletedAt, + }, nil +} + +func (task *Task) ToGeneric() (*rctypes.Task[any, any], error) { + paramsJSON, err := task.Parameters.Marshal() + if err != nil { + return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Parameters") + } + + // deep copy fields referenced by pointer + asset, err := copystructure.Copy(task.Server) + if err != nil { + return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Server") + } + + fault, err := copystructure.Copy(task.Fault) + if err != nil { + return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Fault") + } + + return &rctypes.Task[any, any]{ + StructVersion: task.StructVersion, + ID: task.ID, + Kind: task.Kind, + State: task.State, + Status: task.Status, + Parameters: paramsJSON, + Fault: fault.(*rctypes.Fault), + FacilityCode: task.FacilityCode, + Server: asset.(*rtypes.Server), + WorkerID: task.WorkerID, + TraceID: task.TraceID, + SpanID: task.SpanID, + CreatedAt: task.CreatedAt, + UpdatedAt: task.UpdatedAt, + CompletedAt: task.CompletedAt, + }, nil +} diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..7955856 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,138 @@ +package config + +import ( + "os" + "strings" + + "github.com/jeremywohl/flatten" + "github.com/metal-toolbox/bioscfg/internal/store/fleetdb" + "github.com/metal-toolbox/rivets/events" + "github.com/mitchellh/mapstructure" + "github.com/pkg/errors" + "github.com/spf13/viper" +) + +var ( + ErrConfig = errors.New("configuration error") +) + +type Configuration struct { + FacilityCode string `mapstructure:"facility"` + LogLevel string `mapstructure:"log_level"` + Endpoints Endpoints `mapstructure:"endpoints"` + Dryrun bool `mapstructure:"dryrun"` + Concurrency int `mapstructure:"concurrency"` +} + +type Endpoints struct { + // NatsOptions defines the NATs events broker configuration parameters. + Nats events.NatsOptions `mapstructure:"nats"` + + // FleetDBConfig defines the fleetdb client configuration parameters + FleetDB fleetdb.Config `mapstructure:"fleetdb"` +} + +func Load(cfgFilePath, loglevel string) (*Configuration, error) { + v := viper.New() + cfg := &Configuration{} + + err := cfg.envBindVars(v) + if err != nil { + return nil, err + } + + v.SetConfigType("yaml") + v.SetEnvKeyReplacer(strings.NewReplacer(".", "_")) + v.AutomaticEnv() + + err = readInFile(v, cfg, cfgFilePath) + if err != nil { + return nil, err + } + + if loglevel != "" { + cfg.LogLevel = loglevel + } + + err = cfg.validate() + return cfg, err +} + +// Reads in the cfgFile when available and overrides from environment variables. +func readInFile(v *viper.Viper, cfg *Configuration, path string) error { + if cfg == nil { + return ErrConfig + } + + if path != "" { + fh, err := os.Open(path) + if err != nil { + return errors.Wrap(ErrConfig, err.Error()) + } + + if err = v.ReadConfig(fh); err != nil { + return errors.Wrap(ErrConfig, "ReadConfig error:"+err.Error()) + } + } else { + v.AddConfigPath(".") + v.SetConfigName("config") + err := v.ReadInConfig() + if err != nil { + return err + } + } + + err := v.Unmarshal(cfg) + if err != nil { + return err + } + + return nil +} + +func (cfg *Configuration) validate() error { + if cfg == nil { + return ErrConfig + } + + if cfg.FacilityCode == "" { + return errors.Wrap(ErrConfig, "no facility codes") + } + + if cfg.LogLevel == "" { + cfg.LogLevel = "info" + } + + if cfg.Concurrency == 0 { + cfg.Concurrency = 1 + } + + return nil +} + +// envBindVars binds environment variables to the struct +// without a configuration file being unmarshalled, +// this is a workaround for a viper bug, +// +// This can be replaced by the solution in https://github.com/spf13/viper/pull/1429 +// once that PR is merged. +func (cfg *Configuration) envBindVars(v *viper.Viper) error { + envKeysMap := map[string]interface{}{} + if err := mapstructure.Decode(cfg, &envKeysMap); err != nil { + return err + } + + // Flatten nested conf map + flat, err := flatten.Flatten(envKeysMap, "", flatten.DotStyle) + if err != nil { + return errors.Wrap(err, "Unable to flatten config") + } + + for k := range flat { + if err := v.BindEnv(k); err != nil { + return errors.Wrap(ErrConfig, "env var bind error: "+err.Error()) + } + } + + return nil +} diff --git a/internal/configuration/configuration.go b/internal/configuration/configuration.go deleted file mode 100644 index fe7425d..0000000 --- a/internal/configuration/configuration.go +++ /dev/null @@ -1,271 +0,0 @@ -package configuration - -import ( - "net/url" - "os" - "strings" - "time" - - "github.com/jeremywohl/flatten" - "github.com/metal-toolbox/bioscfg/internal/model" - "github.com/mitchellh/mapstructure" - "github.com/pkg/errors" - "github.com/spf13/viper" -) - -var ( - // NATs streaming configuration - defaultNatsConnectTimeout = 100 * time.Millisecond -) - -// NatsConfig holds NATS specific configuration -type NatsConfig struct { - NatsURL string - CredsFile string - KVReplicas int - ConnectTimeout time.Duration -} - -func newNatsConfig() *NatsConfig { - return &NatsConfig{ - ConnectTimeout: defaultNatsConnectTimeout, - } -} - -// Configuration holds application configuration read from a YAML or set by env variables. -// nolint:govet // prefer readability over field alignment optimization for this case. -type Configuration struct { - // LogLevel is the app verbose logging level. - // one of - info, debug, trace - LogLevel string `mapstructure:"log_level"` - - // Concurrency is the number of concurrent tasks that can be running at once. - Concurrency int `mapstructure:"concurrency"` - - // FacilityCode limits this service to events in a facility. - FacilityCode string `mapstructure:"facility_code"` - - // FleetDBOptions defines the fleetdb client configuration parameters - FleetDBOptions *FleetDBOptions `mapstructure:"fleetdb"` - - // NatsConfig defines the NATs events broker configuration parameters. - NatsConfig *NatsConfig `mapstructure:"nats"` - - EnableProfiling bool `mapstructure:"enable_profiling"` -} - -// New creates an empty configuration struct. -func New() *Configuration { - config := &Configuration{} - - // these are initialized here so viper can read in configuration from env vars - // once https://github.com/spf13/viper/pull/1429 is merged, this can go. - config.FleetDBOptions = &FleetDBOptions{} - config.NatsConfig = newNatsConfig() - - return config -} - -func (c *Configuration) AsLogFields() []any { - return []any{ - "logLevel", c.LogLevel, - "concurrency", c.Concurrency, - "facilityCode", c.FacilityCode, - "disableOAuth", c.FleetDBOptions.DisableOAuth, - "fleetDBUrl", c.FleetDBOptions.Endpoint, - "natsURL", c.NatsConfig.NatsURL, - "enableProfiling", c.EnableProfiling, - } -} - -func (c *Configuration) LoadArgs(args *model.Args) { - c.LogLevel = args.LogLevel - c.EnableProfiling = args.EnableProfiling - c.FacilityCode = args.FacilityCode -} - -// FleetDBOptions defines configuration for the fleetdb client. -// https://github.com/metal-toolbox/fleetdb -type FleetDBOptions struct { - Endpoint string `mapstructure:"endpoint"` - OidcIssuerEndpoint string `mapstructure:"oidc_issuer_endpoint"` - OidcAudienceEndpoint string `mapstructure:"oidc_audience_endpoint"` - OidcClientSecret string `mapstructure:"oidc_client_secret"` - OidcClientID string `mapstructure:"oidc_client_id"` - OidcClientScopes []string `mapstructure:"oidc_client_scopes"` - DisableOAuth bool `mapstructure:"disable_oauth"` -} - -// Load the application configuration -// Reads in the configFile when available and overrides from environment variables. -func Load(args *model.Args) (*Configuration, error) { - viperConfig := viper.New() - viperConfig.SetConfigType("yaml") - viperConfig.SetEnvPrefix(model.AppName) - viperConfig.SetEnvKeyReplacer(strings.NewReplacer(".", "_")) - viperConfig.AutomaticEnv() - - if args.ConfigFile != "" { - fh, err := os.Open(args.ConfigFile) - if err != nil { - return nil, errors.Wrap(model.ErrConfig, err.Error()) - } - - if err = viperConfig.ReadConfig(fh); err != nil { - return nil, errors.Wrap(model.ErrConfig, "ReadConfig error: "+err.Error()) - } - } - - config := New() - config.LoadArgs(args) - - if err := config.envBindVars(viperConfig); err != nil { - return nil, errors.Wrap(model.ErrConfig, "env var bind error: "+err.Error()) - } - - if err := viperConfig.Unmarshal(config); err != nil { - return nil, errors.Wrap(model.ErrConfig, "Unmarshal error: "+err.Error()) - } - - config.envVarAppOverrides(viperConfig) - - if err := config.envVarNatsOverrides(viperConfig); err != nil { - return nil, errors.Wrap(model.ErrConfig, "nats env overrides error: "+err.Error()) - } - - if err := config.envVarFleetDBOverrides(viperConfig); err != nil { - return nil, errors.Wrap(model.ErrConfig, "fleetdb env overrides error: "+err.Error()) - } - - return config, nil -} - -func (c *Configuration) envVarAppOverrides(viperConfig *viper.Viper) { - logLevel := viperConfig.GetString("log.level") - if logLevel != "" { - c.LogLevel = logLevel - } -} - -// envBindVars binds environment variables to the struct -// without a configuration file being unmarshalled, -// this is a workaround for a viper bug, -// -// This can be replaced by the solution in https://github.com/spf13/viper/pull/1429 -// once that PR is merged. -func (c *Configuration) envBindVars(viperConfig *viper.Viper) error { - envKeysMap := map[string]interface{}{} - if err := mapstructure.Decode(c, &envKeysMap); err != nil { - return err - } - - // Flatten nested conf map - flat, err := flatten.Flatten(envKeysMap, "", flatten.DotStyle) - if err != nil { - return errors.Wrap(err, "Unable to flatten configuration") - } - - for k := range flat { - if err := viperConfig.BindEnv(k); err != nil { - return errors.Wrap(model.ErrConfig, "env var bind error: "+err.Error()) - } - } - - return nil -} - -// nolint:gocyclo // nats env configuration load is cyclomatic -func (c *Configuration) envVarNatsOverrides(viperConfig *viper.Viper) error { - if c.NatsConfig == nil { - c.NatsConfig = newNatsConfig() - } - - if viperConfig.GetString("nats.url") != "" { - c.NatsConfig.NatsURL = viperConfig.GetString("nats.url") - } - - if c.NatsConfig.NatsURL == "" { - return errors.New("missing parameter: nats.url") - } - - if viperConfig.GetString("nats.creds.file") != "" { - c.NatsConfig.CredsFile = viperConfig.GetString("nats.creds.file") - } - - if viperConfig.GetDuration("nats.connect.timeout") != 0 { - c.NatsConfig.ConnectTimeout = viperConfig.GetDuration("nats.connect.timeout") - } - - if viperConfig.GetInt("nats.kv.replicas") != 0 { - c.NatsConfig.KVReplicas = viperConfig.GetInt("nats.kv.replicas") - } - - return nil -} - -// nolint:gocyclo // parameter validation is cyclomatic -func (c *Configuration) envVarFleetDBOverrides(viperConfig *viper.Viper) error { - if c.FleetDBOptions == nil { - c.FleetDBOptions = &FleetDBOptions{} - } - - if viperConfig.GetString("fleetdb.endpoint") != "" { - c.FleetDBOptions.Endpoint = viperConfig.GetString("fleetdb.endpoint") - } - - // Validate endpoint - _, err := url.Parse(c.FleetDBOptions.Endpoint) - if err != nil { - return errors.New("fleetdb endpoint URL error: " + err.Error()) - } - - if viperConfig.GetString("fleetdb.disable.oauth") != "" { - c.FleetDBOptions.DisableOAuth = viperConfig.GetBool("fleetdb.disable.oauth") - } - - if c.FleetDBOptions.DisableOAuth { - return nil - } - - if viperConfig.GetString("fleetdb.oidc.issuer.endpoint") != "" { - c.FleetDBOptions.OidcIssuerEndpoint = viperConfig.GetString("fleetdb.oidc.issuer.endpoint") - } - - if c.FleetDBOptions.OidcIssuerEndpoint == "" { - return errors.New("fleetdb oidc.issuer.endpoint not defined") - } - - if viperConfig.GetString("fleetdb.oidc.audience.endpoint") != "" { - c.FleetDBOptions.OidcAudienceEndpoint = viperConfig.GetString("fleetdb.oidc.audience.endpoint") - } - - if c.FleetDBOptions.OidcAudienceEndpoint == "" { - return errors.New("fleetdb oidc.audience.endpoint not defined") - } - - if viperConfig.GetString("fleetdb.oidc.client.secret") != "" { - c.FleetDBOptions.OidcClientSecret = viperConfig.GetString("fleetdb.oidc.client.secret") - } - - if c.FleetDBOptions.OidcClientSecret == "" { - return errors.New("fleetdb.oidc.client.secret not defined") - } - - if viperConfig.GetString("fleetdb.oidc.client.id") != "" { - c.FleetDBOptions.OidcClientID = viperConfig.GetString("fleetdb.oidc.client.id") - } - - if c.FleetDBOptions.OidcClientID == "" { - return errors.New("fleetdb.oidc.client.id not defined") - } - - if viperConfig.GetString("fleetdb.oidc.client.scopes") != "" { - c.FleetDBOptions.OidcClientScopes = viperConfig.GetStringSlice("fleetdb.oidc.client.scopes") - } - - if len(c.FleetDBOptions.OidcClientScopes) == 0 { - return errors.New("fleetdb oidc.client.scopes not defined") - } - - return nil -} diff --git a/internal/handlers/handlers.go b/internal/handlers/handlers.go deleted file mode 100644 index 75dfc3c..0000000 --- a/internal/handlers/handlers.go +++ /dev/null @@ -1,76 +0,0 @@ -package handlers - -import ( - "context" - "log/slog" - _ "net/http/pprof" // nolint:gosec // pprof path is only exposed over localhost - - "github.com/bmc-toolbox/bmclib/v2" - "github.com/metal-toolbox/bioscfg/internal/model" - "github.com/metal-toolbox/bioscfg/internal/store" - "github.com/metal-toolbox/bioscfg/internal/tasks" - "github.com/metal-toolbox/ctrl" - rctypes "github.com/metal-toolbox/rivets/condition" -) - -// HandlerFactory has the data and business logic for the application -type HandlerFactory struct { - repository store.Repository -} - -// NewHandlerFactory returns a new instance of the Handler -func NewHandlerFactory(repository store.Repository) *HandlerFactory { - return &HandlerFactory{ - repository: repository, - } -} - -func (h *HandlerFactory) getAsset(ctx context.Context, params *rctypes.BiosControlTaskParameters) (*model.Asset, error) { - asset, err := h.repository.AssetByID(ctx, params.AssetID) - if err != nil { - // TODO: Check error type - return nil, ctrl.ErrRetryHandler - } - - slog.Debug("Found asset", asset.AsLogFields()...) - - return asset, nil -} - -// Handle will handle the received condition -func (h *HandlerFactory) HandleTask( - ctx context.Context, - genTask *rctypes.Task[any, any], - publisher ctrl.Publisher, -) error { - slog.Debug("Handling condition", "condition", genTask) - - task, err := tasks.NewTask(genTask) - if err != nil { - return err - } - - server, err := h.getAsset(ctx, task.Parameters) - if err != nil { - return err - } - - var oldTask tasks.Task - - switch task.Parameters.Action { - case rctypes.ResetSettings: - oldTask = tasks.NewBiosResetTask(server) - default: - slog.With(server.AsLogFields()...).Error("Invalid action", "action", task.Parameters.Action) - return model.ErrInvalidAction - } - - runner := tasks.NewTaskRunner(publisher, oldTask, task) - client := bmclib.NewClient(server.BmcAddress.String(), server.BmcUsername, server.BmcPassword) - - if err := runner.Run(ctx, client); err != nil { - slog.Error("Failed running task", "error", err, "task", oldTask.Name()) - } - - return nil -} diff --git a/internal/log/log.go b/internal/log/log.go deleted file mode 100644 index 872c8bc..0000000 --- a/internal/log/log.go +++ /dev/null @@ -1,84 +0,0 @@ -package log - -import ( - "log/slog" - "os" - - runtime "github.com/banzaicloud/logrus-runtime-formatter" - "github.com/sirupsen/logrus" -) - -type Level string - -const ( - LevelTrace Level = "trace" - LevelDebug Level = "debug" - LevelInfo Level = "info" - LevelWarn Level = "warn" - LevelError Level = "error" -) - -var levelVar *slog.LevelVar - -// InitLogger will initialize the default logger instance. -func InitLogger() { - levelVar = &slog.LevelVar{} - levelVar.Set(slog.LevelInfo) - - logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: levelVar})) - - slog.SetDefault(logger) -} - -// SetLevel will set the logging level of the default logger at runtime. -func SetLevel(loglevel string) { - switch Level(loglevel) { - case LevelDebug: - levelVar.Set(slog.LevelDebug) - case LevelInfo, "": - levelVar.Set(slog.LevelInfo) - case LevelWarn: - levelVar.Set(slog.LevelWarn) - case LevelError: - levelVar.Set(slog.LevelError) - default: - levelVar.Set(slog.LevelInfo) - slog.Warn("Unknown log level, defaulting to info", "loglevel", loglevel) - } -} - -// NewLogrusLogger will generate a new logrus logger instance -func NewLogrusLogger(logLevel string) *logrus.Logger { - logger := logrus.New() - - logger.SetOutput(os.Stdout) - - switch Level(logLevel) { - case LevelDebug: - logger.Level = logrus.DebugLevel - case LevelTrace: - logger.Level = logrus.TraceLevel - case LevelInfo, "": - logger.Level = logrus.InfoLevel - case LevelWarn: - logger.Level = logrus.WarnLevel - case LevelError: - logger.Level = logrus.ErrorLevel - default: - logger.Level = logrus.InfoLevel - logger.WithField("logLevel", logLevel).Warn("Unknown log level, defaulting to info") - } - - logger.Level = logrus.InfoLevel - - runtimeFormatter := &runtime.Formatter{ - ChildFormatter: &logrus.JSONFormatter{}, - File: true, - Line: true, - BaseNameOnly: true, - } - - logger.SetFormatter(runtimeFormatter) - - return logger -} diff --git a/internal/model/model.go b/internal/model/model.go index b393758..508c6f0 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -10,9 +10,8 @@ type ( StoreKind string ) -const ( - AppName = "bioscfg" - AppSubject = "biosControl" +var ( + Name = "bioscfg" ) // nolint:govet // prefer to keep field ordering as is @@ -32,21 +31,3 @@ type Asset struct { // Facility this Asset is hosted in. FacilityCode string } - -func (a *Asset) AsLogFields() []any { - return []any{ - "asset_id", a.ID.String(), - "address", a.BmcAddress.String(), - "vendor", a.Vendor, - "model", a.Model, - "serial", a.Serial, - "facility", a.FacilityCode, - } -} - -type Args struct { - LogLevel string - ConfigFile string - FacilityCode string - EnableProfiling bool -} diff --git a/internal/store/bmc/bmc.go b/internal/store/bmc/bmc.go new file mode 100644 index 0000000..c441513 --- /dev/null +++ b/internal/store/bmc/bmc.go @@ -0,0 +1,248 @@ +package bmc + +import ( + "context" + "crypto/tls" + "net" + "net/http" + "net/http/cookiejar" + "path" + "runtime" + "strings" + "time" + + "github.com/bmc-toolbox/bmclib/v2" + "github.com/bmc-toolbox/bmclib/v2/constants" + "github.com/bmc-toolbox/bmclib/v2/providers" + logrusrv2 "github.com/bombsimon/logrusr/v2" + "github.com/metal-toolbox/bioscfg/internal/model" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel" + "golang.org/x/net/publicsuffix" +) + +const ( + logoutTimeout = 1 * time.Minute + loginTimeout = 1 * time.Minute +) + +var ( + errBMCLogin = errors.New("bmc login error") + errBMCLogout = errors.New("bmc logout error") + errBMCNotImplemented = errors.New("method not implemented") +) + +// Bmc is an implementation of the Queryor interface +type Client struct { + client *bmclib.Client + asset *model.Asset + logger *logrus.Entry +} + +// NewBMCClient creates a new Queryor interface for a BMC +func NewBMCClient(asset *model.Asset, logger *logrus.Entry) *Client { + client := newBmclibClient(asset, logger) + + return &Client{ + client, + asset, + logger, + } +} + +// Open creates a BMC session +func (b *Client) Open(ctx context.Context) error { + if b.client == nil { + return errors.Wrap(errBMCLogin, "client not initialized") + } + defer b.tracelog() + + return b.client.Open(ctx) +} + +// Close logs out of the BMC +func (b *Client) Close(traceCtx context.Context) error { + if b.client == nil { + return nil + } + + ctxClose, cancel := context.WithTimeout(traceCtx, logoutTimeout) + defer cancel() + + defer b.tracelog() + + if err := b.client.Close(ctxClose); err != nil { + return errors.Wrap(errBMCLogout, err.Error()) + } + + return nil +} + +// GetPowerState returns the device power status +func (b *Client) GetPowerState(ctx context.Context) (string, error) { + defer b.tracelog() + return b.client.GetPowerState(ctx) +} + +// SetPowerState sets the given power state on the device +func (b *Client) SetPowerState(ctx context.Context, state string) error { + defer b.tracelog() + _, err := b.client.SetPowerState(ctx, state) + return err +} + +// SetBootDevice sets the boot device of the remote device, and validates it was set +// +//nolint:gocritic // its a TODO +func (b *Client) SetBootDevice(ctx context.Context, device string, persistent, efiBoot bool) error { + ok, err := b.client.SetBootDevice(ctx, device, persistent, efiBoot) + if err != nil { + return err + } + + if !ok { + return errors.New("setting boot device failed") + } + + // Now lets validate the boot device order + // TODO; This is a WIP. We do not know yet if This is the right bmc call to get boot device + // override, err := b.client.GetBootDeviceOverride(ctx) + // if err != nil { + // return err + // } + + // if device != string(override.Device) { + // return errors.New("setting boot device failed to propagate") + // } + + // if efiBoot != override.IsEFIBoot { + // return errors.New("setting boot device EFI boot failed to propagate") + // } + + // if persistent != override.IsPersistent { + // return errors.New("setting boot device Persistent boot failed to propagate") + // } + + return nil +} + +// GetBootDevice gets the boot device information of the remote device +func (b *Client) GetBootDevice(_ context.Context) (device string, persistent, efiBoot bool, err error) { + return "", false, false, errors.Wrap(errBMCNotImplemented, "GetBootDevice") +} + +// PowerCycleBMC sets a power cycle action on the BMC of the remote device +func (b *Client) PowerCycleBMC(ctx context.Context) error { + defer b.tracelog() + _, err := b.client.ResetBMC(ctx, "GracefulRestart") + return err +} + +func (b *Client) HostBooted(ctx context.Context) (bool, error) { + defer b.tracelog() + status, _, err := b.client.PostCode(ctx) + if err != nil { + return false, err + } + return status == constants.POSTStateOS, nil +} + +func (b *Client) ResetBios(ctx context.Context) error { + defer b.tracelog() + return b.client.ResetBiosConfiguration(ctx) +} + +func (b *Client) tracelog() { + pc, _, _, _ := runtime.Caller(1) + funcName := path.Base(runtime.FuncForPC(pc).Name()) + + mapstr := func(m map[string]string) string { + if m == nil { + return "" + } + + var s []string + for k, v := range m { + s = append(s, k+": "+v) + } + + return strings.Join(s, ", ") + } + + b.logger.WithFields( + logrus.Fields{ + "attemptedProviders": strings.Join(b.client.GetMetadata().ProvidersAttempted, ","), + "successfulProvider": b.client.GetMetadata().SuccessfulProvider, + "successfulOpens": strings.Join(b.client.GetMetadata().SuccessfulOpenConns, ","), + "successfulCloses": strings.Join(b.client.GetMetadata().SuccessfulCloseConns, ","), + "failedProviderDetail": mapstr(b.client.GetMetadata().FailedProviderDetail), + }).Trace(funcName + ": connection metadata") +} + +func newHTTPClient() *http.Client { + jar, err := cookiejar.New(&cookiejar.Options{PublicSuffixList: publicsuffix.List}) + if err != nil { + panic(err) + } + + // nolint:gomnd // time duration declarations are clear as is. + return &http.Client{ + Timeout: time.Second * 600, + Jar: jar, + Transport: &http.Transport{ + // nolint:gosec // BMCs don't have valid certs. + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + DisableKeepAlives: true, + Dial: (&net.Dialer{ + Timeout: 180 * time.Second, + KeepAlive: 180 * time.Second, + }).Dial, + TLSHandshakeTimeout: 180 * time.Second, + ResponseHeaderTimeout: 600 * time.Second, + IdleConnTimeout: 180 * time.Second, + }, + } +} + +// newBmclibClient initializes a bmclib client with the given credentials +func newBmclibClient(asset *model.Asset, l *logrus.Entry) *bmclib.Client { + logger := logrus.New() + logger.Formatter = l.Logger.Formatter + + // setup a logr logger for bmclib + // bmclib uses logr, for which the trace logs are logged with log.V(3), + // this is a hax so the logrusr lib will enable trace logging + // since any value that is less than (logrus.LogLevel - 4) >= log.V(3) is ignored + // https://github.com/bombsimon/logrusr/blob/master/logrusr.go#L64 + switch l.Logger.GetLevel() { + case logrus.TraceLevel: + logger.Level = 7 + case logrus.DebugLevel: + logger.Level = 5 + } + + logruslogr := logrusrv2.New(logger) + + bmcClient := bmclib.NewClient( + asset.BmcAddress.String(), + asset.BmcUsername, + asset.BmcPassword, + bmclib.WithLogger(logruslogr), + bmclib.WithHTTPClient(newHTTPClient()), + bmclib.WithPerProviderTimeout(loginTimeout), + bmclib.WithRedfishEtagMatchDisabled(true), + bmclib.WithTracerProvider(otel.GetTracerProvider()), + ) + + bmcClient.Registry.Drivers = bmcClient.Registry.Supports( + providers.FeatureBmcReset, + providers.FeatureBootDeviceSet, + providers.FeaturePowerSet, + providers.FeaturePowerState, + ) + + // NOTE: remove the .Using("redfish") before this ends up in prod + // this is kept here since ipmitool doesn't work well in the docker sandbox env. + return bmcClient.Using("redfish") +} diff --git a/internal/store/bmc/dryRun.go b/internal/store/bmc/dryRun.go new file mode 100644 index 0000000..ff2edb1 --- /dev/null +++ b/internal/store/bmc/dryRun.go @@ -0,0 +1,184 @@ +package bmc + +import ( + "context" + "errors" + "time" + + "github.com/metal-toolbox/bioscfg/internal/model" +) + +type server struct { + powerStatus string + bootTime time.Time + bootDevice string + previousBootDevice string + persistent bool + efiBoot bool +} + +var ( + errBmcCantFindServer = errors.New("dryrun BMC couldnt find server to set state") + errBmcServerOffline = errors.New("dryrun BMC couldnt set boot device, server is off") + serverStates = make(map[string]server) +) + +// DryRunBMC is an simulated implementation of the Queryor interface +type DryRunBMCClient struct { + id string +} + +// NewDryRunBMCClient creates a new Queryor interface for a simulated BMC +func NewDryRunBMCClient(asset *model.Asset) *DryRunBMCClient { + _, ok := serverStates[asset.ID.String()] + if !ok { + serverStates[asset.ID.String()] = getDefaultSettings() + } + + return &DryRunBMCClient{ + asset.ID.String(), + } +} + +// Open simulates creating a BMC session +func (b *DryRunBMCClient) Open(_ context.Context) error { + return nil +} + +// Close simulates logging out of the BMC +func (b *DryRunBMCClient) Close(_ context.Context) error { + return nil +} + +// GetPowerState simulates returning the device power status +func (b *DryRunBMCClient) GetPowerState(_ context.Context) (string, error) { + server, err := b.getServer() + if err != nil { + return "", err + } + + return server.powerStatus, nil +} + +// SetPowerState simulates setting the given power state on the device +func (b *DryRunBMCClient) SetPowerState(_ context.Context, state string) error { + server, err := b.getServer() + if err != nil { + return err + } + + if isRestarting(state) { + server.bootTime = getRestartTime(state) + } + + server.powerStatus = state + serverStates[b.id] = *server + return nil +} + +// SetBootDevice simulates setting the boot device of the remote device +func (b *DryRunBMCClient) SetBootDevice(_ context.Context, device string, persistent, efiBoot bool) error { + server, err := b.getServer() + if err != nil { + return err + } + + if server.powerStatus != "on" { + return errBmcServerOffline + } + + server.previousBootDevice = server.bootDevice + server.bootDevice = device + server.persistent = persistent + server.efiBoot = efiBoot + + return nil +} + +// GetBootDevice simulates getting the boot device information of the remote device +func (b *DryRunBMCClient) GetBootDevice(_ context.Context) (device string, persistent, efiBoot bool, err error) { + server, err := b.getServer() + if err != nil { + return "", false, false, err + } + + if server.powerStatus != "on" { + return "", false, false, errBmcServerOffline + } + + return server.bootDevice, server.persistent, server.efiBoot, nil +} + +// PowerCycleBMC simulates a power cycle action on the BMC of the remote device +func (b *DryRunBMCClient) PowerCycleBMC(_ context.Context) error { + return nil +} + +// HostBooted reports whether or not the device has booted the host OS +func (b *DryRunBMCClient) HostBooted(_ context.Context) (bool, error) { + return true, nil +} + +func (b *DryRunBMCClient) ResetBios(ctx context.Context) error { + _, ok := serverStates[b.id] + if !ok { + return errBmcCantFindServer + } + + serverStates[b.id] = getDefaultSettings() + + return b.SetPowerState(ctx, "cycle") +} + +// getServer gets a simulateed server state, and update power status and boot device if required +func (b *DryRunBMCClient) getServer() (*server, error) { + state, ok := serverStates[b.id] + if !ok { + return nil, errBmcCantFindServer + } + + if isRestarting(state.powerStatus) { + if time.Now().After(state.bootTime) { + state.powerStatus = "on" + + if !state.persistent { + state.bootDevice = state.previousBootDevice + } + } + } + + return &state, nil +} + +func isRestarting(state string) bool { + switch state { + case "reset", "cycle": + return true + default: + return false + } +} + +func getRestartTime(state string) time.Time { + switch state { + case "reset": + return time.Now().Add(time.Second * 30) // Soft reboot should take longer than a hard reboot + case "cycle": + return time.Now().Add(time.Second * 20) + default: + return time.Now() // No reboot necessary + } +} + +func getDefaultSettings() server { + status := server{} + + status.powerStatus = "on" + status.bootDevice = "disk" + status.previousBootDevice = "disk" + status.persistent = true + status.efiBoot = false + status.bootTime = time.Now() + + return status +} diff --git a/internal/store/bmc/interface.go b/internal/store/bmc/interface.go new file mode 100644 index 0000000..20f3953 --- /dev/null +++ b/internal/store/bmc/interface.go @@ -0,0 +1,18 @@ +package bmc + +import ( + "context" +) + +// Queryor interface abstracts calls to remote devices +type BMC interface { + Open(ctx context.Context) error + Close(ctx context.Context) error + GetPowerState(ctx context.Context) (state string, err error) + SetPowerState(ctx context.Context, state string) error + SetBootDevice(ctx context.Context, device string, persistent, efiBoot bool) error + GetBootDevice(ctx context.Context) (device string, persistent, efiBoot bool, err error) + PowerCycleBMC(ctx context.Context) error + HostBooted(ctx context.Context) (bool, error) + ResetBios(ctx context.Context) error +} diff --git a/internal/store/fleetdb/client.go b/internal/store/fleetdb/client.go index 76b945c..731e1a0 100644 --- a/internal/store/fleetdb/client.go +++ b/internal/store/fleetdb/client.go @@ -3,58 +3,53 @@ package fleetdb import ( "context" "io" - "log/slog" "net/http" "net/url" "time" "github.com/coreos/go-oidc" "github.com/hashicorp/go-retryablehttp" - "github.com/metal-toolbox/bioscfg/internal/configuration" - fleetdbapi "github.com/metal-toolbox/fleetdb/pkg/api/v1" - "github.com/pkg/errors" + "github.com/sirupsen/logrus" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "golang.org/x/oauth2/clientcredentials" + + fleetdbapi "github.com/metal-toolbox/fleetdb/pkg/api/v1" ) var ( // timeout for requests made by this client. - timeout = 30 * time.Second - ErrConfig = errors.New("error in fleetdb client configuration") + timeout = 30 * time.Second ) // NewFleetDBClient instantiates and returns a serverService client -func NewFleetDBClient(ctx context.Context, cfg *configuration.FleetDBOptions) (*fleetdbapi.Client, error) { - if cfg == nil { - return nil, errors.Wrap(ErrConfig, "configuration is nil") +func NewFleetDBClient(ctx context.Context, cfg *Config, logger *logrus.Logger) (*fleetdbapi.Client, error) { + err := cfg.validate() + if err != nil { + return nil, err } - if cfg.DisableOAuth { - return newFleetDBClientWithOtel(cfg, cfg.Endpoint) + if cfg.Authenticate { + return newFleetDBClientWithOAuthOtel(ctx, cfg, logger) } - return newFleetDBClientWithOAuthOtel(ctx, cfg, cfg.Endpoint) + return newFleetDBClientWithOtel(cfg, logger) } // returns a fleetdb retryable client with Otel -func newFleetDBClientWithOtel(cfg *configuration.FleetDBOptions, endpoint string) (*fleetdbapi.Client, error) { - if cfg == nil { - return nil, errors.Wrap(ErrConfig, "configuration is nil") - } - +func newFleetDBClientWithOtel(cfg *Config, logger *logrus.Logger) (*fleetdbapi.Client, error) { // init retryable http client retryableClient := retryablehttp.NewClient() - // log hook fo 500 errors since the retryablehttp client masks them + // log hook fo 500 errors since the the retryablehttp client masks them logHookFunc := func(_ retryablehttp.Logger, r *http.Response) { if r.StatusCode == http.StatusInternalServerError { b, err := io.ReadAll(r.Body) if err != nil { - slog.Warn("fleetdb query returned 500 status code; error reading body", "error", err) + logger.Warn("fleetdb query returned 500 error, got error reading body: ", err.Error()) return } - slog.Warn("fleetdb query returned 500 status code", "body", string(b)) + logger.Warn("fleetdb query returned 500 error, body: ", string(b)) } } @@ -69,18 +64,14 @@ func newFleetDBClientWithOtel(cfg *configuration.FleetDBOptions, endpoint string return fleetdbapi.NewClientWithToken( "dummy", - endpoint, + cfg.URL, client, ) } // returns a fleetdb retryable http client with Otel and Oauth wrapped in -func newFleetDBClientWithOAuthOtel(ctx context.Context, cfg *configuration.FleetDBOptions, endpoint string) (*fleetdbapi.Client, error) { - if cfg == nil { - return nil, errors.Wrap(ErrConfig, "configuration is nil") - } - - slog.Info("fleetdb client ctor") +func newFleetDBClientWithOAuthOtel(ctx context.Context, cfg *Config, logger *logrus.Logger) (*fleetdbapi.Client, error) { + logger.Info("fleetdb client ctor") // init retryable http client retryableClient := retryablehttp.NewClient() @@ -89,7 +80,7 @@ func newFleetDBClientWithOAuthOtel(ctx context.Context, cfg *configuration.Fleet retryableClient.HTTPClient = otelhttp.DefaultClient // setup oidc provider - provider, err := oidc.NewProvider(ctx, cfg.OidcIssuerEndpoint) + provider, err := oidc.NewProvider(ctx, cfg.OidcIssuerURL) if err != nil { return nil, err } @@ -107,7 +98,7 @@ func newFleetDBClientWithOAuthOtel(ctx context.Context, cfg *configuration.Fleet ClientSecret: cfg.OidcClientSecret, TokenURL: provider.Endpoint().TokenURL, Scopes: cfg.OidcClientScopes, - EndpointParams: url.Values{"audience": []string{cfg.OidcAudienceEndpoint}}, + EndpointParams: url.Values{"audience": []string{cfg.OidcAudienceURL}}, } // wrap OAuth transport, cookie jar in the retryable client @@ -122,7 +113,7 @@ func newFleetDBClientWithOAuthOtel(ctx context.Context, cfg *configuration.Fleet return fleetdbapi.NewClientWithToken( cfg.OidcClientSecret, - endpoint, + cfg.URL, client, ) } diff --git a/internal/store/fleetdb/config.go b/internal/store/fleetdb/config.go new file mode 100644 index 0000000..d45e640 --- /dev/null +++ b/internal/store/fleetdb/config.go @@ -0,0 +1,56 @@ +package fleetdb + +import ( + "net/url" + + "github.com/pkg/errors" +) + +// FleetDBConfig defines configuration for the Serverservice client. +// https://github.com/metal-toolbox/fleetdb +type Config struct { + URL string `mapstructure:"url"` + OidcIssuerURL string `mapstructure:"oidc_issuer_url"` + OidcAudienceURL string `mapstructure:"oidc_audience_url"` + OidcClientSecret string `mapstructure:"oidc_client_secret"` + OidcClientID string `mapstructure:"oidc_client_id"` + OidcClientScopes []string `mapstructure:"oidc_client_scopes"` + Authenticate bool `mapstructure:"authenticate"` +} + +func (cfg *Config) validate() error { + if cfg == nil { + return errors.Wrap(ErrFleetDBConfig, "config was nil") + } + + if cfg.URL == "" { + return errors.Wrap(ErrFleetDBConfig, "url was empty") + } + + _, err := url.Parse(cfg.URL) + if err != nil { + return errors.Wrap(ErrFleetDBConfig, "url failed to parse, isnt a valid url") + } + + if !cfg.Authenticate { + return nil + } + + if cfg.OidcIssuerURL == "" { + return errors.Wrap(ErrFleetDBConfig, "oidc issuer url was empty") + } + + if cfg.OidcClientSecret == "" { + return errors.Wrap(ErrFleetDBConfig, "oidc secret was empty") + } + + if cfg.OidcClientID == "" { + return errors.Wrap(ErrFleetDBConfig, "oidc client id was empty") + } + + if len(cfg.OidcClientScopes) == 0 { + return errors.Wrap(ErrFleetDBConfig, "oidc scopes was empty") + } + + return nil +} diff --git a/internal/store/fleetdb/errors.go b/internal/store/fleetdb/errors.go new file mode 100644 index 0000000..016031b --- /dev/null +++ b/internal/store/fleetdb/errors.go @@ -0,0 +1,15 @@ +package fleetdb + +import "github.com/pkg/errors" + +var ( + ErrSlugs = errors.New("slugs error") + ErrServerServiceRegisterChanges = errors.New("error in server service API register changes") + ErrAssetObject = errors.New("asset object error") + ErrAssetObjectConversion = errors.New("error converting asset object") + ErrFleetDBObject = errors.New("serverService object error") + ErrChangeList = errors.New("error building change list") + ErrServerServiceAttrObject = errors.New("error in server service attribute object") + ErrFleetDBConfig = errors.New("fleetdb configuration error") + ErrInventoryQuery = errors.New("fleetdb query returned error") +) diff --git a/internal/store/fleetdb/fleetdb.go b/internal/store/fleetdb/fleetdb.go index 97d75f7..2ff86dc 100644 --- a/internal/store/fleetdb/fleetdb.go +++ b/internal/store/fleetdb/fleetdb.go @@ -3,43 +3,40 @@ package fleetdb import ( "context" "encoding/json" - "log/slog" "net" "github.com/google/uuid" - "github.com/metal-toolbox/bioscfg/internal/configuration" "github.com/metal-toolbox/bioscfg/internal/model" - fleetdbapi "github.com/metal-toolbox/fleetdb/pkg/api/v1" "github.com/metal-toolbox/rivets/fleetdb" "github.com/pkg/errors" + "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/codes" + + fleetdbapi "github.com/metal-toolbox/fleetdb/pkg/api/v1" ) const ( pkgName = "internal/store" ) -var ( - ErrInventoryQuery = errors.New("fleetdb query returned error") - ErrFleetDBObject = errors.New("fleetdb object error") -) - // Store is an asset inventory store type Store struct { api *fleetdbapi.Client - config *configuration.FleetDBOptions + logger *logrus.Logger + config *Config } // New returns a fleetdb store queryor to lookup and publish assets to, from the store. -func New(ctx context.Context, cfg *configuration.FleetDBOptions) (*Store, error) { - apiclient, err := NewFleetDBClient(ctx, cfg) +func New(ctx context.Context, cfg *Config, logger *logrus.Logger) (*Store, error) { + apiclient, err := NewFleetDBClient(ctx, cfg, logger) if err != nil { return nil, err } s := &Store{ api: apiclient, + logger: logger, config: cfg, } @@ -79,8 +76,7 @@ func toAsset(server *fleetdbapi.Server, credential *fleetdbapi.ServerCredential) serverAttributes, err := serverAttributes(server.Attributes) if err != nil { - slog.Error("error getting server attributes", "error", err) - return nil, errors.Wrap(ErrFleetDBObject, err.Error()) + return nil, errors.Wrap(err, "error getting server attributes") } asset := &model.Asset{ diff --git a/internal/store/fleetdb/fleetdb_test.go b/internal/store/fleetdb/fleetdb_test.go index 9bc76f5..a2243e9 100644 --- a/internal/store/fleetdb/fleetdb_test.go +++ b/internal/store/fleetdb/fleetdb_test.go @@ -2,6 +2,7 @@ package fleetdb import ( "net" + "testing" "github.com/google/uuid" diff --git a/internal/store/store.go b/internal/store/store.go deleted file mode 100644 index 36f2a2b..0000000 --- a/internal/store/store.go +++ /dev/null @@ -1,19 +0,0 @@ -package store - -import ( - "context" - - "github.com/google/uuid" - "github.com/metal-toolbox/bioscfg/internal/configuration" - "github.com/metal-toolbox/bioscfg/internal/model" - "github.com/metal-toolbox/bioscfg/internal/store/fleetdb" -) - -type Repository interface { - // AssetByID returns asset based on the identifier. - AssetByID(ctx context.Context, assetID uuid.UUID) (*model.Asset, error) -} - -func NewRepository(ctx context.Context, config *configuration.Configuration) (Repository, error) { - return fleetdb.New(ctx, config.FleetDBOptions) -} diff --git a/internal/tasks/steps.go b/internal/tasks/steps.go deleted file mode 100644 index 5763d41..0000000 --- a/internal/tasks/steps.go +++ /dev/null @@ -1,140 +0,0 @@ -package tasks - -import ( - "context" - "log/slog" - - "github.com/bmc-toolbox/bmclib/v2" - "github.com/metal-toolbox/bioscfg/internal/model" - rctypes "github.com/metal-toolbox/rivets/condition" - "github.com/pkg/errors" -) - -// StepStatus has status about a step, to be reported as part of the overall task. -type StepStatus struct { - Step string `json:"step"` - Status string `json:"status"` - Details string `json:"details,omitempty"` - Error string `json:"error,omitempty"` -} - -// NewStepStatus will create a new step status struct -func NewStepStatus(stepName string, state rctypes.State, details string, err error) *StepStatus { - status := &StepStatus{ - Step: stepName, - Status: string(state), - Details: details, - } - - if err != nil { - status.Error = err.Error() - } - - return status -} - -func (s *StepStatus) AsLogFields() []any { - return []any{ - "task", s.Step, - "status", s.Status, - "details", s.Details, - "error", s.Error, - } -} - -// Step is a unit of work. Multiple steps accomplish a task. -type Step interface { - // Name of this step - Name() string - // Run will execute the code to accomplish this step - Run(ctx context.Context, client *bmclib.Client, data sharedData) (string, error) -} - -type getServerPowerStateStep struct { - name string -} - -// GetServerPowerStateStep will get the current power state of a server, -// and store it in sharedData. -func GetServerPowerStateStep() Step { - return &getServerPowerStateStep{ - name: "GetServerPowerState", - } -} - -func (t *getServerPowerStateStep) Name() string { - return t.name -} - -func (t *getServerPowerStateStep) Run(ctx context.Context, client *bmclib.Client, data sharedData) (string, error) { - state, err := client.GetPowerState(ctx) - if err != nil { - return "Failed to get current power state", err - } - - data[currentPowerStateKey] = state - - return "Current power state: " + state, nil -} - -type biosResetStep struct { - name string -} - -// BiosResetStep will use the client to reset the BIOS settings. -func BiosResetStep() Step { - return &biosResetStep{ - name: "BiosReset", - } -} - -func (t *biosResetStep) Name() string { - return t.name -} - -func (t *biosResetStep) Run(ctx context.Context, client *bmclib.Client, _ sharedData) (string, error) { - err := client.ResetBiosConfiguration(ctx) - if err != nil { - return "Failed to reset bios settings", err - } - - return "BIOS settings reset", nil -} - -type serverRebootStep struct { - name string -} - -// ServerRebootStep will reboot the server, if necessary, per the information in sharedData -func ServerRebootStep() Step { - return &serverRebootStep{ - name: "ServerReboot", - } -} - -func (t *serverRebootStep) Name() string { - return t.name -} - -func (t *serverRebootStep) Run(ctx context.Context, client *bmclib.Client, data sharedData) (string, error) { - powerState, ok := data[currentPowerStateKey].(string) - if !ok { - return "Reboot requirement unknown", errors.New("missing power state") - } - - var details string - - if powerState == model.PowerStateOn { - slog.Info("Rebooting server", "powerState", powerState) - _, err := client.SetPowerState(ctx, model.PowerStateReset) - if err != nil { - return "Failed to reset power state", err - } - details = "Rebooting server" - } else { - slog.Info("Skipping server reboot", "ok", ok, "powerState", powerState) - details = "Reboot not required" - } - - return details, nil -} diff --git a/internal/tasks/tasks.go b/internal/tasks/tasks.go deleted file mode 100644 index 1bd7480..0000000 --- a/internal/tasks/tasks.go +++ /dev/null @@ -1,314 +0,0 @@ -package tasks - -import ( - "context" - "encoding/json" - "log/slog" - "runtime/debug" - - "github.com/bmc-toolbox/bmclib/v2" - "github.com/metal-toolbox/bioscfg/internal/model" - "github.com/metal-toolbox/ctrl" - rctypes "github.com/metal-toolbox/rivets/condition" - rtypes "github.com/metal-toolbox/rivets/types" - "github.com/mitchellh/copystructure" - "github.com/pkg/errors" -) - -var ( - currentPowerStateKey = "currentPowerState" - errInvalidConditionParams = errors.New("invalid condition parameters") - errTaskConv = errors.New("error in generic Task conversion") -) - -// Miscellaneous -type sharedData map[string]interface{} - -// TaskStatus has status about a task, and it's steps. -type TaskStatus struct { - Task string `json:"task"` - Status string `json:"status"` - Details string `json:"details,omitempty"` - Error string `json:"error,omitempty"` - ActiveStep string `json:"active_step,omitempty"` - Steps []*StepStatus `json:"steps"` -} - -// NewTaskStatus will generate a new task status struct -func NewTaskStatus(taskName string, state rctypes.State) *TaskStatus { - return &TaskStatus{ - Task: taskName, - Status: string(state), - } -} - -func (r *TaskStatus) AsLogFields() []string { - return []string{ - "task", r.Task, - "status", r.Status, - "details", r.Details, - "error", r.Error, - } -} - -func (r *TaskStatus) Marshal() ([]byte, error) { - respBytes, err := json.Marshal(r) - if err != nil { - return nil, errors.Wrap(err, "failed to marshal response to json") - } - - return respBytes, nil -} - -// Task is a unit of work to address a condition from condition orchestrator. -// The task multiple steps to accomplish the task. -type Task interface { - // Name of the task - Name() string - // Asset is the server that will be affected by this task - Asset() *model.Asset - // Steps is the multiple units of work that will accomplish this task - Steps() []Step -} - -type biosResetTask struct { - name string - asset *model.Asset - steps []Step -} - -// NewBiosResetTask creates the task for resetting the BIOS of a server to default settings. -func NewBiosResetTask(asset *model.Asset) Task { - return &biosResetTask{ - name: "BiosResetSettings", - asset: asset, - steps: []Step{ - GetServerPowerStateStep(), - BiosResetStep(), - ServerRebootStep(), - }, - } -} - -func (j *biosResetTask) Name() string { - return j.name -} - -func (j *biosResetTask) Steps() []Step { - return j.steps -} - -func (j *biosResetTask) Asset() *model.Asset { - return j.asset -} - -type RCTask rctypes.Task[*rctypes.BiosControlTaskParameters, json.RawMessage] - -func NewTask(task *rctypes.Task[any, any]) (*RCTask, error) { - paramsJSON, ok := task.Parameters.(json.RawMessage) - if !ok { - return nil, errInvalidConditionParams - } - - params := rctypes.BiosControlTaskParameters{} - if err := json.Unmarshal(paramsJSON, ¶ms); err != nil { - return nil, err - } - - // deep copy fields referenced by pointer - asset, err := copystructure.Copy(task.Server) - if err != nil { - return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Server") - } - - fault, err := copystructure.Copy(task.Fault) - if err != nil { - return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Fault") - } - - return &RCTask{ - StructVersion: task.StructVersion, - ID: task.ID, - Kind: task.Kind, - State: task.State, - Status: task.Status, - Parameters: ¶ms, - Fault: fault.(*rctypes.Fault), - FacilityCode: task.FacilityCode, - Server: asset.(*rtypes.Server), - WorkerID: task.WorkerID, - TraceID: task.TraceID, - SpanID: task.SpanID, - CreatedAt: task.CreatedAt, - UpdatedAt: task.UpdatedAt, - CompletedAt: task.CompletedAt, - }, nil -} - -func (task *RCTask) ToGeneric() (*rctypes.Task[any, any], error) { - paramsJSON, err := task.Parameters.Marshal() - if err != nil { - return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Parameters") - } - - // deep copy fields referenced by pointer - asset, err := copystructure.Copy(task.Server) - if err != nil { - return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Server") - } - - fault, err := copystructure.Copy(task.Fault) - if err != nil { - return nil, errors.Wrap(errTaskConv, err.Error()+": Task.Fault") - } - - return &rctypes.Task[any, any]{ - StructVersion: task.StructVersion, - ID: task.ID, - Kind: task.Kind, - State: task.State, - Status: task.Status, - Parameters: paramsJSON, - Fault: fault.(*rctypes.Fault), - FacilityCode: task.FacilityCode, - Server: asset.(*rtypes.Server), - WorkerID: task.WorkerID, - TraceID: task.TraceID, - SpanID: task.SpanID, - CreatedAt: task.CreatedAt, - UpdatedAt: task.UpdatedAt, - CompletedAt: task.CompletedAt, - }, nil -} - -// TaskRunner Will run the task by executing the individual steps in the task, -// and reports task status using the publisher. -type TaskRunner struct { - publisher ctrl.Publisher - task *RCTask - oldTask Task - taskStatus *TaskStatus -} - -// NewTaskRunner creates a TaskRunner to run a specific Task -func NewTaskRunner(publisher ctrl.Publisher, oldTask Task, task *RCTask) *TaskRunner { - return &TaskRunner{ - publisher: publisher, - task: task, - oldTask: oldTask, - taskStatus: NewTaskStatus(oldTask.Name(), rctypes.Pending), - } -} - -func (r *TaskRunner) Run(ctx context.Context, client *bmclib.Client) (err error) { - slog.With(r.oldTask.Asset().AsLogFields()...).Info("Running task", "task", r.oldTask.Name()) - - data := sharedData{} - r.initTaskLog() - - defer func() { - if rec := recover(); rec != nil { - err = r.handlePanic(ctx, rec) - } - }() - - r.publishTaskUpdate(ctx, rctypes.Active, "Opening client", nil) - - if err = client.Open(ctx); err != nil { - r.publishFailed(ctx, 0, "Failed to open client", err) - return errors.Wrap(err, "failed to open client") - } - defer client.Close(ctx) - - for stepID, step := range r.oldTask.Steps() { - r.publishStepUpdate(ctx, stepID, "Running step") - - details, err := step.Run(ctx, client, data) - if err != nil { - r.publishFailed(ctx, stepID, "Step failure", err) - return err - } - - r.publishStepSuccess(ctx, stepID, details) - } - - r.publishTaskSuccess(ctx) - - return nil -} - -func (r *TaskRunner) initTaskLog() { - steps := r.oldTask.Steps() - r.taskStatus.Steps = make([]*StepStatus, len(steps)) - - for i, step := range steps { - r.taskStatus.Steps[i] = NewStepStatus(step.Name(), rctypes.Pending, "", nil) - } -} - -func (r *TaskRunner) handlePanic(ctx context.Context, rec any) error { - msg := "Panic occurred while running task" - slog.Error("!!panic occurred", "rec", rec, "stack", string(debug.Stack())) - slog.Error(msg) - err := errors.New("Task fatal error, check logs for details") - - r.publishTaskUpdate(ctx, rctypes.Failed, msg, err) - - return err -} - -func (r *TaskRunner) publishStepUpdate(ctx context.Context, stepID int, details string) { - r.publish(ctx, stepID, rctypes.Active, rctypes.Active, details, nil) -} - -func (r *TaskRunner) publishStepSuccess(ctx context.Context, stepID int, details string) { - r.publish(ctx, stepID, rctypes.Succeeded, rctypes.Active, details, nil) -} - -func (r *TaskRunner) publishFailed(ctx context.Context, stepID int, details string, err error) { - slog.With(r.oldTask.Asset().AsLogFields()...).Error("Task failed", "task", r.oldTask.Name()) - r.publish(ctx, stepID, rctypes.Failed, rctypes.Failed, details, err) -} - -func (r *TaskRunner) publishTaskSuccess(ctx context.Context) { - slog.With(r.oldTask.Asset().AsLogFields()...).Info("Task completed successfully", "task", r.oldTask.Name()) - r.publishTaskUpdate(ctx, rctypes.Succeeded, "Task completed successfully", nil) -} - -func (r *TaskRunner) publish(ctx context.Context, stepID int, stepState, taskState rctypes.State, details string, err error) { - step := r.oldTask.Steps()[stepID] - stepStatus := NewStepStatus(step.Name(), stepState, details, err) - - slog.With(r.oldTask.Asset().AsLogFields()...).With(stepStatus.AsLogFields()...).Info(details, "step", step.Name()) - - r.taskStatus.Steps[stepID] = stepStatus - - var taskDetails string - if err != nil { - taskDetails = "Task failed at step " + step.Name() - } - - r.publishTaskUpdate(ctx, taskState, taskDetails, err) -} - -func (r *TaskRunner) publishTaskUpdate(ctx context.Context, state rctypes.State, details string, err error) { - r.task.State = state - r.task.Status.Append(details) - - if err != nil { - r.taskStatus.Error = err.Error() - } - - slog.With(r.oldTask.Asset().AsLogFields()...).Info("Task update", "task", r.oldTask.Name()) - - genTask, err := r.task.ToGeneric() - if err != nil { - r.taskStatus.Error = err.Error() - return - } - - err = r.publisher.Publish(ctx, genTask, false) - if err != nil { - return - } -} diff --git a/internal/tasks/tasks_test.go b/internal/tasks/tasks_test.go deleted file mode 100644 index 5627ba6..0000000 --- a/internal/tasks/tasks_test.go +++ /dev/null @@ -1,69 +0,0 @@ -package tasks - -import ( - "context" - "testing" - - "github.com/bmc-toolbox/bmclib/v2" - "github.com/metal-toolbox/bioscfg/internal/model" - "github.com/metal-toolbox/rivets/condition" - "github.com/stretchr/testify/assert" -) - -type fakeStep struct{} - -func (s *fakeStep) Name() string { - return "fake step" -} - -func (s *fakeStep) Run(_ context.Context, _ *bmclib.Client, _ sharedData) (string, error) { - return "", nil -} - -type fakeTask struct { - asset *model.Asset - steps []Step -} - -func newFakeTask() *fakeTask { - return &fakeTask{ - asset: &model.Asset{}, - steps: []Step{&fakeStep{}}, - } -} - -func newFakeRCTask() *RCTask { - return &RCTask{} -} - -func (t *fakeTask) Name() string { - return "fake task" -} - -func (t *fakeTask) Asset() *model.Asset { - return t.asset -} - -func (t *fakeTask) Steps() []Step { - return t.steps -} - -type fakePublisher struct { - t *testing.T -} - -func (m *fakePublisher) Publish(_ context.Context, _ *condition.Task[any, any], _ bool) error { - return nil -} - -func TestTaskRunnerHandlePanic(t *testing.T) { - task := newFakeTask() - rcTask := newFakeRCTask() - runner := NewTaskRunner(&fakePublisher{t: t}, task, rcTask) - - err := runner.Run(context.Background(), nil) - - if assert.NotNil(t, err) { - assert.Equal(t, "Task fatal error, check logs for details", err.Error()) - } -} diff --git a/internal/version/version.go b/internal/version/version.go index b5100d5..361d561 100644 --- a/internal/version/version.go +++ b/internal/version/version.go @@ -1,6 +1,7 @@ package version import ( + "encoding/json" "runtime" rdebug "runtime/debug" "strings" @@ -49,6 +50,22 @@ func Current() *Version { } } +func (v *Version) AsMap() (map[string]any, error) { + var asMap map[string]interface{} + + bytes, err := json.Marshal(v) + if err != nil { + return nil, err + } + + err = json.Unmarshal(bytes, &asMap) + if err != nil { + return nil, err + } + + return asMap, nil +} + func ExportBuildInfoMetric() { buildInfo := promauto.NewGaugeVec( prometheus.GaugeOpts{ diff --git a/main.go b/main.go index d899e73..30b0781 100644 --- a/main.go +++ b/main.go @@ -17,10 +17,8 @@ package main import ( "github.com/metal-toolbox/bioscfg/cmd" - "github.com/metal-toolbox/bioscfg/internal/log" ) func main() { - log.InitLogger() cmd.Execute() }