Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core/services: fix ocrWrapper saveError contexts #13139

Merged
merged 1 commit into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/big-trees-help.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"chainlink": patch
---

core/services: fix ocrWrapper saveError contexts #internal
4 changes: 2 additions & 2 deletions core/services/ocr/delegate.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
ocr "github.com/smartcontractkit/libocr/offchainreporting"
ocrtypes "github.com/smartcontractkit/libocr/offchainreporting/types"

commonlogger "github.com/smartcontractkit/chainlink-common/pkg/logger"
"github.com/smartcontractkit/chainlink-common/pkg/sqlutil"
"github.com/smartcontractkit/chainlink-common/pkg/utils/mailbox"

Expand Down Expand Up @@ -155,9 +154,10 @@ func (d *Delegate) ServicesForSpec(ctx context.Context, jb job.Job) (services []
v2Bootstrappers = peerWrapper.P2PConfig().V2().DefaultBootstrappers()
}

ocrLogger := commonlogger.NewOCRWrapper(lggr, d.cfg.OCR().TraceLogging(), func(msg string) {
ocrLogger := ocrcommon.NewOCRWrapper(lggr, d.cfg.OCR().TraceLogging(), func(ctx context.Context, msg string) {
d.jobORM.TryRecordError(ctx, jb.ID, msg)
})
services = append(services, ocrLogger)

lc := toLocalConfig(chain.Config().EVM(), chain.Config().EVM().OCR(), d.cfg.Insecure(), *concreteSpec, d.cfg.OCR())
if err = ocr.SanityCheckLocalConfig(lc); err != nil {
Expand Down
90 changes: 58 additions & 32 deletions core/services/ocr2/delegate.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
ocr2keepers20runner "github.com/smartcontractkit/chainlink-automation/pkg/v2/runner"
ocr2keepers21config "github.com/smartcontractkit/chainlink-automation/pkg/v3/config"
ocr2keepers21 "github.com/smartcontractkit/chainlink-automation/pkg/v3/plugin"
commonlogger "github.com/smartcontractkit/chainlink-common/pkg/logger"
"github.com/smartcontractkit/chainlink-common/pkg/loop"
"github.com/smartcontractkit/chainlink-common/pkg/loop/reportingplugins"
"github.com/smartcontractkit/chainlink-common/pkg/loop/reportingplugins/ocr3"
Expand Down Expand Up @@ -397,10 +396,6 @@ func (d *Delegate) ServicesForSpec(ctx context.Context, jb job.Job) ([]job.Servi
return nil, errors.New("peerWrapper is not started. OCR2 jobs require a started and running p2p v2 peer")
}

ocrLogger := commonlogger.NewOCRWrapper(lggr, d.cfg.OCR2().TraceLogging(), func(msg string) {
lggr.ErrorIf(d.jobORM.RecordError(ctx, jb.ID, msg), "unable to record error")
})

lc, err := validate.ToLocalConfig(d.cfg.OCR2(), d.cfg.Insecure(), *spec)
if err != nil {
return nil, err
Expand Down Expand Up @@ -438,22 +433,22 @@ func (d *Delegate) ServicesForSpec(ctx context.Context, jb job.Job) ([]job.Servi
ctx = lggrCtx.ContextWithValues(ctx)
switch spec.PluginType {
case types.Mercury:
return d.newServicesMercury(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc, ocrLogger)
return d.newServicesMercury(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc)

case types.LLO:
return d.newServicesLLO(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc, ocrLogger)
return d.newServicesLLO(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc)

case types.Median:
return d.newServicesMedian(ctx, lggr, jb, bootstrapPeers, kb, kvStore, ocrDB, lc, ocrLogger)
return d.newServicesMedian(ctx, lggr, jb, bootstrapPeers, kb, kvStore, ocrDB, lc)

case types.DKG:
return d.newServicesDKG(lggr, jb, bootstrapPeers, kb, ocrDB, lc, ocrLogger)
return d.newServicesDKG(lggr, jb, bootstrapPeers, kb, ocrDB, lc)

case types.OCR2VRF:
return d.newServicesOCR2VRF(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc)

case types.OCR2Keeper:
return d.newServicesOCR2Keepers(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc, ocrLogger)
return d.newServicesOCR2Keepers(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc)

case types.Functions:
const (
Expand All @@ -463,10 +458,10 @@ func (d *Delegate) ServicesForSpec(ctx context.Context, jb job.Job) ([]job.Servi
)
thresholdPluginDB := NewDB(d.ds, spec.ID, thresholdPluginId, lggr)
s4PluginDB := NewDB(d.ds, spec.ID, s4PluginId, lggr)
return d.newServicesOCR2Functions(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, thresholdPluginDB, s4PluginDB, lc, ocrLogger)
return d.newServicesOCR2Functions(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, thresholdPluginDB, s4PluginDB, lc)

case types.GenericPlugin:
return d.newServicesGenericPlugin(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc, ocrLogger, d.capabilitiesRegistry,
return d.newServicesGenericPlugin(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc, d.capabilitiesRegistry,
kvStore)

default:
Expand Down Expand Up @@ -525,7 +520,6 @@ func (d *Delegate) newServicesGenericPlugin(
kb ocr2key.KeyBundle,
ocrDB *db,
lc ocrtypes.LocalConfig,
ocrLogger commontypes.Logger,
capabilitiesRegistry core.CapabilitiesRegistry,
keyValueStore core.KeyValueStore,
) (srvs []job.ServiceCtx, err error) {
Expand Down Expand Up @@ -655,6 +649,11 @@ func (d *Delegate) newServicesGenericPlugin(
synchronization.TelemetryType(pCfg.TelemetryType),
)

ocrLogger := ocrcommon.NewOCRWrapper(lggr, d.cfg.OCR2().TraceLogging(), func(ctx context.Context, msg string) {
lggr.ErrorIf(d.jobORM.RecordError(ctx, jb.ID, msg), "unable to record error")
})
srvs = append(srvs, ocrLogger)

switch pCfg.OCRVersion {
case 2:
plugin := reportingplugins.NewLOOPPService(pluginLggr, grpcOpts, cmdFn, pluginConfig, providerClientConn, pr, ta,
Expand Down Expand Up @@ -723,7 +722,6 @@ func (d *Delegate) newServicesMercury(
kb ocr2key.KeyBundle,
ocrDB *db,
lc ocrtypes.LocalConfig,
ocrLogger commontypes.Logger,
) ([]job.ServiceCtx, error) {
if jb.OCR2OracleSpec.FeedID == nil || (*jb.OCR2OracleSpec.FeedID == (common.Hash{})) {
return nil, errors.Errorf("ServicesForSpec: mercury job type requires feedID")
Expand Down Expand Up @@ -775,6 +773,10 @@ func (d *Delegate) newServicesMercury(
// https://smartcontract-it.atlassian.net/browse/MERC-3386
lc.ContractConfigTrackerPollInterval = 1 * time.Second // Mercury requires a fast poll interval, this is the fastest that libocr supports. See: https://github.com/smartcontractkit/offchain-reporting/pull/520

ocrLogger := ocrcommon.NewOCRWrapper(lggr, d.cfg.OCR2().TraceLogging(), func(ctx context.Context, msg string) {
lggr.ErrorIf(d.jobORM.RecordError(ctx, jb.ID, msg), "unable to record error")
})

oracleArgsNoPlugin := libocr2.MercuryOracleArgs{
BinaryNetworkEndpointFactory: d.peerWrapper.Peer2,
V2Bootstrappers: bootstrapPeers,
Expand Down Expand Up @@ -803,6 +805,8 @@ func (d *Delegate) newServicesMercury(
lggr.Infow("Enhanced telemetry is disabled for mercury job", "job", jb.Name)
}

mercuryServices = append(mercuryServices, ocrLogger)

return mercuryServices, err2
}

Expand All @@ -814,7 +818,6 @@ func (d *Delegate) newServicesLLO(
kb ocr2key.KeyBundle,
ocrDB *db,
lc ocrtypes.LocalConfig,
ocrLogger commontypes.Logger,
) ([]job.ServiceCtx, error) {
lggr = logger.Sugared(lggr.Named("LLO"))
spec := jb.OCR2OracleSpec
Expand Down Expand Up @@ -906,6 +909,10 @@ func (d *Delegate) newServicesLLO(
lggr.Infof("Using on-chain signing keys for LLO job %d (%s): %v", jb.ID, jb.Name.ValueOrZero(), kbm)
kr := llo.NewOnchainKeyring(lggr, kbm)

ocrLogger := ocrcommon.NewOCRWrapper(lggr, d.cfg.OCR2().TraceLogging(), func(ctx context.Context, msg string) {
lggr.ErrorIf(d.jobORM.RecordError(ctx, jb.ID, msg), "unable to record error")
})

cfg := llo.DelegateConfig{
Logger: lggr,
DataSource: d.ds,
Expand Down Expand Up @@ -934,7 +941,7 @@ func (d *Delegate) newServicesLLO(
if err != nil {
return nil, err
}
return []job.ServiceCtx{provider, oracle}, nil
return []job.ServiceCtx{provider, ocrLogger, oracle}, nil
}

func (d *Delegate) newServicesMedian(
Expand All @@ -946,7 +953,6 @@ func (d *Delegate) newServicesMedian(
kvStore job.KVStore,
ocrDB *db,
lc ocrtypes.LocalConfig,
ocrLogger commontypes.Logger,
) ([]job.ServiceCtx, error) {
spec := jb.OCR2OracleSpec

Expand All @@ -955,6 +961,10 @@ func (d *Delegate) newServicesMedian(
return nil, ErrJobSpecNoRelayer{Err: err, PluginName: "median"}
}

ocrLogger := ocrcommon.NewOCRWrapper(lggr, d.cfg.OCR2().TraceLogging(), func(ctx context.Context, msg string) {
lggr.ErrorIf(d.jobORM.RecordError(ctx, jb.ID, msg), "unable to record error")
})

oracleArgsNoPlugin := libocr2.OCR2OracleArgs{
BinaryNetworkEndpointFactory: d.peerWrapper.Peer2,
V2Bootstrappers: bootstrapPeers,
Expand Down Expand Up @@ -988,6 +998,8 @@ func (d *Delegate) newServicesMedian(
lggr.Infow("Enhanced telemetry is disabled for job", "job", jb.Name)
}

medianServices = append(medianServices, ocrLogger)

return medianServices, err2
}

Expand All @@ -998,7 +1010,6 @@ func (d *Delegate) newServicesDKG(
kb ocr2key.KeyBundle,
ocrDB *db,
lc ocrtypes.LocalConfig,
ocrLogger commontypes.Logger,
) ([]job.ServiceCtx, error) {
spec := jb.OCR2OracleSpec
rid, err := spec.RelayID()
Expand Down Expand Up @@ -1028,6 +1039,9 @@ func (d *Delegate) newServicesDKG(
if err2 != nil {
return nil, err2
}
ocrLogger := ocrcommon.NewOCRWrapper(lggr, d.cfg.OCR2().TraceLogging(), func(ctx context.Context, msg string) {
lggr.ErrorIf(d.jobORM.RecordError(ctx, jb.ID, msg), "unable to record error")
})
noopMonitoringEndpoint := telemetry.NoopAgent{}
oracleArgsNoPlugin := libocr2.OCR2OracleArgs{
BinaryNetworkEndpointFactory: d.peerWrapper.Peer2,
Expand All @@ -1044,7 +1058,12 @@ func (d *Delegate) newServicesDKG(
OnchainKeyring: kb,
MetricsRegisterer: prometheus.WrapRegistererWith(map[string]string{"job_name": jb.Name.ValueOrZero()}, prometheus.DefaultRegisterer),
}
return dkg.NewDKGServices(jb, dkgProvider, lggr, ocrLogger, d.dkgSignKs, d.dkgEncryptKs, chain.Client(), oracleArgsNoPlugin, d.ds, chain.ID(), spec.Relay)
services, err := dkg.NewDKGServices(jb, dkgProvider, lggr, ocrLogger, d.dkgSignKs, d.dkgEncryptKs, chain.Client(), oracleArgsNoPlugin, d.ds, chain.ID(), spec.Relay)
if err != nil {
return nil, err
}
services = append(services, ocrLogger)
return services, nil
}

func (d *Delegate) newServicesOCR2VRF(
Expand Down Expand Up @@ -1167,12 +1186,10 @@ func (d *Delegate) newServicesOCR2VRF(
"jobName", jb.Name.ValueOrZero(),
"jobID", jb.ID,
)
vrfLogger := commonlogger.NewOCRWrapper(l.With(
"vrfContractID", spec.ContractID), d.cfg.OCR2().TraceLogging(), func(msg string) {
vrfLogger := ocrcommon.NewOCRWrapper(l.With("vrfContractID", spec.ContractID), d.cfg.OCR2().TraceLogging(), func(ctx context.Context, msg string) {
lggr.ErrorIf(d.jobORM.RecordError(ctx, jb.ID, msg), "unable to record error")
})
dkgLogger := commonlogger.NewOCRWrapper(l.With(
"dkgContractID", cfg.DKGContractAddress), d.cfg.OCR2().TraceLogging(), func(msg string) {
dkgLogger := ocrcommon.NewOCRWrapper(l.With("dkgContractID", cfg.DKGContractAddress), d.cfg.OCR2().TraceLogging(), func(ctx context.Context, msg string) {
lggr.ErrorIf(d.jobORM.RecordError(ctx, jb.ID, msg), "unable to record error")
})
dkgReportingPluginFactoryDecorator := func(wrapped ocrtypes.ReportingPluginFactory) ocrtypes.ReportingPluginFactory {
Expand Down Expand Up @@ -1233,7 +1250,6 @@ func (d *Delegate) newServicesOCR2Keepers(
kb ocr2key.KeyBundle,
ocrDB *db,
lc ocrtypes.LocalConfig,
ocrLogger commontypes.Logger,
) ([]job.ServiceCtx, error) {
spec := jb.OCR2OracleSpec
var cfg ocr2keeper.PluginConfig
Expand All @@ -1247,14 +1263,14 @@ func (d *Delegate) newServicesOCR2Keepers(

switch cfg.ContractVersion {
case "v2.1":
return d.newServicesOCR2Keepers21(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc, ocrLogger, cfg, spec)
return d.newServicesOCR2Keepers21(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc, cfg, spec)
case "v2.1+":
// Future contracts of v2.1 (v2.x) will use the same job spec as v2.1
return d.newServicesOCR2Keepers21(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc, ocrLogger, cfg, spec)
return d.newServicesOCR2Keepers21(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc, cfg, spec)
case "v2.0":
return d.newServicesOCR2Keepers20(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc, ocrLogger, cfg, spec)
return d.newServicesOCR2Keepers20(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc, cfg, spec)
default:
return d.newServicesOCR2Keepers20(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc, ocrLogger, cfg, spec)
return d.newServicesOCR2Keepers20(ctx, lggr, jb, bootstrapPeers, kb, ocrDB, lc, cfg, spec)
}
}

Expand All @@ -1266,7 +1282,6 @@ func (d *Delegate) newServicesOCR2Keepers21(
kb ocr2key.KeyBundle,
ocrDB *db,
lc ocrtypes.LocalConfig,
ocrLogger commontypes.Logger,
cfg ocr2keeper.PluginConfig,
spec *job.OCR2OracleSpec,
) ([]job.ServiceCtx, error) {
Expand Down Expand Up @@ -1348,6 +1363,9 @@ func (d *Delegate) newServicesOCR2Keepers21(
if cfg.ServiceQueueLength != 0 {
conf.ServiceQueueLength = cfg.ServiceQueueLength
}
ocrLogger := ocrcommon.NewOCRWrapper(lggr, d.cfg.OCR2().TraceLogging(), func(ctx context.Context, msg string) {
lggr.ErrorIf(d.jobORM.RecordError(ctx, jb.ID, msg), "unable to record error")
})

dConf := ocr2keepers21.DelegateConfig{
BinaryNetworkEndpointFactory: d.peerWrapper.Peer2,
Expand Down Expand Up @@ -1394,6 +1412,7 @@ func (d *Delegate) newServicesOCR2Keepers21(
keeperProvider.UpkeepStateStore(),
keeperProvider.TransmitEventProvider(),
pluginService,
ocrLogger,
}

if cfg.CaptureAutomationCustomTelemetry != nil && *cfg.CaptureAutomationCustomTelemetry ||
Expand Down Expand Up @@ -1422,7 +1441,6 @@ func (d *Delegate) newServicesOCR2Keepers20(
kb ocr2key.KeyBundle,
ocrDB *db,
lc ocrtypes.LocalConfig,
ocrLogger commontypes.Logger,
cfg ocr2keeper.PluginConfig,
spec *job.OCR2OracleSpec,
) ([]job.ServiceCtx, error) {
Expand Down Expand Up @@ -1498,6 +1516,10 @@ func (d *Delegate) newServicesOCR2Keepers20(
CacheClean: conf.CacheEvictionInterval,
}

ocrLogger := ocrcommon.NewOCRWrapper(lggr, d.cfg.OCR2().TraceLogging(), func(ctx context.Context, msg string) {
lggr.ErrorIf(d.jobORM.RecordError(ctx, jb.ID, msg), "unable to record error")
})

dConf := ocr2keepers20.DelegateConfig{
BinaryNetworkEndpointFactory: d.peerWrapper.Peer2,
V2Bootstrappers: bootstrapPeers,
Expand Down Expand Up @@ -1532,6 +1554,7 @@ func (d *Delegate) newServicesOCR2Keepers20(
keeperProvider,
rgstry,
logProvider,
ocrLogger,
pluginService,
}, nil
}
Expand All @@ -1546,7 +1569,6 @@ func (d *Delegate) newServicesOCR2Functions(
thresholdOcrDB *db,
s4OcrDB *db,
lc ocrtypes.LocalConfig,
ocrLogger commontypes.Logger,
) ([]job.ServiceCtx, error) {
spec := jb.OCR2OracleSpec

Expand Down Expand Up @@ -1597,6 +1619,10 @@ func (d *Delegate) newServicesOCR2Functions(
return nil, err
}

ocrLogger := ocrcommon.NewOCRWrapper(lggr, d.cfg.OCR2().TraceLogging(), func(ctx context.Context, msg string) {
lggr.ErrorIf(d.jobORM.RecordError(ctx, jb.ID, msg), "unable to record error")
})

functionsOracleArgs := libocr2.OCR2OracleArgs{
BinaryNetworkEndpointFactory: d.peerWrapper.Peer2,
V2Bootstrappers: bootstrapPeers,
Expand Down Expand Up @@ -1682,7 +1708,7 @@ func (d *Delegate) newServicesOCR2Functions(
return nil, errors.Wrap(err, "error calling NewFunctionsServices")
}

return append([]job.ServiceCtx{functionsProvider, thresholdProvider, s4Provider}, functionsServices...), nil
return append([]job.ServiceCtx{functionsProvider, thresholdProvider, s4Provider, ocrLogger}, functionsServices...), nil
}

// errorLog implements [loop.ErrorLog]
Expand Down
18 changes: 9 additions & 9 deletions core/services/ocrbootstrap/delegate.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (

ocr "github.com/smartcontractkit/libocr/offchainreporting2plus"

commonlogger "github.com/smartcontractkit/chainlink-common/pkg/logger"
"github.com/smartcontractkit/chainlink-common/pkg/loop"
"github.com/smartcontractkit/chainlink-common/pkg/sqlutil"
"github.com/smartcontractkit/chainlink-common/pkg/types"
Expand Down Expand Up @@ -163,22 +162,23 @@ func (d *Delegate) ServicesForSpec(ctx context.Context, jb job.Job) (services []
"ContractTransmitterTransmitTimeout", lc.ContractTransmitterTransmitTimeout,
"DatabaseTimeout", lc.DatabaseTimeout,
)
ocrLogger := ocrcommon.NewOCRWrapper(lggr.Named("OCRBootstrap"), d.ocr2Cfg.TraceLogging(), func(ctx context.Context, msg string) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my own understanding: the benefit of this is that now consumers of ocrLogger will provide their own context to saveError rather than inheriting it from the delegate?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it is tied to the lifecyle of the job now. Inheriting from the delegate method was the real problem. My first hack added a StopChan backed Close() method to the Delegates instead. That works but ties it to the lifecyle of the delegate itself, which is not cancelled until shutdown. This new approach manages the lifecycle appropriately, at the cost of a little code duplication where we used to be able to share.

logger.Sugared(lggr).ErrorIf(d.jobORM.RecordError(ctx, jb.ID, msg), "unable to record error")
})
bootstrapNodeArgs := ocr.BootstrapperArgs{
BootstrapperFactory: d.peerWrapper.Peer2,
ContractConfigTracker: configProvider.ContractConfigTracker(),
Database: NewDB(d.ds, spec.ID, lggr),
LocalConfig: lc,
Logger: commonlogger.NewOCRWrapper(lggr.Named("OCRBootstrap"), d.ocr2Cfg.TraceLogging(), func(msg string) {
logger.Sugared(lggr).ErrorIf(d.jobORM.RecordError(ctx, jb.ID, msg), "unable to record error")
}),
BootstrapperFactory: d.peerWrapper.Peer2,
ContractConfigTracker: configProvider.ContractConfigTracker(),
Database: NewDB(d.ds, spec.ID, lggr),
LocalConfig: lc,
Logger: ocrLogger,
OffchainConfigDigester: configProvider.OffchainConfigDigester(),
}
lggr.Debugw("Launching new bootstrap node", "args", bootstrapNodeArgs)
bootstrapper, err := ocr.NewBootstrapper(bootstrapNodeArgs)
if err != nil {
return nil, errors.Wrap(err, "error calling NewBootstrapNode")
}
return []job.ServiceCtx{configProvider, job.NewServiceAdapter(bootstrapper)}, nil
return []job.ServiceCtx{configProvider, ocrLogger, job.NewServiceAdapter(bootstrapper)}, nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is ocrLogger to be wired through in bootstrapper as well as on its own?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The bootstrapper depends on it, but it doesn't understand that it needs to be closed, so it is not closed by the ServiceAdapter.Close() call.

}

// AfterJobCreated satisfies the job.Delegate interface.
Expand Down
Loading
Loading