From 3e56a048e5eea627d0df7d028bcc0a171f93c0e2 Mon Sep 17 00:00:00 2001 From: Hlib Kanunnikov Date: Tue, 10 Oct 2023 15:01:34 +0200 Subject: [PATCH 1/2] chore(share/ipld): obliterate expensive ipld getter tracing (#2832) The load these traces can produce is massive. In the case of ODS size 128, and the whole block is sampled for namespace data, it will be 48896 spans (128*2-1)*128*2 - (128 * 128). Besides, there is zero value this tracing brought us so far. --- share/ipld/get.go | 15 --------------- share/ipld/get_shares.go | 6 ------ share/ipld/namespace_data.go | 18 ------------------ share/ipld/nmt.go | 4 +--- 4 files changed, 1 insertion(+), 42 deletions(-) diff --git a/share/ipld/get.go b/share/ipld/get.go index f263877dc0..adf2ffa8c5 100644 --- a/share/ipld/get.go +++ b/share/ipld/get.go @@ -10,8 +10,6 @@ import ( "github.com/ipfs/boxo/blockservice" "github.com/ipfs/go-cid" ipld "github.com/ipfs/go-ipld-format" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/codes" "github.com/celestiaorg/celestia-node/share" ) @@ -100,9 +98,6 @@ func GetLeaves(ctx context.Context, maxShares int, put func(int, ipld.Node), ) { - ctx, span := tracer.Start(ctx, "get-leaves") - defer span.End() - // this buffer ensures writes to 'jobs' are never blocking (bin-tree-feat) jobs := make(chan *job, (maxShares+1)/2) // +1 for the case where 'maxShares' is 1 jobs <- &job{cid: root, ctx: ctx} @@ -120,21 +115,12 @@ func GetLeaves(ctx context.Context, // work over each job concurrently, s.t. shares do not block // processing of each other pool.Submit(func() { - ctx, span := tracer.Start(j.ctx, "process-job") - defer span.End() defer wg.Done() - span.SetAttributes( - attribute.String("cid", j.cid.String()), - attribute.Int("pos", j.sharePos), - ) - nd, err := GetNode(ctx, bGetter, j.cid) if err != nil { // we don't really care about errors here // just fetch as much as possible - span.RecordError(err) - span.SetStatus(codes.Error, err.Error()) return } // check links to know what we should do with the node @@ -142,7 +128,6 @@ func GetLeaves(ctx context.Context, if len(lnks) == 0 { // successfully fetched a share/leaf // ladies and gentlemen, we got em! - span.SetStatus(codes.Ok, "") put(j.sharePos, nd) return } diff --git a/share/ipld/get_shares.go b/share/ipld/get_shares.go index 3fda86941f..98db7012b5 100644 --- a/share/ipld/get_shares.go +++ b/share/ipld/get_shares.go @@ -32,9 +32,6 @@ func GetShare( // Does not return any error, and returns/unblocks only on success // (got all shares) or on context cancellation. func GetShares(ctx context.Context, bg blockservice.BlockGetter, root cid.Cid, shares int, put func(int, share.Share)) { - ctx, span := tracer.Start(ctx, "get-shares") - defer span.End() - putNode := func(i int, leaf format.Node) { put(i, leafToShare(leaf)) } @@ -51,9 +48,6 @@ func GetSharesByNamespace( namespace share.Namespace, maxShares int, ) ([]share.Share, *nmt.Proof, error) { - ctx, span := tracer.Start(ctx, "get-shares-by-namespace") - defer span.End() - data := NewNamespaceData(maxShares, namespace, WithLeaves(), WithProofs()) err := data.CollectLeavesByNamespace(ctx, bGetter, root) if err != nil { diff --git a/share/ipld/namespace_data.go b/share/ipld/namespace_data.go index a3f468ee47..5a6fd2abb4 100644 --- a/share/ipld/namespace_data.go +++ b/share/ipld/namespace_data.go @@ -10,8 +10,6 @@ import ( "github.com/ipfs/boxo/blockservice" "github.com/ipfs/go-cid" ipld "github.com/ipfs/go-ipld-format" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/codes" "github.com/celestiaorg/nmt" @@ -193,13 +191,6 @@ func (n *NamespaceData) CollectLeavesByNamespace( return err } - ctx, span := tracer.Start(ctx, "get-leaves-by-namespace") - defer span.End() - - span.SetAttributes( - attribute.String("namespace", n.namespace.String()), - ) - // buffer the jobs to avoid blocking, we only need as many // queued as the number of shares in the second-to-last layer jobs := make(chan job, (n.maxShares+1)/2) @@ -227,15 +218,8 @@ func (n *NamespaceData) CollectLeavesByNamespace( return retrievalErr } pool.Submit(func() { - ctx, span := tracer.Start(j.ctx, "process-job") - defer span.End() defer wg.done() - span.SetAttributes( - attribute.String("cid", j.cid.String()), - attribute.Int("pos", j.sharePos), - ) - // if an error is likely to be returned or not depends on // the underlying impl of the blockservice, currently it is not a realistic probability nd, err := GetNode(ctx, bGetter, j.cid) @@ -248,7 +232,6 @@ func (n *NamespaceData) CollectLeavesByNamespace( "pos", j.sharePos, "err", err, ) - span.SetStatus(codes.Error, err.Error()) // we still need to update the bounds n.addLeaf(j.sharePos, nil) return @@ -257,7 +240,6 @@ func (n *NamespaceData) CollectLeavesByNamespace( links := nd.Links() if len(links) == 0 { // successfully fetched a leaf belonging to the namespace - span.SetStatus(codes.Ok, "") // we found a leaf, so we update the bounds n.addLeaf(j.sharePos, nd) return diff --git a/share/ipld/nmt.go b/share/ipld/nmt.go index b43d786a5f..6dba300965 100644 --- a/share/ipld/nmt.go +++ b/share/ipld/nmt.go @@ -15,7 +15,6 @@ import ( logging "github.com/ipfs/go-log/v2" mh "github.com/multiformats/go-multihash" mhcore "github.com/multiformats/go-multihash/core" - "go.opentelemetry.io/otel" "github.com/celestiaorg/celestia-app/pkg/appconsts" "github.com/celestiaorg/celestia-app/pkg/da" @@ -25,8 +24,7 @@ import ( ) var ( - tracer = otel.Tracer("ipld") - log = logging.Logger("ipld") + log = logging.Logger("ipld") ) const ( From 937eb433d937e4652d54b4356d7fce1aeaf5ded1 Mon Sep 17 00:00:00 2001 From: Vlad <13818348+walldiss@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:27:11 +0400 Subject: [PATCH 2/2] fix(share/p2p/peer-manager) fix race for hasPeerCh pointer read (#2835) --- share/p2p/peers/pool.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/share/p2p/peers/pool.go b/share/p2p/peers/pool.go index 609d68e0b3..d0cc45ac44 100644 --- a/share/p2p/peers/pool.go +++ b/share/p2p/peers/pool.go @@ -89,8 +89,11 @@ func (p *pool) next(ctx context.Context) <-chan peer.ID { return } + p.m.RLock() + hasPeerCh := p.hasPeerCh + p.m.RUnlock() select { - case <-p.hasPeerCh: + case <-hasPeerCh: case <-ctx.Done(): return }