diff --git a/CHANGELOG.md b/CHANGELOG.md index e6c8083873..a69b3a9f66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,17 +2,22 @@ See [RELEASE](./RELEASE.md) for workflow instructions. -## UNRELEASED +## Release v1.3.8 ### Improvements * [#5441](https://github.com/spacemeshos/go-spacemesh/pull/5441) Fix possible nil-pointer dereference in blocks.Generator. + * [#5512](https://github.com/spacemeshos/go-spacemesh/pull/5512) Increase EpochActiveSet limit to 1.5M to prepare for 1M+ ATXs. + * [#5515](https://github.com/spacemeshos/go-spacemesh/pull/5515) Increase fetcher limit to 60MiB to prepare for 1M+ ATXs. +* [#5518](https://github.com/spacemeshos/go-spacemesh/pull/5518) In rare cases the node could create a malfeasance + proof against itself. This is now prevented. + ## Release v1.3.7 ### Improvements @@ -39,10 +44,14 @@ See [RELEASE](./RELEASE.md) for workflow instructions. * [#5498](https://github.com/spacemeshos/go-spacemesh/pull/5498) Reduce the default number of CPU cores that are used for verifying incoming ATXs to half of the available cores. -* [#5500](https://github.com/spacemeshos/go-spacemesh/pull/5500) - Make fetch request timeout configurable. - Add separate metric for failed p2p server requests. +* [#5462](https://github.com/spacemeshos/go-spacemesh/pull/5462) Add separate metric for failed p2p server requests + +* [#5464](https://github.com/spacemeshos/go-spacemesh/pull/5464) Make fetch request timeout configurable. + +* [#5463](https://github.com/spacemeshos/go-spacemesh/pull/5463) Adjust deadline during long reads and writes, reducing "i/o deadline exceeded" errors. + +* [#5494](https://github.com/spacemeshos/go-spacemesh/pull/5494) Make routing discovery more configurable and less spammy by default. ## Release v1.3.5 @@ -92,6 +101,13 @@ See [RELEASE](./RELEASE.md) for workflow instructions. Also, remove unnecessary wait for ATXs to be synced before beginning initialization if the commitment ATX is already selected. +* [#5418](https://github.com/spacemeshos/go-spacemesh/pull/5418) Add `grpc-post-listener` to separate post service from + `grpc-private-listener` and not require mTLS for the post service. + + If you are not using a remote post service you do not need to adjust anything. If you are using a remote setup + make sure your post service now connects to `grpc-post-listener` instead of `grpc-private-listener`. If you are + connecting to a remote post service over the internet we strongly recommend using mTLS via `grpc-tls-listener`. + ## Release v1.3.2 ### Improvements @@ -208,6 +224,13 @@ for more information on how to configure the node to work with the PoST service. * [#5384](https://github.com/spacemeshos/go-spacemesh/pull/5384) to improve network stability and performance allow the active set to be set in advance for an epoch. This allows the network to start consensus on the first layer of an epoch. +## Release v1.2.13 + +### Improvements + +* [#5384](https://github.com/spacemeshos/go-spacemesh/pull/5384) to improve network stability and performance allow the + active set to be set in advance for an epoch. + ## Release v1.2.12 ### Improvements @@ -230,6 +253,15 @@ for more information on how to configure the node to work with the PoST service. * further increased cache sizes and and p2p timeouts to compensate for the increased number of nodes on the network. +* [#5329](https://github.com/spacemeshos/go-spacemesh/pull/5329) P2P decentralization improvements. Added support for QUIC + transport and DHT routing discovery for finding peers and relays. Also, added the `ping-peers` feature which is useful + during connectivity troubleshooting. `static-relays` feature can be used to provide a static list of circuit v2 relays + nodes when automatic relay discovery is not desired. All of the relay server resource settings are now configurable. Most + of the new functionality is disabled by default unless explicitly enabled in the config via `enable-routing-discovery`, + `routing-discovery-advertise`, `enable-quic-transport`, `static-relays` and `ping-peers` options in the `p2p` config + section. The non-conditional changes include values/provides support on all of the nodes, which will enable DHT to + function efficiently for routing discovery. + ## Release v1.2.9 ### Improvements @@ -262,7 +294,6 @@ for more information on how to configure the node to work with the PoST service. ### Improvements * [#5263](https://github.com/spacemeshos/go-spacemesh/pull/5263) randomize peer selection - without this change node can get stuck after restart on requesting data from peer that is misbehaving. log below will be printed repeatedly: diff --git a/activation/handler.go b/activation/handler.go index 4846df3e39..4c7b1f2b7b 100644 --- a/activation/handler.go +++ b/activation/handler.go @@ -8,6 +8,7 @@ import ( "time" "github.com/spacemeshos/post/shared" + "go.uber.org/zap" "golang.org/x/exp/maps" "github.com/spacemeshos/go-spacemesh/atxsdata" @@ -50,6 +51,9 @@ type Handler struct { fetcher system.Fetcher poetCfg PoetConfig + signerMtx sync.Mutex + signers map[types.NodeID]*signing.EdSigner + // inProgress map gathers ATXs that are currently being processed. // It's used to avoid processing the same ATX twice. inProgress map[types.ATXID][]chan error @@ -89,10 +93,23 @@ func NewHandler( tortoise: tortoise, poetCfg: poetCfg, + signers: make(map[types.NodeID]*signing.EdSigner), inProgress: make(map[types.ATXID][]chan error), } } +func (h *Handler) Register(sig *signing.EdSigner) { + h.signerMtx.Lock() + defer h.signerMtx.Unlock() + if _, exists := h.signers[sig.NodeID()]; exists { + h.log.Error("signing key already registered", zap.Stringer("id", sig.NodeID())) + return + } + + h.log.Info("registered signing key", zap.Stringer("id", sig.NodeID())) + h.signers[sig.NodeID()] = sig +} + // ProcessAtx validates the active set size declared in the atx, and contextually validates the atx according to atx // validation rules it then stores the atx with flag set to validity of the atx. // @@ -372,48 +389,61 @@ func (h *Handler) storeAtx(ctx context.Context, atx *types.VerifiedActivationTx) return fmt.Errorf("checking if node is malicious: %w", err) } var proof *types.MalfeasanceProof - if err := h.cdb.WithTx(ctx, func(dbtx *sql.Tx) error { - if !malicious { - prev, err := atxs.GetByEpochAndNodeID(dbtx, atx.PublishEpoch, atx.SmesherID) - if err != nil && !errors.Is(err, sql.ErrNotFound) { - return err + if err := h.cdb.WithTx(ctx, func(tx *sql.Tx) error { + if malicious { + if err := atxs.Add(tx, atx); err != nil && !errors.Is(err, sql.ErrObjectExists) { + return fmt.Errorf("add atx to db: %w", err) } - // do ID check to be absolutely sure. - if prev != nil && prev.ID() != atx.ID() { - var atxProof types.AtxProof - for i, a := range []*types.VerifiedActivationTx{prev, atx} { - atxProof.Messages[i] = types.AtxProofMsg{ - InnerMsg: types.ATXMetadata{ - PublishEpoch: a.PublishEpoch, - MsgHash: types.BytesToHash(a.HashInnerBytes()), - }, - SmesherID: a.SmesherID, - Signature: a.Signature, - } - } - proof = &types.MalfeasanceProof{ - Layer: atx.PublishEpoch.FirstLayer(), - Proof: types.Proof{ - Type: types.MultipleATXs, - Data: &atxProof, + return nil + } + + prev, err := atxs.GetByEpochAndNodeID(tx, atx.PublishEpoch, atx.SmesherID) + if err != nil && !errors.Is(err, sql.ErrNotFound) { + return err + } + + // do ID check to be absolutely sure. + if prev != nil && prev.ID() != atx.ID() { + if _, ok := h.signers[atx.SmesherID]; ok { + // if we land here we tried to publish 2 ATXs in the same epoch + // don't punish ourselves but fail validation and thereby the handling of the incoming ATX + return fmt.Errorf("%s already published an ATX in epoch %d", atx.SmesherID.ShortString(), atx.PublishEpoch) + } + + var atxProof types.AtxProof + for i, a := range []*types.VerifiedActivationTx{prev, atx} { + atxProof.Messages[i] = types.AtxProofMsg{ + InnerMsg: types.ATXMetadata{ + PublishEpoch: a.PublishEpoch, + MsgHash: types.BytesToHash(a.HashInnerBytes()), }, + SmesherID: a.SmesherID, + Signature: a.Signature, } - encoded, err := codec.Encode(proof) - if err != nil { - h.log.With().Panic("failed to encode malfeasance proof", log.Err(err)) - } - if err := identities.SetMalicious(dbtx, atx.SmesherID, encoded, time.Now()); err != nil { - return fmt.Errorf("add malfeasance proof: %w", err) - } - - h.log.WithContext(ctx).With().Warning("smesher produced more than one atx in the same epoch", - log.Stringer("smesher", atx.SmesherID), - log.Object("prev", prev), - log.Object("curr", atx), - ) } + proof = &types.MalfeasanceProof{ + Layer: atx.PublishEpoch.FirstLayer(), + Proof: types.Proof{ + Type: types.MultipleATXs, + Data: &atxProof, + }, + } + encoded, err := codec.Encode(proof) + if err != nil { + h.log.With().Panic("failed to encode malfeasance proof", log.Err(err)) + } + if err := identities.SetMalicious(tx, atx.SmesherID, encoded, time.Now()); err != nil { + return fmt.Errorf("add malfeasance proof: %w", err) + } + + h.log.WithContext(ctx).With().Warning("smesher produced more than one atx in the same epoch", + log.Stringer("smesher", atx.SmesherID), + log.Object("prev", prev), + log.Object("curr", atx), + ) } - if err := atxs.Add(dbtx, atx); err != nil && !errors.Is(err, sql.ErrObjectExists) { + + if err := atxs.Add(tx, atx); err != nil && !errors.Is(err, sql.ErrObjectExists) { return fmt.Errorf("add atx to db: %w", err) } return nil diff --git a/activation/handler_test.go b/activation/handler_test.go index b895b5018e..a8a4a68ca1 100644 --- a/activation/handler_test.go +++ b/activation/handler_test.go @@ -1000,6 +1000,66 @@ func TestHandler_ProcessAtx(t *testing.T) { require.Equal(t, atx2.PublishEpoch.FirstLayer(), got.MalfeasanceProof.Layer) } +func TestHandler_ProcessAtx_OwnNotMalicious(t *testing.T) { + // Arrange + goldenATXID := types.ATXID{2, 3, 4} + atxHdlr := newTestHandler(t, goldenATXID) + + sig, err := signing.NewEdSigner() + require.NoError(t, err) + atxHdlr.Register(sig) + + coinbase := types.GenerateAddress([]byte("aaaa")) + + // Act & Assert + atx1 := newActivationTx( + t, + sig, + 0, + types.EmptyATXID, + types.EmptyATXID, + nil, + types.LayerID(layersPerEpoch).GetEpoch(), + 0, + 100, + coinbase, + 100, + &types.NIPost{}, + ) + atxHdlr.mbeacon.EXPECT().OnAtx(gomock.Any()) + atxHdlr.mtortoise.EXPECT().OnAtx(gomock.Any()) + require.NoError(t, atxHdlr.ProcessAtx(context.Background(), atx1)) + + // processing an already stored ATX returns no error + require.NoError(t, atxHdlr.ProcessAtx(context.Background(), atx1)) + proof, err := identities.GetMalfeasanceProof(atxHdlr.cdb, sig.NodeID()) + require.ErrorIs(t, err, sql.ErrNotFound) + require.Nil(t, proof) + + // another atx for the same epoch is considered malicious + atx2 := newActivationTx( + t, + sig, + 1, + atx1.ID(), + atx1.ID(), + nil, + types.LayerID(layersPerEpoch+1).GetEpoch(), + 0, + 100, + coinbase, + 100, + &types.NIPost{}, + ) + require.ErrorContains(t, + atxHdlr.ProcessAtx(context.Background(), atx2), + fmt.Sprintf("%s already published an ATX", sig.NodeID().ShortString()), + ) + proof, err = identities.GetMalfeasanceProof(atxHdlr.cdb, sig.NodeID()) + require.ErrorIs(t, err, sql.ErrNotFound) + require.Nil(t, proof) +} + func TestHandler_ProcessAtxStoresNewVRFNonce(t *testing.T) { // Arrange goldenATXID := types.ATXID{2, 3, 4} diff --git a/node/node.go b/node/node.go index 0caf08da9a..db998b36ac 100644 --- a/node/node.go +++ b/node/node.go @@ -717,6 +717,7 @@ func (app *App) initServices(ctx context.Context) error { app.addLogger(ATXHandlerLogger, lg), app.Config.POET, ) + atxHandler.Register(app.edSgn) // we can't have an epoch offset which is greater/equal than the number of layers in an epoch