Skip to content

Commit

Permalink
more flakyness fixes (#1795)
Browse files Browse the repository at this point in the history
* more flakyness fixes

* lint

* Print better error

* Print better error

* kill

* kill
  • Loading branch information
tudor-malene authored Feb 15, 2024
1 parent 83469ca commit 1837bcc
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 31 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/build-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ jobs:
# Close specified ports using lsof before testing / local port list compiled from ./integration/constants.go
- name: Close Integration Test Ports
run: |
killall -9 geth-v1.12.2 || true
killall -9 beacon-chain-v4.0.6 || true
killall -9 validator-v4.0.6 || true
lowest_port=8000 # Lowest starting port
highest_port=58000 # Highest port considering the offset
additional_ports=(80 81 99) # Additional specific ports
Expand Down
47 changes: 20 additions & 27 deletions integration/eth2network/eth2_network.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package eth2network
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
Expand Down Expand Up @@ -86,7 +85,7 @@ func NewEth2Network(
timeout time.Duration,
) Eth2Network {
// Build dirs are suffixed with a timestamp so multiple executions don't collide
timestamp := strconv.FormatInt(time.Now().UnixMilli(), 10)
timestamp := strconv.FormatInt(time.Now().UnixMicro(), 10)

// set the paths
buildDir := path.Join(basepath, "../.build/eth2", timestamp)
Expand All @@ -99,6 +98,11 @@ func NewEth2Network(
prysmBinaryPath := path.Join(binDir, _prysmCTLFileNameVersion)
prysmValidatorBinaryPath := path.Join(binDir, _prysmValidatorFileNameVersion)

// catch any issues due to folder collision early
if _, err := os.Stat(buildDir); err == nil {
panic(fmt.Sprintf("folder %s already exists", buildDir))
}

// Nodes logs and execution related files are written in the build folder
err := os.MkdirAll(buildDir, os.ModePerm)
if err != nil {
Expand Down Expand Up @@ -239,6 +243,7 @@ func (n *Impl) Start() error {
if err != nil {
panic(err)
}
time.Sleep(time.Second)
}()
}

Expand Down Expand Up @@ -322,37 +327,25 @@ func (n *Impl) Start() error {
// Stop stops the network
func (n *Impl) Stop() error {
for i := 0; i < len(n.dataDirs); i++ {
err := kill(n.gethProcesses[i].Process, 0)
if err != nil {
fmt.Printf("unable to kill geth node - %s\n", err.Error())
}
err = kill(n.prysmBeaconProcesses[i].Process, 0)
if err != nil {
fmt.Printf("unable to kill prysm beacon node - %s\n", err.Error())
}
err = kill(n.prysmValidatorProcesses[i].Process, 0)
if err != nil {
fmt.Printf("unable to kill prysm validator node - %s\n", err.Error())
}
kill(n.gethProcesses[i].Process)
kill(n.prysmBeaconProcesses[i].Process)
kill(n.prysmValidatorProcesses[i].Process)
}
// wait a second for the kill signal
time.Sleep(time.Second)
return nil
}

const maxTryKill = 5

func kill(p *os.Process, cnt int) error {
if killErr := p.Kill(); killErr == nil {
return nil
} else if !errors.Is(killErr, os.ErrProcessDone) {
if cnt >= maxTryKill {
return killErr
}
time.Sleep(time.Second)
return kill(p, cnt+1)
func kill(p *os.Process) {
killErr := p.Kill()
if killErr != nil {
fmt.Printf("Error killing process %s", killErr)
}
time.Sleep(200 * time.Millisecond)
err := p.Release()
if err != nil {
fmt.Printf("Error releasing process %s", err)
}
return nil
}

// GethGenesis returns the Genesis used in geth to boot up the network
Expand Down Expand Up @@ -547,7 +540,7 @@ func (n *Impl) waitForNodeUp(nodeID int, timeout time.Duration) error {
return nil
}
}

fmt.Printf("Geth node error:\n%s\n", n.gethProcesses[nodeID].Stderr)
return fmt.Errorf("node not responsive after %s", timeout)
}

Expand Down
6 changes: 3 additions & 3 deletions integration/simulation/network/geth_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,18 @@ const (
func SetUpGethNetwork(wallets *params.SimWallets, startPort int, nrNodes int, blockDurationSeconds int) (*params.L1SetupData, []ethadapter.EthClient, eth2network.Eth2Network) {
eth2Network, err := StartGethNetwork(wallets, startPort, blockDurationSeconds)
if err != nil {
panic(err)
panic(fmt.Errorf("error starting geth network %w", err))
}

// connect to the first host to deploy
tmpEthClient, err := ethadapter.NewEthClient(Localhost, uint(startPort+100), DefaultL1RPCTimeout, common.HexToAddress("0x0"), testlog.Logger())
if err != nil {
panic(err)
panic(fmt.Errorf("error connecting to te first host %w", err))
}

l1Data, err := DeployObscuroNetworkContracts(tmpEthClient, wallets, true)
if err != nil {
panic(err)
panic(fmt.Errorf("error deploying obscuro contract %w", err))
}

ethClients := make([]ethadapter.EthClient, nrNodes)
Expand Down
3 changes: 2 additions & 1 deletion integration/simulation/network/socket.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ func (n *networkOfSocketNodes) Create(simParams *params.SimParams, _ *stats.Stat
if errCheck != nil {
testlog.Logger().Warn("no port found on error", log.ErrKey, err)
}
testlog.Logger().Crit("unable to start obscuro node ", log.ErrKey, err)
fmt.Printf("unable to start obscuro node: %s", err)
testlog.Logger().Error("unable to start obscuro node ", log.ErrKey, err)
}
}

Expand Down

0 comments on commit 1837bcc

Please sign in to comment.