Skip to content

Commit

Permalink
use ssh to simplify the usage (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
siddontang authored May 26, 2018
1 parent 057716b commit 0b153f4
Show file tree
Hide file tree
Showing 29 changed files with 291 additions and 698 deletions.
14 changes: 2 additions & 12 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,10 @@ default: build

all: build

build: agent chaos verifier

agent:
go build -o bin/chaos-agent cmd/agent/main.go
build: chaos verifier

chaos:
go build -o bin/chaos-tidb cmd/tidb/main.go

verifier:
go build -o bin/chaos-verifier cmd/verifier/main.go

update:
which glide >/dev/null || curl https://glide.sh/get | sh
which glide-vc || go get -v -u github.com/sgotti/glide-vc
glide update --strip-vendor --skip-test
@echo "removing test files"
glide vc --only-code --no-tests
go build -o bin/chaos-verifier cmd/verifier/main.go
20 changes: 12 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,19 @@ Chaos is inspired by [jepsen](https://github.com/jepsen-io/jepsen) and uses [por

## Architecture

Chaos runs your registered database on 5 nodes, and starts an agent in every node too. The agent will
receive the command sent from the controller to control the service, like starting/stoping the service,
or using a nemesis to disturb the whole cluster.
Chaos runs your registered database on 5 nodes, sends the command through `ssh` to control the service, like starting/stoping the service, or using a nemesis to disturb the whole cluster.

![Architecture](./chaos.jpg)
```
+-------------+
+------- | controller | -------+
| +-------------+ |
| | | | |
| +----+ | | |
v v | | v
+----+----+----+ | | +----+----+
| n1 | n2 | n3 | <+ +> | n4 | n5 |
+----+----+----+ +----+----+
```

## Usage

Expand All @@ -27,10 +35,6 @@ In another shell, use `docker exec -it chaos-control bash` to enter the controll
# build the node and your own chaos test
make
# deploy and start node agent
./scripts/deploy_agent.sh
./scripts/start_agent.sh
# run you own chaos like
./bin/chaos-tidb
```
Expand Down
Binary file removed chaos.jpg
Binary file not shown.
52 changes: 0 additions & 52 deletions cmd/agent/main.go

This file was deleted.

4 changes: 1 addition & 3 deletions cmd/tidb/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,19 @@ import (
)

var (
nodePort = flag.Int("node-port", 8080, "node port")
requestCount = flag.Int("request-count", 500, "client test request count")
runTime = flag.Duration("run-time", 10*time.Minute, "client test run time")
clientCase = flag.String("case", "bank", "client test case, like bank")
historyFile = flag.String("history", "./history.log", "history file")
nemesises = flag.String("nemesis", "", "nemesis, seperated by name, like random_kill,all_kill")
verifyNames = flag.String("verifiers", "", "verifier names, seperate by comma, tidb-bank,tidb-bank-tso")
verifyNames = flag.String("verifiers", "", "verifier names, seperate by comma, tidb_bank,tidb_bank_tso")
)

func main() {
flag.Parse()

cfg := &control.Config{
DB: "tidb",
NodePort: *nodePort,
RequestCount: *requestCount,
RunTime: *runTime,
History: *historyFile,
Expand Down
6 changes: 4 additions & 2 deletions cmd/verifier/verify/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ func Verify(ctx context.Context, historyFile string, verfier_names string) {
for _, name := range strings.Split(verfier_names, ",") {
var verifier history.Verifier
switch name {
case "tidb-bank":
case "tidb_bank":
verifier = tidb.BankVerifier{}
case "tidb-bank-tso":
case "tidb_bank_tso":
verifier = tidb.BankTsoVerifier{}
case "":
continue
default:
log.Printf("%s is not supported", name)
continue
Expand Down
58 changes: 26 additions & 32 deletions db/tidb/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,15 @@ package tidb
import (
"context"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"path"
"strconv"
"strings"
"time"

"github.com/siddontang/chaos/pkg/core"
"github.com/siddontang/chaos/pkg/util"
"github.com/siddontang/chaos/pkg/util/ssh"
)

const (
Expand Down Expand Up @@ -41,20 +40,21 @@ type db struct {
// SetUp initializes the database.
func (db *db) SetUp(ctx context.Context, nodes []string, node string) error {
// Try kill all old servers
exec.CommandContext(ctx, "killall", "-9", "tidb-server").Run()
exec.CommandContext(ctx, "killall", "-9", "tikv-server").Run()
exec.CommandContext(ctx, "killall", "-9", "pd-server").Run()
ssh.Exec(ctx, node, "killall", "-9", "tidb-server")
ssh.Exec(ctx, node, "killall", "-9", "tikv-server")
ssh.Exec(ctx, node, "killall", "-9", "pd-server")

db.nodes = nodes

if err := util.InstallArchive(ctx, archiveURL, deployDir); err != nil {
log.Printf("install archieve on node %s", node)
if err := util.InstallArchive(ctx, node, archiveURL, deployDir); err != nil {
return err
}

os.MkdirAll(path.Join(deployDir, "conf"), 0755)
os.MkdirAll(path.Join(deployDir, "log"), 0755)
util.Mkdir(ctx, node, path.Join(deployDir, "conf"))
util.Mkdir(ctx, node, path.Join(deployDir, "log"))

if err := ioutil.WriteFile(pdConfig, []byte("[replication]\nmax-replicas=5"), 0644); err != nil {
if err := util.WriteFile(ctx, node, pdConfig, strconv.Quote("[replication]\nmax-replicas=5")); err != nil {
return err
}

Expand All @@ -68,7 +68,7 @@ func (db *db) SetUp(ctx context.Context, nodes []string, node string) error {
"raft_election_timeout_ticks=10",
}

if err := ioutil.WriteFile(tikvConfig, []byte(strings.Join(tikvCfs, "\n")), 0644); err != nil {
if err := util.WriteFile(ctx, node, tikvConfig, strconv.Quote(strings.Join(tikvCfs, "\n"))); err != nil {
return err
}

Expand All @@ -86,6 +86,8 @@ func (db *db) Start(ctx context.Context, node string) error {
}

func (db *db) start(ctx context.Context, node string, inSetUp bool) error {
log.Printf("start database on node %s", node)

initialClusterArgs := make([]string, len(db.nodes))
for i, n := range db.nodes {
initialClusterArgs[i] = fmt.Sprintf("%s=http://%s:2380", n, n)
Expand All @@ -105,15 +107,15 @@ func (db *db) start(ctx context.Context, node string, inSetUp bool) error {
log.Printf("start pd-server on node %s", node)
pdPID := path.Join(deployDir, "pd.pid")
opts := util.NewDaemonOptions(deployDir, pdPID)
if err := util.StartDaemon(ctx, opts, pdBinary, pdArgs...); err != nil {
if err := util.StartDaemon(ctx, node, opts, pdBinary, pdArgs...); err != nil {
return err
}

if inSetUp {
time.Sleep(5 * time.Second)
}

if !util.IsDaemonRunning(ctx, pdBinary, pdPID) {
if !util.IsDaemonRunning(ctx, node, pdBinary, pdPID) {
return fmt.Errorf("fail to start pd on node %s", node)
}

Expand All @@ -134,15 +136,15 @@ func (db *db) start(ctx context.Context, node string, inSetUp bool) error {
log.Printf("start tikv-server on node %s", node)
tikvPID := path.Join(deployDir, "tikv.pid")
opts = util.NewDaemonOptions(deployDir, tikvPID)
if err := util.StartDaemon(ctx, opts, tikvBinary, tikvArgs...); err != nil {
if err := util.StartDaemon(ctx, node, opts, tikvBinary, tikvArgs...); err != nil {
return err
}

if inSetUp {
time.Sleep(30 * time.Second)
}

if !util.IsDaemonRunning(ctx, tikvBinary, tikvPID) {
if !util.IsDaemonRunning(ctx, node, tikvBinary, tikvPID) {
return fmt.Errorf("fail to start tikv on node %s", node)
}

Expand All @@ -155,15 +157,15 @@ func (db *db) start(ctx context.Context, node string, inSetUp bool) error {
log.Printf("start tidb-erver on node %s", node)
tidbPID := path.Join(deployDir, "tidb.pid")
opts = util.NewDaemonOptions(deployDir, tidbPID)
if err := util.StartDaemon(ctx, opts, tidbBinary, tidbArgs...); err != nil {
if err := util.StartDaemon(ctx, node, opts, tidbBinary, tidbArgs...); err != nil {
return err
}

if inSetUp {
time.Sleep(30 * time.Second)
}

if !util.IsDaemonRunning(ctx, tidbBinary, tidbPID) {
if !util.IsDaemonRunning(ctx, node, tidbBinary, tidbPID) {
return fmt.Errorf("fail to start tidb on node %s", node)
}

Expand All @@ -172,41 +174,33 @@ func (db *db) start(ctx context.Context, node string, inSetUp bool) error {

// Stop stops the database
func (db *db) Stop(ctx context.Context, node string) error {
if err := util.StopDaemon(ctx, tidbBinary, path.Join(deployDir, "tidb.pid")); err != nil {
if err := util.StopDaemon(ctx, node, tidbBinary, path.Join(deployDir, "tidb.pid")); err != nil {
return err
}

if err := util.StopDaemon(ctx, tikvBinary, path.Join(deployDir, "tikv.pid")); err != nil {
if err := util.StopDaemon(ctx, node, tikvBinary, path.Join(deployDir, "tikv.pid")); err != nil {
return err
}

if err := util.StopDaemon(ctx, pdBinary, path.Join(deployDir, "pd.pid")); err != nil {
return err
}

return nil
return util.StopDaemon(ctx, node, pdBinary, path.Join(deployDir, "pd.pid"))
}

// Kill kills the database
func (db *db) Kill(ctx context.Context, node string) error {
if err := util.KillDaemon(ctx, tidbBinary, path.Join(deployDir, "tidb.pid")); err != nil {
if err := util.KillDaemon(ctx, node, tidbBinary, path.Join(deployDir, "tidb.pid")); err != nil {
return err
}

if err := util.KillDaemon(ctx, tikvBinary, path.Join(deployDir, "tikv.pid")); err != nil {
if err := util.KillDaemon(ctx, node, tikvBinary, path.Join(deployDir, "tikv.pid")); err != nil {
return err
}

if err := util.KillDaemon(ctx, pdBinary, path.Join(deployDir, "pd.pid")); err != nil {
return err
}

return nil
return util.KillDaemon(ctx, node, pdBinary, path.Join(deployDir, "pd.pid"))
}

// IsRunning checks whether the database is running or not
func (db *db) IsRunning(ctx context.Context, node string) bool {
return util.IsDaemonRunning(ctx, tidbBinary, path.Join(deployDir, "tidb.pid"))
return util.IsDaemonRunning(ctx, node, tidbBinary, path.Join(deployDir, "tidb.pid"))
}

// Name returns the unique name for the database
Expand Down
2 changes: 0 additions & 2 deletions docker/control/bashrc
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ You are currently in the base dir of the git repo for Chaos.
To run a test:
make
./script/deploy_agent.sh
./script/start_agent.sh
./bin/chaos-tidb
EOF

Expand Down
6 changes: 2 additions & 4 deletions pkg/control/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,15 @@ import (

// Config is the configuration for the controller.
type Config struct {
// NodePort is used to communicate with the node server.
NodePort int
// DB is the name which we want to run, you must register the db in the node before.
// DB is the name which we want to run.
DB string
// RequestCount controls how many requests a client sends to the db
RequestCount int
// RunTime controls how long the controller takes.
RunTime time.Duration

// History file
History string
History string
}

func (c *Config) adjust() {
Expand Down
Loading

0 comments on commit 0b153f4

Please sign in to comment.