Skip to content

Commit

Permalink
PWX-20808: etcd3 members list fix (portworx#108)
Browse files Browse the repository at this point in the history
* etcd3's `ListMembers()` modified to use configured KVDB endpoints first
  - the client-URLs from MemberList() will be used as fail-back, to get status for members that were not explicitly registered as endpoints

Signed-off-by: Zoran Rajic <[email protected]>
  • Loading branch information
zoxpx authored Jul 27, 2022
1 parent ffffa7f commit ecdffdc
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 35 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ services:
- docker
language: go
go:
- 1.10.7
- 1.14.15
install:
- curl -s -L https://github.com/golang/dep/releases/download/v0.5.0/dep-linux-amd64 -o $GOPATH/bin/dep
- chmod +x $GOPATH/bin/dep
Expand Down
97 changes: 64 additions & 33 deletions etcd/v3/kv_etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -1497,53 +1497,84 @@ func (et *etcdKV) RemoveMember(
}

func (et *etcdKV) ListMembers() (map[string]*kvdb.MemberInfo, error) {
var (
fnMemberStatus = func(cliURL string) (*kvdb.MemberInfo, uint64, error) {
if cliURL == "" {
return nil, 0, fmt.Errorf("Must provide client URL")
}
// Use the context with no leader requirement as we might be hitting
// an endpoint which is down
ctx, cancel := et.MaintenanceContext()
endpointStatus, err := et.maintenanceClient.Status(ctx, cliURL)
cancel()

if err != nil {
return nil, 0, err
}

mid := endpointStatus.Header.MemberId
return &kvdb.MemberInfo{
// PeerUrls: .. must fill later
ClientUrls: []string{cliURL},
Leader: endpointStatus.Leader == mid,
DbSize: endpointStatus.DbSize,
IsHealthy: true,
ID: strconv.FormatUint(mid, 16),
}, mid, nil
}
)
ctx, cancel := et.MaintenanceContextWithLeader()
memberListResponse, err := et.kvClient.MemberList(ctx)
cancel()

if err != nil {
return nil, err
}
resp := make(map[string]*kvdb.MemberInfo)

membersMap := make(map[uint64]*kvdb.MemberInfo)
mLock.Lock()
defer mLock.Unlock()

// Get status from Endpoints
for _, ep := range et.GetEndpoints() {
mi, mid, err := fnMemberStatus(ep)
if err != nil || mi == nil {
logrus.WithError(err).Warnf("kvClient.Status(%s) returned error", ep)
continue
}
membersMap[mid] = mi
}

// Get status from MemberList() nodes, that were missing from GetEndpoints() list
for _, member := range memberListResponse.Members {
var (
leader bool
dbSize int64
isHealthy bool
clientURLs []string
)
// etcd versions < v3.2.15 will return empty ClientURLs if
// the node is unhealthy. For versions >= v3.2.15 they populate
// ClientURLs but return an error status
if len(member.ClientURLs) != 0 {
// Use the context with no leader requirement as we might be hitting
// an endpoint which is down
ctx, cancel := et.MaintenanceContext()
endpointStatus, err := et.maintenanceClient.Status(
ctx,
member.ClientURLs[0],
)
cancel()
if err == nil {
if member.ID == endpointStatus.Leader {
leader = true
for _, cu := range member.ClientURLs {
if _, has := membersMap[member.ID]; !has {

mi, mid, err := fnMemberStatus(cu)
if err != nil || mi == nil {
logrus.WithError(err).Warnf("kvClient.Status(%s) returned error", cu)
continue
}
dbSize = endpointStatus.DbSize
isHealthy = true
// Only set the urls if status is healthy
clientURLs = member.ClientURLs
membersMap[mid] = mi
}
}
resp[member.Name] = &kvdb.MemberInfo{
PeerUrls: member.PeerURLs,
ClientUrls: clientURLs,
Leader: leader,
DbSize: dbSize,
IsHealthy: isHealthy,
ID: strconv.FormatUint(member.ID, 16),
}

// Fill PeerURLs; also, remap with "Name" as a key
resp := make(map[string]*kvdb.MemberInfo)
for _, member := range memberListResponse.Members {
if mi, has := membersMap[member.ID]; has {
mi.PeerUrls = member.PeerURLs
resp[member.Name] = mi
} else {
// no status -- add "blank" MemberInfo
resp[member.Name] = &kvdb.MemberInfo{
PeerUrls: member.PeerURLs,
ID: strconv.FormatUint(member.ID, 16),
}
}
}

return resp, nil
}

Expand Down
6 changes: 5 additions & 1 deletion test/kv_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ func testMemberStatus(kv kvdb.Kvdb, t *testing.T) {
select {
case <-c:
return
case <-time.After(5 * time.Minute):
case <-time.After(10 * time.Minute):
t.Fatalf("testMemberStatus timeout")
}
}
Expand Down Expand Up @@ -307,6 +307,10 @@ func startEtcd(index int, initCluster map[string][]string, initState string) (*e
initState,
}

// unset env that can prevent etcd startup
os.Unsetenv("ETCD_LISTEN_CLIENT_URLS")
os.Unsetenv("ETCDCTL_API")

cmd := exec.Command("/tmp/test-etcd/etcd", etcdArgs...)
cmd.Stdout = ioutil.Discard
cmd.Stderr = ioutil.Discard
Expand Down

0 comments on commit ecdffdc

Please sign in to comment.