Skip to content

Commit

Permalink
collecinfo, collectinfo-analyzer and info command improvements (#22)
Browse files Browse the repository at this point in the history
* TOOLS-666: [asadm] Fix empty aggregation columns to avoid error foreground.

* TOOLS-753: [asadm-loganalyzer] Fix to avoid negative value in latency output.

* TOOLS-804 : [asadm] Fix to work with 127 nodes.

* TOOLS-874: [asadm] Modify 'show pmap' output.

* TOOLS-900: [asadm] Modify to display new namespace statistics (rack-id, non_replica).

* TOOLS-912: [asadm] Fix command history to work after failed attempt to connect to cluster.

* TOOLS-917: [asadm] Fix remote system statistics collection by using ssh key.

* TOOLS-918: [asadm] Capture storage device partitions sizes in collectinfo.

* TOOLS-923: [asadm-healthcheck] Fix anomaly function to handle strings..

* TOOLS-924: [asadm] Fix collectinfo AWS data collection.

* TOOLS-936: [asadm] Add new 'asinfo' mode to get faster response.

* TOOLS-939: [asadm-healthcheck]: Fix memory size configuration check queries.

* TOOLS-941: [asadm-healthcheck] Flag out 'services' discrepancy.

* TOOLS-944: [asadm] Modify to run health/summary as part of collectinfo and add output of those as 2 different files

* TOOLS-952: [asadm] Fix dependency installation scripts for Amazon Linux.

* TOOLS-962: [asadm] Fix pmap error during migrations.

* TOOLS-666 : [asadm] Fix empty aggregation columns to avoid error foreground.

* Added health query to check non-default value of defray-sleep.

* Fixed byte conversion for top output parsing for KiB format (Ubuntu).

* Summary computation modified to handle empty statistics/config data.

* Collectinfo-analyser modified to use collectinfo_parser for all commands.
  • Loading branch information
hbpatre authored Sep 5, 2017
1 parent 8f3bfed commit 361adf0
Show file tree
Hide file tree
Showing 28 changed files with 2,338 additions and 1,735 deletions.
2 changes: 2 additions & 0 deletions asadm-deps/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ elif [ "$OS" == "linux" ]; then
DISTRO="rpm"
elif [ -f /etc/debian_version ] ; then
DISTRO="deb"
elif [ -f /etc/system-release ] ; then
DISTRO="rpm"
fi
else
echo "No support to OS {$OS}"
Expand Down
157 changes: 124 additions & 33 deletions asadm.py

Large diffs are not rendered by default.

579 changes: 297 additions & 282 deletions lib/basiccontroller.py

Large diffs are not rendered by default.

14 changes: 9 additions & 5 deletions lib/client/assocket.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,15 @@

class ASSocket:

def __init__(self, node, ip, port, pool_size=3, timeout=5):
def __init__(self, ip, port, tls_name, user, password, ssl_context, pool_size=3, timeout=5):
self.sock = None
self.node = node

self.ip = ip
self.port = port
self.tls_name = tls_name
self.user = user
self.password = password
self.ssl_context = ssl_context
self.pool_size = pool_size
self._timeout = timeout

Expand Down Expand Up @@ -99,9 +103,9 @@ def _create_socket(self, host, port, tls_name=None, user=None,
def connect(self):
try:
self.sock = self._create_socket(self.ip, self.port,
tls_name=self.node.tls_name, user=self.node.user,
password=self.node.password,
ssl_context=self.node.ssl_context)
tls_name=self.tls_name, user=self.user,
password=self.password,
ssl_context=self.ssl_context)

if not self.sock:
return False
Expand Down
132 changes: 91 additions & 41 deletions lib/client/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@
except ImportError:
PEXPECT_VERSION = NO_MODULE

COMMAND_PROMPT = '[#$] '

def getfqdn(address, timeout=0.5):
# note: cannot use timeout lib because signal must be run from the
# main thread
Expand Down Expand Up @@ -92,6 +90,7 @@ def __init__(self, address, port=3000, tls_name=None, timeout=5, user=None,
ALSO NOTE: May be better to just use telnet instead?
"""
self.logger = logging.getLogger('asadm')
self.remote_system_command_prompt = '[#$] '
self._update_IP(address, port)
self.port = port
self.xdr_port = 3004 # TODO: Find the xdr port
Expand Down Expand Up @@ -308,7 +307,7 @@ def _get_connection(self, ip, port):
pass
if sock:
return sock
sock = ASSocket(self, ip, port, timeout=self._timeout)
sock = ASSocket(ip, port, self.tls_name, self.user, self.password, self.ssl_context, timeout=self._timeout)
if sock.connect():
return sock
return None
Expand Down Expand Up @@ -402,6 +401,17 @@ def info_node(self):

return self.info("node")

@return_exceptions
def info_ip_port(self):
"""
Get this nodes ip:port.
Returns:
string -- this node's ip:port.
"""

return self.create_key(self.ip, self.port)

@return_exceptions
def _info_peers_list_helper(self, peers):
"""
Expand Down Expand Up @@ -546,15 +556,22 @@ def info_services_alt(self):
return self._info_services_helper(self.info("services-alternate"))

@return_exceptions
def info_service(self, address, return_None=False):
def info_service(self, address="", return_None=False):
try:
service = self.info("service")
s = map(util.info_to_tuple, util.info_to_list(service))

return map(lambda v: (v[0], int(v[1]), self.tls_name), s)

except Exception:
pass

if return_None:
return None

if not address:
address = self.ip

return [(address, self.port, self.tls_name)]

@return_exceptions
Expand Down Expand Up @@ -959,22 +976,26 @@ def info_XDR_get_config(self):
return xdr_configs

@return_exceptions
def info_histogram(self, histogram):
def info_histogram(self, histogram, raw_output=False):
namespaces = self.info_namespaces()

data = {}
for namespace in namespaces:
try:
datum = self.info("hist-dump:ns=%s;hist=%s" %
(namespace, histogram))
datum = datum.split(',')
datum.pop(0) # don't care about ns, hist_name, or length
width = int(datum.pop(0))
datum[-1] = datum[-1].split(';')[0]
datum = map(int, datum)

data[namespace] = {
'histogram': histogram, 'width': width, 'data': datum}
if raw_output:
data[namespace] = datum

else:
datum = datum.split(',')
datum.pop(0) # don't care about ns, hist_name, or length
width = int(datum.pop(0))
datum[-1] = datum[-1].split(';')[0]
datum = map(int, datum)

data[namespace] = {'histogram': histogram, 'width': width, 'data': datum}

except Exception:
pass
return data
Expand Down Expand Up @@ -1164,11 +1185,14 @@ def _login_remote_system(self, ip, user, pwd, ssh_key=None, port=None):
@return_exceptions
def _spawn_remote_system(self, ip, user, pwd, ssh_key=None, port=None):

global COMMAND_PROMPT
terminal_prompt = '(?i)terminal type\?'
terminal_prompt_msg = '(?i)terminal type'
ssh_newkey_msg = '(?i)are you sure you want to continue connecting'
connection_closed_msg = "(?i)connection closed by remote host"
permission_denied_msg = "(?i)permission denied"
pwd_passphrase_msg = "(?i)(?:password)|(?:passphrase for key)"

terminal_type = 'vt100'

ssh_newkey = '(?i)are you sure you want to continue connecting'
ssh_options = "-o 'NumberOfPasswordPrompts=1' "

if port:
Expand All @@ -1183,43 +1207,67 @@ def _spawn_remote_system(self, ip, user, pwd, ssh_key=None, port=None):
ssh_options += ' -i %s' % (ssh_key)

s = pexpect.spawn('ssh %s -l %s %s'%(ssh_options, str(user), str(ip)))

i = s.expect([pexpect.TIMEOUT, ssh_newkey, COMMAND_PROMPT, '(?i)(?:password)|(?:passphrase for key)'])
i = s.expect([ssh_newkey_msg, self.remote_system_command_prompt, pwd_passphrase_msg, permission_denied_msg, terminal_prompt_msg, pexpect.TIMEOUT, connection_closed_msg, pexpect.EOF], timeout=10)

if i == 0:
# Timeout
return None

enter_pwd = False

if i == 1:
# In this case SSH does not have the public key cached.
s.sendline ('yes')
s.expect ('(?i)(?:password)|(?:passphrase for key)')
enter_pwd = True
s.sendline("yes")
i = s.expect([ssh_newkey_msg, self.remote_system_command_prompt, pwd_passphrase_msg, permission_denied_msg, terminal_prompt_msg, pexpect.TIMEOUT])
if i == 2:
# password or passphrase
s.sendline(pwd)
i = s.expect([ssh_newkey_msg, self.remote_system_command_prompt, pwd_passphrase_msg, permission_denied_msg, terminal_prompt_msg, pexpect.TIMEOUT])
if i == 4:
s.sendline(terminal_type)
i = s.expect([ssh_newkey_msg, self.remote_system_command_prompt, pwd_passphrase_msg, permission_denied_msg, terminal_prompt_msg, pexpect.TIMEOUT])
if i == 7:
s.close()
return None

elif i == 2:
if i == 0:
# twice not expected
s.close()
return None
elif i == 1:
pass

elif i == 2:
# password prompt again means input password is wrong
s.close()
return None
elif i == 3:
enter_pwd = True
# permission denied means input password is wrong
s.close()
return None
elif i == 4:
# twice not expected
s.close()
return None
elif i == 5:
# timeout
# Two possibilities
# 1. couldn't login
# 2. couldn't match shell prompt
# safe option is to pass
pass
elif i == 6:
# connection closed by remote host
s.close()
return None
else:
# unexpected
s.close()
return None

if enter_pwd:
s.sendline(pwd)
i = s.expect ([COMMAND_PROMPT, terminal_prompt])
if i == 1:
s.sendline (terminal_type)
s.expect (COMMAND_PROMPT)
self.remote_system_command_prompt = "\[PEXPECT\][\$\#] "
s.sendline("unset PROMPT_COMMAND")

COMMAND_PROMPT = "\[PEXPECT\][\$\#] "
# sh style
s.sendline ("PS1='[PEXPECT]\$ '")
i = s.expect ([pexpect.TIMEOUT, COMMAND_PROMPT], timeout=10)

i = s.expect ([pexpect.TIMEOUT, self.remote_system_command_prompt], timeout=10)
if i == 0:
# csh-style.
s.sendline ("set prompt='[PEXPECT]\$ '")
i = s.expect ([pexpect.TIMEOUT, COMMAND_PROMPT], timeout=10)
i = s.expect ([pexpect.TIMEOUT, self.remote_system_command_prompt], timeout=10)

if i == 0:
return None
Expand All @@ -1245,7 +1293,7 @@ def _execute_remote_system_command(self, conn, cmd):
if PEXPECT_VERSION == NEW_MODULE:
conn.prompt()
elif PEXPECT_VERSION == OLD_MODULE:
conn.expect (COMMAND_PROMPT)
conn.expect (self.remote_system_command_prompt)
else:
return None
return conn.before
Expand Down Expand Up @@ -1281,6 +1329,8 @@ def _stop_ssh_connection(self, conn):
if conn:
conn.close()

self.remote_system_command_prompt = '[#$] '

@return_exceptions
def _get_remote_host_system_statistics(self, commands):
sys_stats = {}
Expand Down
Loading

0 comments on commit 361adf0

Please sign in to comment.