Skip to content

Commit

Permalink
Merge pull request #17 from sassoftware/jowood-pod-log-bug-fix
Browse files Browse the repository at this point in the history
fix trace printed by download-pod-logs if pod is still initializing
  • Loading branch information
sasjowood authored Sep 11, 2020
2 parents e01702f + 5b53594 commit c5b3d2d
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 23 deletions.
23 changes: 15 additions & 8 deletions download_pod_logs/download_pod_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,22 +138,29 @@ def main(argv: List):
try:
print()
with LRPIndicator(enter_message="Downloading pod logs"):
log_dir, timeout_pods = log_downloader.download_logs(selected_components=args.selected_components,
tail=args.tail)
log_dir, timeout_pods, error_pods = log_downloader.download_logs(
selected_components=args.selected_components, tail=args.tail)

# print any containers that encountered errors, if present
if len(error_pods) > 0:
print("\nERROR: Log content for the following containers could not be downloaded:\n", file=sys.stderr)

# print the containers that had an error
for err_info in error_pods:
print(f" [{err_info[0]}] in pod [{err_info[1]}]", file=sys.stderr)

print("\nContainer status information is available in the log file.", file=sys.stderr)

# print any pods that timed out, if present
if len(timeout_pods) > 0:
print()
print("WARNING: Logs for the following pods were not downloaded because they exceeded the configured wait "
f"time ({args.wait}s):")
print()
print("\nWARNING: Log content for the following pods was not downloaded because the configured wait time "
f"was exceeded ({args.wait}s):\n")

# print the pods that timed out
for pod_name in timeout_pods:
print(f" {pod_name}")

print()
print("The wait time can be increased using the \"--wait=\" option.")
print("\nThe wait time can be increased using the \"--wait=\" option.")

# print output directory
print()
Expand Down
41 changes: 28 additions & 13 deletions download_pod_logs/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@
####################################################################
import datetime
import os
import sys

from multiprocessing import Pool, TimeoutError
from multiprocessing.pool import ApplyResult
from subprocess import CalledProcessError
from typing import Any, Dict, List, Optional, Text, Tuple
from typing import Any, AnyStr, Dict, List, Optional, Text, Tuple

from viya_ark_library.k8s.sas_kubectl_interface import KubectlInterface
from viya_ark_library.k8s.sas_k8s_errors import KubectlRequestForbiddenError
Expand Down Expand Up @@ -64,7 +63,7 @@ def __init__(self, kubectl: KubectlInterface, output_dir: Text = DEFAULT_OUTPUT_
self._wait = wait

def download_logs(self, selected_components: Optional[List[Text]] = None, tail: int = DEFAULT_TAIL) \
-> Tuple[Text, List[Text]]:
-> Tuple[Text, List[Text], List[Tuple[Text, Text]]]:
"""
Downloads the log files for all or a select group of pods and their containers. A status summary is prepended
to the downloaded log file.
Expand All @@ -76,8 +75,9 @@ def download_logs(self, selected_components: Optional[List[Text]] = None, tail:
:raises NoPodsError: If pods can be listed but no pods are found in the given namespace.
:raises NoMatchingPodsError: If no available pods match a component in the selected_components
:returns: A tuple containing the absolute output directory and a list of names for any pods for which the
timeout was reached when gathering the log.
:returns: A tuple containing the absolute output directory, a list of names for any pods for which the
timeout was reached when gathering the log, and a list of tuples representing any errors encountered
in gathering the logs for a (container_name, pod_name).
"""
# get the current namespace
namespace = self._kubectl.get_namespace()
Expand Down Expand Up @@ -142,16 +142,22 @@ def download_logs(self, selected_components: Optional[List[Text]] = None, tail:

# run the processes
timeout_pods: List[Text] = list()
error_pods: List[Tuple[Text, Text]] = list()
for process in write_log_processes:
try:
process.get_process().get(timeout=self._wait)
err_info: Optional[Tuple[Text, Text]] = process.get_process().get(timeout=self._wait)

# add the error message, if returned
if err_info:
error_pods.append(err_info)
except TimeoutError:
timeout_pods.append(process.get_pod_name())

return os.path.abspath(self._output_dir), timeout_pods
return os.path.abspath(self._output_dir), timeout_pods, error_pods

@staticmethod
def _write_log(kubectl: KubectlInterface, pod: KubernetesResource, tail: int, output_dir: Text) -> None:
def _write_log(kubectl: KubectlInterface, pod: KubernetesResource, tail: int, output_dir: Text) -> \
Optional[Tuple[Text, Text]]:
"""
Internal method used for gathering the status and log for each container in the provided pod and writing
the gathered information to an on-disk log file.
Expand All @@ -160,6 +166,7 @@ def _write_log(kubectl: KubectlInterface, pod: KubernetesResource, tail: int, ou
:param pod: KubernetesResource representation of the pod whose logs will be retrieved.
:param tail: Lines of recent log file to retrieve.
:param output_dir: The directory where log files should be written.
:returns: A printable error message if the log could not be retrieved.
"""
# if this pod is not a SAS-provided resource, skip it
if not pod.is_sas_resource():
Expand Down Expand Up @@ -201,14 +208,20 @@ def _write_log(kubectl: KubectlInterface, pod: KubernetesResource, tail: int, ou
# close the status header
output_file.writelines(("#" * 50) + "\n\n")

# create tuple to hold information about failed containers/pods
err_info: Optional[Tuple[Text, Text]] = None

# call kubectl to get the log for this container
try:
log = kubectl.logs(pod_name=f"{pod.get_name()} {container_status.get_name()}",
all_containers=False, prefix=False, tail=tail)
log: List[AnyStr] = kubectl.logs(pod_name=f"{pod.get_name()} {container_status.get_name()}",
all_containers=False, prefix=False, tail=tail)

except CalledProcessError:
# print a message to stderr if the log file could not be retrieved for this container
print(f"\nERROR: A log could not be retrieved for the container [{container_status.get_name()}] "
f"in pod [{pod.get_name()}] in namespace [{kubectl.get_namespace()}]", file=sys.stderr)
err_msg = (f"ERROR: A log could not be retrieved for the container [{container_status.get_name()}] "
f"in pod [{pod.get_name()}] in namespace [{kubectl.get_namespace()}]")
log: List[AnyStr] = [err_msg]

err_info = (container_status.get_name(), pod.get_name())

# parse any structured logging
file_content = SASStructuredLoggingParser.parse_log(log)
Expand All @@ -217,6 +230,8 @@ def _write_log(kubectl: KubectlInterface, pod: KubernetesResource, tail: int, ou
output_file.write("Beginning log...\n\n")
output_file.write("\n".join(file_content))

return err_info


####################################################################
# Class: NoMatchingPodsError ###
Expand Down
4 changes: 2 additions & 2 deletions download_pod_logs/test/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def test_download_logs() -> None:
this: PodLogDownloader = PodLogDownloader(kubectl=KubectlTest(), output_dir="./test_download_logs")

# run the method being tested
logs_dir, timeout_pods = this.download_logs()
logs_dir, timeout_pods, error_pods = this.download_logs()

# verify the expected number of logs were written to disk
assert len([name for name in os.listdir(logs_dir) if os.path.isfile(os.path.join(logs_dir, name))]) == 6
Expand All @@ -120,7 +120,7 @@ def test_download_logs_with_valid_selected_component() -> None:
this: PodLogDownloader = PodLogDownloader(kubectl=KubectlTest(), output_dir="./test_download_logs_selected")

# run the method being tested
logs_dir, timeout_pods = this.download_logs(selected_components=["sas-annotations"])
logs_dir, timeout_pods, error_pods = this.download_logs(selected_components=["sas-annotations"])

# verify the expected number of logs were written to disk
assert len([name for name in os.listdir(logs_dir) if os.path.isfile(os.path.join(logs_dir, name))]) == 1
Expand Down

0 comments on commit c5b3d2d

Please sign in to comment.