From 1d5d128eb90ff670fa2d5192fc6c3dae5a8b120d Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Wed, 15 Apr 2020 23:36:47 +0200
Subject: [PATCH 01/31] Custom profiling commands & flamegraphs

Allows running custom profiling command on the nodes via bcc-tools
and perf, and obtains flamegraphs based on that.

See the updated README.md in perf/benchmark for examples and more
information.

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/README.md                     | 24 ++++++
 perf/benchmark/runner/runner.py              | 90 +++++++++++++++++++-
 perf/benchmark/templates/fortio.yaml         | 54 +++++++++++-
 perf/benchmark/values.yaml                   |  4 +-
 perf/docker/Dockerfile.profiling             | 21 +++++
 perf/docker/perf/setup-node-for-profiling.sh | 42 +++++++++
 6 files changed, 231 insertions(+), 4 deletions(-)
 create mode 100644 perf/docker/Dockerfile.profiling
 create mode 100644 perf/docker/perf/setup-node-for-profiling.sh

diff --git a/perf/benchmark/README.md b/perf/benchmark/README.md
index 411cb91acd..22f18cad10 100644
--- a/perf/benchmark/README.md
+++ b/perf/benchmark/README.md
@@ -141,12 +141,36 @@ optional arguments:
   --no_clientsidecar    do not run clientsidecar-only for all
   --bothsidecar         run both clientsidecar and serversidecar
   --no_sidecar          do not run clientsidecar and serversidecar
+  --custom_profiling_command
+                        runs a custom profiling commands on the nodes for the client and server,
+                        and produces a flamegraph based on that.
+                        Example on-cpu profile using bcc tools for the envoy sidecar proxy:
+                        --custom_profiling_command=\"profile-bpfcc -df {duration} -p {sidecar_pid}\"
+                        - runner.py will replace {duration} with whatever was specified for --duration.
+                        - runner.py will replace {sidecar_pid} with the actual process id of the envoy
+                          sidecar process.
+  --custom_profiling_name
+                        filename prefix for the result of any --custom_profiling_command
 ```
 
 Note:
 - `runner.py` will run all combinations of the parameters given. However, in order to reduce ambiguity when generating the graph, it would be
  better to change one parameter at a time and fix other parameters
 - if you want to run with `--perf` flag to generate a flame graph, please make sure you have the permission to gather perf data, please refer to step 2 of this [README](https://github.com/istio/tools/tree/master/perf/benchmark/flame#setup-perf-tool)
+- if you want to run with `--custom_profiling_command`, `profilingMode` must be set to `true` in `values.yaml`. Doing so will set up the client and server pods to run the perf/profiling container. It's worth noting that this container  runs `--priviledged`, and that `hostIPC` and `hostPID` will also be enabled,
+weakening security. Resulting flamegraphs will be written to `flame/flameoutput`.
+- sample sidecar profiling commands for `--custom_profiling_command`:
+  - "profile-bpfcc -df {duration} -p {sidecar_pid}" sidecar on-cpu profile
+  - "offcputime-bpfcc -df {duration} -p {sidecar_pid}" sidecar off-cpu profile
+  - "offwaketime-bpfcc -df {duration} -p {sidecar_pid}" sidecar offwaktime profile
+  - "wakeuptime-bpfcc -f -p {sidecar_pid} {duration}" sidecar wakeuptime profile
+  - "perf record -F 99 -a -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" on-cpu perf-generated profile
+  - "stackcount-bpfcc c:*alloc* -df -D {duration} -p {sidecar_pid}" profile calls to `*alloc*`
+- It's also possible to run machine-wide profiling, for example:
+  - "profile-bpfcc -df {duration}" for obtaining a machine-wide on-cpu flamegraph.
+  - See http://www.brendangregg.com/FlameGraphs/ for more examples and information.
+- Enabling `profilingMode` in `values.yaml` will also bring up and expose Prometheus's `node_exporter` at the configured port (default: 9100),
+  accessible over http via `/metrics.
 
 For example:
 
diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 36680b9b77..13fb9f5978 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -22,8 +22,11 @@
 import shlex
 import uuid
 import sys
+
 from subprocess import getoutput
 from urllib.parse import urlparse
+from threading import Thread
+from time import sleep
 import yaml
 from fortio import METRICS_START_SKIP_DURATION, METRICS_END_SKIP_DURATION
 
@@ -114,7 +117,9 @@ def __init__(
             ingress=None,
             mesh="istio",
             cacert=None,
-            load_gen_type="fortio"):
+            load_gen_type="fortio",
+            custom_profiling_command=None,
+            custom_profiling_name="default-profile"):
         self.run_id = str(uuid.uuid4()).partition('-')[0]
         self.headers = headers
         self.conn = conn
@@ -127,6 +132,8 @@ def __init__(
         self.r = "0.00005"
         self.telemetry_mode = telemetry_mode
         self.perf_record = perf_record
+        self.custom_profiling_command = custom_profiling_command
+        self.custom_profiling_name = custom_profiling_name
         self.server = pod_info("-lapp=" + server, namespace=self.ns)
         self.client = pod_info("-lapp=" + client, namespace=self.ns)
         self.additional_args = additional_args
@@ -245,6 +252,70 @@ def run(self, headers, conn, qps, size, duration):
         headers_cmd = self.generate_headers_cmd(headers)
         fortio_cmd = self.generate_fortio_cmd(headers_cmd, conn, qps, duration, grpc, cacert_arg, labels)
 
+        def run_profiling_in_background(exec_cmd, podname, filename_prefix, profiling_command):
+            filename = "{filename_prefix}-{podname}".format(
+                filename_prefix=filename_prefix, podname=podname)
+            profiler_cmd = "{exec_cmd} \"{profiling_command} > {filename}.profile\"".format(
+                profiling_command=profiling_command,
+                exec_cmd=exec_cmd,
+                filename=filename
+            )
+            # Run the profile collection tool, and wait for it to finish.
+            process = subprocess.Popen(shlex.split(profiler_cmd))
+            process.wait()
+            # Next we feed the profiling data to the flamegraphing script.
+            flamegraph_cmd = "{exec_cmd} \"./FlameGraph/flamegraph.pl --title='{profiling_command} Flame Graph'  < {filename}.profile > {filename}.svg\"".format(
+                exec_cmd=exec_cmd,
+                profiling_command=profiling_command,
+                filename=filename
+            )
+            process = subprocess.Popen(shlex.split(flamegraph_cmd))
+            process.wait()
+            # Lastly copy the resulting flamegraph out of the container
+            kubectl_cp(podname + ":{filename}.svg".format(filename=filename),
+                       "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
+
+        threads = []
+
+        if self.custom_profiling_command:
+            # We run any custom profiling command on both pods, as one runs on each node we're interested in.
+            for pod in [self.client.name, self.server.name]:
+                exec_cmd_on_pod = "kubectl exec -n {namespace} {podname} -c perf -it -- bash -c ".format(
+                    namespace=os.environ.get("NAMESPACE", "twopods"),
+                    podname=pod
+                )
+                
+                # Wait for node_exporter to run, which indicates the profiling initialization container has finished initializing.
+                # once the init probe is supported, move this to a http probe instead in fortio.yaml
+                ne_pid = ""
+                attempts = 0
+                while ne_pid == "" and attempts < 60:
+                    ne_pid = getoutput("{exec_cmd} \"pgrep 'node_exporter'\"".format(exec_cmd=exec_cmd_on_pod)).strip()
+                    attempts = attempts + 1
+                    print(".")
+                    sleep(1)
+
+                # Find side car process id's in case the profiling command needs it.
+                sidecar_ppid = getoutput("{exec_cmd} \"pgrep -f 'pilot-agent proxy sidecar'\"".format(exec_cmd=exec_cmd_on_pod)).strip()
+                sidecar_pid = getoutput("{exec_cmd} \"pgrep -P {sidecar_ppid}\"".format(exec_cmd=exec_cmd_on_pod, sidecar_ppid=sidecar_ppid)).strip()
+                profiling_command = self.custom_profiling_command.format(
+                    duration=self.duration, sidecar_pid=sidecar_pid)
+                threads.append(Thread(target=run_profiling_in_background, args=[
+                    exec_cmd_on_pod, pod, self.custom_profiling_name, profiling_command]))
+
+        for thread in threads:
+            thread.start()
+
+        if self.run_ingress:
+            print('-------------- Running in ingress mode --------------')
+            kubectl_exec(self.client.name, self.ingress(fortio_cmd))
+            if self.perf_record:
+                run_perf(
+                    self.mesh,
+                    self.server.name,
+                    labels + "_srv_ingress",
+                    duration=40)
+
         if self.run_baseline:
             self.execute_sidecar_mode("baseline", self.load_gen_type, fortio_cmd, self.nosidecar, labels, "")
 
@@ -267,6 +338,11 @@ def run(self, headers, conn, qps, size, duration):
                     labels + "_srv_ingress",
                     duration=40)
 
+        if len(threads) > 0:
+            if self.custom_profiling_command:
+                for thread in threads:
+                    thread.join()
+            print("background profiler thread finished - flamegraphs are available in flame/flameoutput")
 
 PERFCMD = "/usr/lib/linux-tools/4.4.0-131-generic/perf"
 FLAMESH = "flame.sh"
@@ -365,7 +441,9 @@ def run_perf_test(args):
             mesh=args.mesh,
             telemetry_mode=args.telemetry_mode,
             cacert=args.cacert,
-            load_gen_type=args.load_gen_type)
+            load_gen_type=args.load_gen_type,
+            custom_profiling_command=args.custom_profiling_command,
+            custom_profiling_name=args.custom_profiling_name)
 
     if fortio.duration <= min_duration:
         print("Duration must be greater than {min_duration}".format(
@@ -425,6 +503,14 @@ def get_parser():
         "--perf",
         help="also run perf and produce flame graph",
         default=False)
+    parser.add_argument(
+        "--custom_profiling_command",
+        help="Run custom profiling commands on the nodes for the client and server, and produce a flamegraph based on their outputs. E.g. --custom_profiling_command=\"/usr/share/bcc/tools/profile -df 40\"",
+        default=False)
+    parser.add_argument(
+        "--custom_profiling_name",
+        help="Name to be added to the flamegraph resulting from --custom_profiling_command",
+        default="default-profile")
     parser.add_argument(
         "--ingress",
         help="run traffic through ingress, should be a valid URL",
diff --git a/perf/benchmark/templates/fortio.yaml b/perf/benchmark/templates/fortio.yaml
index 342b32932b..851e07d575 100644
--- a/perf/benchmark/templates/fortio.yaml
+++ b/perf/benchmark/templates/fortio.yaml
@@ -40,7 +40,11 @@ spec:
     protocol: TCP
   - name: grpc-pinga
     port: 8076
+{{- if $.Values.profilingMode }}
+  - name: node-exporter
+    port: 9100
     protocol: TCP
+{{- end }}
   selector:
     app: {{ $.name }}
 {{- if $.V.expose }}
@@ -98,7 +102,7 @@ spec:
         config.linkerd.io/skip-inbound-ports: "8077"
 {{- end }}
         # exclude inbound ports of the uncaptured container
-        traffic.sidecar.istio.io/excludeInboundPorts: "8076,8077,8078"
+        traffic.sidecar.istio.io/excludeInboundPorts: "8076,8077,8078,{{ $.Values.nodeExporterPort }}"
         sidecar.istio.io/proxyCPU: {{ $.Values.proxy.cpu }}
         sidecar.istio.io/proxyMemory: {{ $.Values.proxy.memory }}
       labels:
@@ -118,9 +122,30 @@ spec:
                 - "fortioclient"
 {{- end }}
             topologyKey: "kubernetes.io/hostname"
+{{- if $.Values.profilingMode }}
+      hostIPC: true
+      hostPID: true   
+{{- end }}
       volumes:
       - name: shared-data
         emptyDir: {}
+{{- if $.Values.profilingMode }}
+      - name: sys
+        hostPath:
+          path: /sys
+      - name: lsb-release
+        hostPath:
+          path: /etc/lsb-release
+      - name: modules-generated
+        hostPath:
+          path: /var/cache/kernel/modules
+      - name: headers-generated
+        hostPath:
+          path: /var/cache/kernel/headers
+      - name: usr-host
+        hostPath:
+          path: /usr
+{{- end }}
       containers:
       - name: captured
         securityContext:
@@ -152,6 +177,33 @@ spec:
         args:
         - /bin/sleep
         - infinity
+{{- if $.Values.profilingMode }}
+      - name: perf
+        image: {{ $.Values.perfImage }}
+        imagePullPolicy: Always
+        securityContext:
+          privileged: true
+          capabilities:
+            add:
+              - SYS_ADMIN
+              - SYS_PTRACE
+        command: ["/bin/bash"]
+        args: ["-c", "./setup-node-for-profiling.sh :{{ $.Values.nodeExporterPort }}"]
+        ports:
+        - containerPort: {{ $.Values.nodeExporterPort }}
+          protocol: TCP
+        volumeMounts:
+          - mountPath: /sys
+            name: sys
+          - mountPath: /etc/lsb-release.host
+            name: lsb-release
+          - mountPath: /lib/modules
+            name: modules-generated
+          - mountPath: /usr/src
+            name: headers-generated
+          - mountPath: /usr-host
+            name: usr-host
+{{- end }}
       - name: uncaptured
         securityContext:
           runAsUser: 1
diff --git a/perf/benchmark/values.yaml b/perf/benchmark/values.yaml
index 4fa9b98b37..d65562e7bd 100644
--- a/perf/benchmark/values.yaml
+++ b/perf/benchmark/values.yaml
@@ -43,5 +43,7 @@ client: # client overrides
 
 cert: false
 interceptionMode: REDIRECT
-
+profilingMode: true
+perfImage: oschaaf/istio-tools:profiling
+nodeExporterPort: 9100
 namespace: ""
diff --git a/perf/docker/Dockerfile.profiling b/perf/docker/Dockerfile.profiling
new file mode 100644
index 0000000000..5ff46b75cc
--- /dev/null
+++ b/perf/docker/Dockerfile.profiling
@@ -0,0 +1,21 @@
+FROM ubuntu:18.04
+
+WORKDIR /root
+
+COPY perf/setup-node-for-profiling.sh setup-node-for-profiling.sh
+
+RUN apt update && \
+  apt install -y git gcc make curl wget libelf-dev bc bpfcc-tools \
+    bison flex \
+    libdw-dev systemtap-sdt-dev libunwind-dev  libaudit-dev \
+    libssl-dev libslang2-dev libgtk2.0-dev libperl-dev python-dev && \
+  chmod +x setup-node-for-profiling.sh && \
+  wget -qO- https://github.com/prometheus/node_exporter/releases/download/v0.18.1/node_exporter-0.18.1.linux-amd64.tar.gz | tar -C . -xvzf - && \
+  cp node_exporter-*/node_exporter /usr/bin/ && \
+  rm -rf node_exporter-* && \
+  git clone --depth=1 https://github.com/BrendanGregg/FlameGraph && \
+  rm -rf /var/lib/apt/lists/* && \
+  rm -rf /tmp/*
+ 
+CMD ["setup-node-for-profiling.sh"]
+
diff --git a/perf/docker/perf/setup-node-for-profiling.sh b/perf/docker/perf/setup-node-for-profiling.sh
new file mode 100644
index 0000000000..62c9fad986
--- /dev/null
+++ b/perf/docker/perf/setup-node-for-profiling.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+set -ex
+
+USR_SRC="/usr/src"
+KERNEL_VERSION="$(uname -r)"
+CHROMEOS_RELEASE_VERSION="$(grep 'CHROMEOS_RELEASE_VERSION' /etc/lsb-release.host | cut -d '=' -f 2)"
+
+build_kernel()
+{
+  # Build the headers
+  cd "${WORKING_DIR}"
+  zcat /proc/config.gz > .config
+  make ARCH=x86 oldconfig > /dev/null
+  make ARCH=x86 prepare > /dev/null
+
+  # Build perf
+  cd tools/perf/
+  make ARCH=x86  > /dev/null
+  mv perf /usr/sbin/
+}
+
+prepare_node()
+{
+  WORKING_DIR="/linux-lakitu-${CHROMEOS_RELEASE_VERSION}"
+  SOURCES_DIR="${USR_SRC}/linux-lakitu-${CHROMEOS_RELEASE_VERSION}"
+  mkdir -p "${WORKING_DIR}"
+  curl -s "https://storage.googleapis.com/cos-tools/${CHROMEOS_RELEASE_VERSION}/kernel-src.tar.gz" \
+    | tar -xzf - -C "${WORKING_DIR}"
+  build_kernel
+  rm -rf "${USR_SRC}${WORKING_DIR}"
+  mv "${WORKING_DIR}" "${USR_SRC}"
+}
+
+prepare_node
+mkdir -p "/lib/modules/${KERNEL_VERSION}"
+ln -sf "${SOURCES_DIR}" "/lib/modules/${KERNEL_VERSION}/source"
+ln -sf "${SOURCES_DIR}" "/lib/modules/${KERNEL_VERSION}/build"
+
+# fire up the node exporter process, listening at the passed in address:port
+node_exporter --web.listen-address $1
+

From 70b6e52c321daf12de787cbcd4cb6fa153ab8cca Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Thu, 16 Apr 2020 11:05:32 +0200
Subject: [PATCH 02/31] Docker linting, fix TODO

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py  | 18 ++++--------------
 perf/docker/Dockerfile.profiling |  4 +++-
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 13fb9f5978..edde72c7fb 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -252,9 +252,9 @@ def run(self, headers, conn, qps, size, duration):
         headers_cmd = self.generate_headers_cmd(headers)
         fortio_cmd = self.generate_fortio_cmd(headers_cmd, conn, qps, duration, grpc, cacert_arg, labels)
 
-        def run_profiling_in_background(exec_cmd, podname, filename_prefix, profiling_command):
-            filename = "{filename_prefix}-{podname}".format(
-                filename_prefix=filename_prefix, podname=podname)
+        def run_profiling_in_background(exec_cmd, podname, filename_prefix, profiling_command, labels):
+            filename = "{filename_prefix}-{labels}-{podname}".format(
+                filename_prefix=filename_prefix, labels=labels, podname=podname)
             profiler_cmd = "{exec_cmd} \"{profiling_command} > {filename}.profile\"".format(
                 profiling_command=profiling_command,
                 exec_cmd=exec_cmd,
@@ -301,21 +301,11 @@ def run_profiling_in_background(exec_cmd, podname, filename_prefix, profiling_co
                 profiling_command = self.custom_profiling_command.format(
                     duration=self.duration, sidecar_pid=sidecar_pid)
                 threads.append(Thread(target=run_profiling_in_background, args=[
-                    exec_cmd_on_pod, pod, self.custom_profiling_name, profiling_command]))
+                    exec_cmd_on_pod, pod, self.custom_profiling_name, profiling_command, labels]))
 
         for thread in threads:
             thread.start()
 
-        if self.run_ingress:
-            print('-------------- Running in ingress mode --------------')
-            kubectl_exec(self.client.name, self.ingress(fortio_cmd))
-            if self.perf_record:
-                run_perf(
-                    self.mesh,
-                    self.server.name,
-                    labels + "_srv_ingress",
-                    duration=40)
-
         if self.run_baseline:
             self.execute_sidecar_mode("baseline", self.load_gen_type, fortio_cmd, self.nosidecar, labels, "")
 
diff --git a/perf/docker/Dockerfile.profiling b/perf/docker/Dockerfile.profiling
index 5ff46b75cc..8c234dafc9 100644
--- a/perf/docker/Dockerfile.profiling
+++ b/perf/docker/Dockerfile.profiling
@@ -4,8 +4,10 @@ WORKDIR /root
 
 COPY perf/setup-node-for-profiling.sh setup-node-for-profiling.sh
 
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
 RUN apt update && \
-  apt install -y git gcc make curl wget libelf-dev bc bpfcc-tools \
+  apt-get install -y git gcc make curl wget libelf-dev bc bpfcc-tools \
     bison flex \
     libdw-dev systemtap-sdt-dev libunwind-dev  libaudit-dev \
     libssl-dev libslang2-dev libgtk2.0-dev libperl-dev python-dev && \

From 81d4d7a1de5430ff67b8478b5de934959014db5b Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Thu, 16 Apr 2020 11:22:41 +0200
Subject: [PATCH 03/31] Docker lint tweak

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/docker/Dockerfile.profiling | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/perf/docker/Dockerfile.profiling b/perf/docker/Dockerfile.profiling
index 8c234dafc9..b93f35828a 100644
--- a/perf/docker/Dockerfile.profiling
+++ b/perf/docker/Dockerfile.profiling
@@ -6,8 +6,8 @@ COPY perf/setup-node-for-profiling.sh setup-node-for-profiling.sh
 
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
-RUN apt update && \
-  apt-get install -y git gcc make curl wget libelf-dev bc bpfcc-tools \
+RUN apt-get update && \
+  apt-get install -y --no-install-recommends git gcc make curl wget libelf-dev bc bpfcc-tools \
     bison flex \
     libdw-dev systemtap-sdt-dev libunwind-dev  libaudit-dev \
     libssl-dev libslang2-dev libgtk2.0-dev libperl-dev python-dev && \

From ee62a802603bafeb468815b7cff8ac26f01896dd Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Thu, 16 Apr 2020 13:43:10 +0200
Subject: [PATCH 04/31] Add perf label to flamegraph filename

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py               | 90 +++++++++++--------
 perf/docker/Dockerfile.profiling              |  5 +-
 .../setup-node-for-profiling.sh               |  3 +
 3 files changed, 59 insertions(+), 39 deletions(-)
 rename perf/docker/{perf => profiling}/setup-node-for-profiling.sh (94%)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index edde72c7fb..d1478b5eea 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -238,6 +238,29 @@ def generate_fortio_cmd(self, headers_cmd, conn, qps, duration, grpc, cacert_arg
 
         return fortio_cmd
 
+    def run_profiling_in_background(self, exec_cmd, podname, filename_prefix, profiling_command, labels):
+        filename = "{filename_prefix}-{labels}-{podname}".format(
+            filename_prefix=filename_prefix, labels=labels, podname=podname)
+        profiler_cmd = "{exec_cmd} \"{profiling_command} > {filename}.profile\"".format(
+            profiling_command=profiling_command,
+            exec_cmd=exec_cmd,
+            filename=filename
+        )
+        # Run the profile collection tool, and wait for it to finish.
+        process = subprocess.Popen(shlex.split(profiler_cmd))
+        process.wait()
+        # Next we feed the profiling data to the flamegraphing script.
+        flamegraph_cmd = "{exec_cmd} \"./FlameGraph/flamegraph.pl --title='{profiling_command} Flame Graph'  < {filename}.profile > {filename}.svg\"".format(
+            exec_cmd=exec_cmd,
+            profiling_command=profiling_command,
+            filename=filename
+        )
+        process = subprocess.Popen(shlex.split(flamegraph_cmd))
+        process.wait()
+        # Lastly copy the resulting flamegraph out of the container
+        kubectl_cp(podname + ":{filename}.svg".format(filename=filename),
+                    "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
+
     def run(self, headers, conn, qps, size, duration):
         labels = self.generate_test_labels(conn, qps, size)
 
@@ -251,29 +274,31 @@ def run(self, headers, conn, qps, size, duration):
 
         headers_cmd = self.generate_headers_cmd(headers)
         fortio_cmd = self.generate_fortio_cmd(headers_cmd, conn, qps, duration, grpc, cacert_arg, labels)
+        perf_label = ""
+        sidecar_mode = ""
+        sidecar_mode_func = None
+
+        if self.run_baseline:
+            sidecar_mode = "baseline"
+            sidecar_mode_func = self.nosidecar
+
+        if self.run_serversidecar:
+            perf_label = "_srv_serveronly"
+            sidecar_mode = "server sidecar"
+            sidecar_mode_func = self.serversidecar
 
-        def run_profiling_in_background(exec_cmd, podname, filename_prefix, profiling_command, labels):
-            filename = "{filename_prefix}-{labels}-{podname}".format(
-                filename_prefix=filename_prefix, labels=labels, podname=podname)
-            profiler_cmd = "{exec_cmd} \"{profiling_command} > {filename}.profile\"".format(
-                profiling_command=profiling_command,
-                exec_cmd=exec_cmd,
-                filename=filename
-            )
-            # Run the profile collection tool, and wait for it to finish.
-            process = subprocess.Popen(shlex.split(profiler_cmd))
-            process.wait()
-            # Next we feed the profiling data to the flamegraphing script.
-            flamegraph_cmd = "{exec_cmd} \"./FlameGraph/flamegraph.pl --title='{profiling_command} Flame Graph'  < {filename}.profile > {filename}.svg\"".format(
-                exec_cmd=exec_cmd,
-                profiling_command=profiling_command,
-                filename=filename
-            )
-            process = subprocess.Popen(shlex.split(flamegraph_cmd))
-            process.wait()
-            # Lastly copy the resulting flamegraph out of the container
-            kubectl_cp(podname + ":{filename}.svg".format(filename=filename),
-                       "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
+        if self.run_clientsidecar:
+            perf_label = "_srv_clientonly"
+            sidecar_mode = "client sidecar"
+            sidecar_mode_func = self.clientsidecar
+
+        if self.run_bothsidecar:
+            perf_label = "_srv_bothsidecars"
+            sidecar_mode = "both sidecar"
+            sidecar_mode_func = self.bothsidecar
+
+        if self.run_ingress:
+            perf_label = "_srv_ingress"
 
         threads = []
 
@@ -300,33 +325,24 @@ def run_profiling_in_background(exec_cmd, podname, filename_prefix, profiling_co
                 sidecar_pid = getoutput("{exec_cmd} \"pgrep -P {sidecar_ppid}\"".format(exec_cmd=exec_cmd_on_pod, sidecar_ppid=sidecar_ppid)).strip()
                 profiling_command = self.custom_profiling_command.format(
                     duration=self.duration, sidecar_pid=sidecar_pid)
-                threads.append(Thread(target=run_profiling_in_background, args=[
-                    exec_cmd_on_pod, pod, self.custom_profiling_name, profiling_command, labels]))
+                threads.append(Thread(target=self.run_profiling_in_background, args=[
+                    exec_cmd_on_pod, pod, self.custom_profiling_name, profiling_command, labels + perf_label]))
 
         for thread in threads:
             thread.start()
 
-        if self.run_baseline:
-            self.execute_sidecar_mode("baseline", self.load_gen_type, fortio_cmd, self.nosidecar, labels, "")
-
-        if self.run_serversidecar:
-            self.execute_sidecar_mode("server sidecar", self.load_gen_type, fortio_cmd, self.serversidecar, labels, "_srv_serveronly")
-
-        if self.run_clientsidecar:
-            self.execute_sidecar_mode("client sidecar", self.load_gen_type, fortio_cmd, self.clientsidecar, labels, "_srv_clientonly")
-
-        if self.run_bothsidecar:
-            self.execute_sidecar_mode("both sidecar", self.load_gen_type, fortio_cmd, self.bothsidecar, labels, "_srv_bothsidecars")
-
         if self.run_ingress:
             print('-------------- Running in ingress mode --------------')
             kubectl_exec(self.client.name, self.ingress(fortio_cmd))
+
             if self.perf_record:
                 run_perf(
                     self.mesh,
                     self.server.name,
-                    labels + "_srv_ingress",
+                    labels + perf_label,
                     duration=40)
+        else:
+            self.execute_sidecar_mode(sidecar_mode, self.load_gen_type, fortio_cmd, sidecar_mode_func, labels, perf_label)
 
         if len(threads) > 0:
             if self.custom_profiling_command:
diff --git a/perf/docker/Dockerfile.profiling b/perf/docker/Dockerfile.profiling
index b93f35828a..6f361bba29 100644
--- a/perf/docker/Dockerfile.profiling
+++ b/perf/docker/Dockerfile.profiling
@@ -2,16 +2,17 @@ FROM ubuntu:18.04
 
 WORKDIR /root
 
-COPY perf/setup-node-for-profiling.sh setup-node-for-profiling.sh
+COPY profiling/setup-node-for-profiling.sh setup-node-for-profiling.sh
 
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
 RUN apt-get update && \
   apt-get install -y --no-install-recommends git gcc make curl wget libelf-dev bc bpfcc-tools \
-    bison flex \
+    bison flex ca-certificates \
     libdw-dev systemtap-sdt-dev libunwind-dev  libaudit-dev \
     libssl-dev libslang2-dev libgtk2.0-dev libperl-dev python-dev && \
   chmod +x setup-node-for-profiling.sh && \
+  update-ca-certificates && \
   wget -qO- https://github.com/prometheus/node_exporter/releases/download/v0.18.1/node_exporter-0.18.1.linux-amd64.tar.gz | tar -C . -xvzf - && \
   cp node_exporter-*/node_exporter /usr/bin/ && \
   rm -rf node_exporter-* && \
diff --git a/perf/docker/perf/setup-node-for-profiling.sh b/perf/docker/profiling/setup-node-for-profiling.sh
similarity index 94%
rename from perf/docker/perf/setup-node-for-profiling.sh
rename to perf/docker/profiling/setup-node-for-profiling.sh
index 62c9fad986..df98357142 100644
--- a/perf/docker/perf/setup-node-for-profiling.sh
+++ b/perf/docker/profiling/setup-node-for-profiling.sh
@@ -37,6 +37,9 @@ mkdir -p "/lib/modules/${KERNEL_VERSION}"
 ln -sf "${SOURCES_DIR}" "/lib/modules/${KERNEL_VERSION}/source"
 ln -sf "${SOURCES_DIR}" "/lib/modules/${KERNEL_VERSION}/build"
 
+sysctl kernel.perf_event_paranoid=-1
+sysctl kernel.kptr_restrict=0
+
 # fire up the node exporter process, listening at the passed in address:port
 node_exporter --web.listen-address $1
 

From 689021b4cea4ab3a0886141cbe2c0650b45cd771 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Thu, 16 Apr 2020 23:35:51 +0200
Subject: [PATCH 05/31] Deduplicate redundant functionality

As --custom_profiling_command offers a superset of --perf,
reimplement the old --perf flag using that and eliminate the
redundant code.

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/flame/README.md         |  63 ++++----------
 perf/benchmark/flame/flame.sh          |  48 -----------
 perf/benchmark/flame/get_perfdata.sh   |  42 ---------
 perf/benchmark/flame/get_proxy_perf.sh |  77 -----------------
 perf/benchmark/runner/runner.py        | 115 +++++++++----------------
 5 files changed, 60 insertions(+), 285 deletions(-)
 delete mode 100755 perf/benchmark/flame/flame.sh
 delete mode 100755 perf/benchmark/flame/get_perfdata.sh
 delete mode 100755 perf/benchmark/flame/get_proxy_perf.sh

diff --git a/perf/benchmark/flame/README.md b/perf/benchmark/flame/README.md
index 0d7f6e3120..2ad0e8a3a2 100644
--- a/perf/benchmark/flame/README.md
+++ b/perf/benchmark/flame/README.md
@@ -8,61 +8,36 @@
 1. Colors are arbitrary.
 1. Function names are sorted left to right.
 
-This document shows how to gather performance data from within the `istio-proxy` container.
+This document shows how to gather performance data from via the `perf` container.
 
-## Setup Perf tool
+## Setup the perf container
 
-Flame graphs are created from data collected using linux `perf_events` by the `perf` tool.
+Enable `profilingMode` in [values.yaml](../values.yaml). This will end up adding the perf
+container to the server and client pods, which both will be running on separate nodes.
 
-1. Ensure that `perf` is installed within the container.
-   Since `istio-proxy` container does not allow installation of new packages, build a new docker image.
+Flame graphs are created from data collected using linux `perf_events` by the `perf` and [BCC tools](https://github.com/iovisor/bcc).
 
-    ```plain
-    FROM gcr.io/istio-release/proxyv2:release-1.0-20180810-09-15
-    # Install fpm tool
-    RUN  sudo apt-get update && \
-        sudo apt-get -qqy install linux-tools-generic
-    ```
+## Obtaining flame graphs
 
-    Build image and push docker image and use it in your deployment by adding the following annotation.
+Flame graphs can be produced via `runner.py`, and will be stored in `flame/flameoutput`.
 
-    ```plain
-    "sidecar.istio.io/proxyImag" : <name of your image>
-    ```
+A few sample command lines. `{duration}` will be replaced by
+whatever was passed for `--duration` to runner.py. `{sidecar_pid}` will
+be replaced by `runner.py` with the process id of the Envoy sidecar.
 
-    This step will go away once the default debug image contains `perf` and related tools.
+It is valid to omit `{sidecar_pid}` in `--custom_profiling_command`.
+This may be useful for machine-wide profiling or arbitrary processes.
 
-1. Ensure that you can run `perf record`
 
-    Running `perf record` from container requires the host to permit this activity. This is done by running the following command on the vm host.
-    For example, if you are running on a GKE cluster, you should `ssh` to the node using the command:
+```bash
+runner/runner.py --conn 20 --qps 10000 --duration 100 --custom_profiling_command="profile-bpfcc -df {duration} -p {sidecar_pid}" --custom_profiling_name="bcc-oncputime-sidecar"
 
-    ```bash
-    gcloud compute ssh gke-perf-test-default-pool-xxxxxx
-    ```
+runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="offcputime-bpfcc -df {duration} -p {sidecar_pid}" --custom_profiling_name="bcc-offcputime-sidecar"
 
-    Then run the following command:
+runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="offwaketime-bpfcc -df {duration} -p {sidecar_pid}" --custom_profiling_name="bcc-offwaketime-sidecar"
 
-    ```bash
-    sudo sysctl kernel.perf_event_paranoid=-1
-    sudo sysctl kernel.kptr_restrict=0
-    ```
+runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="wakeuptime-bpfcc -f -p {sidecar_pid} {duration}" --custom_profiling_name="bcc-wakeuptime-sidecar"
 
-    This setting is very permissive so it must be used with care.
+runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="perf record -F 99 -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" --custom_profiling_name="perf-oncputime-sidecar"
+```
 
-    If running perf still gives error:```You may not have permission to collect stats. Consider tweaking /proc/sys/kernel/perf_event_paranoid:```
-    after running above commands, try ssh into node and run the container with --privileged flag.
-
-1. Run [`get_proxy_perf.sh`](get_proxy_perf.sh) to get the profiling svg. The following command collects samples at `177Hz` for `20s`. The svg file should be created under `flameoutput` dir
-
-    ```plain
-    ./get_proxy_perf.sh -p svc05-0-7-564865d756-pvjhn -n service-graph05 -s 177 -t 20
-    ...
-    [ perf record: Woken up 1 times to write data ]
-    [ perf record: Captured and wrote 0.061 MB /etc/istio/proxy/perf.data (74 samples) ]
-
-    Wrote /etc/istio/proxy/perf.data.perf
-    ...
-    generating svg file svc05-0-7-564865d756-pvjhn-2020-01-29-22-34-19.perf
-    ...
-    ```
diff --git a/perf/benchmark/flame/flame.sh b/perf/benchmark/flame/flame.sh
deleted file mode 100755
index ad09de7263..0000000000
--- a/perf/benchmark/flame/flame.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-# Copyright Istio Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-
-WD=$(dirname "${0}")
-WD=$(cd "${WD}" && pwd)
-
-FLAMEDIR="${WD}/FlameGraph"
-
-if ! command -v c++filt > /dev/null; then
-    echo "Install c++filt to demangle symbols"
-    exit 1
-fi
-
-cd "${WD}" || exit 1
-
-if [[ ! -d ${FLAMEDIR} ]]; then
-    echo "Cloning FlameGraph repo in ${WD}"
-    git clone https://github.com/brendangregg/FlameGraph
-fi
-
-# Given output of `perf script` produce a flamegraph
-FILE=${1:?"perf script output"}
-FILENAME=$(basename "${FILE}")
-BASE=$(echo "${FILENAME}" | cut -d '.' -f 1)
-SVGNAME="${BASE}.svg"
-
-mkdir -p "${WD}/flameoutput"
-"${FLAMEDIR}/stackcollapse-perf.pl" "${FILE}" | c++filt -n | "${FLAMEDIR}/flamegraph.pl" --cp > "./flameoutput/${SVGNAME}"
-
-echo "Wrote ${SVGNAME}"
-if [[ -n "${BUCKET}" ]];then
-    gsutil cp "${SVGNAME}" "${BUCKET}"
-fi
diff --git a/perf/benchmark/flame/get_perfdata.sh b/perf/benchmark/flame/get_perfdata.sh
deleted file mode 100755
index 4881935ffd..0000000000
--- a/perf/benchmark/flame/get_perfdata.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-
-# Copyright Istio Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-
-WD=$(dirname "${0}")
-WD=$(cd "${WD}" && pwd)
-
-FILENAME=${1:?"perffilename"}
-DURATION=${2:?"duration"}
-FREQ=${3:-"99"}
-
-PID=$(pgrep envoy)
-
-# This is specific to the kernel version
-# example: /usr/lib/linux-tools-4.4.0-131/perf
-# provided by `linux-tools-generic`
-PERFDIR=$(find /usr/lib -name 'linux-tools-*' -type d | head -n 1)
-if [[ -z "${PERFDIR}" ]]; then
-    echo "Missing perf tool. Install apt-get install linux-tools-generic"
-    exit 1
-fi
-
-PERF="${PERFDIR}/perf"
-
-"${PERF}" record -o "${WD}/${FILENAME}" -F "${FREQ}" -p "${PID}" -g -- sleep "${DURATION}"
-"${PERF}" script -i "${WD}/${FILENAME}" --demangle > "${WD}/${FILENAME}.perf"
-
-echo "Wrote ${WD}/${FILENAME}.perf"
diff --git a/perf/benchmark/flame/get_proxy_perf.sh b/perf/benchmark/flame/get_proxy_perf.sh
deleted file mode 100755
index dd0f10e2b2..0000000000
--- a/perf/benchmark/flame/get_proxy_perf.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-
-# Copyright Istio Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-
-function usage() {
-  echo "usage:
-        ./get_proxy_perf.sh -p <pod_name> -n <pod_namespace> -s <sample_frequency> -t <time>
-    
-    e.g.
-      ./get_proxy_perf.sh -p svc05-0-4-0-67bff5dbbf-grl94 -n service-graph05 -s 177 -t 20
-
-    -p name of pod.
-    -n namespace of the given pod.
-    -s sample frequence in Hz.
-    -t time of profiling in second."
-  exit 1
-}
-
-POD_NAME=""
-POD_NAMESPACE=""
-SAMPLE_FREQUENCY="177"
-PERF_TIME="20"
-
-while getopts p:n:s:t: arg ; do
-  case "${arg}" in
-    p) POD_NAME="${OPTARG}";;
-    n) POD_NAMESPACE="${OPTARG}";;
-    t) SAMPLE_FREQUENCY="${OPTARG}";;
-    s) PERF_TIME="${OPTARG}";;
-    *) usage;;
-  esac
-done
-
-if [ -z "${POD_NAME}" ]; then
-    echo "pod name must be provided."
-    usage
-    exit 1
-fi
-
-if [ -z "${POD_NAMESPACE}" ]; then
-    echo "pod namespace must be provided."
-    usage
-    exit 1
-fi
-
-WD=$(dirname "${0}")
-WD=$(cd "${WD}" && pwd)
-
-echo "copy profiling script to proxy..."
-kubectl cp "${WD}"/get_perfdata.sh "${POD_NAME}":/etc/istio/proxy/get_perfdata.sh -n "${POD_NAMESPACE}" -c istio-proxy
-
-echo "start profiling..."
-kubectl exec "${POD_NAME}" -n "${POD_NAMESPACE}" -c istio-proxy -- /etc/istio/proxy/get_perfdata.sh perf.data "${SAMPLE_FREQUENCY}" "${PERF_TIME}"
-
-TMP_DIR=$(mktemp -d -t proxy-perf-XXXXXXXXXX)
-trap 'rm -rf "${TMP_DIR}"' EXIT
-TIME="$(date '+%Y-%m-%d-%H-%M-%S')"
-PERF_FILE_NAME="${POD_NAME}"-"${TIME}".perf
-PERF_FILE="${TMP_DIR}"/"${PERF_FILE_NAME}"
-kubectl cp "${POD_NAME}":/etc/istio/proxy/perf.data.perf "${PERF_FILE}" -n "${POD_NAMESPACE}" -c istio-proxy
-
-echo "generating svg file ${PERF_FILE_NAME}"
-"${WD}"/flame.sh "${PERF_FILE}"
diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index d1478b5eea..3f548bcc99 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -147,6 +147,11 @@ def __init__(
         self.cacert = cacert
         self.load_gen_type = load_gen_type
 
+        if self.perf_record != False:
+            if not self.custom_profiling_command is None:
+                sys.exit("--perf and --custom_profiling_command are mutually exclusive")
+            self.custom_profiling_command = "perf record -F 99 -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n"
+
         if mesh == "linkerd":
             self.mesh = "linkerd"
         elif mesh == "istio":
@@ -186,13 +191,6 @@ def execute_sidecar_mode(self, sidecar_mode, load_gen_type, load_gen_cmd, sideca
         if load_gen_type == "fortio":
             kubectl_exec(self.client.name, sidecar_mode_func(load_gen_cmd))
 
-        if self.perf_record and len(perf_label_suffix) > 0:
-            run_perf(
-                self.mesh,
-                self.server.name,
-                labels + perf_label_suffix,
-                duration=40)
-
     def generate_test_labels(self, conn, qps, size):
         size = size or self.size
         labels = self.run_id
@@ -238,7 +236,7 @@ def generate_fortio_cmd(self, headers_cmd, conn, qps, duration, grpc, cacert_arg
 
         return fortio_cmd
 
-    def run_profiling_in_background(self, exec_cmd, podname, filename_prefix, profiling_command, labels):
+    def run_profiler(self, exec_cmd, podname, filename_prefix, profiling_command, labels):
         filename = "{filename_prefix}-{labels}-{podname}".format(
             filename_prefix=filename_prefix, labels=labels, podname=podname)
         profiler_cmd = "{exec_cmd} \"{profiling_command} > {filename}.profile\"".format(
@@ -261,6 +259,39 @@ def run_profiling_in_background(self, exec_cmd, podname, filename_prefix, profil
         kubectl_cp(podname + ":{filename}.svg".format(filename=filename),
                     "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
 
+    def maybe_start_profiling_threads(self, labels, perf_label):
+        threads = []
+
+        if self.custom_profiling_command:
+            # We run any custom profiling command on both pods, as one runs on each node we're interested in.
+            for pod in [self.client.name, self.server.name]:
+                exec_cmd_on_pod = "kubectl exec -n {namespace} {podname} -c perf -it -- bash -c ".format(
+                    namespace=os.environ.get("NAMESPACE", "twopods"),
+                    podname=pod
+                )
+
+                # Wait for node_exporter to run, which indicates the profiling initialization container has finished initializing.
+                # once the init probe is supported, move this to a http probe instead in fortio.yaml
+                ready_cmd = "{exec_cmd} \"pgrep 'node_exporter'\"".format(exec_cmd=exec_cmd_on_pod)
+                ne_pid = getoutput(ready_cmd).strip()
+                attempts = 1
+                while ne_pid == "" and attempts < 60:
+                    sleep(1)
+                    ne_pid = getoutput(ready_cmd).strip()
+                    print(".")
+                    attempts = attempts + 1
+
+                # Find side car process id's in case the profiling command needs it.
+                sidecar_ppid = getoutput("{exec_cmd} \"pgrep -f 'pilot-agent proxy sidecar'\"".format(exec_cmd=exec_cmd_on_pod)).strip()
+                sidecar_pid = getoutput("{exec_cmd} \"pgrep -P {sidecar_ppid}\"".format(exec_cmd=exec_cmd_on_pod, sidecar_ppid=sidecar_ppid)).strip()
+                profiling_command = self.custom_profiling_command.format(
+                    duration=self.duration, sidecar_pid=sidecar_pid)
+                threads.append(Thread(target=self.run_profiler, args=[
+                    exec_cmd_on_pod, pod, self.custom_profiling_name, profiling_command, labels + perf_label]))
+
+        return threads
+
+
     def run(self, headers, conn, qps, size, duration):
         labels = self.generate_test_labels(conn, qps, size)
 
@@ -300,33 +331,7 @@ def run(self, headers, conn, qps, size, duration):
         if self.run_ingress:
             perf_label = "_srv_ingress"
 
-        threads = []
-
-        if self.custom_profiling_command:
-            # We run any custom profiling command on both pods, as one runs on each node we're interested in.
-            for pod in [self.client.name, self.server.name]:
-                exec_cmd_on_pod = "kubectl exec -n {namespace} {podname} -c perf -it -- bash -c ".format(
-                    namespace=os.environ.get("NAMESPACE", "twopods"),
-                    podname=pod
-                )
-                
-                # Wait for node_exporter to run, which indicates the profiling initialization container has finished initializing.
-                # once the init probe is supported, move this to a http probe instead in fortio.yaml
-                ne_pid = ""
-                attempts = 0
-                while ne_pid == "" and attempts < 60:
-                    ne_pid = getoutput("{exec_cmd} \"pgrep 'node_exporter'\"".format(exec_cmd=exec_cmd_on_pod)).strip()
-                    attempts = attempts + 1
-                    print(".")
-                    sleep(1)
-
-                # Find side car process id's in case the profiling command needs it.
-                sidecar_ppid = getoutput("{exec_cmd} \"pgrep -f 'pilot-agent proxy sidecar'\"".format(exec_cmd=exec_cmd_on_pod)).strip()
-                sidecar_pid = getoutput("{exec_cmd} \"pgrep -P {sidecar_ppid}\"".format(exec_cmd=exec_cmd_on_pod, sidecar_ppid=sidecar_ppid)).strip()
-                profiling_command = self.custom_profiling_command.format(
-                    duration=self.duration, sidecar_pid=sidecar_pid)
-                threads.append(Thread(target=self.run_profiling_in_background, args=[
-                    exec_cmd_on_pod, pod, self.custom_profiling_name, profiling_command, labels + perf_label]))
+        threads = self.maybe_start_profiling_threads(labels, perf_label)
 
         for thread in threads:
             thread.start()
@@ -334,13 +339,6 @@ def run(self, headers, conn, qps, size, duration):
         if self.run_ingress:
             print('-------------- Running in ingress mode --------------')
             kubectl_exec(self.client.name, self.ingress(fortio_cmd))
-
-            if self.perf_record:
-                run_perf(
-                    self.mesh,
-                    self.server.name,
-                    labels + perf_label,
-                    duration=40)
         else:
             self.execute_sidecar_mode(sidecar_mode, self.load_gen_type, fortio_cmd, sidecar_mode_func, labels, perf_label)
 
@@ -350,37 +348,6 @@ def run(self, headers, conn, qps, size, duration):
                     thread.join()
             print("background profiler thread finished - flamegraphs are available in flame/flameoutput")
 
-PERFCMD = "/usr/lib/linux-tools/4.4.0-131-generic/perf"
-FLAMESH = "flame.sh"
-PERFSH = "get_perfdata.sh"
-PERFWD = "/etc/istio/proxy/"
-
-WD = os.getcwd()
-LOCAL_FLAMEDIR = os.path.join(WD, "../flame/")
-LOCAL_FLAMEPATH = LOCAL_FLAMEDIR + FLAMESH
-LOCAL_PERFPATH = LOCAL_FLAMEDIR + PERFSH
-LOCAL_FLAMEOUTPUT = LOCAL_FLAMEDIR + "flameoutput/"
-
-
-def run_perf(mesh, pod, labels, duration=20):
-    filename = labels + "_perf.data"
-    filepath = PERFWD + filename
-    perfpath = PERFWD + PERFSH
-
-    # copy executable over
-    kubectl_cp(LOCAL_PERFPATH, pod + ":" + perfpath, mesh + "-proxy")
-
-    kubectl_exec(
-        pod,
-        "{perf_cmd} {filename} {duration}".format(
-            perf_cmd=perfpath,
-            filename=filename,
-            duration=duration),
-        container=mesh + "-proxy")
-
-    kubectl_cp(pod + ":" + filepath + ".perf", LOCAL_FLAMEOUTPUT + filename + ".perf", mesh + "-proxy")
-    run_command_sync(LOCAL_FLAMEPATH + " " + filename + ".perf")
-
 
 def validate_job_config(job_config):
     required_fields = {"conn": list, "qps": list, "duration": int}
@@ -512,7 +479,7 @@ def get_parser():
     parser.add_argument(
         "--custom_profiling_command",
         help="Run custom profiling commands on the nodes for the client and server, and produce a flamegraph based on their outputs. E.g. --custom_profiling_command=\"/usr/share/bcc/tools/profile -df 40\"",
-        default=False)
+        default=None)
     parser.add_argument(
         "--custom_profiling_name",
         help="Name to be added to the flamegraph resulting from --custom_profiling_command",

From bffad091ff4859066f8cd0969f8c390b78554295 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Fri, 17 Apr 2020 10:47:45 +0200
Subject: [PATCH 06/31] Dockerfile.perf linting: pin package versions

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/docker/Dockerfile.profiling | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/perf/docker/Dockerfile.profiling b/perf/docker/Dockerfile.profiling
index 6f361bba29..ad3429de12 100644
--- a/perf/docker/Dockerfile.profiling
+++ b/perf/docker/Dockerfile.profiling
@@ -7,10 +7,27 @@ COPY profiling/setup-node-for-profiling.sh setup-node-for-profiling.sh
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
 RUN apt-get update && \
-  apt-get install -y --no-install-recommends git gcc make curl wget libelf-dev bc bpfcc-tools \
-    bison flex ca-certificates \
-    libdw-dev systemtap-sdt-dev libunwind-dev  libaudit-dev \
-    libssl-dev libslang2-dev libgtk2.0-dev libperl-dev python-dev && \
+  apt-get install -y --no-install-recommends \
+    git=1:2.17.1-1ubuntu0.6 \
+    gcc=4:7.4.0-1ubuntu2.3 \
+    make=4.1-9.1ubuntu1 \
+    curl=7.58.0-2ubuntu3.8 \
+    wget=1.19.4-1ubuntu2.2 \
+    libelf-dev=0.170-0.4ubuntu0.1 \
+    bc=1.07.1-2 \
+    bpfcc-tools=0.5.0-5ubuntu1 \
+    bison=2:3.0.4.dfsg-1build1 \
+    flex=2.6.4-6 \
+    ca-certificates=20180409 \
+    libdw-dev=0.170-0.4ubuntu0.1 \
+    systemtap-sdt-dev=3.1-3ubuntu0.1 \
+    libunwind-dev=1.2.1-8 \
+    libaudit-dev=1:2.8.2-1ubuntu1 \
+    libssl-dev=1.1.1-1ubuntu2.1~18.04.5 \
+    libslang2-dev=2.3.1a-3ubuntu1 \
+    libgtk2.0-dev=2.24.32-1ubuntu1 \
+    libperl-dev=5.26.1-6ubuntu0.3 \
+    python-dev=2.7.15~rc1-1 && \
   chmod +x setup-node-for-profiling.sh && \
   update-ca-certificates && \
   wget -qO- https://github.com/prometheus/node_exporter/releases/download/v0.18.1/node_exporter-0.18.1.linux-amd64.tar.gz | tar -C . -xvzf - && \

From 6425935e8b33162c571066bedf8713c73bc144ec Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Fri, 17 Apr 2020 10:51:50 +0200
Subject: [PATCH 07/31] bash linting: double quote arg

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/docker/profiling/setup-node-for-profiling.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf/docker/profiling/setup-node-for-profiling.sh b/perf/docker/profiling/setup-node-for-profiling.sh
index df98357142..4ce3bbe8c4 100644
--- a/perf/docker/profiling/setup-node-for-profiling.sh
+++ b/perf/docker/profiling/setup-node-for-profiling.sh
@@ -41,5 +41,5 @@ sysctl kernel.perf_event_paranoid=-1
 sysctl kernel.kptr_restrict=0
 
 # fire up the node exporter process, listening at the passed in address:port
-node_exporter --web.listen-address $1
+node_exporter --web.listen-address "$1"
 

From 81f4d4d8edb3e59f55b7413c47ecc3245be07df8 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Fri, 17 Apr 2020 10:56:06 +0200
Subject: [PATCH 08/31] Add licence / copyright banner

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/docker/profiling/setup-node-for-profiling.sh | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/perf/docker/profiling/setup-node-for-profiling.sh b/perf/docker/profiling/setup-node-for-profiling.sh
index 4ce3bbe8c4..18f9f8d8a9 100644
--- a/perf/docker/profiling/setup-node-for-profiling.sh
+++ b/perf/docker/profiling/setup-node-for-profiling.sh
@@ -1,5 +1,19 @@
 #!/bin/bash
 
+# Copyright Istio Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 set -ex
 
 USR_SRC="/usr/src"

From b37cea872c866fd8cfa4e15bf3873e9361cadf37 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Fri, 17 Apr 2020 11:13:37 +0200
Subject: [PATCH 09/31] Python whitespace linting fix

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index a9aa930d38..3ba462f580 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -271,7 +271,7 @@ def run_profiler(self, exec_cmd, podname, filename_prefix, profiling_command, la
         process.wait()
         # Lastly copy the resulting flamegraph out of the container
         kubectl_cp(podname + ":{filename}.svg".format(filename=filename),
-                    "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
+                   "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
 
     def maybe_start_profiling_threads(self, labels, perf_label):
         threads = []

From d5d5d59880b56d5eac70ea061acb5670b960146b Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Fri, 17 Apr 2020 11:19:24 +0200
Subject: [PATCH 10/31] Markdown linting fixes

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/README.md       | 2 +-
 perf/benchmark/flame/README.md | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/perf/benchmark/README.md b/perf/benchmark/README.md
index 22f18cad10..e7b4ca06e4 100644
--- a/perf/benchmark/README.md
+++ b/perf/benchmark/README.md
@@ -168,7 +168,7 @@ weakening security. Resulting flamegraphs will be written to `flame/flameoutput`
   - "stackcount-bpfcc c:*alloc* -df -D {duration} -p {sidecar_pid}" profile calls to `*alloc*`
 - It's also possible to run machine-wide profiling, for example:
   - "profile-bpfcc -df {duration}" for obtaining a machine-wide on-cpu flamegraph.
-  - See http://www.brendangregg.com/FlameGraphs/ for more examples and information.
+  - See [here](http://www.brendangregg.com/FlameGraphs/) for more examples and information.
 - Enabling `profilingMode` in `values.yaml` will also bring up and expose Prometheus's `node_exporter` at the configured port (default: 9100),
   accessible over http via `/metrics.
 
diff --git a/perf/benchmark/flame/README.md b/perf/benchmark/flame/README.md
index 2ad0e8a3a2..55f54bdbdb 100644
--- a/perf/benchmark/flame/README.md
+++ b/perf/benchmark/flame/README.md
@@ -28,7 +28,6 @@ be replaced by `runner.py` with the process id of the Envoy sidecar.
 It is valid to omit `{sidecar_pid}` in `--custom_profiling_command`.
 This may be useful for machine-wide profiling or arbitrary processes.
 
-
 ```bash
 runner/runner.py --conn 20 --qps 10000 --duration 100 --custom_profiling_command="profile-bpfcc -df {duration} -p {sidecar_pid}" --custom_profiling_name="bcc-oncputime-sidecar"
 
@@ -40,4 +39,3 @@ runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_p
 
 runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="perf record -F 99 -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" --custom_profiling_name="perf-oncputime-sidecar"
 ```
-

From eb6090ffdd8e5b5fef90c66d78d07edef5ca1ba9 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Mon, 20 Apr 2020 16:27:56 +0200
Subject: [PATCH 11/31] Move profiling thread start into function

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index e759edceb0..fc89f73c31 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -301,6 +301,8 @@ def maybe_start_profiling_threads(self, labels, perf_label):
                     duration=self.duration, sidecar_pid=sidecar_pid)
                 threads.append(Thread(target=self.run_profiler, args=[
                     exec_cmd_on_pod, pod, self.custom_profiling_name, profiling_command, labels + perf_label]))
+        for thread in threads:
+            thread.start()
 
         return threads
 
@@ -405,10 +407,7 @@ def run(self, headers, conn, qps, size, duration):
             perf_label = "_srv_ingress"
 
         threads = self.maybe_start_profiling_threads(labels, perf_label)
-
-        for thread in threads:
-            thread.start()
-
+    
         if self.run_ingress:
             print('-------------- Running in ingress mode --------------')
             kubectl_exec(self.client.name, self.ingress(load_gen_cmd))

From 67308809385e790d64e6c901ef322419a3712ca7 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Mon, 20 Apr 2020 16:38:35 +0200
Subject: [PATCH 12/31] Python linting fix

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index fc89f73c31..b6a13096ab 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -407,7 +407,7 @@ def run(self, headers, conn, qps, size, duration):
             perf_label = "_srv_ingress"
 
         threads = self.maybe_start_profiling_threads(labels, perf_label)
-    
+
         if self.run_ingress:
             print('-------------- Running in ingress mode --------------')
             kubectl_exec(self.client.name, self.ingress(load_gen_cmd))

From 5e5ce41316d9ec063cc02a8971e3198dbd229b40 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Mon, 27 Apr 2020 16:28:08 +0200
Subject: [PATCH 13/31] Small fixes

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 45 ++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index b6a13096ab..2b4ca37ceb 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -500,31 +500,34 @@ def run_perf_test(args):
             min_duration=min_duration))
         exit(1)
 
-        # Create a port_forward for accessing nighthawk_service.
-        if not can_connect_to_nighthawk_service():
-            popen_cmd = "kubectl -n \"{ns}\" port-forward svc/fortioclient {port}:9999".format(
-                ns=NAMESPACE,
-                port=NIGHTHAWK_GRPC_SERVICE_PORT_FORWARD)
-            process = subprocess.Popen(shlex.split(
-                popen_cmd), stdout=subprocess.PIPE)
-            max_tries = 10
-            while max_tries > 0 and not can_connect_to_nighthawk_service():
-                time.sleep(0.5)
-                max_tries = max_tries - 1
-
-        if not can_connect_to_nighthawk_service():
-            print("Failure connecting to nighthawk_service")
-            sys.exit(-1)
-        else:
-            print("Able to connect to nighthawk_service, proceeding")
+    port_forward_process = None
+
+    # Create a port_forward for accessing nighthawk_service.
+    if not can_connect_to_nighthawk_service():
+        popen_cmd = "kubectl -n \"{ns}\" port-forward svc/fortioclient {port}:9999".format(
+            ns=NAMESPACE,
+            port=NIGHTHAWK_GRPC_SERVICE_PORT_FORWARD)
+        port_forward_process = subprocess.Popen(shlex.split(
+            popen_cmd), stdout=subprocess.PIPE)
+        max_tries = 10
+        while max_tries > 0 and not can_connect_to_nighthawk_service():
+            time.sleep(0.5)
+            max_tries = max_tries - 1
+
+    if not can_connect_to_nighthawk_service():
+        print("Failure connecting to nighthawk_service")
+        sys.exit(-1)
+    else:
+        print("Able to connect to nighthawk_service, proceeding")
+
     try:
         for conn in fortio.conn:
             for qps in fortio.qps:
                 fortio.run(headers=fortio.headers, conn=conn, qps=qps,
-                           duration=fortio.duration, size=fortio.size)
+                        duration=fortio.duration, size=fortio.size)
     finally:
-        process.kill()
-
+        if not port_forward_process is None:
+            port_forward_process.kill()
 
 def run_nighthawk(pod, remote_cmd, labels):
     # Use a local docker instance of Nighthawk to control nighthawk_service running in the pod
@@ -615,7 +618,7 @@ def get_parser():
         default=False)
     parser.add_argument(
         "--custom_profiling_command",
-        help="Run custom profiling commands on the nodes for the client and server, and produce a flamegraph based on their outputs. E.g. --custom_profiling_command=\"/usr/share/bcc/tools/profile -df 40\"",
+        help="Run custom profiling commands on the nodes for the client and server, and produce a flamegraph based on their outputs. E.g. --custom_profiling_command=\"profile-bpfcc -df {duration} -p {sidecar_pid}\"",
         default=None)
     parser.add_argument(
         "--custom_profiling_name",

From 9e9320af7dcfc7ea27f7246961d60c010a8873ab Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Mon, 27 Apr 2020 16:32:20 +0200
Subject: [PATCH 14/31] lint whitespace

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 2b4ca37ceb..91a2d4a7f9 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -524,11 +524,12 @@ def run_perf_test(args):
         for conn in fortio.conn:
             for qps in fortio.qps:
                 fortio.run(headers=fortio.headers, conn=conn, qps=qps,
-                        duration=fortio.duration, size=fortio.size)
+                           duration=fortio.duration, size=fortio.size)
     finally:
         if not port_forward_process is None:
             port_forward_process.kill()
 
+
 def run_nighthawk(pod, remote_cmd, labels):
     # Use a local docker instance of Nighthawk to control nighthawk_service running in the pod
     # and run transforms on the output we get.

From fa42cab612c7b8c5aa54bd5e00203232bbfa1b4d Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Wed, 29 Apr 2020 12:07:34 +0200
Subject: [PATCH 15/31] Sync up with the latest changes

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 68 ++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 36 deletions(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 94b1d6ae36..6627bff8b1 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -73,7 +73,6 @@ def kubectl_cp(from_file, to_file, container):
         from_file=from_file,
         to_file=to_file,
         container=container)
-    print(cmd, flush=True)
     run_command_sync(cmd)
 
 
@@ -188,13 +187,13 @@ def clientsidecar(self, load_gen_cmd, sidecar_mode):
     def bothsidecar(self, load_gen_cmd, sidecar_mode):
         return load_gen_cmd + sidecar_mode + " " + self.compute_uri(self.server.labels["app"], "port")
 
-    def ingress(self, load_gen_cmd):
+    def ingress(self, load_gen_cmd, sidecar_mode):
         url = urlparse(self.run_ingress)
         # If scheme is not defined fallback to http
         if url.scheme == "":
             url = urlparse("http://{svc}".format(svc=self.run_ingress))
 
-        return load_gen_cmd + "_ingress {url}/echo?size={size}".format(
+        return load_gen_cmd + sidecar_mode + " {url}/echo?size={size}".format(
             url=url.geturl(), size=self.size)
 
     def execute_sidecar_mode(self, sidecar_mode, load_gen_type, load_gen_cmd, sidecar_mode_func, labels, perf_label_suffix):
@@ -257,9 +256,11 @@ def run_profiler(self, exec_cmd, podname, filename_prefix, profiling_command, la
             exec_cmd=exec_cmd,
             filename=filename
         )
+        ok = True
         # Run the profile collection tool, and wait for it to finish.
         process = subprocess.Popen(shlex.split(profiler_cmd))
-        process.wait()
+        ok = ok and process.wait() == 0
+
         # Next we feed the profiling data to the flamegraphing script.
         flamegraph_cmd = "{exec_cmd} \"./FlameGraph/flamegraph.pl --title='{profiling_command} Flame Graph'  < {filename}.profile > {filename}.svg\"".format(
             exec_cmd=exec_cmd,
@@ -267,10 +268,15 @@ def run_profiler(self, exec_cmd, podname, filename_prefix, profiling_command, la
             filename=filename
         )
         process = subprocess.Popen(shlex.split(flamegraph_cmd))
-        process.wait()
+        ok = ok and process.wait() == 0
+
         # Lastly copy the resulting flamegraph out of the container
         kubectl_cp(podname + ":{filename}.svg".format(filename=filename),
-                   "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
+                "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
+
+        if ok == False:
+            print("warning - profiling and or flamegraph generation may have failed")
+
 
     def maybe_start_profiling_threads(self, labels, perf_label):
         threads = []
@@ -291,7 +297,6 @@ def maybe_start_profiling_threads(self, labels, perf_label):
                 while ne_pid == "" and attempts < 60:
                     sleep(1)
                     ne_pid = getoutput(ready_cmd).strip()
-                    print(".")
                     attempts = attempts + 1
 
                 # Find side car process id's in case the profiling command needs it.
@@ -349,6 +354,18 @@ def generate_nighthawk_cmd(self, cpus, conn, qps, duration, labels):
 
         return nighthawk_cmd
 
+    def create_execution_delegate(self, perf_label, sidecar_mode, sidecar_mode_func, load_gen_cmd, labels):
+        def execution_delegate():
+            threads = self.maybe_start_profiling_threads(labels, perf_label)
+            self.execute_sidecar_mode(sidecar_mode, self.load_gen_type, load_gen_cmd, sidecar_mode_func, labels, perf_label)
+            if len(threads) > 0:
+                if self.custom_profiling_command:
+                    for thread in threads:
+                        thread.join()
+                print("background profiler thread finished - flamegraphs are available in flame/flameoutput")
+        return execution_delegate
+
+
     def run(self, headers, conn, qps, size, duration):
         labels = self.generate_test_labels(conn, qps, size)
 
@@ -378,46 +395,25 @@ def run(self, headers, conn, qps, size, duration):
             workers = 1
             load_gen_cmd = self.generate_nighthawk_cmd(workers, conn, qps, duration, labels)
 
-        perf_label = ""
-        sidecar_mode = ""
-        sidecar_mode_func = None
+        executions = []
 
         if self.run_baseline:
-            sidecar_mode = "baseline"
-            sidecar_mode_func = self.nosidecar
-            self.execute_sidecar_mode(sidecar_mode, self.load_gen_type, load_gen_cmd, sidecar_mode_func, labels, perf_label)
+            executions.append(self.create_execution_delegate("", "baseline", self.nosidecar, load_gen_cmd, labels))
 
         if self.run_serversidecar:
-            perf_label = "_srv_serveronly"
-            sidecar_mode = "server sidecar"
-            sidecar_mode_func = self.serversidecar
-            self.execute_sidecar_mode(sidecar_mode, self.load_gen_type, load_gen_cmd, sidecar_mode_func, labels, perf_label)
+            executions.append(self.create_execution_delegate("_srv_serveronly", "server sidecar", self.serversidecar, load_gen_cmd, labels))
 
         if self.run_clientsidecar:
-            perf_label = "_srv_clientonly"
-            sidecar_mode = "client sidecar"
-            sidecar_mode_func = self.clientsidecar
-            self.execute_sidecar_mode(sidecar_mode, self.load_gen_type, load_gen_cmd, sidecar_mode_func, labels, perf_label)
+            executions.append(self.create_execution_delegate("_srv_clientonly", "client sidecar", self.clientsidecar, load_gen_cmd, labels))
 
         if self.run_bothsidecar:
-            perf_label = "_srv_bothsidecars"
-            sidecar_mode = "both sidecar"
-            sidecar_mode_func = self.bothsidecar
-            self.execute_sidecar_mode(sidecar_mode, self.load_gen_type, load_gen_cmd, sidecar_mode_func, labels, perf_label)
+            executions.append(self.create_execution_delegate("_srv_bothsidecars", "both sidecar", self.bothsidecar, load_gen_cmd, labels))
 
         if self.run_ingress:
-            perf_label = "_srv_ingress"
-            print('-------------- Running in ingress mode --------------')
-            kubectl_exec(self.client.name, self.ingress(load_gen_cmd))
-
-        threads = self.maybe_start_profiling_threads(labels, perf_label)
-
-        if len(threads) > 0:
-            if self.custom_profiling_command:
-                for thread in threads:
-                    thread.join()
-            print("background profiler thread finished - flamegraphs are available in flame/flameoutput")
+            executions.append(self.create_execution_delegate("_srv_ingress", "ingress", self.ingress, load_gen_cmd, labels))
 
+        for execution in executions:
+            execution()
 
 def validate_job_config(job_config):
     required_fields = {"conn": list, "qps": list, "duration": int}

From 680b10cf41f1a23c58d1c61e006a7ad95c5fd0ef Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Wed, 29 Apr 2020 12:27:21 +0200
Subject: [PATCH 16/31] linting fixes + fix in fortio.yaml

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py      | 45 +++++++++++++++++-----------
 perf/benchmark/templates/fortio.yaml |  3 +-
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 6627bff8b1..8cb2185b5f 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -272,12 +272,11 @@ def run_profiler(self, exec_cmd, podname, filename_prefix, profiling_command, la
 
         # Lastly copy the resulting flamegraph out of the container
         kubectl_cp(podname + ":{filename}.svg".format(filename=filename),
-                "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
+                   "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
 
         if ok == False:
             print("warning - profiling and or flamegraph generation may have failed")
 
-
     def maybe_start_profiling_threads(self, labels, perf_label):
         threads = []
 
@@ -291,7 +290,8 @@ def maybe_start_profiling_threads(self, labels, perf_label):
 
                 # Wait for node_exporter to run, which indicates the profiling initialization container has finished initializing.
                 # once the init probe is supported, move this to a http probe instead in fortio.yaml
-                ready_cmd = "{exec_cmd} \"pgrep 'node_exporter'\"".format(exec_cmd=exec_cmd_on_pod)
+                ready_cmd = "{exec_cmd} \"pgrep 'node_exporter'\"".format(
+                    exec_cmd=exec_cmd_on_pod)
                 ne_pid = getoutput(ready_cmd).strip()
                 attempts = 1
                 while ne_pid == "" and attempts < 60:
@@ -300,8 +300,10 @@ def maybe_start_profiling_threads(self, labels, perf_label):
                     attempts = attempts + 1
 
                 # Find side car process id's in case the profiling command needs it.
-                sidecar_ppid = getoutput("{exec_cmd} \"pgrep -f 'pilot-agent proxy sidecar'\"".format(exec_cmd=exec_cmd_on_pod)).strip()
-                sidecar_pid = getoutput("{exec_cmd} \"pgrep -P {sidecar_ppid}\"".format(exec_cmd=exec_cmd_on_pod, sidecar_ppid=sidecar_ppid)).strip()
+                sidecar_ppid = getoutput(
+                    "{exec_cmd} \"pgrep -f 'pilot-agent proxy sidecar'\"".format(exec_cmd=exec_cmd_on_pod)).strip()
+                sidecar_pid = getoutput("{exec_cmd} \"pgrep -P {sidecar_ppid}\"".format(
+                    exec_cmd=exec_cmd_on_pod, sidecar_ppid=sidecar_ppid)).strip()
                 profiling_command = self.custom_profiling_command.format(
                     duration=self.duration, sidecar_pid=sidecar_pid)
                 threads.append(Thread(target=self.run_profiler, args=[
@@ -357,15 +359,16 @@ def generate_nighthawk_cmd(self, cpus, conn, qps, duration, labels):
     def create_execution_delegate(self, perf_label, sidecar_mode, sidecar_mode_func, load_gen_cmd, labels):
         def execution_delegate():
             threads = self.maybe_start_profiling_threads(labels, perf_label)
-            self.execute_sidecar_mode(sidecar_mode, self.load_gen_type, load_gen_cmd, sidecar_mode_func, labels, perf_label)
+            self.execute_sidecar_mode(
+                sidecar_mode, self.load_gen_type, load_gen_cmd, sidecar_mode_func, labels, perf_label)
             if len(threads) > 0:
                 if self.custom_profiling_command:
                     for thread in threads:
                         thread.join()
-                print("background profiler thread finished - flamegraphs are available in flame/flameoutput")
+                print(
+                    "background profiler thread finished - flamegraphs are available in flame/flameoutput")
         return execution_delegate
 
-
     def run(self, headers, conn, qps, size, duration):
         labels = self.generate_test_labels(conn, qps, size)
 
@@ -375,12 +378,14 @@ def run(self, headers, conn, qps, size, duration):
 
         cacert_arg = ""
         if self.cacert is not None:
-            cacert_arg = "-cacert {cacert_path}".format(cacert_path=self.cacert)
+            cacert_arg = "-cacert {cacert_path}".format(
+                cacert_path=self.cacert)
 
         headers_cmd = self.generate_headers_cmd(headers)
 
         if self.load_gen_type == "fortio":
-            load_gen_cmd = self.generate_fortio_cmd(headers_cmd, conn, qps, duration, grpc, cacert_arg, labels)
+            load_gen_cmd = self.generate_fortio_cmd(
+                headers_cmd, conn, qps, duration, grpc, cacert_arg, labels)
         elif self.load_gen_type == "nighthawk":
             # TODO(oschaaf): Figure out how to best determine the right concurrency for Nighthawk.
             # Results seem to get very noisy as the number of workers increases, are the clients
@@ -393,28 +398,35 @@ def run(self, headers, conn, qps, size, duration):
             # See the comment above, we restrict execution to a single nighthawk worker for
             # now to avoid noise.
             workers = 1
-            load_gen_cmd = self.generate_nighthawk_cmd(workers, conn, qps, duration, labels)
+            load_gen_cmd = self.generate_nighthawk_cmd(
+                workers, conn, qps, duration, labels)
 
         executions = []
 
         if self.run_baseline:
-            executions.append(self.create_execution_delegate("", "baseline", self.nosidecar, load_gen_cmd, labels))
+            executions.append(self.create_execution_delegate(
+                "", "baseline", self.nosidecar, load_gen_cmd, labels))
 
         if self.run_serversidecar:
-            executions.append(self.create_execution_delegate("_srv_serveronly", "server sidecar", self.serversidecar, load_gen_cmd, labels))
+            executions.append(self.create_execution_delegate(
+                "_srv_serveronly", "server sidecar", self.serversidecar, load_gen_cmd, labels))
 
         if self.run_clientsidecar:
-            executions.append(self.create_execution_delegate("_srv_clientonly", "client sidecar", self.clientsidecar, load_gen_cmd, labels))
+            executions.append(self.create_execution_delegate(
+                "_srv_clientonly", "client sidecar", self.clientsidecar, load_gen_cmd, labels))
 
         if self.run_bothsidecar:
-            executions.append(self.create_execution_delegate("_srv_bothsidecars", "both sidecar", self.bothsidecar, load_gen_cmd, labels))
+            executions.append(self.create_execution_delegate(
+                "_srv_bothsidecars", "both sidecar", self.bothsidecar, load_gen_cmd, labels))
 
         if self.run_ingress:
-            executions.append(self.create_execution_delegate("_srv_ingress", "ingress", self.ingress, load_gen_cmd, labels))
+            executions.append(self.create_execution_delegate(
+                "_srv_ingress", "ingress", self.ingress, load_gen_cmd, labels))
 
         for execution in executions:
             execution()
 
+
 def validate_job_config(job_config):
     required_fields = {"conn": list, "qps": list, "duration": int}
     for k in required_fields:
@@ -515,7 +527,6 @@ def run_perf_test(args):
         sys.exit(-1)
     else:
         print("Able to connect to nighthawk_service, proceeding")
-
     try:
         for conn in fortio.conn:
             for qps in fortio.qps:
diff --git a/perf/benchmark/templates/fortio.yaml b/perf/benchmark/templates/fortio.yaml
index e720fb6196..80fea2047b 100644
--- a/perf/benchmark/templates/fortio.yaml
+++ b/perf/benchmark/templates/fortio.yaml
@@ -369,8 +369,7 @@ spec:
         config.linkerd.io/skip-inbound-ports: "8077"
 {{- end }}
         # exclude inbound ports of the uncaptured container
-        traffic.sidecar.istio.io/excludeInboundPorts: "8076,8077,8078,{{ $.Values.nodeExporterPort }}"
-        traffic.sidecar.istio.io/excludeInboundPorts: "8076,8077,8078,9999"
+        traffic.sidecar.istio.io/excludeInboundPorts: "8076,8077,8078,9999,{{ $.Values.nodeExporterPort }}"
         traffic.sidecar.istio.io/excludeOutboundPorts: "80,8076,8077,8078"
         sidecar.istio.io/proxyCPU: {{ $.Values.proxy.cpu }}
         sidecar.istio.io/proxyMemory: {{ $.Values.proxy.memory }}

From 30342ff7e264059f15fde69eff0ad9a654d1eb32 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Wed, 29 Apr 2020 13:29:56 +0200
Subject: [PATCH 17/31] Changes ot minimize the diff

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 8cb2185b5f..20122f2902 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -378,14 +378,12 @@ def run(self, headers, conn, qps, size, duration):
 
         cacert_arg = ""
         if self.cacert is not None:
-            cacert_arg = "-cacert {cacert_path}".format(
-                cacert_path=self.cacert)
+            cacert_arg = "-cacert {cacert_path}".format(cacert_path=self.cacert)
 
         headers_cmd = self.generate_headers_cmd(headers)
 
         if self.load_gen_type == "fortio":
-            load_gen_cmd = self.generate_fortio_cmd(
-                headers_cmd, conn, qps, duration, grpc, cacert_arg, labels)
+            load_gen_cmd = self.generate_fortio_cmd(headers_cmd, conn, qps, duration, grpc, cacert_arg, labels)
         elif self.load_gen_type == "nighthawk":
             # TODO(oschaaf): Figure out how to best determine the right concurrency for Nighthawk.
             # Results seem to get very noisy as the number of workers increases, are the clients

From 83a8e40597deaffe1f8fb1e49e39876e76658cac Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Sun, 3 May 2020 13:54:02 +0200
Subject: [PATCH 18/31] Tweak flamegraph file naming

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index e44ac0f9b0..0b47d9abe9 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -36,7 +36,7 @@
 NIGHTHAWK_GRPC_SERVICE_PORT_FORWARD = 9999
 POD = collections.namedtuple('Pod', ['name', 'namespace', 'ip', 'labels'])
 NIGHTHAWK_DOCKER_IMAGE = "envoyproxy/nighthawk-dev:59683b759eb8f8bd8cce282795c08f9e2b3313d4"
-
+SCRIPT_START = time.strftime("%Y-%m-%d-%H%M%S")
 
 def pod_info(filterstr="", namespace=NAMESPACE, multi_ok=True):
     cmd = "kubectl -n {namespace} get pod {filterstr}  -o json".format(
@@ -249,9 +249,9 @@ def generate_fortio_cmd(self, headers_cmd, conn, qps, duration, grpc, cacert_arg
 
         return fortio_cmd
 
-    def run_profiler(self, exec_cmd, podname, filename_prefix, profiling_command, labels):
-        filename = "{filename_prefix}-{labels}-{podname}".format(
-            filename_prefix=filename_prefix, labels=labels, podname=podname)
+    def run_profiler(self, exec_cmd, podname, profile_name, profiling_command, labels):
+        filename = "{datetime}_{labels}-{profile_name}-{podname}".format(
+            datetime=SCRIPT_START, profile_name=profile_name, labels=labels, podname=podname)
         profiler_cmd = "{exec_cmd} \"{profiling_command} > {filename}.profile\"".format(
             profiling_command=profiling_command,
             exec_cmd=exec_cmd,
@@ -271,7 +271,7 @@ def run_profiler(self, exec_cmd, podname, filename_prefix, profiling_command, la
         process = subprocess.Popen(shlex.split(flamegraph_cmd))
         ok = ok and process.wait() == 0
 
-        # Lastly copy the resulting flamegraph out of the container
+        # Lastly copy the resulting flamegraph out of the container into flame/flameoutput/
         kubectl_cp(podname + ":{filename}.svg".format(filename=filename),
                    "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
 
@@ -568,7 +568,7 @@ def run_nighthawk(pod, remote_cmd, labels):
             # - initiation time to completion (spanning the complete lifetime of a request/reply, including queue/connect time)
             # - per worker output may sometimes help interpret plots that don't have a nice knee-shaped shape.
             kubectl_cp("{dest}.fortio.json".format(
-                dest=dest), "{pod}:/var/lib/fortio/{datetime}_nighthawk_{labels}.json".format(pod=pod, labels=labels, datetime=time.strftime("%Y-%m-%d-%H%M%S")), "shell")
+                dest=dest), "{pod}:/var/lib/fortio/{datetime}_{labels}.json".format(pod=pod, labels=labels, datetime=SCRIPT_START), "shell")
     else:
         print("nighthawk remote execution error: %s" % exit_code)
         if output:

From 94f61206f8a3038b218912de5b24a9a55258bb52 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Sun, 3 May 2020 14:11:24 +0200
Subject: [PATCH 19/31] Fix NH-mode --ingress option

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 0b47d9abe9..50c0e45666 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -193,9 +193,12 @@ def ingress(self, load_gen_cmd, sidecar_mode):
         # If scheme is not defined fallback to http
         if url.scheme == "":
             url = urlparse("http://{svc}".format(svc=self.run_ingress))
-
-        return load_gen_cmd + sidecar_mode + " {url}/echo?size={size}".format(
-            url=url.geturl(), size=self.size)
+        if self.load_gen_type == "fortio":
+            return load_gen_cmd + sidecar_mode + " {url}/echo?size={size}".format(url=url.geturl(), size=self.size)
+        elif self.load_gen_type == "nighthawk":
+            return load_gen_cmd + sidecar_mode + " {url}/".format(url=url.geturl())
+        else:
+            sys.exit("invalid load generator %s, must be fortio or nighthawk", self.load_gen_type)
 
     def execute_sidecar_mode(self, sidecar_mode, load_gen_type, load_gen_cmd, sidecar_mode_func, labels, perf_label_suffix):
         print('-------------- Running in {sidecar_mode} mode --------------'.format(sidecar_mode=sidecar_mode))

From 7f28c4c21861b2b74a691b1f1651b1b1ba35f1fd Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Mon, 4 May 2020 10:02:03 +0200
Subject: [PATCH 20/31] Lint fix

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 50c0e45666..e7e21f47da 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -38,6 +38,7 @@
 NIGHTHAWK_DOCKER_IMAGE = "envoyproxy/nighthawk-dev:59683b759eb8f8bd8cce282795c08f9e2b3313d4"
 SCRIPT_START = time.strftime("%Y-%m-%d-%H%M%S")
 
+
 def pod_info(filterstr="", namespace=NAMESPACE, multi_ok=True):
     cmd = "kubectl -n {namespace} get pod {filterstr}  -o json".format(
         namespace=namespace, filterstr=filterstr)

From e2746658462eb4d7dcb49589d1ed893a6fd3d125 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Mon, 4 May 2020 21:02:53 +0200
Subject: [PATCH 21/31] Small enhancenments/fixes

- Html encode title we hand to flamegraph.py
- Fix a typo in README.md
- Avoid writing to stdout/stderr from threaded code

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/README.md        | 2 +-
 perf/benchmark/runner/runner.py | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/perf/benchmark/README.md b/perf/benchmark/README.md
index ee2b4dd5af..f9f10dbf13 100644
--- a/perf/benchmark/README.md
+++ b/perf/benchmark/README.md
@@ -166,7 +166,7 @@ weakening security. Resulting flamegraphs will be written to `flame/flameoutput`
 - sample sidecar profiling commands for `--custom_profiling_command`:
   - "profile-bpfcc -df {duration} -p {sidecar_pid}" sidecar on-cpu profile
   - "offcputime-bpfcc -df {duration} -p {sidecar_pid}" sidecar off-cpu profile
-  - "offwaketime-bpfcc -df {duration} -p {sidecar_pid}" sidecar offwaktime profile
+  - "offwaketime-bpfcc -df {duration} -p {sidecar_pid}" sidecar offwaketime profile
   - "wakeuptime-bpfcc -f -p {sidecar_pid} {duration}" sidecar wakeuptime profile
   - "perf record -F 99 -a -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" on-cpu perf-generated profile
   - "stackcount-bpfcc c:*alloc* -df -D {duration} -p {sidecar_pid}" profile calls to `*alloc*`
diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index e7e21f47da..b1813240cd 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -16,6 +16,7 @@
 
 import collections
 import os
+import html
 import json
 import socket
 import argparse
@@ -263,16 +264,17 @@ def run_profiler(self, exec_cmd, podname, profile_name, profiling_command, label
         )
         ok = True
         # Run the profile collection tool, and wait for it to finish.
-        process = subprocess.Popen(shlex.split(profiler_cmd))
+        process = subprocess.Popen(shlex.split(profiler_cmd), stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
         ok = ok and process.wait() == 0
 
         # Next we feed the profiling data to the flamegraphing script.
+        html_escaped_command = html.escape(profiling_command)
         flamegraph_cmd = "{exec_cmd} \"./FlameGraph/flamegraph.pl --title='{profiling_command} Flame Graph'  < {filename}.profile > {filename}.svg\"".format(
             exec_cmd=exec_cmd,
-            profiling_command=profiling_command,
+            profiling_command=html_escaped_command,
             filename=filename
         )
-        process = subprocess.Popen(shlex.split(flamegraph_cmd))
+        process = subprocess.Popen(shlex.split(flamegraph_cmd), stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
         ok = ok and process.wait() == 0
 
         # Lastly copy the resulting flamegraph out of the container into flame/flameoutput/

From b96687e7df0fed3c1edccf17cc92fa3b8877a2e8 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Tue, 5 May 2020 11:54:55 +0200
Subject: [PATCH 22/31] Fix hang, improve error handling. Doc enhancements.

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/README.md        |  4 +--
 perf/benchmark/flame/README.md  |  3 ++
 perf/benchmark/runner/runner.py | 50 +++++++++++++++++++--------------
 3 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/perf/benchmark/README.md b/perf/benchmark/README.md
index f9f10dbf13..ee5a0bd1b4 100644
--- a/perf/benchmark/README.md
+++ b/perf/benchmark/README.md
@@ -168,8 +168,8 @@ weakening security. Resulting flamegraphs will be written to `flame/flameoutput`
   - "offcputime-bpfcc -df {duration} -p {sidecar_pid}" sidecar off-cpu profile
   - "offwaketime-bpfcc -df {duration} -p {sidecar_pid}" sidecar offwaketime profile
   - "wakeuptime-bpfcc -f -p {sidecar_pid} {duration}" sidecar wakeuptime profile
-  - "perf record -F 99 -a -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" on-cpu perf-generated profile
-  - "stackcount-bpfcc c:*alloc* -df -D {duration} -p {sidecar_pid}" profile calls to `*alloc*`
+  - "perf record -F 99 -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" on-cpu perf-generated profile
+  - "stackcount-bpfcc -U *alloc* -df -D {duration} -p {sidecar_pid}" profile calls to `*alloc*`
 - It's also possible to run machine-wide profiling, for example:
   - "profile-bpfcc -df {duration}" for obtaining a machine-wide on-cpu flamegraph.
   - See [here](http://www.brendangregg.com/FlameGraphs/) for more examples and information.
diff --git a/perf/benchmark/flame/README.md b/perf/benchmark/flame/README.md
index 55f54bdbdb..4a85b18fcd 100644
--- a/perf/benchmark/flame/README.md
+++ b/perf/benchmark/flame/README.md
@@ -37,5 +37,8 @@ runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_p
 
 runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="wakeuptime-bpfcc -f -p {sidecar_pid} {duration}" --custom_profiling_name="bcc-wakeuptime-sidecar"
 
+runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="stackcount-bpfcc -p {sidecar_pid} *alloc* -fD {duration}" --custom_profiling_name="bcc-stackcount-alloc"
+
 runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="perf record -F 99 -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" --custom_profiling_name="perf-oncputime-sidecar"
+
 ```
diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index b1813240cd..5e3e16c2c3 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -23,6 +23,7 @@
 import subprocess
 import shlex
 import uuid
+import stat
 import sys
 import tempfile
 import time
@@ -257,32 +258,41 @@ def generate_fortio_cmd(self, headers_cmd, conn, qps, duration, grpc, cacert_arg
     def run_profiler(self, exec_cmd, podname, profile_name, profiling_command, labels):
         filename = "{datetime}_{labels}-{profile_name}-{podname}".format(
             datetime=SCRIPT_START, profile_name=profile_name, labels=labels, podname=podname)
-        profiler_cmd = "{exec_cmd} \"{profiling_command} > {filename}.profile\"".format(
+        profiler_cmd = "{profiling_command} > {filename}.profile".format(
             profiling_command=profiling_command,
-            exec_cmd=exec_cmd,
             filename=filename
         )
-        ok = True
-        # Run the profile collection tool, and wait for it to finish.
-        process = subprocess.Popen(shlex.split(profiler_cmd), stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
-        ok = ok and process.wait() == 0
-
-        # Next we feed the profiling data to the flamegraphing script.
         html_escaped_command = html.escape(profiling_command)
-        flamegraph_cmd = "{exec_cmd} \"./FlameGraph/flamegraph.pl --title='{profiling_command} Flame Graph'  < {filename}.profile > {filename}.svg\"".format(
-            exec_cmd=exec_cmd,
+        flamegraph_cmd = "./FlameGraph/flamegraph.pl --title='{profiling_command} Flame Graph'  < {filename}.profile > {filename}.svg".format(
             profiling_command=html_escaped_command,
             filename=filename
         )
-        process = subprocess.Popen(shlex.split(flamegraph_cmd), stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
-        ok = ok and process.wait() == 0
-
-        # Lastly copy the resulting flamegraph out of the container into flame/flameoutput/
-        kubectl_cp(podname + ":{filename}.svg".format(filename=filename),
-                   "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
 
-        if ok == False:
-            print("warning - profiling and or flamegraph generation may have failed")
+        # We build a small bash script which will run the profiler & produce a flame graph
+        # We the copy this script into the container, and run it
+        with tempfile.NamedTemporaryFile(dir='/tmp', delete=True) as tmpfile:
+            dest = tmpfile.name + ".sh"
+            with open(dest, 'w') as f:
+                s = """#!/bin/bash
+set -euo pipefail
+set +x
+({profiler_cmd}) >& /tmp/{filename}_profiler_cmd.log
+({flamegraph_cmd}) >& /tmp/{filename}_flamegraph_cmd.log
+                """.format(profiler_cmd=profiler_cmd, flamegraph_cmd=flamegraph_cmd, filename=filename)
+                f.write(s)
+            st = os.stat(dest)
+            os.chmod(dest, st.st_mode | stat.S_IEXEC)
+            kubectl_cp(dest, podname + ":" + dest, "perf")
+
+        process = subprocess.Popen(shlex.split("{exec_cmd} \"{dest}\"".format(exec_cmd=exec_cmd, dest=dest)))
+
+        if process.wait() == 0:
+            # Copy the resulting flamegraph out of the container into flame/flameoutput/
+            kubectl_cp(podname + ":{filename}.svg".format(filename=filename),
+                    "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
+            print("Wrote flame/flameoutput/{filename}.svg".format(filename=filename))
+        else:
+            print("WARNING: Did not obtain a flamegraph. See /tmp/{filename}_*.log".format(filename=filename))
 
     def maybe_start_profiling_threads(self, labels, perf_label):
         threads = []
@@ -290,7 +300,7 @@ def maybe_start_profiling_threads(self, labels, perf_label):
         if self.custom_profiling_command:
             # We run any custom profiling command on both pods, as one runs on each node we're interested in.
             for pod in [self.client.name, self.server.name]:
-                exec_cmd_on_pod = "kubectl exec -n {namespace} {podname} -c perf -it -- bash -c ".format(
+                exec_cmd_on_pod = "kubectl exec -n {namespace} {podname} -c perf -- bash -c ".format(
                     namespace=os.environ.get("NAMESPACE", "twopods"),
                     podname=pod
                 )
@@ -373,8 +383,6 @@ def execution_delegate():
                 if self.custom_profiling_command:
                     for thread in threads:
                         thread.join()
-                print(
-                    "background profiler thread finished - flamegraphs are available in flame/flameoutput")
         return execution_delegate
 
     def run(self, headers, conn, qps, size, duration):

From aae1f19c2313c63b2b510912b4e102b833173e76 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Tue, 5 May 2020 15:32:23 +0200
Subject: [PATCH 23/31] Lint change in runner.py

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 2316e52d52..3a9312d5ff 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -289,7 +289,7 @@ def run_profiler(self, exec_cmd, podname, profile_name, profiling_command, label
         if process.wait() == 0:
             # Copy the resulting flamegraph out of the container into flame/flameoutput/
             kubectl_cp(podname + ":{filename}.svg".format(filename=filename),
-                    "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
+                       "flame/flameoutput/{filename}.svg".format(filename=filename), "perf")
             print("Wrote flame/flameoutput/{filename}.svg".format(filename=filename))
         else:
             print("WARNING: Did not obtain a flamegraph. See /tmp/{filename}_*.log".format(filename=filename))

From fdfe91073133df810829876dd8a8865d9df7326e Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Tue, 5 May 2020 18:20:46 +0200
Subject: [PATCH 24/31] Flag for allowing short runs. Doc pagefault
 flamgraphing.

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/README.md        |  1 +
 perf/benchmark/flame/README.md  |  2 ++
 perf/benchmark/runner/runner.py | 12 ++++++++++--
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/perf/benchmark/README.md b/perf/benchmark/README.md
index ee5a0bd1b4..69643173fb 100644
--- a/perf/benchmark/README.md
+++ b/perf/benchmark/README.md
@@ -170,6 +170,7 @@ weakening security. Resulting flamegraphs will be written to `flame/flameoutput`
   - "wakeuptime-bpfcc -f -p {sidecar_pid} {duration}" sidecar wakeuptime profile
   - "perf record -F 99 -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" on-cpu perf-generated profile
   - "stackcount-bpfcc -U *alloc* -df -D {duration} -p {sidecar_pid}" profile calls to `*alloc*`
+  - "perf record -e page-faults -g -p {sidecar_pid} -F 99 -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" page faults
 - It's also possible to run machine-wide profiling, for example:
   - "profile-bpfcc -df {duration}" for obtaining a machine-wide on-cpu flamegraph.
   - See [here](http://www.brendangregg.com/FlameGraphs/) for more examples and information.
diff --git a/perf/benchmark/flame/README.md b/perf/benchmark/flame/README.md
index 4a85b18fcd..ada02d0aaa 100644
--- a/perf/benchmark/flame/README.md
+++ b/perf/benchmark/flame/README.md
@@ -41,4 +41,6 @@ runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_p
 
 runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="perf record -F 99 -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" --custom_profiling_name="perf-oncputime-sidecar"
 
+runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="perf record -e page-faults -g -p {sidecar_pid} -F 99 -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" --custom_profiling_name="perf-pagefaults-sidecar"
+
 ```
diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 3a9312d5ff..0be6c48e66 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -123,7 +123,8 @@ def __init__(
             cacert=None,
             load_gen_type="fortio",
             custom_profiling_command=None,
-            custom_profiling_name="default-profile"):
+            custom_profiling_name="default-profile",
+            devmode=False):
         self.run_id = str(uuid.uuid4()).partition('-')[0]
         self.headers = headers
         self.conn = conn
@@ -150,6 +151,7 @@ def __init__(
         self.run_ingress = ingress
         self.cacert = cacert
         self.load_gen_type = load_gen_type
+        self.devmode = devmode
 
         if self.perf_record != False:
             if not self.custom_profiling_command is None:
@@ -520,7 +522,8 @@ def run_perf_test(args):
     if fortio.duration <= min_duration:
         print("Duration must be greater than {min_duration}".format(
             min_duration=min_duration))
-        exit(1)
+        if not args.devmode:
+            exit(1)
 
     port_forward_process = None
 
@@ -673,6 +676,11 @@ def get_parser():
         help="fortio or nighthawk",
         default="fortio",
     )
+    parser.add_argument(
+        "--devmode",
+        help="In development mode, very short duration argument values are allowed.",
+        default=False,
+    )
 
     define_bool(parser, "baseline", "run baseline for all", False)
     define_bool(parser, "serversidecar",

From 47c63f5c1469655e1064632b53397df7a1dccb85 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Fri, 8 May 2020 23:42:04 +0200
Subject: [PATCH 25/31] runner.py: add --envoy_profiler option

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/README.md        |  4 ++
 perf/benchmark/flame/README.md  | 42 +++++++++++++++-----
 perf/benchmark/runner/runner.py | 69 +++++++++++++++++++++++++++++----
 3 files changed, 97 insertions(+), 18 deletions(-)

diff --git a/perf/benchmark/README.md b/perf/benchmark/README.md
index 69643173fb..30ff4139c1 100644
--- a/perf/benchmark/README.md
+++ b/perf/benchmark/README.md
@@ -155,6 +155,10 @@ optional arguments:
                           sidecar process.
   --custom_profiling_name
                         filename prefix for the result of any --custom_profiling_command
+  --envoy_profiler [heapprofiler|cpuprofiler]
+                       yields visualizations using pprof over profiles collected via the built-in profiler
+                       of the side cars. 
+                       NOTE: requires global.proxy.privileged=true,values.global.proxy.enableCoreDump=true
 ```
 
 Note:
diff --git a/perf/benchmark/flame/README.md b/perf/benchmark/flame/README.md
index ada02d0aaa..bebaed06a9 100644
--- a/perf/benchmark/flame/README.md
+++ b/perf/benchmark/flame/README.md
@@ -15,13 +15,14 @@ This document shows how to gather performance data from via the `perf` container
 Enable `profilingMode` in [values.yaml](../values.yaml). This will end up adding the perf
 container to the server and client pods, which both will be running on separate nodes.
 
-Flame graphs are created from data collected using linux `perf_events` by the `perf` and [BCC tools](https://github.com/iovisor/bcc).
+Flame graphs and visualizations are created from data collected using linux `perf_events`
+by the `perf` and [BCC tools](https://github.com/iovisor/bcc), as well as Envoy's built-in profiler.
 
-## Obtaining flame graphs
+## Obtaining flame graphs 
 
 Flame graphs can be produced via `runner.py`, and will be stored in `flame/flameoutput`.
 
-A few sample command lines. `{duration}` will be replaced by
+A few sample command line arguments. `{duration}` will be replaced by
 whatever was passed for `--duration` to runner.py. `{sidecar_pid}` will
 be replaced by `runner.py` with the process id of the Envoy sidecar.
 
@@ -29,18 +30,39 @@ It is valid to omit `{sidecar_pid}` in `--custom_profiling_command`.
 This may be useful for machine-wide profiling or arbitrary processes.
 
 ```bash
-runner/runner.py --conn 20 --qps 10000 --duration 100 --custom_profiling_command="profile-bpfcc -df {duration} -p {sidecar_pid}" --custom_profiling_name="bcc-oncputime-sidecar"
+runner/runner.py ... --custom_profiling_command="profile-bpfcc -df {duration} -p {sidecar_pid}" --custom_profiling_name="bcc-oncputime-sidecar"
 
-runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="offcputime-bpfcc -df {duration} -p {sidecar_pid}" --custom_profiling_name="bcc-offcputime-sidecar"
+runner/runner.py ... --custom_profiling_command="offcputime-bpfcc -df {duration} -p {sidecar_pid}" --custom_profiling_name="bcc-offcputime-sidecar"
 
-runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="offwaketime-bpfcc -df {duration} -p {sidecar_pid}" --custom_profiling_name="bcc-offwaketime-sidecar"
+runner/runner.py ... --custom_profiling_command="offwaketime-bpfcc -df {duration} -p {sidecar_pid}" --custom_profiling_name="bcc-offwaketime-sidecar"
 
-runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="wakeuptime-bpfcc -f -p {sidecar_pid} {duration}" --custom_profiling_name="bcc-wakeuptime-sidecar"
+runner/runner.py ... --custom_profiling_command="wakeuptime-bpfcc -f -p {sidecar_pid} {duration}" --custom_profiling_name="bcc-wakeuptime-sidecar"
 
-runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="stackcount-bpfcc -p {sidecar_pid} *alloc* -fD {duration}" --custom_profiling_name="bcc-stackcount-alloc"
+runner/runner.py ... --custom_profiling_command="stackcount-bpfcc -p {sidecar_pid} *alloc* -fD {duration}" --custom_profiling_name="bcc-stackcount-alloc"
 
-runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="perf record -F 99 -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" --custom_profiling_name="perf-oncputime-sidecar"
+runner/runner.py ... --custom_profiling_command="perf record -F 99 -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n"
+--custom_profiling_name="perf-oncputime-sidecar"
 
-runner/runner.py --conn 20 --qps 10000 --duration 100 --serversidecar --custom_profiling_command="perf record -e page-faults -g -p {sidecar_pid} -F 99 -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" --custom_profiling_name="perf-pagefaults-sidecar"
+runner/runner.py ... --custom_profiling_command="perf record -e page-faults -g -p {sidecar_pid} -- sleep {duration} && perf script | ~/FlameGraph/stackcollapse-perf.pl | c++filt -n" --custom_profiling_name="perf-pagefaults-sidecar"
 
 ```
+
+## Leveraging Istio's sidecar built-in profiling 
+
+Istio's sidecar proxy (Envoy) is usually build with `tcmalloc`, and as such traditional memory profiling
+methods may give unsatisfactory results. Fortunately, the proxy provides a built-in means to collect
+profiling data, and the benchmark tool is able to leverage that. Doing so, however, requires a writeable
+file system as well as priviliges to install new packages for the sidecar containers. The following
+command (re)configures istio to satisfy these requirements:
+
+```bash
+istioctl manifest apply --set "values.global.proxy.privileged=true,values.global.proxy.enableCoreDump=true"
+```
+
+After doing so `runner.py` can be run with `--envoy_profiler [heapprofiler|cpuprofiler]`. This will start/stop
+the built-in profilers of the sidecars, obtain the collected profiles, and visualize them via `pprof`.
+The resulting output will end up in `flame/flameoutput` just like in the other flows:
+
+```
+runner/runner.py --envoy_profiler cpuprofiler|heapprofiler ...
+```
diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 0be6c48e66..a10ffa522c 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -124,7 +124,8 @@ def __init__(
             load_gen_type="fortio",
             custom_profiling_command=None,
             custom_profiling_name="default-profile",
-            devmode=False):
+            devmode=False,
+            envoy_profiler=None):
         self.run_id = str(uuid.uuid4()).partition('-')[0]
         self.headers = headers
         self.conn = conn
@@ -152,6 +153,7 @@ def __init__(
         self.cacert = cacert
         self.load_gen_type = load_gen_type
         self.devmode = devmode
+        self.envoy_profiler = envoy_profiler
 
         if self.perf_record != False:
             if not self.custom_profiling_command is None:
@@ -257,6 +259,43 @@ def generate_fortio_cmd(self, headers_cmd, conn, qps, duration, grpc, cacert_arg
 
         return fortio_cmd
 
+    def run_envoy_profiler(self, exec_cmd, podname, profile_name, envoy_profiler, labels):
+        filename = "{datetime}_{labels}-{profile_name}-{podname}".format(
+            datetime=SCRIPT_START, profile_name=profile_name, labels=labels, podname=podname)
+        exec_cmd_on_pod = "kubectl exec -n {namespace} {podname} -c istio-proxy -- bash -c ".format(
+            namespace=os.environ.get("NAMESPACE", "twopods"),
+            podname=podname
+        )
+        profile_url ="curl -X POST -s http://localhost:15000/{envoy_profiler}?enable".format(envoy_profiler=envoy_profiler)
+        script = "{profile_url}=y; sleep {duration}; {profile_url}=n;".format(profile_url=profile_url, duration=self.duration)
+        print(getoutput("{exec_cmd} \"{script}\"".format(exec_cmd=exec_cmd_on_pod, script=script)))
+
+        # When we get here, the heap profile has been written.
+        # We install pprof & some nessecities for generating the visual into the istio-proxy container the first
+        # time we get here, so we can a the visualization of the process out.
+        script = "test ! -f ~/go/bin/pprof && echo 1"
+        if getoutput("{exec_cmd} \"{script}\"".format(exec_cmd=exec_cmd_on_pod, script=script)) == "1":
+            script = "sudo apt-get update && sudo apt-get install -y --no-install-recommends wget git binutils graphviz &&"
+            script = script + " cd /tmp/ &&"
+            script = script + " curl https://dl.google.com/go/go1.14.2.linux-amd64.tar.gz --output go1.14.2.linux-amd64.tar.gz &&"
+            script = script + " sudo tar -C /usr/local -xzf go1.14.2.linux-amd64.tar.gz &&"
+            script = script + " export PATH=$PATH:/usr/local/go/bin &&"
+            script = script + " go get -u github.com/google/pprof"
+            print(getoutput("{exec_cmd} \"{script}\"".format(exec_cmd=exec_cmd_on_pod, script=script)))
+
+        script = "rm -r /tmp/envoy; cp -r /var/log/envoy/ /tmp/envoy; cp -r /lib/x86_64-linux-gnu /tmp/envoy/lib; cp /usr/local/bin/envoy /tmp/envoy/lib/envoy"
+        print(getoutput("{exec_cmd} \"{script}\"".format(exec_cmd=exec_cmd_on_pod, script=script)))
+        output_name = "tmp.svg"
+        
+        visualization_arg = ""
+        if envoy_profiler == "heapprofiler":
+            visualization_arg = "-alloc_space"
+        print(getoutput("{exec_cmd} \"cd /tmp/envoy;  PPROF_BINARY_PATH=/tmp/envoy/lib/ ~/go/bin/pprof {visualization_arg} -svg -output '{output_name}' /tmp/envoy/lib/envoy envoy.*\"".format(
+            exec_cmd=exec_cmd_on_pod, output_name=output_name, visualization_arg=visualization_arg)))
+        # Copy the visualization into flame/output.
+        kubectl_cp(podname + ":/tmp/envoy/{output_name}".format(output_name=output_name),
+                    "flame/flameoutput/{filename}.svg".format(filename=filename), "istio-proxy")
+
     def run_profiler(self, exec_cmd, podname, profile_name, profiling_command, labels):
         filename = "{datetime}_{labels}-{profile_name}-{podname}".format(
             datetime=SCRIPT_START, profile_name=profile_name, labels=labels, podname=podname)
@@ -277,7 +316,6 @@ def run_profiler(self, exec_cmd, podname, profile_name, profiling_command, label
             with open(dest, 'w') as f:
                 s = """#!/bin/bash
 set -euo pipefail
-set +x
 ({profiler_cmd}) >& /tmp/{filename}_profiler_cmd.log
 ({flamegraph_cmd}) >& /tmp/{filename}_flamegraph_cmd.log
                 """.format(profiler_cmd=profiler_cmd, flamegraph_cmd=flamegraph_cmd, filename=filename)
@@ -298,7 +336,16 @@ def run_profiler(self, exec_cmd, podname, profile_name, profiling_command, label
 
     def maybe_start_profiling_threads(self, labels, perf_label):
         threads = []
-
+        if self.envoy_profiler:
+            for pod in [self.client.name, self.server.name]:
+                exec_cmd_on_pod = "kubectl exec -n {namespace} {podname} -c istio-proxy -- bash -c ".format(
+                    namespace=os.environ.get("NAMESPACE", "twopods"),
+                    podname=pod
+                )
+                script = "set -euo pipefail; sudo rm -rf {dir} || true; sudo mkdir -p {dir}; sudo chmod 777 {dir};".format(dir="/var/log/envoy")
+                print(getoutput("{exec_cmd} \"{script}\"".format(exec_cmd=exec_cmd_on_pod, script=script)))
+                threads.append(Thread(target=self.run_envoy_profiler, args=[
+                    exec_cmd_on_pod, pod, "envoy-" + self.envoy_profiler, self.envoy_profiler, labels + perf_label]))
         if self.custom_profiling_command:
             # We run any custom profiling command on both pods, as one runs on each node we're interested in.
             for pod in [self.client.name, self.server.name]:
@@ -309,13 +356,13 @@ def maybe_start_profiling_threads(self, labels, perf_label):
 
                 # Wait for node_exporter to run, which indicates the profiling initialization container has finished initializing.
                 # once the init probe is supported, move this to a http probe instead in fortio.yaml
-                ready_cmd = "{exec_cmd} \"pgrep 'node_exporter'\"".format(
+                ready_cmd = "{exec_cmd} \"which perf\"".format(
                     exec_cmd=exec_cmd_on_pod)
-                ne_pid = getoutput(ready_cmd).strip()
+                perf_path = getoutput(ready_cmd).strip()
                 attempts = 1
-                while ne_pid == "" and attempts < 60:
+                while perf_path != "/usr/sbin/perf" and attempts < 60:
                     sleep(1)
-                    ne_pid = getoutput(ready_cmd).strip()
+                    perf_path = getoutput(ready_cmd).strip()
                     attempts = attempts + 1
 
                 # Find side car process id's in case the profiling command needs it.
@@ -517,7 +564,8 @@ def run_perf_test(args):
             cacert=args.cacert,
             load_gen_type=args.load_gen_type,
             custom_profiling_command=args.custom_profiling_command,
-            custom_profiling_name=args.custom_profiling_name)
+            custom_profiling_name=args.custom_profiling_name,
+            envoy_profiler=args.envoy_profiler)
 
     if fortio.duration <= min_duration:
         print("Duration must be greater than {min_duration}".format(
@@ -681,6 +729,11 @@ def get_parser():
         help="In development mode, very short duration argument values are allowed.",
         default=False,
     )
+    parser.add_argument(
+        "--envoy_profiler",
+        help="Obtains perf visualization based on Envoy's built-in profiling. Valid values are 'heapprofiler' or 'cpuprofiler'.",
+        default=None,
+    )
 
     define_bool(parser, "baseline", "run baseline for all", False)
     define_bool(parser, "serversidecar",

From b2aa8f01f3e9efe1f807a7789db178ce6e7412fd Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Fri, 8 May 2020 23:48:58 +0200
Subject: [PATCH 26/31] Lint fixes

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index a10ffa522c..8a92e5f043 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -266,7 +266,7 @@ def run_envoy_profiler(self, exec_cmd, podname, profile_name, envoy_profiler, la
             namespace=os.environ.get("NAMESPACE", "twopods"),
             podname=podname
         )
-        profile_url ="curl -X POST -s http://localhost:15000/{envoy_profiler}?enable".format(envoy_profiler=envoy_profiler)
+        profile_url = "curl -X POST -s http://localhost:15000/{envoy_profiler}?enable".format(envoy_profiler=envoy_profiler)
         script = "{profile_url}=y; sleep {duration}; {profile_url}=n;".format(profile_url=profile_url, duration=self.duration)
         print(getoutput("{exec_cmd} \"{script}\"".format(exec_cmd=exec_cmd_on_pod, script=script)))
 
@@ -286,7 +286,7 @@ def run_envoy_profiler(self, exec_cmd, podname, profile_name, envoy_profiler, la
         script = "rm -r /tmp/envoy; cp -r /var/log/envoy/ /tmp/envoy; cp -r /lib/x86_64-linux-gnu /tmp/envoy/lib; cp /usr/local/bin/envoy /tmp/envoy/lib/envoy"
         print(getoutput("{exec_cmd} \"{script}\"".format(exec_cmd=exec_cmd_on_pod, script=script)))
         output_name = "tmp.svg"
-        
+
         visualization_arg = ""
         if envoy_profiler == "heapprofiler":
             visualization_arg = "-alloc_space"
@@ -294,7 +294,7 @@ def run_envoy_profiler(self, exec_cmd, podname, profile_name, envoy_profiler, la
             exec_cmd=exec_cmd_on_pod, output_name=output_name, visualization_arg=visualization_arg)))
         # Copy the visualization into flame/output.
         kubectl_cp(podname + ":/tmp/envoy/{output_name}".format(output_name=output_name),
-                    "flame/flameoutput/{filename}.svg".format(filename=filename), "istio-proxy")
+                   "flame/flameoutput/{filename}.svg".format(filename=filename), "istio-proxy")
 
     def run_profiler(self, exec_cmd, podname, profile_name, profiling_command, labels):
         filename = "{datetime}_{labels}-{profile_name}-{podname}".format(

From b19423bf5305860b6dd37037ce7fc8eb45a0fe6a Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Fri, 8 May 2020 23:55:48 +0200
Subject: [PATCH 27/31] Markdown lint fixes

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/README.md       | 2 +-
 perf/benchmark/flame/README.md | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/perf/benchmark/README.md b/perf/benchmark/README.md
index 30ff4139c1..8dfcf6a3cb 100644
--- a/perf/benchmark/README.md
+++ b/perf/benchmark/README.md
@@ -157,7 +157,7 @@ optional arguments:
                         filename prefix for the result of any --custom_profiling_command
   --envoy_profiler [heapprofiler|cpuprofiler]
                        yields visualizations using pprof over profiles collected via the built-in profiler
-                       of the side cars. 
+                       of the side cars.
                        NOTE: requires global.proxy.privileged=true,values.global.proxy.enableCoreDump=true
 ```
 
diff --git a/perf/benchmark/flame/README.md b/perf/benchmark/flame/README.md
index bebaed06a9..f6a91bfb73 100644
--- a/perf/benchmark/flame/README.md
+++ b/perf/benchmark/flame/README.md
@@ -18,7 +18,7 @@ container to the server and client pods, which both will be running on separate
 Flame graphs and visualizations are created from data collected using linux `perf_events`
 by the `perf` and [BCC tools](https://github.com/iovisor/bcc), as well as Envoy's built-in profiler.
 
-## Obtaining flame graphs 
+## Obtaining flame graphs
 
 Flame graphs can be produced via `runner.py`, and will be stored in `flame/flameoutput`.
 
@@ -47,7 +47,7 @@ runner/runner.py ... --custom_profiling_command="perf record -e page-faults -g -
 
 ```
 
-## Leveraging Istio's sidecar built-in profiling 
+## Leveraging Istio's sidecar built-in profiling
 
 Istio's sidecar proxy (Envoy) is usually build with `tcmalloc`, and as such traditional memory profiling
 methods may give unsatisfactory results. Fortunately, the proxy provides a built-in means to collect
@@ -63,6 +63,6 @@ After doing so `runner.py` can be run with `--envoy_profiler [heapprofiler|cpupr
 the built-in profilers of the sidecars, obtain the collected profiles, and visualize them via `pprof`.
 The resulting output will end up in `flame/flameoutput` just like in the other flows:
 
-```
+```bash
 runner/runner.py --envoy_profiler cpuprofiler|heapprofiler ...
 ```

From 70e8f5a0b73d696246badf6d34582e4de7397eaa Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Wed, 13 May 2020 12:35:13 +0200
Subject: [PATCH 28/31] Add scrape annotations for prom. node exporter

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/templates/fortio.yaml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/perf/benchmark/templates/fortio.yaml b/perf/benchmark/templates/fortio.yaml
index 160a347a92..49963d1363 100644
--- a/perf/benchmark/templates/fortio.yaml
+++ b/perf/benchmark/templates/fortio.yaml
@@ -27,6 +27,12 @@ apiVersion: v1
 kind: Service
 metadata:
   name: {{ $.name }}
+{{- if $.Values.profilingMode }}
+  annotations:
+    prometheus.io/scrape: 'true'
+    prometheus.io/scheme: 'http'
+    prometheus.io/port: '{{ $.Values.nodeExporterPort }}'
+{{- end }}
 spec:
   ports:
   - name: http-echo
@@ -41,8 +47,8 @@ spec:
   - name: grpc-pinga
     port: 8076
 {{- if $.Values.profilingMode }}
-  - name: node-exporter
-    port: 9100
+  - name: prometheus-node-exporter
+    port: {{ $.Values.nodeExporterPort }}
     protocol: TCP
 {{- end }}
   - name: nighthawk-service

From e6413092fc6088aba2f3544519865a8eb945ff09 Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Wed, 13 May 2020 20:04:54 +0200
Subject: [PATCH 29/31] network flakes in Ci: Add hard coded single retry per
 test execution

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 8a92e5f043..6457efa648 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -62,6 +62,7 @@ def pod_info(filterstr="", namespace=NAMESPACE, multi_ok=True):
 def run_command(command):
     process = subprocess.Popen(shlex.split(command))
     process.wait()
+    return process.returncode
 
 
 def run_command_sync(command):
@@ -89,7 +90,7 @@ def kubectl_exec(pod, remote_cmd, runfn=run_command, container=None):
         c=c,
         namespace=NAMESPACE)
     print(cmd, flush=True)
-    runfn(cmd)
+    return runfn(cmd) == 0
 
 
 class Fortio:
@@ -210,9 +211,9 @@ def ingress(self, load_gen_cmd, sidecar_mode):
     def execute_sidecar_mode(self, sidecar_mode, load_gen_type, load_gen_cmd, sidecar_mode_func, labels, perf_label_suffix):
         print('-------------- Running in {sidecar_mode} mode --------------'.format(sidecar_mode=sidecar_mode))
         if load_gen_type == "fortio":
-            kubectl_exec(self.client.name, sidecar_mode_func(load_gen_cmd, sidecar_mode))
+            return kubectl_exec(self.client.name, sidecar_mode_func(load_gen_cmd, sidecar_mode))
         elif load_gen_type == "nighthawk":
-            run_nighthawk(self.client.name, sidecar_mode_func(load_gen_cmd, sidecar_mode), labels + "_" + sidecar_mode)
+            return run_nighthawk(self.client.name, sidecar_mode_func(load_gen_cmd, sidecar_mode), labels + "_" + sidecar_mode)
 
     def generate_test_labels(self, conn, qps, size):
         size = size or self.size
@@ -426,12 +427,11 @@ def generate_nighthawk_cmd(self, cpus, conn, qps, duration, labels):
     def create_execution_delegate(self, perf_label, sidecar_mode, sidecar_mode_func, load_gen_cmd, labels):
         def execution_delegate():
             threads = self.maybe_start_profiling_threads(labels, perf_label)
-            self.execute_sidecar_mode(
+            ok = self.execute_sidecar_mode(
                 sidecar_mode, self.load_gen_type, load_gen_cmd, sidecar_mode_func, labels, perf_label)
-            if len(threads) > 0:
-                if self.custom_profiling_command:
-                    for thread in threads:
-                        thread.join()
+            for thread in threads:
+                thread.join()
+            return ok
         return execution_delegate
 
     def run(self, headers, conn, qps, size, duration):
@@ -487,7 +487,12 @@ def run(self, headers, conn, qps, size, duration):
                 "_srv_ingress", "ingress", self.ingress, load_gen_cmd, labels))
 
         for execution in executions:
-            execution()
+            if not execution():
+                print("WARNING: execution failed. Performing a single retry.")
+                # TODO(oschaaf): optionize this, add --max-retries-per-test or some such.
+                if not execution():
+                    print("ERROR: retry failed. Aborting.")
+                    sys.exit(1)
 
 
 def validate_job_config(job_config):
@@ -605,6 +610,7 @@ def run_perf_test(args):
 
 
 def run_nighthawk(pod, remote_cmd, labels):
+    return False
     kube_cmd = "kubectl --namespace {namespace} exec {pod} -c captured -- {remote_cmd}".format(
         pod=pod,
         remote_cmd=remote_cmd,
@@ -636,12 +642,14 @@ def run_nighthawk(pod, remote_cmd, labels):
             # - per worker output may sometimes help interpret plots that don't have a nice knee-shaped shape.
             kubectl_cp("{dest}.fortio.json".format(
                 dest=dest), "{pod}:/var/lib/fortio/{datetime}_{labels}.json".format(pod=pod, labels=labels, datetime=SCRIPT_START), "shell")
+            return True
     else:
         print("nighthawk remote execution error: %s" % exit_code)
         if output:
             print("--> stdout: %s" % output.decode("utf-8"))
         if err:
             print("--> stderr: %s" % err.decode("utf-8"))
+        return False
 
 
 def csv_to_int(s):

From 2d71dd1893acd842a3163ba6b73a2e8f610e4a9c Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Wed, 13 May 2020 20:12:39 +0200
Subject: [PATCH 30/31] Remove line of code for debugging

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/runner/runner.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 6457efa648..074237ba04 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -610,7 +610,6 @@ def run_perf_test(args):
 
 
 def run_nighthawk(pod, remote_cmd, labels):
-    return False
     kube_cmd = "kubectl --namespace {namespace} exec {pod} -c captured -- {remote_cmd}".format(
         pod=pod,
         remote_cmd=remote_cmd,

From bce6a4f97bc98ced31ff89e482d8b338feb57d8b Mon Sep 17 00:00:00 2001
From: Otto van der Schaaf <oschaaf@we-amp.com>
Date: Wed, 13 May 2020 21:26:19 +0200
Subject: [PATCH 31/31] Tweaks for bleeding edge istio

Signed-off-by: Otto van der Schaaf <oschaaf@we-amp.com>
---
 perf/benchmark/flame/README.md  | 2 +-
 perf/benchmark/runner/runner.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/perf/benchmark/flame/README.md b/perf/benchmark/flame/README.md
index f6a91bfb73..bfa99298dd 100644
--- a/perf/benchmark/flame/README.md
+++ b/perf/benchmark/flame/README.md
@@ -56,7 +56,7 @@ file system as well as priviliges to install new packages for the sidecar contai
 command (re)configures istio to satisfy these requirements:
 
 ```bash
-istioctl manifest apply --set "values.global.proxy.privileged=true,values.global.proxy.enableCoreDump=true"
+istioctl manifest apply --set "values.global.proxy.enableCoreDump=true" --set "values.global.proxy.privileged=true"
 ```
 
 After doing so `runner.py` can be run with `--envoy_profiler [heapprofiler|cpuprofiler]`. This will start/stop
diff --git a/perf/benchmark/runner/runner.py b/perf/benchmark/runner/runner.py
index 074237ba04..11b953f2ea 100644
--- a/perf/benchmark/runner/runner.py
+++ b/perf/benchmark/runner/runner.py
@@ -284,7 +284,7 @@ def run_envoy_profiler(self, exec_cmd, podname, profile_name, envoy_profiler, la
             script = script + " go get -u github.com/google/pprof"
             print(getoutput("{exec_cmd} \"{script}\"".format(exec_cmd=exec_cmd_on_pod, script=script)))
 
-        script = "rm -r /tmp/envoy; cp -r /var/log/envoy/ /tmp/envoy; cp -r /lib/x86_64-linux-gnu /tmp/envoy/lib; cp /usr/local/bin/envoy /tmp/envoy/lib/envoy"
+        script = "rm -r /tmp/envoy; cp -r /var/lib/istio/data/ /tmp/envoy; cp -r /lib/x86_64-linux-gnu /tmp/envoy/lib; cp /usr/local/bin/envoy /tmp/envoy/lib/envoy"
         print(getoutput("{exec_cmd} \"{script}\"".format(exec_cmd=exec_cmd_on_pod, script=script)))
         output_name = "tmp.svg"
 
@@ -343,7 +343,7 @@ def maybe_start_profiling_threads(self, labels, perf_label):
                     namespace=os.environ.get("NAMESPACE", "twopods"),
                     podname=pod
                 )
-                script = "set -euo pipefail; sudo rm -rf {dir} || true; sudo mkdir -p {dir}; sudo chmod 777 {dir};".format(dir="/var/log/envoy")
+                script = "set -euo pipefail; sudo rm -rf {dir}/* || true; sudo mkdir -p {dir}; sudo chmod 777 {dir};".format(dir="/var/lib/istio/data/")
                 print(getoutput("{exec_cmd} \"{script}\"".format(exec_cmd=exec_cmd_on_pod, script=script)))
                 threads.append(Thread(target=self.run_envoy_profiler, args=[
                     exec_cmd_on_pod, pod, "envoy-" + self.envoy_profiler, self.envoy_profiler, labels + perf_label]))