Skip to content

Commit

Permalink
python: Upgrade PyPerf to v1.0.3 (#70)
Browse files Browse the repository at this point in the history
* Upgrade PyPerf to v1.0.3 (c568fdd751304ecf7907ebd27f8039ea3c492a4c)
* Add tests for kernel stacks from PyPerf
* Use larger events buffer & symbols map
* Increase MAX_FREQUENCY to 1000
* Log PyPerf's stdout/stderr every interval
* Bump gProfiler version to 1.0.4
  • Loading branch information
Jongy authored May 13, 2021
1 parent 7a60f2c commit 6e201e2
Show file tree
Hide file tree
Showing 13 changed files with 89 additions and 45 deletions.
6 changes: 2 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,8 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get install -y git build-essential iperf

WORKDIR /bcc

RUN git clone --depth 1 -b v1.0.1 https://github.com/Granulate/bcc.git && cd bcc && git reset --hard 92b61ade89f554859950695b067288f60cb1f3e5
RUN mkdir bcc/build && cd bcc/build && \
cmake -DPYTHON_CMD=python3 -DINSTALL_CPP_EXAMPLES=y -DCMAKE_INSTALL_PREFIX=/bcc/root .. && \
make -C examples/cpp/pyperf -j -l VERBOSE=1 install
COPY ./scripts/pyperf_build.sh .
RUN ./pyperf_build.sh


FROM ubuntu:20.04
Expand Down
2 changes: 1 addition & 1 deletion gprofiler/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.0.3"
__version__ = "1.0.4"
22 changes: 20 additions & 2 deletions gprofiler/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


class PythonProfilerBase(ProfilerBase):
MAX_FREQUENCY = 100
MAX_FREQUENCY: Optional[int] = None # set by base classes

def __init__(
self,
Expand All @@ -35,6 +35,7 @@ def __init__(
storage_dir: str,
):
super().__init__()
assert isinstance(self.MAX_FREQUENCY, int)
self._frequency = min(frequency, self.MAX_FREQUENCY)
self._duration = duration
self._stop_event = stop_event or Event()
Expand Down Expand Up @@ -119,7 +120,11 @@ def snapshot(self) -> Mapping[int, Mapping[str, int]]:


class PythonEbpfProfiler(PythonProfilerBase):
MAX_FREQUENCY = 1000
PYPERF_RESOURCE = "python/pyperf/PyPerf"
events_buffer_pages = 256 # 1mb and needs to be physically contiguous
# 28mb (each symbol is 224 bytes), but needn't be physicall contiguous so don't care
symbols_map_size = 131072
dump_signal = signal.SIGUSR2
dump_timeout = 5 # seconds
poll_timeout = 10 # seconds
Expand Down Expand Up @@ -195,6 +200,10 @@ def start(self):
str(self.output_path),
"-F",
str(self._frequency),
"--events-buffer-pages",
str(self.events_buffer_pages),
"--symbols-map-size",
str(self.symbols_map_size),
# Duration is irrelevant here, we want to run continuously.
]
process = start_process(cmd, via_staticx=True)
Expand All @@ -204,6 +213,7 @@ def start(self):
wait_event(self.poll_timeout, self._stop_event, lambda: os.path.exists(self.output_path))
except TimeoutError:
process.kill()
logger.error(f"PyPerf failed to start. stdout {process.stdout.read()!r} stderr {process.stderr.read()!r}")
raise
else:
self.process = process
Expand All @@ -225,7 +235,15 @@ def _dump(self) -> Path:
self.process.send_signal(self.dump_signal)

try:
return self._wait_for_output_file(self.dump_timeout)
output = self._wait_for_output_file(self.dump_timeout)
# PyPerf outputs sampling & error counters every interval (after writing the output file), print them.
# also, makes sure its output pipe doesn't fill up.
# using read1() which performs just a single read() call and doesn't read until EOF
# (unlike Popen.communicate())
assert self.process is not None
# Python 3.6 doesn't have read1() without size argument :/
logger.debug(f"PyPerf output: {self.process.stderr.read1(4096)}")
return output
except TimeoutError:
# error flow :(
try:
Expand Down
8 changes: 2 additions & 6 deletions pyi.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ RUN yum install -y \

WORKDIR /bcc

RUN git clone --depth 1 -b v1.0.1 https://github.com/Granulate/bcc.git && cd bcc && git reset --hard 92b61ade89f554859950695b067288f60cb1f3e5

RUN yum install -y centos-release-scl-rh
# mostly taken from https://github.com/iovisor/bcc/blob/master/INSTALL.md#install-and-compile-llvm
RUN yum install -y devtoolset-8 \
Expand All @@ -37,10 +35,8 @@ RUN yum install -y devtoolset-8 \
llvm-toolset-7-clang-devel \
devtoolset-8-elfutils-libelf-devel

RUN mkdir bcc/build && cd bcc/build && \
source scl_source enable devtoolset-8 llvm-toolset-7 && \
cmake -DPYTHON_CMD=python3 -DINSTALL_CPP_EXAMPLES=y -DCMAKE_INSTALL_PREFIX=/bcc/root .. && \
make -C examples/cpp/pyperf -j -l VERBOSE=1 install
COPY ./scripts/pyperf_build.sh .
RUN source scl_source enable devtoolset-8 llvm-toolset-7 && source ./pyperf_build.sh

# gProfiler part

Expand Down
2 changes: 1 addition & 1 deletion scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ curl -fL https://github.com/Granulate/async-profiler/releases/download/v2.0g1/as
-z build/async-profiler-2.0-linux-x64.tar.gz -o build/async-profiler-2.0-linux-x64.tar.gz
tar -xzf build/async-profiler-2.0-linux-x64.tar.gz -C gprofiler/resources/java --strip-components=2 async-profiler-2.0-linux-x64/build

# pyperf - just create the directory for it, it will be built/downloaded later
# pyperf - just create the directory for it, it will be built later
mkdir -p gprofiler/resources/python/pyperf

# perf
Expand Down
12 changes: 12 additions & 0 deletions scripts/pyperf_build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash
#
# Copyright (c) Granulate. All rights reserved.
# Licensed under the AGPL3 License. See LICENSE.md in the project root for license information.
#
set -e

git clone --depth 1 -b v1.0.3 https://github.com/Granulate/bcc.git && cd bcc && git reset --hard c568fdd751304ecf7907ebd27f8039ea3c492a4c
mkdir build
cd build
cmake -DPYTHON_CMD=python3 -DINSTALL_CPP_EXAMPLES=y -DCMAKE_INSTALL_PREFIX=/bcc/root ..
make -C examples/cpp/pyperf -j -l VERBOSE=1 install
18 changes: 7 additions & 11 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import stat
from contextlib import contextmanager
from functools import partial
from pathlib import Path
from subprocess import Popen, TimeoutExpired, run
from time import sleep
Expand All @@ -17,7 +18,7 @@
from pytest import fixture # type: ignore

from tests import CONTAINERS_DIRECTORY, PARENT
from tests.utils import chmod_path_parts
from tests.utils import assert_function_in_collapsed, chmod_path_parts


@fixture
Expand Down Expand Up @@ -56,9 +57,9 @@ def java_command_line(class_path: Path) -> List:
def command_line(tmp_path: Path, runtime: str) -> List:
return {
"java": java_command_line(tmp_path / "java"),
# note: here we run "python /path/to/fibonacci.py" while in the container test we have
# "CMD /path/to/fibonacci.py", to test processes with non-python /proc/pid/comm
"python": ["python3", CONTAINERS_DIRECTORY / "python/fibonacci.py"],
# note: here we run "python /path/to/lister.py" while in the container test we have
# "CMD /path/to/lister.py", to test processes with non-python /proc/pid/comm
"python": ["python3", CONTAINERS_DIRECTORY / "python/lister.py"],
}[runtime]


Expand Down Expand Up @@ -160,15 +161,10 @@ def application_pid(in_container: bool, application_process: Popen, application_
def assert_collapsed(runtime: str) -> Callable[[Mapping[str, int]], None]:
function_name = {
"java": "Fibonacci.main",
"python": "fibonacci",
"python": "burner",
}[runtime]

def assert_collapsed(collapsed: Mapping[str, int]) -> None:
print(f"collapsed: {collapsed}")
assert collapsed is not None
assert any((function_name in record) for record in collapsed.keys())

return assert_collapsed
return partial(assert_function_in_collapsed, function_name)


@fixture
Expand Down
4 changes: 2 additions & 2 deletions tests/containers/python/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM python:3.6-alpine

WORKDIR /app
ADD fibonacci.py /app
ADD lister.py /app

CMD ["/app/fibonacci.py"]
CMD ["/app/lister.py"]
4 changes: 2 additions & 2 deletions tests/containers/python/Dockerfile.libpython
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
FROM python:3.6-alpine

WORKDIR /app
ADD fibonacci.py /app
ADD lister.py /app
# this is used to test that we identify Python processes to profile based on "libpython" in their "/proc/pid/maps".
# so we'll run a Python script using non-"python" executable ("shmython" instead) but it'll have "libpython"
# loaded.
RUN ln /usr/local/bin/python3.6 /usr/local/bin/shmython && ! test -L /usr/local/bin/shmython && ldd /usr/local/bin/shmython | grep libpython > /dev/null

CMD ["/usr/local/bin/shmython", "/app/fibonacci.py"]
CMD ["/usr/local/bin/shmython", "/app/lister.py"]
12 changes: 0 additions & 12 deletions tests/containers/python/fibonacci.py

This file was deleted.

22 changes: 22 additions & 0 deletions tests/containers/python/lister.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/local/bin/python
#
# Copyright (c) Granulate. All rights reserved.
# Licensed under the AGPL3 License. See LICENSE.md in the project root for license information.
#
import os
from threading import Thread


def lister():
os.listdir("/") # have some kernel stacks


def burner():
while True: # have some Python stacks
pass


if __name__ == "__main__":
Thread(target=burner).start()
while True:
lister()
13 changes: 10 additions & 3 deletions tests/test_sanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
from gprofiler.merge import parse_one_collapsed
from gprofiler.python import PySpyProfiler, PythonEbpfProfiler
from gprofiler.utils import resource_path
from tests.utils import copy_file_from_image, copy_pyspy_from_image, run_privileged_container
from tests.utils import (
assert_function_in_collapsed,
copy_file_from_image,
copy_pyspy_from_image,
run_privileged_container,
)


@pytest.mark.parametrize("runtime", ["java"])
Expand Down Expand Up @@ -57,9 +62,11 @@ def test_python_ebpf(
pyperf_path,
)

with PythonEbpfProfiler(1000, 1, Event(), str(tmp_path)) as profiler:
with PythonEbpfProfiler(1000, 5, Event(), str(tmp_path)) as profiler:
process_collapsed = profiler.snapshot()
assert_collapsed(process_collapsed.get(application_pid))
collapsed = process_collapsed.get(application_pid)
assert_collapsed(collapsed)
assert_function_in_collapsed("sys_getdents64", collapsed) # ensure kernels stacks exist


@pytest.mark.parametrize("runtime", ["java", "python"])
Expand Down
9 changes: 8 additions & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import subprocess
from pathlib import Path
from typing import Dict, List
from typing import Dict, List, Mapping

from docker import DockerClient
from docker.models.images import Image
Expand Down Expand Up @@ -59,3 +59,10 @@ def copy_pyspy_from_image(gprofiler_docker_image: Image):
os.path.join("/app", "gprofiler", "resources", "python", "py-spy"),
resource_path("python/py-spy"),
)


def assert_function_in_collapsed(function_name: str, collapsed: Mapping[str, int]) -> None:
print(f"collapsed: {collapsed}")
assert any(
(function_name in record) for record in collapsed.keys()
), f"function {function_name!r} missing in collapsed data!"

0 comments on commit 6e201e2

Please sign in to comment.