Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RHOAIENG-9707: chore(tests/containers): check shared objects with ldd #871

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 101 additions & 1 deletion tests/containers/base_image_test.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
from __future__ import annotations

import binascii
import inspect
import json
import logging
import pathlib
import re
import tempfile
from typing import TYPE_CHECKING
import textwrap
from typing import TYPE_CHECKING, Any, Callable

import pytest
import testcontainers.core.container
import testcontainers.core.waiting_utils

Expand All @@ -20,6 +26,84 @@
class TestBaseImage:
"""Tests that are applicable for all images we have in this repository."""

def test_elf_files_can_link_runtime_libs(self, subtests: pytest_subtests.SubTests, image):
container = testcontainers.core.container.DockerContainer(image=image, user=0, group_add=[0])
container.with_command("/bin/sh -c 'sleep infinity'")

def check_elf_file():
"""This python function will be executed on the image itself.
That's why it has to have here all imports it needs."""
import glob
import os
import json
import subprocess
import stat

dirs = [
"/bin",
"/lib",
"/lib64",
"/opt/app-root"
]
for path in dirs:
count_scanned = 0
unsatisfied_deps: list[tuple[str, str]] = []
for dlib in glob.glob(os.path.join(path, "**"), recursive=True):
# we will visit all files eventually, no need to bother with symlinks
s = os.stat(dlib, follow_symlinks=False)
isdirectory = stat.S_ISDIR(s.st_mode)
isfile = stat.S_ISREG(s.st_mode)
executable = bool(s.st_mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH))
if isdirectory or not executable or not isfile:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder whether we should log/print the skipped/ignored files here with the explanation?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe, but for /opt/app-root this will produce a lot of output that nobody in their right mind will have the energy to read through

cc @opendatahub-io/notebook-devs wdyt? do you like your pytest tests to be vvvverbose or not?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

continue
with open(dlib, mode='rb') as fp:
magic = fp.read(4)
if magic != b'\x7fELF':
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar as above - do we want to log/print files like these for the convenience?

continue

count_scanned += 1
ld_library_path = os.environ.get("LD_LIBRARY_PATH", "") + os.path.pathsep + os.path.dirname(dlib)
output = subprocess.check_output(["ldd", dlib],
# search the $ORIGIN, essentially; most python libs expect this
env={**os.environ, "LD_LIBRARY_PATH": ld_library_path},
text=True)
for line in output.splitlines():
if "not found" in line:
unsatisfied_deps.append((dlib, line.strip()))
assert output
print("OUTPUT>", json.dumps({"dir": path, "count_scanned": count_scanned, "unsatisfied": unsatisfied_deps}))

try:
container.start()
ecode, output = container.exec(
encode_python_function_execution_command_interpreter("/usr/bin/python3", check_elf_file))
finally:
docker_utils.NotebookContainer(container).stop(timeout=0)

for line in output.decode().splitlines():
logging.debug(line)
if not line.startswith("OUTPUT> "):
continue
data = json.loads(line[len("OUTPUT> "):])
assert data['count_scanned'] > 0
for dlib, deps in data["unsatisfied"]:
# here goes the allowlist
if re.search(r"^/lib64/python3.\d+/site-packages/hawkey/test/_hawkey_test.so", dlib) is not None:
continue # this is some kind of self test or what
if re.search(r"^/lib64/systemd/libsystemd-core-\d+.so", dlib) is not None:
continue # this is expected and we don't use systemd anyway
if deps.startswith("libodbc.so.2"):
continue # todo(jdanek): known issue RHOAIENG-18904
if deps.startswith("libcuda.so.1"):
continue # cuda magic will mount this into /usr/lib64/libcuda.so.1 and it will be found
if deps.startswith("libjvm.so"):
continue # it's in ../server
if deps.startswith("libtracker-extract.so"):
continue # it's in ../

with subtests.test(f"{dlib=}"):
pytest.fail(f"{dlib=} has unsatisfied dependencies {deps=}")

def test_oc_command_runs(self, image: str):
container = testcontainers.core.container.DockerContainer(image=image, user=23456, group_add=[0])
container.with_command("/bin/sh -c 'sleep infinity'")
Expand Down Expand Up @@ -78,3 +162,19 @@ def test_oc_command_runs_fake_fips(self, image: str, subtests: pytest_subtests.S
assert ecode == 0, output.decode()
finally:
docker_utils.NotebookContainer(container).stop(timeout=0)


def encode_python_function_execution_command_interpreter(python: str, function: Callable[..., Any], *args: list[Any]) -> list[str]:
"""Returns a cli command that will run the given Python function encoded inline.
All dependencies (imports, ...) must be part of function body."""
code = textwrap.dedent(inspect.getsource(function))
ccode = binascii.b2a_base64(code.encode())
name = function.__name__
parameters = ', '.join(repr(arg) for arg in args)
program = textwrap.dedent(f"""
import binascii;
s=binascii.a2b_base64("{ccode.decode('ascii').strip()}");
exec(s.decode());
print({name}({parameters}));""")
int_cmd = [python, "-c", program]
return int_cmd