From f4a6ebe9a65e1eab169dde02e23f934268644c33 Mon Sep 17 00:00:00 2001 From: Nikolay Yurin Date: Mon, 31 Jul 2023 13:47:23 -0700 Subject: [PATCH] src/patchset.py: Implement Patchset service Patchset service process patchset nodes: - Wait for parent checkout node to be available - Download checkout node tarball - Apply patches and calculate patchset hash - Upload new tarball Signed-off-by: Nikolay Yurin --- config/kernelci.toml | 5 + docker-compose.yaml | 14 +++ src/patchset.py | 250 +++++++++++++++++++++++++++++++++++++++++++ src/tarball.py | 47 ++++---- 4 files changed, 297 insertions(+), 19 deletions(-) create mode 100755 src/patchset.py diff --git a/config/kernelci.toml b/config/kernelci.toml index 404e309c8..822f6300b 100644 --- a/config/kernelci.toml +++ b/config/kernelci.toml @@ -13,6 +13,11 @@ kdir = "/home/kernelci/data/src/linux" output = "/home/kernelci/data/output" storage_config = "docker-host" +[patchset] +kdir = "/home/kernelci/data/src/linux-patchset" +output = "/home/kernelci/data/output" +storage_config = "docker-host" + [scheduler] output = "/home/kernelci/output" diff --git a/docker-compose.yaml b/docker-compose.yaml index 68a6d0af5..6e76b352f 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -149,3 +149,17 @@ services: - '--settings=${KCI_SETTINGS:-/home/kernelci/config/kernelci.toml}' - 'run' - '--mode=holdoff' + + patchset: + <<: *base-service + container_name: 'kernelci-pipeline-patchset' + command: + - './pipeline/patchset.py' + - '--settings=${KCI_SETTINGS:-/home/kernelci/config/kernelci.toml}' + - 'run' + volumes: + - './src:/home/kernelci/pipeline' + - './config:/home/kernelci/config' + - './data/ssh:/home/kernelci/data/ssh' + - './data/src:/home/kernelci/data/src' + - './data/output:/home/kernelci/data/output' diff --git a/src/patchset.py b/src/patchset.py new file mode 100755 index 000000000..2e1e76d79 --- /dev/null +++ b/src/patchset.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +# +# SPDX-License-Identifier: LGPL-2.1-or-later +# +# Copyright (C) 2022 Collabora Limited +# Author: Nikolay Yurin + +import os +import sys +import json +import requests +import time +import tempfile +import hashlib +from datetime import datetime, timedelta +from urllib.parse import urlparse +from urllib.request import urlopen + +import kernelci +import kernelci.build +import kernelci.config +from kernelci.legacy.cli import Args, Command, parse_opts +import kernelci.storage + +from tarball import Tarball + +# FIXME: make patchset service configuration option +ALLOWED_DOMAINS = {"patchwork.kernel.org"} +PATCHSET_SHORT_HASH_LEN = 13 + + +class Patchset(Tarball): + TAR_CREATE_CMD = """\ +set -e +cd {target_dir}/ +tar --create --transform "s/^/{prefix}\\//" * | gzip > {tarball_path} +""" + + APPLY_PATCH_SHELL_CMD = """\ +set -e +cd {kdir} +patch -p1 < {patch_file} +""" + + # FIXME: I really don't have a good idea what I'm doing here + # This code probably needs rework and put into kernelci.patch + def _hash_patch(self, patch_file): + allowed_prefixes = { + "-", # This convers both removed lines and source file + "+", # This convers both added lines and target file + # "@" I don't know how we should handle hunks yet + } + norm_patch_lines = [] + for line in patch_file.readlines(): + if not line: + continue + + if line[0] in allowed_prefixes: + norm_patch_lines.append(line) + + norm_patch_str = "/n".join(norm_patch_lines) + return hashlib.sha256(norm_patch_str.encode("utf-8")).hexdigest() + + # FIXME: move into kernelci.patch + def _apply_patch(self, kdir, patch_url): + encoding = urlopen(patch_url).headers.get_charsets()[0] + with tempfile.NamedTemporaryFile( + prefix="kernel-patch-", + encoding=encoding + ) as tmp_f: + if not kernelci.build._download_file(patch_url, tmp_f.name): + raise FileNotFoundError( + f"Error downloading patch from {patch_url}" + ) + + kernelci.shell_cmd(self.APPLY_PATCH_SHELL_CMD.format( + kdir=kdir, + patch_file=tmp_f.name, + )) + + return self._hash_patch(tmp_f) + + # FIXME: move into kernelci.patch + def _apply_patches(self, kdir, patch_artifacts): + patchset_hash = hashlib.sha256() + for patch_name, patch_url in patch_artifacts.items(): + self.log.info( + f"Applying patch {patch_name}, url: {patch_url}" + ) + patch_hash = self._apply_patch(kdir, patch_url) + patchset_hash.update(patch_hash.encode("utf-8")) + + return patchset_hash.hexdigest() + + def _download_checkout_archive(self, tarball_url, retries=3): + self.log.info(f"Downloading checkout tarball, url: {tarball_url}") + urlpath = urlparse(tarball_url).path + tar_filename = os.path.basename(urlpath) + kernelci.build.pull_tarball( + kdir=self._kdir, + url=tarball_url, + dest_filename=tar_filename, + retries=retries, + delete=True + ) + + def _update_node( + self, + patchset_node, + revision, + tarball_url, + ): + node = patchset_node.copy() + node.update({ + 'revision': revision, + 'state': 'available', + 'artifacts': { + 'tarball': tarball_url, + }, + 'holdoff': str(datetime.utcnow() + timedelta(minutes=10)) + }) + try: + self._api.update_node(node) + except requests.exceptions.HTTPError as err: + err_msg = json.loads(err.response.content).get("detail", []) + self.log.error(err_msg) + + def _setup(self, *args): + return self._api_helper.subscribe_filters({ + 'op': 'created', + 'name': 'patchset', + 'state': 'running', + }) + + def _has_approved_domain(self, url): + return urlparse(url).hostname in ALLOWED_DOMAINS + + def _process_patchset(self, checkout_node, patchset_node): + build_config = self._find_build_config(checkout_node) + if not build_config: + raise RuntimeError( + "No build config found for checkout node %s", + checkout_node['id'] + ) + + patch_artifacts = patchset_node.get("artifacts") + if not patch_artifacts: + raise ValueError( + f"No patch artifacts available for node {patchset_node['id']}" + ) + + if not all( + self._has_approved_domain(patch_mbox_url) + for patch_mbox_url in patch_artifacts.values() + ): + raise RuntimeError("Forbidden domain") + + patchset_build_config = self._find_build_config(patchset_node) + if build_config != patchset_build_config: + raise ValueError( + f"Patchsets node {patchset_node['id']} build config " + f"doesn't match to the parent node {checkout_node['id']}" + ) + + self._download_checkout_archive(checkout_node["artifacts"]["tarball"]) + + chekout_name = '-'.join([ + 'linux', + checkout_node["revision"]["tree"], + checkout_node["revision"]["branch"], + checkout_node["revision"]["describe"], + ]) + chekout_path = os.path.join(self._kdir, chekout_name) + patchset_hash = self._apply_patches(chekout_path, patch_artifacts) + patchset_hash_short = patchset_hash[:PATCHSET_SHORT_HASH_LEN] + tarball_path = self._make_tarball( + target_dir=chekout_path, + tarball_name=f"{chekout_name}-{patchset_hash_short}" + ) + tarball_url = self._push_tarball(tarball_path) + + patchset_revision = checkout_node["revision"].copy() + patchset_revision['patchset'] = patchset_hash + + self._update_node( + patchset_node, + patchset_revision, + tarball_url + ) + + def _mark_failed(self, patchset_node): + node = patchset_node.copy() + node.update({ + 'state': 'done', + 'result': 'fail', + }) + try: + self._api.update_node(node) + except requests.exceptions.HTTPError as err: + err_msg = json.loads(err.response.content).get("detail", []) + self.log.error(err_msg) + + def _run(self, sub_id): + self.log.info("Listening for new trigger events") + self.log.info("Press Ctrl-C to stop.") + + while True: + patchset_nodes = self._api.get_nodes({ + "name": "patchset", + "state": "running", + }) + + for patchset_node in patchset_nodes: + if not patchset_node["parent"]: + continue + + checkout_node = self._api.get_node(patchset_node["parent"]) + if checkout_node["state"] == 'running': + continue + + try: + self._process_patchset(checkout_node, patchset_node) + except Exception as e: + self._mark_failed(patchset_node) + self.log.error(e) + + time.sleep(10) + + +class cmd_run(Command): + help = ( + "Wait for a checkout node to be available " + "and push a source+patchset tarball" + ) + args = [ + Args.kdir, Args.output, Args.api_config, Args.storage_config, + ] + opt_args = [ + Args.verbose, Args.storage_cred, + ] + + def __call__(self, configs, args): + return Patchset(configs, args).run(args) + + +if __name__ == '__main__': + opts = parse_opts('patchset', globals()) + configs = kernelci.config.load('config/pipeline.yaml') + status = opts.command(configs, opts) + sys.exit(0 if status is True else 1) diff --git a/src/tarball.py b/src/tarball.py index a81f7e10e..ce42ac402 100755 --- a/src/tarball.py +++ b/src/tarball.py @@ -5,13 +5,12 @@ # Copyright (C) 2022 Collabora Limited # Author: Guillaume Tucker # Author: Jeny Sadadia +# Author: Nikolay Yurin from datetime import datetime, timedelta -import logging import os import re import sys -import urllib.parse import json import requests @@ -32,6 +31,11 @@ class Tarball(Service): + TAR_CREATE_CMD = """\ +set -e +cd {target_dir} +git archive --format=tar --prefix={prefix}/ HEAD | gzip > {tarball_path} +""" def __init__(self, configs, args): super().__init__(configs, args, 'tarball') @@ -50,7 +54,7 @@ def _find_build_config(self, node): revision = node['revision'] tree = revision['tree'] branch = revision['branch'] - for name, config in self._build_configs.items(): + for config in self._build_configs.values(): if config.tree.name == tree and config.branch == branch: return config @@ -59,25 +63,22 @@ def _update_repo(self, config): kernelci.build.update_repo(config, self._kdir) self.log.info("Repo updated") - def _make_tarball(self, config, describe): - name = '-'.join(['linux', config.tree.name, config.branch, describe]) - tarball = f"{name}.tar.gz" - self.log.info(f"Making tarball {tarball}") - output_path = os.path.relpath(self._output, self._kdir) - cmd = """\ -set -e -cd {kdir} -git archive --format=tar --prefix={name}/ HEAD | gzip > {output}/{tarball} -""".format(kdir=self._kdir, name=name, output=output_path, tarball=tarball) + def _make_tarball(self, target_dir, tarball_name): + self.log.info(f"Making tarball {tarball_name}") + tarball_path = os.path.join(self._output, f"{tarball_name}.tar.gz") + cmd = self.TAR_CREATE_CMD.format( + target_dir=target_dir, + prefix=tarball_name, + tarball_path=tarball_path + ) self.log.info(cmd) kernelci.shell_cmd(cmd) self.log.info("Tarball created") - return tarball + return tarball_path - def _push_tarball(self, config, describe): - tarball_name = self._make_tarball(config, describe) - tarball_path = os.path.join(self._output, tarball_name) - self.log.info(f"Uploading {tarball_path}") + def _push_tarball(self, tarball_path): + tarball_name = os.path.basename(tarball_path) + self.log.info(f"Uploading {tarball_name}") tarball_url = self._storage.upload_single((tarball_path, tarball_name)) self.log.info(f"Upload complete: {tarball_url}") os.unlink(tarball_path) @@ -134,11 +135,19 @@ def _run(self, sub_id): continue self._update_repo(build_config) + describe = kernelci.build.git_describe( build_config.tree.name, self._kdir ) version = self._get_version_from_describe() - tarball_url = self._push_tarball(build_config, describe) + tarball_name = '-'.join([ + 'linux', + build_config.tree.name, + build_config.branch, + describe + ]) + tarball_path = self._make_tarball(self._kdir, tarball_name) + tarball_url = self._push_tarball(tarball_path) self._update_node(checkout_node, describe, version, tarball_url) return True