From b595f257bfa0251ef64b6028ba4c94b500176f61 Mon Sep 17 00:00:00 2001 From: Stephen Herr Date: Thu, 2 Feb 2023 16:29:14 -0500 Subject: [PATCH] Allow for public key id checking when adding packages to repos closes #2258 --- CHANGES/2258.feature | 1 + docs/workflows/manage.rst | 5 ++ .../app/management/commands/rpm-datarepair.py | 13 +++++ pulp_rpm/app/migrations/0046_pub_key.py | 23 +++++++++ pulp_rpm/app/models/package.py | 9 +++- pulp_rpm/app/models/repository.py | 23 +++++++++ pulp_rpm/app/serializers/package.py | 9 +++- pulp_rpm/app/serializers/repository.py | 26 ++++++++++ pulp_rpm/app/shared_utils.py | 48 ++++++++++++++++++- pulp_rpm/app/tasks/synchronizing.py | 13 +++++ requirements.txt | 2 + 11 files changed, 169 insertions(+), 3 deletions(-) create mode 100644 CHANGES/2258.feature create mode 100644 pulp_rpm/app/migrations/0046_pub_key.py diff --git a/CHANGES/2258.feature b/CHANGES/2258.feature new file mode 100644 index 0000000000..e4f1c18970 --- /dev/null +++ b/CHANGES/2258.feature @@ -0,0 +1 @@ +Pulp is able to reject packages that are unsigned or incorrectly signed, if desired. \ No newline at end of file diff --git a/docs/workflows/manage.rst b/docs/workflows/manage.rst index 055af6f647..8d35ec495a 100644 --- a/docs/workflows/manage.rst +++ b/docs/workflows/manage.rst @@ -7,6 +7,11 @@ Package Within a repository version there can be only one package (RPM or SRPM) with the same NEVRA. NEVRA stands for name, epoch, version, release, architecture. +Repositories can be set to only allow packages signed by specific key(s) to be added to them by setting the `allowed_pub_keys` field on the repo. +That field is a list of Key IDs for the acceptable signing keys. +A Key ID is the last 16 digits of the hex fingerprint of the public keys (import the key to gpg, do `gpg --list-keys`, take the last 16 digits). +For example, a repo that had set `allowed_pub_keys: ["ABCDEF0123456789", "0987654321FEDCBA"]` would only allow packages signed by those two keys to be added, and any update that contained an RPM *not* signed by one of them would fail. + Advisory -------- diff --git a/pulp_rpm/app/management/commands/rpm-datarepair.py b/pulp_rpm/app/management/commands/rpm-datarepair.py index 184f14dde0..ed81540717 100644 --- a/pulp_rpm/app/management/commands/rpm-datarepair.py +++ b/pulp_rpm/app/management/commands/rpm-datarepair.py @@ -3,6 +3,7 @@ from django.core.management import BaseCommand, CommandError from pulp_rpm.app.models import Package # noqa +from pulp_rpm.app.shared_utils import read_crpackage_from_artifact class Command(BaseCommand): @@ -22,9 +23,21 @@ def handle(self, *args, **options): if issue == "2460": self.repair_2460() + elif issue == "2258": + self.repair_2258() else: raise CommandError(_("Unknown issue: '{}'").format(issue)) + def repair_2258(self): + """Re-examine package header and store signing key for issue #2258.""" + packages = Package.objects.filter(signer_key_id__isnull=True) + for package in packages: + _, pub_key_id = read_crpackage_from_artifact(package._artifacts.first()) # Only one. + if pub_key_id is not None: + print(f"Fixing stored signature for {package.nevra}") + package.signer_key_id = pub_key_id + package.save() + def repair_2460(self): """Perform data repair for issue #2460.""" diff --git a/pulp_rpm/app/migrations/0046_pub_key.py b/pulp_rpm/app/migrations/0046_pub_key.py new file mode 100644 index 0000000000..7158461ffd --- /dev/null +++ b/pulp_rpm/app/migrations/0046_pub_key.py @@ -0,0 +1,23 @@ +# Generated by Django 3.2.17 on 2023-02-02 21:22 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("rpm", "0045_modulemd_fields"), + ] + + operations = [ + migrations.AddField( + model_name="package", + name="signer_key_id", + field=models.CharField(max_length=16, null=True), + ), + migrations.AddField( + model_name="rpmrepository", + name="allowed_pub_keys", + field=models.JSONField(default=list), + ), + ] diff --git a/pulp_rpm/app/models/package.py b/pulp_rpm/app/models/package.py index 3f82a89851..7422b88fd8 100644 --- a/pulp_rpm/app/models/package.py +++ b/pulp_rpm/app/models/package.py @@ -1,4 +1,5 @@ from logging import getLogger +import traceback import createrepo_c as cr @@ -134,6 +135,8 @@ class Package(Content): First byte of the header rpm_header_end (BigInteger): Last byte of the header + signer_key_id (Text): + 16-digit hex key id of the public key that signed this rpm (if any) is_modular (Bool): Flag to identify if the package is modular @@ -220,6 +223,8 @@ class Package(Content): rpm_vendor = models.TextField() rpm_header_start = models.BigIntegerField(null=True) rpm_header_end = models.BigIntegerField(null=True) + # According to rfc4880 is always 8 octets, aka a 16-digit hex number + signer_key_id = models.CharField(null=True, max_length=16) size_archive = models.BigIntegerField(null=True) size_installed = models.BigIntegerField(null=True) @@ -289,12 +294,13 @@ class ReadonlyMeta: readonly = ["evr"] @classmethod - def createrepo_to_dict(cls, package): + def createrepo_to_dict(cls, package, signer_key_id=None): """ Convert createrepo_c package object to dict for instantiating Package object. Args: package(createrepo_c.Package): a RPM/SRPM package to convert + signer_key_id(str): the key_id of the signer, if any Returns: dict: all data for RPM/SRPM content creation @@ -367,6 +373,7 @@ def createrepo_to_dict(cls, package): PULP_PACKAGE_ATTRS.TIME_FILE: getattr(package, CR_PACKAGE_ATTRS.TIME_FILE), PULP_PACKAGE_ATTRS.URL: getattr(package, CR_PACKAGE_ATTRS.URL) or "", PULP_PACKAGE_ATTRS.VERSION: getattr(package, CR_PACKAGE_ATTRS.VERSION), + "signer_key_id": signer_key_id, } def to_createrepo_c(self): diff --git a/pulp_rpm/app/models/repository.py b/pulp_rpm/app/models/repository.py index 85ee6da3f7..3f4e84cbaf 100644 --- a/pulp_rpm/app/models/repository.py +++ b/pulp_rpm/app/models/repository.py @@ -202,6 +202,12 @@ class RpmRepository(Repository, AutoAddObjPermsMixin): 1 or 0 corresponding to whether repo_gpgcheck should be enabled in the generated .repo file. sqlite_metadata (Boolean): Whether to generate sqlite metadata files on publish. + allowed_pub_keys (JSON): + A list of Public Key IDs (16-digit hex strings) if you want to require that all rpms + added to the Repo must be signed by one of them. This is a Pulp-side *data-sanity* + check, whereas gpgcheck triggers a client-side *security* restriction. You can find the + Key ID by importing the public key and then taking the last 16 digits from the hex + string in the output of `gpg --list-keys`. """ TYPE = "rpm" @@ -234,6 +240,7 @@ class RpmRepository(Repository, AutoAddObjPermsMixin): gpgcheck = models.IntegerField(default=0, choices=GPGCHECK_CHOICES) repo_gpgcheck = models.IntegerField(default=0, choices=GPGCHECK_CHOICES) sqlite_metadata = models.BooleanField(default=False) + allowed_pub_keys = models.JSONField(default=list) def on_new_version(self, version): """ @@ -299,6 +306,7 @@ def finalize_new_version(self, new_version): """ Ensure there are no duplicates in a repo version and content is not broken. + If set, remove Packages that are not signed by one of the allowed_pub_keys public keys. Remove duplicates based on repo_key_fields. Ensure that modulemd is added with all its RPMs. Ensure that modulemd is removed with all its RPMs. @@ -316,6 +324,21 @@ def finalize_new_version(self, new_version): except RepositoryVersion.DoesNotExist: previous_version = None + # If requested, validate that the added rpms are signed appropriately + if self.allowed_pub_keys: + rejected_nevras = [] + if added := new_version.added().filter(pulp_type=Package.get_pulp_type()): + for package in added: + package = package.cast() # Transform it from Content into Package + if str(package.signer_key_id) not in self.allowed_pub_keys: + rejected_nevras.append(package.nevra) + if rejected_nevras: + raise Exception( + f"Repo {self.name} has specified that packages must be signed by one of the " + f"following keys: \n{self.allowed_pub_keys} \nThe following packages are not " + f"signed appropriately and the update has been rejected: \n{rejected_nevras}" + ) + remove_duplicates(new_version) self._resolve_distribution_trees(new_version, previous_version) diff --git a/pulp_rpm/app/serializers/package.py b/pulp_rpm/app/serializers/package.py index a3b2bd2310..662a9ba1c1 100644 --- a/pulp_rpm/app/serializers/package.py +++ b/pulp_rpm/app/serializers/package.py @@ -196,6 +196,12 @@ class PackageSerializer(SingleArtifactContentUploadSerializer, ContentChecksumSe help_text=_("Last byte of the header"), read_only=True, ) + signer_key_id = serializers.CharField( + help_text=_("16-digit hex key id of the public key that signed this rpm (if any)"), + allow_blank=True, + required=False, + read_only=True, + ) is_modular = serializers.BooleanField( help_text=_("Flag to identify if the package is modular"), required=False, @@ -247,7 +253,7 @@ def deferred_validate(self, data): data = super().deferred_validate(data) # export META from rpm and prepare dict as saveable format try: - new_pkg = Package.createrepo_to_dict(read_crpackage_from_artifact(data["artifact"])) + new_pkg = Package.createrepo_to_dict(*read_crpackage_from_artifact(data["artifact"])) except OSError: log.info(traceback.format_exc()) raise NotAcceptable(detail="RPM file cannot be parsed for metadata") @@ -314,6 +320,7 @@ class Meta: "rpm_vendor", "rpm_header_start", "rpm_header_end", + "signer_key_id", "is_modular", "size_archive", "size_installed", diff --git a/pulp_rpm/app/serializers/repository.py b/pulp_rpm/app/serializers/repository.py index aa19ec033b..534d419a24 100644 --- a/pulp_rpm/app/serializers/repository.py +++ b/pulp_rpm/app/serializers/repository.py @@ -1,4 +1,6 @@ from gettext import gettext as _ +import json +import re from django.conf import settings from jsonschema import Draft7Validator @@ -105,6 +107,17 @@ class RpmRepositorySerializer(RepositorySerializer): "DEPRECATED: An option specifying whether Pulp should generate SQLite metadata." ), ) + allowed_pub_keys = serializers.JSONField( + default=list, + required=False, + help_text=_( + "A list of Public Key IDs (16-digit hex strings) if you want to require that all rpms " + "added to the Repo must be signed by one of them. This is a Pulp-side *data-sanity* " + "check, whereas gpgcheck triggers a client-side *security* restriction. You can find " + "the Key ID by importing the public key and then taking the last 16 digits from the " + "hex string in the output of `gpg --list-keys`." + ), + ) def validate(self, data): """Validate data.""" @@ -116,6 +129,18 @@ def validate(self, data): ): raise serializers.ValidationError({field: _(ALLOWED_CHECKSUM_ERROR_MSG)}) + signed_by = "allowed_pub_keys" + if signed_by in data: + keys = json.loads(data[signed_by]) + uppercase_keys = [] + for key in keys: + key = key.upper() # key ids in rpm headers are always uppercase + if not re.match("^[A-F0-9]{16}$", key): + raise serializers.ValidationError( + {field: _("Public Key IDs must be 16-digit hex strings.")} + ) + uppercase_keys.append(key) + data[signed_by] = uppercase_keys validated_data = super().validate(data) return validated_data @@ -129,6 +154,7 @@ class Meta: "gpgcheck", "repo_gpgcheck", "sqlite_metadata", + "allowed_pub_keys", ) model = RpmRepository diff --git a/pulp_rpm/app/shared_utils.py b/pulp_rpm/app/shared_utils.py index 9efb47bcd0..6c94a536ed 100644 --- a/pulp_rpm/app/shared_utils.py +++ b/pulp_rpm/app/shared_utils.py @@ -1,12 +1,18 @@ import createrepo_c as cr +from logging import getLogger import tempfile +import traceback import shutil from hashlib import sha256 +from pgpy.pgp import PGPSignature +import rpmfile from django.conf import settings from django.core.files.storage import default_storage as storage from django.utils.dateparse import parse_datetime +log = getLogger(__name__) + def format_nevra(name=None, epoch=0, version=None, release=None, arch=None): """Generate Name-Epoch-Version-Release-Arch string.""" @@ -51,9 +57,10 @@ def read_crpackage_from_artifact(artifact): cr_pkginfo = cr.package_from_rpm( temp_file.name, changelog_limit=settings.KEEP_CHANGELOG_LIMIT ) + signer_key_id = parse_signer_id(temp_file.name) artifact_file.close() - return cr_pkginfo + return cr_pkginfo, signer_key_id def urlpath_sanitize(*args): @@ -159,3 +166,42 @@ def parse_time(value): int | datetime | None: formatted time value """ return int(value) if value.isdigit() else parse_datetime(value) + + +def parse_signer_id(rpm_path): + """ + Parse the key_id of the signing key from the RPM header, given a locally-available RPM. + + Args: + rpm_path(str): Path to the local RPM file. + + Returns: + str: 16-digit hex key_id of signing key, or None. + """ + # I have filed an Issue with createrepo_c requesting the ability to access the signature + # through the python bindings. Until that is available we'll have to read the header + # a second time with a utility that actually makes it available. + # https://github.com/rpm-software-management/createrepo_c/issues/346 + # TODO: When the above is resolved re-evaluate and potentially drop extra dependencies. + signature = "" + try: + with rpmfile.open(rpm_path) as rpm: + + def hdr(header): + return rpm.headers.get(header, "") + + # What the `rpm -qi` command does. See "--info" definition in /usr/lib/rpm/rpmpopt-* + signature = hdr("dsaheader") or hdr("rsaheader") or hdr("siggpg") or hdr("sigpgp") + except: + log.info(f"Could not extract signature from RPM file: {rpm_path}") + log.info(traceback.format_exc()) + + signer_key_id = "" # If package is unsigned, store empty str + if signature: + try: + signer_key_id = PGPSignature.from_blob(signature).signer + except: + signer_key_id = None # If error (or never examined), store None + log.info(f"Could not parse PGP signature for {getattr(package, CR_PACKAGE_ATTRS.NAME)}") + log.info(traceback.format_exc()) + return signer_key_id diff --git a/pulp_rpm/app/tasks/synchronizing.py b/pulp_rpm/app/tasks/synchronizing.py index 18dba4a9ef..543a879d03 100644 --- a/pulp_rpm/app/tasks/synchronizing.py +++ b/pulp_rpm/app/tasks/synchronizing.py @@ -90,6 +90,7 @@ from pulp_rpm.app.shared_utils import ( is_previous_version, get_sha256, + parse_signer_id, urlpath_sanitize, ) from pulp_rpm.app.rpm_version import RpmVersion @@ -1435,6 +1436,8 @@ def _post_save(self, batch): When it has a treeinfo file, save a batch of Addon, Checksum, Image, Variant objects. + Examine Package headers and save signer_key_id. + Args: batch (list of :class:`~pulpcore.plugin.stages.DeclarativeContent`): The batch of :class:`~pulpcore.plugin.stages.DeclarativeContent` objects to be saved. @@ -1492,6 +1495,7 @@ def _handle_distribution_tree(declarative_content): update_collection_to_save = [] update_references_to_save = [] update_collection_packages_to_save = [] + packages_to_update = [] seen_updaterecords = [] for declarative_content in batch: @@ -1534,6 +1538,12 @@ def _handle_distribution_tree(declarative_content): for update_reference in update_references: update_reference.update_record = update_record update_references_to_save.append(update_reference) + elif isinstance(declarative_content.content, Package): + artifact = declarative_content.d_artifacts[0] # Packages have 1 artifact + signer_key_id = parse_signer_id(artifact.artifact.file.path) + if signer_key_id is not None: + declarative_content.content.signer_key_id = signer_key_id + packages_to_update.append(declarative_content.content) if update_collection_to_save: UpdateCollection.objects.bulk_create(update_collection_to_save, ignore_conflicts=True) @@ -1546,6 +1556,9 @@ def _handle_distribution_tree(declarative_content): if update_references_to_save: UpdateReference.objects.bulk_create(update_references_to_save, ignore_conflicts=True) + if packages_to_update: + Package.objects.bulk_update(packages_to_update, ["signer_key_id"]) + class RpmQueryExistingContents(Stage): """ diff --git a/requirements.txt b/requirements.txt index 0259783d4c..2aa66e51f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,5 @@ productmd~=1.33.0 pulpcore>=3.21,<3.25 solv~=0.7.21 aiohttp_xmlrpc~=1.5.0 +PGPy~=0.6.0 +rpmfile~=1.0.8 \ No newline at end of file