Skip to content

Commit

Permalink
Allow for public key id checking when adding packages to repos
Browse files Browse the repository at this point in the history
closes #2258
  • Loading branch information
sdherr committed Feb 14, 2023
1 parent 596ce74 commit 99414aa
Show file tree
Hide file tree
Showing 11 changed files with 182 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGES/2258.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Pulp is able to reject packages that are unsigned or incorrectly signed, if desired.
5 changes: 5 additions & 0 deletions docs/workflows/manage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ Package
Within a repository version there can be only one package (RPM or SRPM) with the same NEVRA.
NEVRA stands for name, epoch, version, release, architecture.

Repositories can be set to only allow packages signed by specific key(s) to be added to them by setting the `allowed_pub_keys` field on the repo.
That field is a list of Key IDs for the acceptable signing keys.
A Key ID is the last 16 digits of the hex fingerprint of the public keys (import the key to gpg, do `gpg --list-keys`, take the last 16 digits).
For example, a repo that had set `allowed_pub_keys: ["ABCDEF0123456789", "0987654321FEDCBA"]` would only allow packages signed by those two keys to be added, and any update that contained an RPM *not* signed by one of them would fail.


Advisory
--------
Expand Down
22 changes: 22 additions & 0 deletions pulp_rpm/app/management/commands/rpm-datarepair.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from django.core.management import BaseCommand, CommandError

from pulp_rpm.app.models import Package # noqa
from pulp_rpm.app.shared_utils import read_crpackage_from_artifact


class Command(BaseCommand):
Expand All @@ -15,16 +16,37 @@ class Command(BaseCommand):
def add_arguments(self, parser):
"""Set up arguments."""
parser.add_argument("issue", help=_("The github issue # of the issue to be fixed."))
parser.add_argument("-y", "--yes", action='store_true', default=False,
help=_('Execute change with no "Are you sure?" prompt.'))

def handle(self, *args, **options):
"""Implement the command."""
issue = options["issue"]

if issue == "2460":
self.repair_2460()
elif issue == "2258":
self.repair_2258(options["yes"])
else:
raise CommandError(_("Unknown issue: '{}'").format(issue))

def repair_2258(self, execute=False):
"""Re-examine package header and store signing key for issue #2258."""
if not execute:
count = Package.objects.filter(signer_key_id__isnull=True).count()
print(f"This will require downloading {count} packages out of storage for examination, "
"which may be time-and-cost expensive. Are you sure you want to do that? Run "
'again with "--yes" to proceed.')
return

packages = Package.objects.filter(signer_key_id__isnull=True).iterator()
for package in packages:
_, pub_key_id = read_crpackage_from_artifact(package._artifacts.first()) # Only one.
if pub_key_id is not None:
print(f"Fixing stored signature for {package.nevra}")
package.signer_key_id = pub_key_id
package.save()

def repair_2460(self):
"""Perform data repair for issue #2460."""

Expand Down
23 changes: 23 additions & 0 deletions pulp_rpm/app/migrations/0046_pub_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 3.2.17 on 2023-02-02 21:22

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("rpm", "0045_modulemd_fields"),
]

operations = [
migrations.AddField(
model_name="package",
name="signer_key_id",
field=models.CharField(max_length=16, null=True),
),
migrations.AddField(
model_name="rpmrepository",
name="allowed_pub_keys",
field=models.JSONField(default=list),
),
]
9 changes: 8 additions & 1 deletion pulp_rpm/app/models/package.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from logging import getLogger
import traceback

import createrepo_c as cr

Expand Down Expand Up @@ -134,6 +135,8 @@ class Package(Content):
First byte of the header
rpm_header_end (BigInteger):
Last byte of the header
signer_key_id (Text):
16-digit hex key id of the public key that signed this rpm (if any)
is_modular (Bool):
Flag to identify if the package is modular
Expand Down Expand Up @@ -220,6 +223,8 @@ class Package(Content):
rpm_vendor = models.TextField()
rpm_header_start = models.BigIntegerField(null=True)
rpm_header_end = models.BigIntegerField(null=True)
# According to rfc4880 is always 8 octets, aka a 16-digit hex number
signer_key_id = models.CharField(null=True, max_length=16)

size_archive = models.BigIntegerField(null=True)
size_installed = models.BigIntegerField(null=True)
Expand Down Expand Up @@ -289,12 +294,13 @@ class ReadonlyMeta:
readonly = ["evr"]

@classmethod
def createrepo_to_dict(cls, package):
def createrepo_to_dict(cls, package, signer_key_id=None):
"""
Convert createrepo_c package object to dict for instantiating Package object.
Args:
package(createrepo_c.Package): a RPM/SRPM package to convert
signer_key_id(str): the key_id of the signer, if any
Returns:
dict: all data for RPM/SRPM content creation
Expand Down Expand Up @@ -367,6 +373,7 @@ def createrepo_to_dict(cls, package):
PULP_PACKAGE_ATTRS.TIME_FILE: getattr(package, CR_PACKAGE_ATTRS.TIME_FILE),
PULP_PACKAGE_ATTRS.URL: getattr(package, CR_PACKAGE_ATTRS.URL) or "",
PULP_PACKAGE_ATTRS.VERSION: getattr(package, CR_PACKAGE_ATTRS.VERSION),
"signer_key_id": signer_key_id,
}

def to_createrepo_c(self):
Expand Down
23 changes: 23 additions & 0 deletions pulp_rpm/app/models/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,12 @@ class RpmRepository(Repository, AutoAddObjPermsMixin):
1 or 0 corresponding to whether repo_gpgcheck should be enabled in the generated
.repo file.
sqlite_metadata (Boolean): Whether to generate sqlite metadata files on publish.
allowed_pub_keys (JSON):
A list of Public Key IDs (16-digit hex strings) if you want to require that all rpms
added to the Repo must be signed by one of them. This is a Pulp-side *data-sanity*
check, whereas gpgcheck triggers a client-side *security* restriction. You can find the
Key ID by importing the public key and then taking the last 16 digits from the hex
string in the output of `gpg --list-keys`.
"""

TYPE = "rpm"
Expand Down Expand Up @@ -234,6 +240,7 @@ class RpmRepository(Repository, AutoAddObjPermsMixin):
gpgcheck = models.IntegerField(default=0, choices=GPGCHECK_CHOICES)
repo_gpgcheck = models.IntegerField(default=0, choices=GPGCHECK_CHOICES)
sqlite_metadata = models.BooleanField(default=False)
allowed_pub_keys = models.JSONField(default=list)

def on_new_version(self, version):
"""
Expand Down Expand Up @@ -299,6 +306,7 @@ def finalize_new_version(self, new_version):
"""
Ensure there are no duplicates in a repo version and content is not broken.
If set, remove Packages that are not signed by one of the allowed_pub_keys public keys.
Remove duplicates based on repo_key_fields.
Ensure that modulemd is added with all its RPMs.
Ensure that modulemd is removed with all its RPMs.
Expand All @@ -316,6 +324,7 @@ def finalize_new_version(self, new_version):
except RepositoryVersion.DoesNotExist:
previous_version = None

self._check_signatures(new_version)
remove_duplicates(new_version)
self._resolve_distribution_trees(new_version, previous_version)

Expand Down Expand Up @@ -350,6 +359,20 @@ def finalize_new_version(self, new_version):
"{value_errors}"
).format(repo=new_version.repository.name, value_errors=str(ve))
)

def _check_signatures(self, new_version):
"""If requested, validate that the added rpms are signed appropriately."""
if self.allowed_pub_keys:
rejected_nevras = []
for package in Package.objects.filter(pk__in=new_version.added()).iterator():
if str(package.signer_key_id) not in self.allowed_pub_keys:
rejected_nevras.append(package.nevra)
if rejected_nevras:
raise Exception(
f"Repo {self.name} has specified that packages must be signed by one of the "
f"following keys: \n{self.allowed_pub_keys} \nThe following packages are not "
f"signed appropriately and the update has been rejected: \n{rejected_nevras}"
)

def _apply_retention_policy(self, new_version):
"""Apply the repository's "retain_package_versions" settings to the new version.
Expand Down
9 changes: 8 additions & 1 deletion pulp_rpm/app/serializers/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,12 @@ class PackageSerializer(SingleArtifactContentUploadSerializer, ContentChecksumSe
help_text=_("Last byte of the header"),
read_only=True,
)
signer_key_id = serializers.CharField(
help_text=_("16-digit hex key id of the public key that signed this rpm (if any)"),
allow_blank=True,
required=False,
read_only=True,
)
is_modular = serializers.BooleanField(
help_text=_("Flag to identify if the package is modular"),
required=False,
Expand Down Expand Up @@ -247,7 +253,7 @@ def deferred_validate(self, data):
data = super().deferred_validate(data)
# export META from rpm and prepare dict as saveable format
try:
new_pkg = Package.createrepo_to_dict(read_crpackage_from_artifact(data["artifact"]))
new_pkg = Package.createrepo_to_dict(*read_crpackage_from_artifact(data["artifact"]))
except OSError:
log.info(traceback.format_exc())
raise NotAcceptable(detail="RPM file cannot be parsed for metadata")
Expand Down Expand Up @@ -314,6 +320,7 @@ class Meta:
"rpm_vendor",
"rpm_header_start",
"rpm_header_end",
"signer_key_id",
"is_modular",
"size_archive",
"size_installed",
Expand Down
29 changes: 29 additions & 0 deletions pulp_rpm/app/serializers/repository.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from gettext import gettext as _
import json
import re

from django.conf import settings
from jsonschema import Draft7Validator
Expand Down Expand Up @@ -105,6 +107,17 @@ class RpmRepositorySerializer(RepositorySerializer):
"DEPRECATED: An option specifying whether Pulp should generate SQLite metadata."
),
)
allowed_pub_keys = serializers.JSONField(
default=list,
required=False,
help_text=_(
"A list of Public Key IDs (16-digit hex strings) if you want to require that all rpms "
"added to the Repo must be signed by one of them. This is a Pulp-side *data-sanity* "
"check, whereas gpgcheck triggers a client-side *security* restriction. You can find "
"the Key ID by importing the public key and then taking the last 16 digits from the "
"hex string in the output of `gpg --list-keys`."
),
)

def validate(self, data):
"""Validate data."""
Expand All @@ -116,6 +129,21 @@ def validate(self, data):
):
raise serializers.ValidationError({field: _(ALLOWED_CHECKSUM_ERROR_MSG)})

signed_by = "allowed_pub_keys"
if signed_by in data:
if type(data[signed_by]) == list:
keys = data[signed_by] # framework parses empty list automatically
else:
keys = json.loads(data[signed_by])
uppercase_keys = []
for key in keys:
key = key.upper() # key ids in rpm headers are always uppercase
if not re.match("^[A-F0-9]{16}$", key):
raise serializers.ValidationError(
{field: _("Public Key IDs must be 16-digit hex strings.")}
)
uppercase_keys.append(key)
data[signed_by] = uppercase_keys
validated_data = super().validate(data)
return validated_data

Expand All @@ -129,6 +157,7 @@ class Meta:
"gpgcheck",
"repo_gpgcheck",
"sqlite_metadata",
"allowed_pub_keys",
)
model = RpmRepository

Expand Down
49 changes: 48 additions & 1 deletion pulp_rpm/app/shared_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import createrepo_c as cr
from logging import getLogger
import tempfile
import traceback
import shutil
from hashlib import sha256
from pgpy.pgp import PGPSignature
import rpmfile

from django.conf import settings
from django.core.files.storage import default_storage as storage
from django.utils.dateparse import parse_datetime

log = getLogger(__name__)


def format_nevra(name=None, epoch=0, version=None, release=None, arch=None):
"""Generate Name-Epoch-Version-Release-Arch string."""
Expand Down Expand Up @@ -51,9 +57,10 @@ def read_crpackage_from_artifact(artifact):
cr_pkginfo = cr.package_from_rpm(
temp_file.name, changelog_limit=settings.KEEP_CHANGELOG_LIMIT
)
signer_key_id = parse_signer_id(temp_file.name)

artifact_file.close()
return cr_pkginfo
return cr_pkginfo, signer_key_id


def urlpath_sanitize(*args):
Expand Down Expand Up @@ -159,3 +166,43 @@ def parse_time(value):
int | datetime | None: formatted time value
"""
return int(value) if value.isdigit() else parse_datetime(value)


def parse_signer_id(rpm_path):
"""
Parse the key_id of the signing key from the RPM header, given a locally-available RPM.
Args:
rpm_path(str): Path to the local RPM file.
Returns:
str: 16-digit hex key_id of signing key, or None.
"""
# I have filed an Issue with createrepo_c requesting the ability to access the signature
# through the python bindings. Until that is available we'll have to read the header
# a second time with a utility that actually makes it available.
# https://github.com/rpm-software-management/createrepo_c/issues/346
# TODO: When the above is resolved re-evaluate and potentially drop extra dependencies.
signature = ""
try:
with rpmfile.open(rpm_path) as rpm:

def hdr(header):
return rpm.headers.get(header, "")

# What the `rpm -qi` command does. See "--info" definition in /usr/lib/rpm/rpmpopt-*
signature = hdr("dsaheader") or hdr("rsaheader") or hdr("siggpg") or hdr("sigpgp")
except:
log.info(f"Could not extract signature from RPM file: {rpm_path}")
log.info(traceback.format_exc())
return None

signer_key_id = "" # If package is unsigned, store empty str
if signature:
try:
signer_key_id = PGPSignature.from_blob(signature).signer
except:
signer_key_id = None # If error (or never examined), store None
log.info(f"Could not parse PGP signature for {getattr(package, CR_PACKAGE_ATTRS.NAME)}")
log.info(traceback.format_exc())
return signer_key_id
13 changes: 13 additions & 0 deletions pulp_rpm/app/tasks/synchronizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
from pulp_rpm.app.shared_utils import (
is_previous_version,
get_sha256,
parse_signer_id,
urlpath_sanitize,
)
from pulp_rpm.app.rpm_version import RpmVersion
Expand Down Expand Up @@ -1435,6 +1436,8 @@ def _post_save(self, batch):
When it has a treeinfo file, save a batch of Addon, Checksum, Image, Variant objects.
Examine Package headers and save signer_key_id.
Args:
batch (list of :class:`~pulpcore.plugin.stages.DeclarativeContent`): The batch of
:class:`~pulpcore.plugin.stages.DeclarativeContent` objects to be saved.
Expand Down Expand Up @@ -1492,6 +1495,7 @@ def _handle_distribution_tree(declarative_content):
update_collection_to_save = []
update_references_to_save = []
update_collection_packages_to_save = []
packages_to_update = []
seen_updaterecords = []

for declarative_content in batch:
Expand Down Expand Up @@ -1534,6 +1538,12 @@ def _handle_distribution_tree(declarative_content):
for update_reference in update_references:
update_reference.update_record = update_record
update_references_to_save.append(update_reference)
elif isinstance(declarative_content.content, Package):
artifact = declarative_content.d_artifacts[0] # Packages have 1 artifact
signer_key_id = parse_signer_id(artifact.artifact.file.path)
if signer_key_id is not None:
declarative_content.content.signer_key_id = signer_key_id
packages_to_update.append(declarative_content.content)

if update_collection_to_save:
UpdateCollection.objects.bulk_create(update_collection_to_save, ignore_conflicts=True)
Expand All @@ -1546,6 +1556,9 @@ def _handle_distribution_tree(declarative_content):
if update_references_to_save:
UpdateReference.objects.bulk_create(update_references_to_save, ignore_conflicts=True)

if packages_to_update:
Package.objects.bulk_update(packages_to_update, ["signer_key_id"])


class RpmQueryExistingContents(Stage):
"""
Expand Down
Loading

0 comments on commit 99414aa

Please sign in to comment.