Skip to content

Commit

Permalink
CVE-2024-6345 Cherry Pick fix 88807c7
Browse files Browse the repository at this point in the history
Merge changes from:
pypa@88807c7

Then fixup problems.
  • Loading branch information
jaraco authored and rickprice committed Nov 15, 2024
1 parent 49fec9f commit 403f606
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 130 deletions.
1 change: 1 addition & 0 deletions changelog.d/4332.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Modernized and refactored VCS handling in package_index.
14 changes: 13 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,19 @@ testing =
ini2toml[lite]>=0.9
tomli-w>=1.0.0
pytest-timeout
pytest-perf
pytest-perf; \
# workaround for jaraco/inflect#195, pydantic/pydantic-core#773 (see #3986)
sys_platform != "cygwin"
# for tools/finalize.py
jaraco.develop >= 7.21; python_version >= "3.9" and sys_platform != "cygwin"
; pytest-home >= 0.5
; mypy==1.9 # pin mypy version so a new version doesn't suddenly cause the CI to fail
mypy
# No Python 3.11 dependencies require tomli, but needed for type-checking since we import it directly
tomli
# No Python 3.12 dependencies require importlib_metadata, but needed for type-checking since we import it directly
importlib_metadata
pytest-subprocess

testing-integration =
pytest
Expand Down
197 changes: 99 additions & 98 deletions setuptools/package_index.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,33 @@
"""PyPI and direct package downloading."""

import sys
import os
import re
import io
import shutil
import socket
import base64
import hashlib
import itertools
import configparser
import hashlib
import html
import http.client
import io
import itertools
import os
import re
import shutil
import socket
import subprocess
import sys
import urllib.error
import urllib.parse
import urllib.request
import urllib.error
from functools import wraps

import setuptools
from pkg_resources import (
CHECKOUT_DIST,
Distribution,
BINARY_DIST,
normalize_path,
SOURCE_DIST,
Environment,
find_distributions,
safe_name,
safe_version,
to_filename,
Requirement,
DEVELOP_DIST,
EGG_DIST,
parse_version,
)
from distutils import log
from distutils.errors import DistutilsError
from fnmatch import translate
from setuptools.wheel import Wheel
from setuptools.extern.more_itertools import unique_everseen
from functools import wraps

import setuptools
from pkg_resources import (BINARY_DIST, CHECKOUT_DIST, DEVELOP_DIST, EGG_DIST,
SOURCE_DIST, Distribution, Environment, Requirement,
find_distributions, normalize_path, parse_version, safe_name,
safe_version, to_filename)
from setuptools.extern.more_itertools import unique_everseen
from setuptools.wheel import Wheel

EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
Expand Down Expand Up @@ -195,7 +183,7 @@ def interpret_distro_name(
'-'.join(parts[p:]),
py_version=py_version,
precedence=precedence,
platform=platform
platform=platform,
)


Expand Down Expand Up @@ -305,7 +293,7 @@ def __init__(
ca_bundle=None,
verify_ssl=True,
*args,
**kw
**kw,
):
super().__init__(*args, **kw)
self.index_url = index_url + "/"[: not index_url.endswith('/')]
Expand Down Expand Up @@ -586,7 +574,7 @@ def download(self, spec, tmpdir):
scheme = URL_SCHEME(spec)
if scheme:
# It's a url, download it to tmpdir
found = self._download_url(scheme.group(1), spec, tmpdir)
found = self._download_url(spec, tmpdir)
base, fragment = egg_info_for_url(spec)
if base.endswith('.py'):
found = self.gen_setup(found, fragment, tmpdir)
Expand Down Expand Up @@ -813,7 +801,7 @@ def open_url(self, url, warning=None): # noqa: C901 # is too complex (12)
else:
raise DistutilsError("Download error for %s: %s" % (url, v)) from v

def _download_url(self, scheme, url, tmpdir):
def _download_url(self, url, tmpdir):
# Determine download filename
#
name, fragment = egg_info_for_url(url)
Expand All @@ -828,19 +816,59 @@ def _download_url(self, scheme, url, tmpdir):

filename = os.path.join(tmpdir, name)

# Download the file
#
if scheme == 'svn' or scheme.startswith('svn+'):
return self._download_svn(url, filename)
elif scheme == 'git' or scheme.startswith('git+'):
return self._download_git(url, filename)
elif scheme.startswith('hg+'):
return self._download_hg(url, filename)
elif scheme == 'file':
return urllib.request.url2pathname(urllib.parse.urlparse(url)[2])
else:
self.url_ok(url, True) # raises error if not allowed
return self._attempt_download(url, filename)
return self._download_vcs(url, filename) or self._download_other(url, filename)

@staticmethod
def _resolve_vcs(url):
"""
>>> rvcs = PackageIndex._resolve_vcs
>>> rvcs('git+http://foo/bar')
'git'
>>> rvcs('hg+https://foo/bar')
'hg'
>>> rvcs('git:myhost')
'git'
>>> rvcs('hg:myhost')
>>> rvcs('http://foo/bar')
"""
scheme = urllib.parse.urlsplit(url).scheme
pre, sep, post = scheme.partition('+')
# svn and git have their own protocol; hg does not
allowed = set(['svn', 'git'] + ['hg'] * bool(sep))
return next(iter({pre} & allowed), None)

def _download_vcs(self, url, spec_filename):
vcs = self._resolve_vcs(url)
if not vcs:
return
if vcs == 'svn':
raise DistutilsError(
f"Invalid config, SVN download is not supported: {url}"
)

filename, _, _ = spec_filename.partition('#')
url, rev = self._vcs_split_rev_from_url(url)

self.info(f"Doing {vcs} clone from {url} to {filename}")
subprocess.check_call([vcs, 'clone', '--quiet', url, filename])

co_commands = dict(
git=[vcs, '-C', filename, 'checkout', '--quiet', rev],
hg=[vcs, '--cwd', filename, 'up', '-C', '-r', rev, '-q'],
)
if rev is not None:
self.info(f"Checking out {rev}")
subprocess.check_call(co_commands[vcs])

return filename

def _download_other(self, url, filename):
scheme = urllib.parse.urlsplit(url).scheme
if scheme == 'file': # pragma: no cover
return urllib.request.url2pathname(urllib.parse.urlparse(url).path)
# raise error if not allowed
self.url_ok(url, True)
return self._attempt_download(url, filename)

def scan_url(self, url):
self.process_url(url, True)
Expand All @@ -856,64 +884,37 @@ def _invalid_download_html(self, url, headers, filename):
os.unlink(filename)
raise DistutilsError(f"Unexpected HTML page found at {url}")

def _download_svn(self, url, _filename):
raise DistutilsError(f"Invalid config, SVN download is not supported: {url}")

@staticmethod
def _vcs_split_rev_from_url(url, pop_prefix=False):
scheme, netloc, path, query, frag = urllib.parse.urlsplit(url)
def _vcs_split_rev_from_url(url):
"""
Given a possible VCS URL, return a clean URL and resolved revision if any.
>>> vsrfu = PackageIndex._vcs_split_rev_from_url
>>> vsrfu('git+https://github.com/pypa/[email protected]#egg-info=setuptools')
('https://github.com/pypa/setuptools', 'v69.0.0')
>>> vsrfu('git+https://github.com/pypa/setuptools#egg-info=setuptools')
('https://github.com/pypa/setuptools', None)
>>> vsrfu('http://foo/bar')
('http://foo/bar', None)
"""
parts = urllib.parse.urlsplit(url)

scheme = scheme.split('+', 1)[-1]
clean_scheme = parts.scheme.split('+', 1)[-1]

# Some fragment identification fails
path = path.split('#', 1)[0]
no_fragment_path, _, _ = parts.path.partition('#')

rev = None
if '@' in path:
path, rev = path.rsplit('@', 1)
pre, sep, post = no_fragment_path.rpartition('@')
clean_path, rev = (pre, post) if sep else (post, None)

# Also, discard fragment
url = urllib.parse.urlunsplit((scheme, netloc, path, query, ''))
resolved = parts._replace(
scheme=clean_scheme,
path=clean_path,
# discard the fragment
fragment='',
).geturl()

return url, rev

def _download_git(self, url, filename):
filename = filename.split('#', 1)[0]
url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)

self.info("Doing git clone from %s to %s", url, filename)
os.system("git clone --quiet %s %s" % (url, filename))

if rev is not None:
self.info("Checking out %s", rev)
os.system(
"git -C %s checkout --quiet %s"
% (
filename,
rev,
)
)

return filename

def _download_hg(self, url, filename):
filename = filename.split('#', 1)[0]
url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)

self.info("Doing hg clone from %s to %s", url, filename)
os.system("hg clone --quiet %s %s" % (url, filename))

if rev is not None:
self.info("Updating to %s", rev)
os.system(
"hg --cwd %s up -C -r %s -q"
% (
filename,
rev,
)
)

return filename
return resolved, rev

def debug(self, msg, *args):
log.debug(msg, *args)
Expand Down
56 changes: 26 additions & 30 deletions setuptools/tests/test_packageindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import urllib.request
import urllib.error
import http.client
from unittest import mock

import pytest

Expand Down Expand Up @@ -186,49 +185,46 @@ def test_egg_fragment(self):
assert dists[0].version == ''
assert dists[1].version == vc

def test_download_git_with_rev(self, tmpdir):
def test_download_git_with_rev(self, tmp_path, fp):
url = 'git+https://github.example/group/project@master#egg=foo'
index = setuptools.package_index.PackageIndex()

with mock.patch("os.system") as os_system_mock:
result = index.download(url, str(tmpdir))
expected_dir = tmp_path / 'project@master'
fp.register([
'git',
'clone',
'--quiet',
'https://github.example/group/project',
expected_dir,
])
fp.register(['git', '-C', expected_dir, 'checkout', '--quiet', 'master'])

os_system_mock.assert_called()
result = index.download(url, tmp_path)

expected_dir = str(tmpdir / 'project@master')
expected = (
'git clone --quiet ' 'https://github.example/group/project {expected_dir}'
).format(**locals())
first_call_args = os_system_mock.call_args_list[0][0]
assert first_call_args == (expected,)
assert result == str(expected_dir)
assert len(fp.calls) == 2

tmpl = 'git -C {expected_dir} checkout --quiet master'
expected = tmpl.format(**locals())
assert os_system_mock.call_args_list[1][0] == (expected,)
assert result == expected_dir

def test_download_git_no_rev(self, tmpdir):
def test_download_git_no_rev(self, tmp_path, fp):
url = 'git+https://github.example/group/project#egg=foo'
index = setuptools.package_index.PackageIndex()

with mock.patch("os.system") as os_system_mock:
result = index.download(url, str(tmpdir))

os_system_mock.assert_called()

expected_dir = str(tmpdir / 'project')
expected = (
'git clone --quiet ' 'https://github.example/group/project {expected_dir}'
).format(**locals())
os_system_mock.assert_called_once_with(expected)

def test_download_svn(self, tmpdir):
expected_dir = tmp_path / 'project'
fp.register([
'git',
'clone',
'--quiet',
'https://github.example/group/project',
expected_dir,
])
index.download(url, tmp_path)

def test_download_svn(self, tmp_path):
url = 'svn+https://svn.example/project#egg=foo'
index = setuptools.package_index.PackageIndex()

msg = r".*SVN download is not supported.*"
with pytest.raises(distutils.errors.DistutilsError, match=msg):
index.download(url, str(tmpdir))
index.download(url, tmp_path)


class TestContentCheckers:
Expand Down
3 changes: 2 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ deps =
# Ideally all the dependencies should be set as "extras"
setenv =
PYTHONWARNDEFAULTENCODING = 1
SETUPTOOLS_ENFORCE_DEPRECATION = 1
; SETUPTOOLS_ENFORCE_DEPRECATION = 1
SETUPTOOLS_ENFORCE_DEPRECATION = false
commands =
pytest {posargs}
usedevelop = True
Expand Down

0 comments on commit 403f606

Please sign in to comment.