-
Notifications
You must be signed in to change notification settings - Fork 3.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
71d7349
commit ec021b3
Showing
1 changed file
with
4 additions
and
175 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,28 +69,6 @@ def run_command(cmd: str, outfile=None) -> Tuple[bool, str]: | |
|
||
return proc.returncode == 0, output.strip() | ||
|
||
|
||
# @cached | ||
# def npm_repo_url(npm_spec: str) -> Optional[str]: | ||
# """Given '[email protected]', return a repo url.""" | ||
# pkg, _, ver = npm_spec.rpartition("@") | ||
# url = f"https://registry.npmjs.org/{pkg}/{ver}" | ||
# try: | ||
# resp = requests.get(url, timeout=60) | ||
# if resp.status_code != 200: | ||
# print(f"{npm_spec}: {url} -> {resp.status_code}") | ||
# return None | ||
# jdata = resp.json() | ||
# except requests.RequestException as exc: | ||
# print(f"Couldn't fetch npm data for {npm_spec}: {exc}") | ||
# return None | ||
# repo = jdata.get("repository") | ||
# if repo is None: | ||
# return None | ||
# if isinstance(repo, dict): | ||
# repo = repo["url"] | ||
# return repo | ||
|
||
def canonical_url(url: str) -> str: | ||
"""Canonicalize a repo URL, probably on GitHub.""" | ||
for pat, repl in [ | ||
|
@@ -106,27 +84,6 @@ def canonical_url(url: str) -> str: | |
url = f"https://github.com/{url}" | ||
return url | ||
|
||
# @cached | ||
# def find_real_url(url: str) -> Optional[str]: | ||
# """Find the eventual real url for a redirected url.""" | ||
# while True: | ||
# try: | ||
# resp = requests.head(url, timeout=60, allow_redirects=True) | ||
# except requests.RequestException as exc: | ||
# print(f"Couldn't fetch {url}: {exc}") | ||
# return None | ||
# if resp.status_code == 429: | ||
# # I didn't know you could get 429 from https://github.com, but you can... | ||
# wait = int(resp.headers.get("Retry-After", 10)) | ||
# time.sleep(wait + 1) | ||
# else: | ||
# break | ||
|
||
# if resp.status_code == 200: | ||
# return resp.url | ||
# return None | ||
|
||
|
||
WORK_DIR = Path("/tmp/unpack_reqs") | ||
|
||
def parallel_map(func, data, description): | ||
|
@@ -140,106 +97,6 @@ def parallel_map(func, data, description): | |
progress.update(pbar, advance=1) | ||
yield result | ||
|
||
# def write_list(path: str, lines: Iterable[str]): | ||
# """Write a list of strings to a file.""" | ||
# with Path(path).open("w") as flist: | ||
# for line in lines: | ||
# print(line, file=flist) | ||
|
||
# def check_js_dependencies() -> Iterable[str]: | ||
# """Check the JS dependencies in package-lock.json, returning a set of repo URLs.""" | ||
# print("Checking JavaScript dependencies") | ||
# with Path("package-lock.json").open() as lockf: | ||
# lock_data = json.load(lockf) | ||
|
||
# deps = set() | ||
# for name, pkg in lock_data["packages"].items(): | ||
# name = pkg.get("name") or name | ||
# name = name.rpartition("node_modules/")[-1] | ||
# version = pkg.get("version") | ||
# if version is None: | ||
# continue | ||
# deps.add(f"{name}@{version}") | ||
# write_list("deps.txt", sorted(deps)) | ||
|
||
# urls = set() | ||
# for url in parallel_map(npm_repo_url, deps, "Getting npm URLs"): | ||
# if url: | ||
# urls.add(canonical_url(url)) | ||
|
||
# real_urls = set() | ||
# for url in parallel_map(find_real_url, urls, "Getting real URLs"): | ||
# if url: | ||
# real_urls.add(url) | ||
|
||
# print(f"{len(deps)} deps, {len(urls)} urls, {len(real_urls)} real urls") | ||
# write_list("repo_urls.txt", sorted(real_urls)) | ||
# return real_urls | ||
|
||
# def check_py_dependencies() -> Iterable[str]: | ||
# """Check the Python dependencies in base.txt, returning a set of repo URLs.""" | ||
# print("Checking Python dependencies") | ||
|
||
# print("Creating venv") | ||
# run_command("python3 -m venv .venv", "make_venv.log") | ||
# run_command(".venv/bin/python3 -m pip install -U pip", "pip_upgrade.log") | ||
# print("Downloading packages") | ||
# run_command(".venv/bin/python3 -m pip download --dest files -r base.txt", "pip_download.log") | ||
|
||
# urls = set() | ||
# for url in parallel_map(repo_url_from_wheel, Path("files").glob("*.whl"), "Examining wheels"): | ||
# if url: | ||
# urls.add(canonical_url(url)) | ||
|
||
# for url in parallel_map(repo_url_from_tgz, Path("files").glob("*.tar.gz"), "Examining tar.gz"): | ||
# if url: | ||
# urls.add(canonical_url(url)) | ||
|
||
# with open("base.txt") as fbase: | ||
# for line in fbase: | ||
# if match := re.search(r"https://github.com[^@ #]*(\.git)?", line): | ||
# urls.add(canonical_url(match[0])) | ||
|
||
# real_urls = set() | ||
# for url in parallel_map(find_real_url, urls, "Getting real URLs"): | ||
# if url: | ||
# real_urls.add(url) | ||
|
||
# write_list("repo_urls.txt", sorted(real_urls)) | ||
# return real_urls | ||
|
||
# def matching_text(text, regexes): | ||
# """Find a line in text matching a regex, and return the first regex group.""" | ||
# for regex in regexes: | ||
# for line in text.splitlines(): | ||
# if match := re.search(regex, line): | ||
# return match[1] | ||
# return None | ||
|
||
# @cached | ||
# def repo_url_from_wheel(wheel_path: str) -> Optional[str]: | ||
# """Read metadata from a .whl file, returning the repo URL.""" | ||
# with zipfile.ZipFile(wheel_path) as whl_file: | ||
# fmetadata = next((f for f in whl_file.namelist() if f.endswith("/METADATA")), None) | ||
# if fmetadata is None: | ||
# print(f"No metadata in {wheel_path}") | ||
# return None | ||
# with whl_file.open(fmetadata) as inner_file: | ||
# metadata = inner_file.read().decode("utf-8") | ||
# return repo_url_from_metadata(wheel_path, metadata) | ||
|
||
# @cached | ||
# def repo_url_from_tgz(tgz_path: str) -> Optional[str]: | ||
# """Read metadata from a .tar.gz file, returning the repo URL.""" | ||
# with tarfile.open(tgz_path) as tgz_file: | ||
# fmetadata = next((f for f in tgz_file.getnames() if f.endswith("/PKG-INFO")), None) | ||
# if fmetadata is None: | ||
# print(f"No metadata in {tgz_path}") | ||
# return None | ||
# metadata = tgz_file.extractfile(fmetadata).read().decode("utf-8") | ||
# return repo_url_from_metadata(tgz_path, metadata) | ||
|
||
|
||
SOURCE_URL_REGEXES = [ | ||
# These regexes are tried in order. The first group is the extracted URL. | ||
r"(?i)^Project-URL: Source.*,\s*(.*)$", | ||
|
@@ -339,19 +196,11 @@ def process_directory(): | |
Also copies the considered dependencies file into the temp work directory, | ||
for later analysis. | ||
""" | ||
# repo_name = Path.cwd().name | ||
# repo_work = WORK_DIR / repo_name | ||
# repo_work.mkdir(parents=True, exist_ok=True) | ||
|
||
repo_urls = set() | ||
package_names = [] | ||
openedx_packages = [] | ||
# if (js_reqs := Path("package-lock.json")).exists(): | ||
# shutil.copyfile(js_reqs, repo_work / "package-lock.json") | ||
# with change_dir(repo_work): | ||
# repo_urls.update(check_js_dependencies()) | ||
# if (py_reqs := find_py_reqs()): | ||
# shutil.copyfile(py_reqs, repo_work / "base.txt") | ||
|
||
|
||
with open("/tmp/unpack_reqs/openedx/edx-platform/base.txt") as fbase: | ||
# Read each line (package name) in the file | ||
file_data = fbase.read() | ||
|
@@ -391,40 +240,20 @@ def urls_in_orgs(urls, orgs): | |
if any(f"/{org}/" in url for org in orgs) | ||
) | ||
|
||
# def urls_in_orgs(urls, org): | ||
# """ | ||
# Find urls that are in any of the `orgs`. | ||
# """ | ||
# return sorted( | ||
# url for url in urls | ||
# if f"/{org}/" in url | ||
# ) | ||
|
||
|
||
def main(dirs=None, org=None): | ||
""" | ||
Analyze the requirements in all of the directories mentioned on the command line. | ||
If arguments have newlines, treat each line as a separate directory. | ||
""" | ||
# if dirs is None: | ||
# repo_dir = sys.argv[1] | ||
# org_flag_index = sys.argv.index("--org") | ||
# org = sys.argv[org_flag_index + 1] | ||
#print(f"Creating new work directory: {WORK_DIR}") | ||
#shutil.rmtree(WORK_DIR, ignore_errors=True) | ||
repo_urls = set() | ||
|
||
#with change_dir(repo_dir): | ||
repo_urls.update(process_directory()) | ||
|
||
|
||
|
||
|
||
print("== DONE ==============") | ||
print("Second-party:") | ||
print("\n".join(repo_urls)) | ||
# if repo_urls: | ||
# sys.exit(1) | ||
if repo_urls: | ||
sys.exit(1) | ||
|
||
if __name__ == "__main__": | ||
main() |