From ec021b3949ab81d0c9b47e682fbc45173f08e0e3 Mon Sep 17 00:00:00 2001 From: salman2013 Date: Thu, 18 Apr 2024 16:36:10 +0500 Subject: [PATCH] chore: testing workflow --- scripts/find_dependencies.py | 179 +---------------------------------- 1 file changed, 4 insertions(+), 175 deletions(-) diff --git a/scripts/find_dependencies.py b/scripts/find_dependencies.py index c39d48b161c6..8c7037add3c4 100644 --- a/scripts/find_dependencies.py +++ b/scripts/find_dependencies.py @@ -69,28 +69,6 @@ def run_command(cmd: str, outfile=None) -> Tuple[bool, str]: return proc.returncode == 0, output.strip() - -# @cached -# def npm_repo_url(npm_spec: str) -> Optional[str]: -# """Given 'jspkg@0.1.0', return a repo url.""" -# pkg, _, ver = npm_spec.rpartition("@") -# url = f"https://registry.npmjs.org/{pkg}/{ver}" -# try: -# resp = requests.get(url, timeout=60) -# if resp.status_code != 200: -# print(f"{npm_spec}: {url} -> {resp.status_code}") -# return None -# jdata = resp.json() -# except requests.RequestException as exc: -# print(f"Couldn't fetch npm data for {npm_spec}: {exc}") -# return None -# repo = jdata.get("repository") -# if repo is None: -# return None -# if isinstance(repo, dict): -# repo = repo["url"] -# return repo - def canonical_url(url: str) -> str: """Canonicalize a repo URL, probably on GitHub.""" for pat, repl in [ @@ -106,27 +84,6 @@ def canonical_url(url: str) -> str: url = f"https://github.com/{url}" return url -# @cached -# def find_real_url(url: str) -> Optional[str]: -# """Find the eventual real url for a redirected url.""" -# while True: -# try: -# resp = requests.head(url, timeout=60, allow_redirects=True) -# except requests.RequestException as exc: -# print(f"Couldn't fetch {url}: {exc}") -# return None -# if resp.status_code == 429: -# # I didn't know you could get 429 from https://github.com, but you can... -# wait = int(resp.headers.get("Retry-After", 10)) -# time.sleep(wait + 1) -# else: -# break - -# if resp.status_code == 200: -# return resp.url -# return None - - WORK_DIR = Path("/tmp/unpack_reqs") def parallel_map(func, data, description): @@ -140,106 +97,6 @@ def parallel_map(func, data, description): progress.update(pbar, advance=1) yield result -# def write_list(path: str, lines: Iterable[str]): -# """Write a list of strings to a file.""" -# with Path(path).open("w") as flist: -# for line in lines: -# print(line, file=flist) - -# def check_js_dependencies() -> Iterable[str]: -# """Check the JS dependencies in package-lock.json, returning a set of repo URLs.""" -# print("Checking JavaScript dependencies") -# with Path("package-lock.json").open() as lockf: -# lock_data = json.load(lockf) - -# deps = set() -# for name, pkg in lock_data["packages"].items(): -# name = pkg.get("name") or name -# name = name.rpartition("node_modules/")[-1] -# version = pkg.get("version") -# if version is None: -# continue -# deps.add(f"{name}@{version}") -# write_list("deps.txt", sorted(deps)) - -# urls = set() -# for url in parallel_map(npm_repo_url, deps, "Getting npm URLs"): -# if url: -# urls.add(canonical_url(url)) - -# real_urls = set() -# for url in parallel_map(find_real_url, urls, "Getting real URLs"): -# if url: -# real_urls.add(url) - -# print(f"{len(deps)} deps, {len(urls)} urls, {len(real_urls)} real urls") -# write_list("repo_urls.txt", sorted(real_urls)) -# return real_urls - -# def check_py_dependencies() -> Iterable[str]: -# """Check the Python dependencies in base.txt, returning a set of repo URLs.""" -# print("Checking Python dependencies") - -# print("Creating venv") -# run_command("python3 -m venv .venv", "make_venv.log") -# run_command(".venv/bin/python3 -m pip install -U pip", "pip_upgrade.log") -# print("Downloading packages") -# run_command(".venv/bin/python3 -m pip download --dest files -r base.txt", "pip_download.log") - -# urls = set() -# for url in parallel_map(repo_url_from_wheel, Path("files").glob("*.whl"), "Examining wheels"): -# if url: -# urls.add(canonical_url(url)) - -# for url in parallel_map(repo_url_from_tgz, Path("files").glob("*.tar.gz"), "Examining tar.gz"): -# if url: -# urls.add(canonical_url(url)) - -# with open("base.txt") as fbase: -# for line in fbase: -# if match := re.search(r"https://github.com[^@ #]*(\.git)?", line): -# urls.add(canonical_url(match[0])) - -# real_urls = set() -# for url in parallel_map(find_real_url, urls, "Getting real URLs"): -# if url: -# real_urls.add(url) - -# write_list("repo_urls.txt", sorted(real_urls)) -# return real_urls - -# def matching_text(text, regexes): -# """Find a line in text matching a regex, and return the first regex group.""" -# for regex in regexes: -# for line in text.splitlines(): -# if match := re.search(regex, line): -# return match[1] -# return None - -# @cached -# def repo_url_from_wheel(wheel_path: str) -> Optional[str]: -# """Read metadata from a .whl file, returning the repo URL.""" -# with zipfile.ZipFile(wheel_path) as whl_file: -# fmetadata = next((f for f in whl_file.namelist() if f.endswith("/METADATA")), None) -# if fmetadata is None: -# print(f"No metadata in {wheel_path}") -# return None -# with whl_file.open(fmetadata) as inner_file: -# metadata = inner_file.read().decode("utf-8") -# return repo_url_from_metadata(wheel_path, metadata) - -# @cached -# def repo_url_from_tgz(tgz_path: str) -> Optional[str]: -# """Read metadata from a .tar.gz file, returning the repo URL.""" -# with tarfile.open(tgz_path) as tgz_file: -# fmetadata = next((f for f in tgz_file.getnames() if f.endswith("/PKG-INFO")), None) -# if fmetadata is None: -# print(f"No metadata in {tgz_path}") -# return None -# metadata = tgz_file.extractfile(fmetadata).read().decode("utf-8") -# return repo_url_from_metadata(tgz_path, metadata) - - SOURCE_URL_REGEXES = [ # These regexes are tried in order. The first group is the extracted URL. r"(?i)^Project-URL: Source.*,\s*(.*)$", @@ -339,19 +196,11 @@ def process_directory(): Also copies the considered dependencies file into the temp work directory, for later analysis. """ - # repo_name = Path.cwd().name - # repo_work = WORK_DIR / repo_name - # repo_work.mkdir(parents=True, exist_ok=True) + repo_urls = set() package_names = [] openedx_packages = [] - # if (js_reqs := Path("package-lock.json")).exists(): - # shutil.copyfile(js_reqs, repo_work / "package-lock.json") - # with change_dir(repo_work): - # repo_urls.update(check_js_dependencies()) - # if (py_reqs := find_py_reqs()): - # shutil.copyfile(py_reqs, repo_work / "base.txt") - + with open("/tmp/unpack_reqs/openedx/edx-platform/base.txt") as fbase: # Read each line (package name) in the file file_data = fbase.read() @@ -391,40 +240,20 @@ def urls_in_orgs(urls, orgs): if any(f"/{org}/" in url for org in orgs) ) -# def urls_in_orgs(urls, org): -# """ -# Find urls that are in any of the `orgs`. -# """ -# return sorted( -# url for url in urls -# if f"/{org}/" in url -# ) - def main(dirs=None, org=None): """ Analyze the requirements in all of the directories mentioned on the command line. If arguments have newlines, treat each line as a separate directory. """ - # if dirs is None: - # repo_dir = sys.argv[1] - # org_flag_index = sys.argv.index("--org") - # org = sys.argv[org_flag_index + 1] - #print(f"Creating new work directory: {WORK_DIR}") - #shutil.rmtree(WORK_DIR, ignore_errors=True) repo_urls = set() - - #with change_dir(repo_dir): repo_urls.update(process_directory()) - - - print("== DONE ==============") print("Second-party:") print("\n".join(repo_urls)) - # if repo_urls: - # sys.exit(1) + if repo_urls: + sys.exit(1) if __name__ == "__main__": main()