Skip to content

Commit

Permalink
chore: testing workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
salman2013 committed Apr 18, 2024
1 parent 71d7349 commit ec021b3
Showing 1 changed file with 4 additions and 175 deletions.
179 changes: 4 additions & 175 deletions scripts/find_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,28 +69,6 @@ def run_command(cmd: str, outfile=None) -> Tuple[bool, str]:

return proc.returncode == 0, output.strip()


# @cached
# def npm_repo_url(npm_spec: str) -> Optional[str]:
# """Given '[email protected]', return a repo url."""
# pkg, _, ver = npm_spec.rpartition("@")
# url = f"https://registry.npmjs.org/{pkg}/{ver}"
# try:
# resp = requests.get(url, timeout=60)
# if resp.status_code != 200:
# print(f"{npm_spec}: {url} -> {resp.status_code}")
# return None
# jdata = resp.json()
# except requests.RequestException as exc:
# print(f"Couldn't fetch npm data for {npm_spec}: {exc}")
# return None
# repo = jdata.get("repository")
# if repo is None:
# return None
# if isinstance(repo, dict):
# repo = repo["url"]
# return repo

def canonical_url(url: str) -> str:
"""Canonicalize a repo URL, probably on GitHub."""
for pat, repl in [
Expand All @@ -106,27 +84,6 @@ def canonical_url(url: str) -> str:
url = f"https://github.com/{url}"
return url

# @cached
# def find_real_url(url: str) -> Optional[str]:
# """Find the eventual real url for a redirected url."""
# while True:
# try:
# resp = requests.head(url, timeout=60, allow_redirects=True)
# except requests.RequestException as exc:
# print(f"Couldn't fetch {url}: {exc}")
# return None
# if resp.status_code == 429:
# # I didn't know you could get 429 from https://github.com, but you can...
# wait = int(resp.headers.get("Retry-After", 10))
# time.sleep(wait + 1)
# else:
# break

# if resp.status_code == 200:
# return resp.url
# return None


WORK_DIR = Path("/tmp/unpack_reqs")

def parallel_map(func, data, description):
Expand All @@ -140,106 +97,6 @@ def parallel_map(func, data, description):
progress.update(pbar, advance=1)
yield result

# def write_list(path: str, lines: Iterable[str]):
# """Write a list of strings to a file."""
# with Path(path).open("w") as flist:
# for line in lines:
# print(line, file=flist)

# def check_js_dependencies() -> Iterable[str]:
# """Check the JS dependencies in package-lock.json, returning a set of repo URLs."""
# print("Checking JavaScript dependencies")
# with Path("package-lock.json").open() as lockf:
# lock_data = json.load(lockf)

# deps = set()
# for name, pkg in lock_data["packages"].items():
# name = pkg.get("name") or name
# name = name.rpartition("node_modules/")[-1]
# version = pkg.get("version")
# if version is None:
# continue
# deps.add(f"{name}@{version}")
# write_list("deps.txt", sorted(deps))

# urls = set()
# for url in parallel_map(npm_repo_url, deps, "Getting npm URLs"):
# if url:
# urls.add(canonical_url(url))

# real_urls = set()
# for url in parallel_map(find_real_url, urls, "Getting real URLs"):
# if url:
# real_urls.add(url)

# print(f"{len(deps)} deps, {len(urls)} urls, {len(real_urls)} real urls")
# write_list("repo_urls.txt", sorted(real_urls))
# return real_urls

# def check_py_dependencies() -> Iterable[str]:
# """Check the Python dependencies in base.txt, returning a set of repo URLs."""
# print("Checking Python dependencies")

# print("Creating venv")
# run_command("python3 -m venv .venv", "make_venv.log")
# run_command(".venv/bin/python3 -m pip install -U pip", "pip_upgrade.log")
# print("Downloading packages")
# run_command(".venv/bin/python3 -m pip download --dest files -r base.txt", "pip_download.log")

# urls = set()
# for url in parallel_map(repo_url_from_wheel, Path("files").glob("*.whl"), "Examining wheels"):
# if url:
# urls.add(canonical_url(url))

# for url in parallel_map(repo_url_from_tgz, Path("files").glob("*.tar.gz"), "Examining tar.gz"):
# if url:
# urls.add(canonical_url(url))

# with open("base.txt") as fbase:
# for line in fbase:
# if match := re.search(r"https://github.com[^@ #]*(\.git)?", line):
# urls.add(canonical_url(match[0]))

# real_urls = set()
# for url in parallel_map(find_real_url, urls, "Getting real URLs"):
# if url:
# real_urls.add(url)

# write_list("repo_urls.txt", sorted(real_urls))
# return real_urls

# def matching_text(text, regexes):
# """Find a line in text matching a regex, and return the first regex group."""
# for regex in regexes:
# for line in text.splitlines():
# if match := re.search(regex, line):
# return match[1]
# return None

# @cached
# def repo_url_from_wheel(wheel_path: str) -> Optional[str]:
# """Read metadata from a .whl file, returning the repo URL."""
# with zipfile.ZipFile(wheel_path) as whl_file:
# fmetadata = next((f for f in whl_file.namelist() if f.endswith("/METADATA")), None)
# if fmetadata is None:
# print(f"No metadata in {wheel_path}")
# return None
# with whl_file.open(fmetadata) as inner_file:
# metadata = inner_file.read().decode("utf-8")
# return repo_url_from_metadata(wheel_path, metadata)

# @cached
# def repo_url_from_tgz(tgz_path: str) -> Optional[str]:
# """Read metadata from a .tar.gz file, returning the repo URL."""
# with tarfile.open(tgz_path) as tgz_file:
# fmetadata = next((f for f in tgz_file.getnames() if f.endswith("/PKG-INFO")), None)
# if fmetadata is None:
# print(f"No metadata in {tgz_path}")
# return None
# metadata = tgz_file.extractfile(fmetadata).read().decode("utf-8")
# return repo_url_from_metadata(tgz_path, metadata)


SOURCE_URL_REGEXES = [
# These regexes are tried in order. The first group is the extracted URL.
r"(?i)^Project-URL: Source.*,\s*(.*)$",
Expand Down Expand Up @@ -339,19 +196,11 @@ def process_directory():
Also copies the considered dependencies file into the temp work directory,
for later analysis.
"""
# repo_name = Path.cwd().name
# repo_work = WORK_DIR / repo_name
# repo_work.mkdir(parents=True, exist_ok=True)

repo_urls = set()
package_names = []
openedx_packages = []
# if (js_reqs := Path("package-lock.json")).exists():
# shutil.copyfile(js_reqs, repo_work / "package-lock.json")
# with change_dir(repo_work):
# repo_urls.update(check_js_dependencies())
# if (py_reqs := find_py_reqs()):
# shutil.copyfile(py_reqs, repo_work / "base.txt")


with open("/tmp/unpack_reqs/openedx/edx-platform/base.txt") as fbase:
# Read each line (package name) in the file
file_data = fbase.read()
Expand Down Expand Up @@ -391,40 +240,20 @@ def urls_in_orgs(urls, orgs):
if any(f"/{org}/" in url for org in orgs)
)

# def urls_in_orgs(urls, org):
# """
# Find urls that are in any of the `orgs`.
# """
# return sorted(
# url for url in urls
# if f"/{org}/" in url
# )


def main(dirs=None, org=None):
"""
Analyze the requirements in all of the directories mentioned on the command line.
If arguments have newlines, treat each line as a separate directory.
"""
# if dirs is None:
# repo_dir = sys.argv[1]
# org_flag_index = sys.argv.index("--org")
# org = sys.argv[org_flag_index + 1]
#print(f"Creating new work directory: {WORK_DIR}")
#shutil.rmtree(WORK_DIR, ignore_errors=True)
repo_urls = set()

#with change_dir(repo_dir):
repo_urls.update(process_directory())




print("== DONE ==============")
print("Second-party:")
print("\n".join(repo_urls))
# if repo_urls:
# sys.exit(1)
if repo_urls:
sys.exit(1)

if __name__ == "__main__":
main()

0 comments on commit ec021b3

Please sign in to comment.