Skip to content

Commit

Permalink
feat: parse current dependencies only once (#359)
Browse files Browse the repository at this point in the history
This change alters the internal Phylum CLI extension to remove the use
of the `parseLockfile` call. Instead, the results of parsing the current
dependency files (which happens during input filtering) are reused by
putting them in a format that can be directly ingested by the `analyze`
extension API call. This takes advantage of the caching that already
happens when parsing dependency files.

The result of this change means current dependencies will only be parsed
once instead of twice, saving execution time. Testing on the private
`isildurs_bane` repository yielded a savings of approximately 40 seconds
when run through a Docker container on my local system:

* `phylum-ci` went from 2m40s to 2m0s
* `phylum-ci --all-deps` went from 1m59s to 1m19s
  • Loading branch information
maxrake authored Nov 30, 2023
1 parent 87897b4 commit a96dccb
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 22 deletions.
26 changes: 21 additions & 5 deletions src/phylum/ci/ci_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def cli_path(self) -> Path:
# Ensure stdout is piped to DEVNULL, to keep the token from being printed in (CI log) output.
# We want the return code here and don't want to raise when non-zero.
cmd = [str(cli_path), "auth", "token"]
if bool(subprocess.run(cmd, stdout=subprocess.DEVNULL).returncode): # noqa: S603, PLW1510
if bool(subprocess.run(cmd, stdout=subprocess.DEVNULL, check=False).returncode): # noqa: S603
msg = "A Phylum API key is required to continue."
raise SystemExit(msg)

Expand Down Expand Up @@ -591,7 +591,7 @@ def analyze(self) -> None:
if self.phylum_group:
cmd.extend(["--group", self.phylum_group])

current_packages = {pkg for depfile in self.depfiles for pkg in depfile.deps}
current_packages = sorted({pkg for depfile in self.depfiles for pkg in depfile.deps})
LOG.debug("%s unique current dependencies from %s file(s)", len(current_packages), len(self.depfiles))
if self.all_deps:
LOG.info("Considering all current dependencies ...")
Expand All @@ -602,11 +602,26 @@ def analyze(self) -> None:
new_packages = sorted(set(current_packages).difference(set(base_packages)))
LOG.debug("%s new dependencies: %s", len(new_packages), new_packages)

with tempfile.NamedTemporaryFile(mode="w+", encoding="utf-8", prefix="base_", suffix=".json") as base_fd:
# TODO(maxrake): Better formatting with parenthesized context managers is available in Python 3.10+
# https://github.com/phylum-dev/phylum-ci/issues/357
# https://docs.python.org/3.10/whatsnew/3.10.html#parenthesized-context-managers
with tempfile.NamedTemporaryFile(
mode="w+",
encoding="utf-8",
prefix="base_",
suffix=".json",
) as base_fd, tempfile.NamedTemporaryFile(
mode="w+",
encoding="utf-8",
prefix="curr_",
suffix=".json",
) as curr_fd:
json.dump(base_packages, base_fd, cls=DataclassJSONEncoder)
base_fd.flush()
cmd.append(base_fd.name)
cmd.extend(f"{depfile.path}:{depfile.type}" for depfile in self.depfiles)
json.dump(current_packages, curr_fd, cls=DataclassJSONEncoder)
curr_fd.flush()
cmd.append(curr_fd.name)

LOG.info("Performing analysis. This may take a few seconds.")
LOG.debug("Using analysis command: %s", shlex.join(cmd))
Expand Down Expand Up @@ -660,7 +675,8 @@ def _parse_analysis_result(self, analysis_result: str) -> None:

self._analysis_report = analysis.report

# The logic below would make for a good match statement, which was introduced in Python 3.10
# TODO(maxrake): The logic below would make for a good match statement, which was introduced in Python 3.10
# https://github.com/phylum-dev/phylum-ci/issues/357
if analysis.incomplete_count == 0:
if analysis.is_failure:
LOG.error("The analysis is complete and there were failures")
Expand Down
29 changes: 12 additions & 17 deletions src/phylum/exts/ci/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { Package, PhylumApi } from "phylum";
const args = Deno.args.slice(0);
if (args.length < 4) {
console.error(
"Usage: phylum ci <PROJECT> <LABEL> [--group <GROUP>] <BASE> <DEPFILE:TYPE...>",
"Usage: phylum ci <PROJECT> <LABEL> [--group <GROUP>] <BASE> <CURRENT>",
);
Deno.exit(1);
}
Expand All @@ -21,30 +21,25 @@ if (groupArgsIndex != -1) {
const project = args[0];
const label = args[1];
const base = args[2];
const depfiles = args.splice(3);

// Parse new dependency files.
let packages: Package[] = [];
for (const depfile of depfiles) {
const depfile_path = depfile.substring(0, depfile.lastIndexOf(":"));
const depfile_type = depfile.substring(depfile.lastIndexOf(":") + 1, depfile.length);
const depfileDeps = await PhylumApi.parseLockfile(depfile_path, depfile_type);
packages = packages.concat(depfileDeps.packages);
}
const current = args[3];

// Deserialize base dependencies.
const baseDepsJson = await Deno.readTextFile(base);
const baseDeps = JSON.parse(baseDepsJson);
// Deserialize current dependencies.
const currDepsJson = await Deno.readTextFile(current);
const currDeps: Package[] = JSON.parse(currDepsJson);

// Short-circuit if there are no dependencies.
if (packages.length == 0) {
// Short-circuit if there are no current dependencies.
if (currDeps.length == 0) {
console.log("{}");
Deno.exit(0);
}

// Deserialize base dependencies.
const baseDepsJson = await Deno.readTextFile(base);
const baseDeps: Package[] = JSON.parse(baseDepsJson);

// Submit analysis job.
const jobID = await PhylumApi.analyze(
packages,
currDeps,
project,
group,
label,
Expand Down

0 comments on commit a96dccb

Please sign in to comment.