Skip to content

Commit

Permalink
Also add bin prefix to AAE cluster names
Browse files Browse the repository at this point in the history
In commit 905115, the bin files were renamed by adding a prefix in order to
resolve a file name conflict between the Z and Y space clusters.
However, this meant that the bin/cluster names now did not correspond to the
file names of the bins.
Instead, in this commit, rename the bins themselves.
  • Loading branch information
jakobnissen committed Jan 8, 2025
1 parent f3fe7e0 commit 46bfd91
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions vamb/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1256,13 +1256,16 @@ def write_clusters_and_bins(
if binsplitter.splitter is not None:
split_path = Path(base_clusters_name + "_split.tsv")
clusters = dict(binsplitter.binsplit(clusters.items()))
# Add prefix before writing the clusters to file
clusters = add_bin_prefix(clusters, bin_prefix)
with open(split_path, "w") as file:
(n_split_clusters, _) = vamb.vambtools.write_clusters(
file, clusters.items()
)
msg = f"\tClustered {n_contigs} contigs in {n_split_clusters} split bins ({n_unsplit_clusters} clusters)"
else:
msg = f"\tClustered {n_contigs} contigs in {n_unsplit_clusters} unsplit bins"
clusters = add_bin_prefix(clusters, bin_prefix)

logger.info(msg)
elapsed = round(time.time() - begintime, 2)
Expand All @@ -1276,8 +1279,7 @@ def write_clusters_and_bins(
sizeof = dict(zip(sequence_names, sequence_lens))
for binname, contigs in clusters.items():
if sum(sizeof[c] for c in contigs) >= fasta_output.min_fasta_size:
new_name = binname if bin_prefix is None else bin_prefix + binname
filtered_clusters[new_name] = contigs
filtered_clusters[binname] = contigs

with vamb.vambtools.Reader(fasta_output.existing_fasta_path.path) as file:
vamb.vambtools.write_bins(
Expand All @@ -1294,6 +1296,15 @@ def write_clusters_and_bins(
)


def add_bin_prefix(
clusters: dict[str, set[str]], prefix: Optional[str]
) -> dict[str, set[str]]:
if prefix is None:
return clusters
else:
return {prefix + b: c for (b, c) in clusters.items()}


def run_bin_default(opt: BinDefaultOptions):
composition, abundance = load_composition_and_abundance(
vamb_options=opt.common.general,
Expand Down

0 comments on commit 46bfd91

Please sign in to comment.