Skip to content

Commit

Permalink
Fix bug with sample filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
sminot committed Feb 5, 2024
1 parent 57ae1e0 commit 98ff79c
Showing 1 changed file with 21 additions and 4 deletions.
25 changes: 21 additions & 4 deletions bin/bin_metagenomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,27 @@ def _filter_self(

filt_n = filtered_obs.sum()
tot_n = filtered_obs.shape[0]
logger.info(f"{filt_n:,} / {tot_n:,} samples have {query}")
logger.info(f"{filt_n:,} / {tot_n:,} samples filtered out: {query}")

if filtered_obs.any():
self.data = self.data[~filtered_obs]
self.log_content()
if "filtered_out" not in self.data.uns:
self.data.uns["filtered_out"] = pd.Series(
False,
index=self.data.obs_names
)

self.data.uns["filtered_out"] = (
self.data.uns["filtered_out"] |
filtered_obs
)
filt_n = self.data.uns["filtered_out"].sum()
logger.info(f"Total samples filtered out: {filt_n:,} / {tot_n:,}")

@property
def filtered_samples(self):
"""List of samples which pass the filters."""
return self.data.uns["filtered_out"].index.values[
~self.data.uns["filtered_out"]
]

def log_content(self):
for line in str(self.data).split("\n"):
Expand Down Expand Up @@ -341,6 +357,7 @@ def calc_bin_abund(self):
)
.sum()
.T
.reindex(index=self.filtered_samples)
)
)
self.data.mod["bins"].var["n_genes"] = pd.Series(self.data.uns["bin_size"])
Expand Down

0 comments on commit 98ff79c

Please sign in to comment.