Skip to content

Commit

Permalink
Sort features on sample abundances
Browse files Browse the repository at this point in the history
  • Loading branch information
sminot committed Feb 6, 2024
1 parent 89bc8f3 commit f713b7b
Showing 1 changed file with 39 additions and 22 deletions.
61 changes: 39 additions & 22 deletions bin/bin_metagenomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,17 +676,21 @@ def log_scale(df: pd.DataFrame):
lowest = df.apply(lambda c: c[c > 0].min()).min()
return df.clip(lower=lowest).apply(np.log10)

@staticmethod
def sort_index(df: pd.DataFrame):
return df.index.values[
hierarchy.leaves_list(
hierarchy.linkage(
df.values,
metric="cosine",
method="average"
def sort_index(self, df: pd.DataFrame, metric="cosine", method="average"):
try:
return df.index.values[
hierarchy.leaves_list(
hierarchy.linkage(
df.values,
metric=metric,
method=method
)
)
)
]
]
except Exception as e:
logger.info("Error encountered while sorting table:")
self.log_df(df)
raise e

def write_image(
self,
Expand Down Expand Up @@ -714,9 +718,32 @@ def write_image(
row_heights = np.array([0.5, heatmap_size, 1, heatmap_size, 1, 1, 1])
row_heights = list(row_heights / row_heights.sum())

# Genomes across samples
genomes_df: pd.DataFrame = (
self.data
.mod["genomes"]
.to_df("prop")
)

# Bins across samples
bins_df: pd.DataFrame = (
self.data
.mod["bins"]
.to_df("prop")
)

# Sort the bins and genomes
bin_order = self.sort_index(self.data.uns["group_profile"])
genome_order = self.sort_index(self.data.uns["group_profile"].T)
bin_order = self.sort_index(
bins_df.T,
metric="euclidean",
method="ward"
)

genome_order = self.sort_index(
genomes_df.T,
metric="euclidean",
method="ward"
)

cols = 6
rows = 7
Expand Down Expand Up @@ -771,11 +798,6 @@ def write_image(
)

# Genomes across samples
genomes_df: pd.DataFrame = (
self.data
.mod["genomes"]
.to_df("prop")
)
sample_order = self.sort_index(genomes_df)
genomes_df = genomes_df.reindex(
columns=genome_order,
Expand Down Expand Up @@ -884,11 +906,6 @@ def write_image(
)

# Bins across samples
bins_df: pd.DataFrame = (
self.data
.mod["bins"]
.to_df("prop")
)
sample_order = self.sort_index(bins_df)
bins_df = bins_df.reindex(
columns=bin_order,
Expand Down

0 comments on commit f713b7b

Please sign in to comment.