Skip to content

Commit

Permalink
Merge branch 'dev' into fix_whitelist
Browse files Browse the repository at this point in the history
  • Loading branch information
rannick authored Feb 5, 2024
2 parents 066993e + 8440a2c commit 2988e8f
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 14 deletions.
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed

- Update to nf-tools 2.11.1
- Update to nf-tools 2.11.1 [#457] (https://github.com/nf-core/rnafusion/pull/457)

### Fixed

- fix VCF_COLLECT handling when a tool is absent from FUSIONREPORT report
- fix bug when using parameter "whitelist" [#466](https://github.com/nf-core/rnafusion/pull/466)
- fix VCF_COLLECT handling when a tool is absent from FUSIONREPORT report [#458](https://github.com/nf-core/rnafusion/pull/458)
- fix VCF_COLLECT when fusioninspector output is empty but fusionreport is not [#465](https://github.com/nf-core/rnafusion/pull/465)

### Removed

Expand Down
44 changes: 32 additions & 12 deletions bin/vcf_collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def vcf_collect(
.reset_index()
)
hgnc_df = build_hgnc_dataframe(hgnc)

df_symbol = merged_df[merged_df["Left_ensembl_gene_id"].isna()]
df_not_symbol = merged_df[merged_df["Left_ensembl_gene_id"].notna()]

Expand Down Expand Up @@ -115,6 +114,7 @@ def vcf_collect(
"annots",
]
].drop_duplicates()
all_df["CDS_RIGHT_ID"] = all_df["CDS_RIGHT_ID"].astype("str")
all_df = all_df.merge(gtf_df, how="left", left_on="CDS_RIGHT_ID", right_on="Transcript_id")
all_df[["PosB", "orig_start", "orig_end"]] = all_df[["PosB", "orig_start", "orig_end"]].fillna(0)
all_df[["PosB", "orig_start", "orig_end"]] = all_df[["PosB", "orig_start", "orig_end"]].astype(int)
Expand Down Expand Up @@ -272,15 +272,35 @@ def build_fusioninspector_dataframe(file: str) -> pd.DataFrame:
"""
df = pd.read_csv(file, sep="\t")
df = df.rename(columns={"#FusionName": "FUSION"})
df[["ChromosomeA", "PosA", "Strand1"]] = df["LeftBreakpoint"].str.split(":", expand=True)
df[["ChromosomeB", "PosB", "Strand2"]] = df["RightBreakpoint"].str.split(":", expand=True)
df[["LeftGeneName", "Left_ensembl_gene_id"]] = df["LeftGene"].str.split("^", expand=True)
df[["RightGeneName", "Right_ensembl_gene_id"]] = df["RightGene"].str.split("^", expand=True)
df["annots"] = (
df["annots"]
.apply(convert_to_list)
.apply(lambda x: ",".join(map(str, x)) if isinstance(x, list) else str(x) if pd.notna(x) else "")
)
if not (df.empty):
df[["ChromosomeA", "PosA", "Strand1"]] = df["LeftBreakpoint"].str.split(":", expand=True)
df[["ChromosomeB", "PosB", "Strand2"]] = df["RightBreakpoint"].str.split(":", expand=True)
df[["LeftGeneName", "Left_ensembl_gene_id"]] = df["LeftGene"].str.split("^", expand=True)
df[["RightGeneName", "Right_ensembl_gene_id"]] = df["RightGene"].str.split("^", expand=True)
df["annots"] = (
df["annots"]
.apply(convert_to_list)
.apply(lambda x: ",".join(map(str, x)) if isinstance(x, list) else str(x) if pd.notna(x) else "")
)
else:
for i in [
"ChromosomeA",
"Strand1",
"ChromosomeB",
"Strand2",
"LeftGeneName",
"Left_ensembl_gene_id",
"RightGeneName",
"Right_ensembl_gene_id",
"annots",
]:
df[i] = ""
for j in [
"PosA",
"PosB",
]:
df[j] = np.nan

return df.set_index(["FUSION"])


Expand Down Expand Up @@ -315,8 +335,8 @@ def read_build_fusionreport(fusionreport_file: str) -> pd.DataFrame:
"""
with open(fusionreport_file) as f:
from_html = [line.split('rows": [')[1] for line in f if 'name="fusion_list' in line]
expression = from_html[0].split('], "tool')[0]
fusion_report = pd.DataFrame.from_dict(ast.literal_eval(expression))
expression = ast.literal_eval(from_html[0].split('], "tool')[0])
fusion_report = pd.DataFrame.from_dict({k: [v] for k, v in expression.items()})
if not "arriba" in fusion_report.columns:
fusion_report["arriba"] = ""
if not "fusioncatcher" in fusion_report.columns:
Expand Down

0 comments on commit 2988e8f

Please sign in to comment.