openvax · iskandr · Jun 10, 2016 · Jun 9, 2016 · Jun 9, 2016 · Jun 9, 2016
diff --git a/requirements.txt b/requirements.txt
@@ -5,3 +5,4 @@ varcode>=0.3.17
 pylint>=1.4.4
 nose>=1.3.6
 gtfparse>=0.0.4
+mhcnames
diff --git a/scripts/topiary b/scripts/topiary
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-# Copyright (c) 2015. Mount Sinai School of Medicine
+# Copyright (c) 2015-2016. Mount Sinai School of Medicine
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -33,32 +33,26 @@ Example usage:
 from topiary.commandline_args import (
     arg_parser,
     rna_gene_expression_dict_from_args,
-    rna_transcript_expression_dict_from_args
+    rna_transcript_expression_dict_from_args,
+    write_outputs,
 )
 from topiary import predict_epitopes_from_args, epitopes_to_dataframe
 
 
-args = arg_parser.parse_args()
-
-def main():
-    print("Topiary commandline arguments:")
-    print(args)
+def main(args):
     epitopes = predict_epitopes_from_args(args)
     gene_expression_dict = rna_gene_expression_dict_from_args(args)
     transcript_expression_dict = rna_transcript_expression_dict_from_args(args)
     df = epitopes_to_dataframe(
         epitopes,
         gene_expression_dict=gene_expression_dict,
         transcript_expression_dict=transcript_expression_dict)
-    print("First 10 epitope predictions:")
-    print(df.ix[:10])
+    write_outputs(df, args)
     print("Total count: %d" % len(df))
-    if args.output_csv:
-        print("Saving %s..." % args.output_csv)
-        df.to_csv(args.output_csv, index=True, index_label="#")
-    if args.output_html:
-        print("Saving %s..." % args.output_html)
-        df.to_html(args.output_html, index=True)
 
 if __name__ == "__main__":
-    main()
+    args = arg_parser.parse_args()
+    print("Topiary commandline arguments:")
+    print(args)
+
+    main(args)
diff --git a/scripts/topiary-analyze-cohort b/scripts/topiary-analyze-cohort
@@ -54,65 +54,77 @@ from .mutation_report import print_mutation_report
 parser = argparse.ArgumentParser()
 
 input_group = parser.add_mutually_exclusive_group(required=True)
-input_group.add_argument("--variant-input-dir",
-   type=str,
-   help="Directory containing MAF or VCF input files")
+input_group.add_argument(
+    "--variant-input-dir",
+    type=str,
+    help="Directory containing MAF or VCF input files")
 
-input_group.add_argument("--variant-input-file",
-   type=str,
-   help="Single MAF or VCF input file")
+input_group.add_argument(
+    "--variant-input-file",
+    type=str,
+    help="Single MAF or VCF input file")
 
-parser.add_argument("--hla-dir",
+parser.add_argument(
+    "--hla-dir",
     type=str,
     default=None,
     help=("Directory containing HLA allele files (with suffix .hla)"))
 
-parser.add_argument("--output-counts-csv",
+parser.add_argument(
+    "--output-counts-csv",
     default="analyze_cohort_results.csv",
     help="Path to output file containing mutation/epitope counts")
 
-parser.add_argument("--quiet",
+parser.add_argument(
+    "--quiet",
     type=str,
     help="Suppress INFO log messages")
 
-parser.add_argument("--binding-threshold",
+parser.add_argument(
+    "--binding-threshold",
     type=int,
     default=500,
     help="Cutoff IC50 score for epitope MHC binding")
 
-parser.add_argument("--combined-maf",
+parser.add_argument(
+    "--combined-maf",
     default=False,
     action="store_true",
     help=("Rather than using filenames to identify patients, "
           "a single MAF file can have multiple tumor barcodes."))
 
-parser.add_argument("--rna-filter-dir",
+parser.add_argument(
+    "--rna-filter-dir",
     type=str,
     default=None,
     help=("Directory containing RNASeq gene expression "
           "levels (one file per patient). If provided, we "
           "filter mutations with no gene expression."))
 
-parser.add_argument("--debug-patient-id",
+parser.add_argument(
+    "--debug-patient-id",
     type=str,
     default=None,
     help=("If we have a directory or a file containing "
           "multiple patient IDs, limit that collection to "
           "one specific patient ID for debugging."))
 
-parser.add_argument("--debug-scored-epitopes-csv",
+parser.add_argument(
+    "--debug-scored-epitopes-csv",
     type=str,
     default=None,
     help=("If we have a CSV file representing scored "
           "epitopes, use that instead of running netMHCpan. "
           "If not, generate that CSV file."))
 
-parser.add_argument("--netmhc-cons",
+parser.add_argument(
+    "--netmhc-cons",
     default=False,
     action="store_true",
     help="Use local NetMHCcons binding predictor (otherwise use NetMHCpan)")
 
-parser.add_argument("--resume",
+parser.add_argument(
+    "--resume",
     default=False,
     action="store_true",
     help="Append to an existing output file")
@@ -168,12 +180,16 @@ def find_mutation_files(
 
 
 def collect_hla_files(input_dir_string):
-    return collect_files(input_dir_string, read_hla_file,
+    return collect_files(
+        input_dir_string,
+        read_hla_file,
         permissive_parsing=True)
 
 
 def collect_gene_exp_files(input_dir_string):
-    return collect_files(input_dir_string, read_gene_exp_file,
+    return collect_files(
+        input_dir_string,
+        read_gene_exp_file,
         permissive_parsing=True)
 
 
@@ -212,7 +228,7 @@ def read_gene_exp_file(path, permissive_parsing):
     count_col = gene_exp_df.columns[1]
     if permissive_parsing:
         gene_exp_df[gene_col] = gene_exp_df[gene_col].str.split('|').map(
-                lambda x: x[0])
+            lambda x: x[0])
     gene_exp_df = gene_exp_df[gene_exp_df[count_col] > 0]
     return set(gene_exp_df[gene_col].tolist())
 
@@ -286,13 +302,15 @@ def generate_mutation_counts(
                 scored_epitopes = pd.read_csv(csv_file)
             else:
                 mhc = make_mhc_predictor()
-                scored_epitopes = mhc.predict(transcripts_df,
-                        mutation_window_size=9)
+                scored_epitopes = mhc.predict(
+                    transcripts_df,
+                    mutation_window_size=9)
                 scored_epitopes.to_csv(csv_file)
         else:
             mhc = make_mhc_predictor()
-            scored_epitopes = mhc.predict(transcripts_df,
-                    mutation_window_size=9)
+            scored_epitopes = mhc.predict(
+                transcripts_df,
+                mutation_window_size=9)
 
         if not args.quiet:
             print scored_epitopes
@@ -331,12 +349,13 @@ def generate_mutation_counts(
             curr_immunogenic_epitopes = immunogenic_epitopes.groupby(['Epitope']).first()
             n_immunogenic_epitopes += len(curr_immunogenic_epitopes)
             n_immunogenic_mutations += len(curr_immunogenic_epitopes) > 0
-            logging.info(("%s %s: epitopes %s, ligands %d, imm %d"),
-                             gene,
-                             mut,
-                             n_curr_epitopes,
-                             n_curr_ligands,
-                             len(curr_immunogenic_epitopes))
+            logging.info(
+                ("%s %s: epitopes %s, ligands %d, imm %d") % (
+                    gene,
+                    mut,
+                    n_curr_epitopes,
+                    n_curr_ligands,
+                    len(curr_immunogenic_epitopes)))
         result_tuple = (
             n_coding_mutations,
             n_epitopes,
@@ -441,4 +460,4 @@ if __name__ == "__main__":
                 n_ligand_mutations,
                 n_ligands,
                 n_immunogenic_mutations,
-                n_immunogenic_epitopes))
+                n_immunogenic_epitopes))
diff --git a/setup.py b/setup.py
@@ -71,7 +71,8 @@
             'mhctools >=0.1.8',
             'varcode >=0.3.17',
             'nose >=1.3.6',
-            'gtfparse >=0.0.4'
+            'gtfparse >=0.0.4',
+            'mhcnames',
         ],
         long_description=readme,
         packages=find_packages(exclude="test"),

diff --git a/test/test_args_outputs.py b/test/test_args_outputs.py
@@ -0,0 +1,34 @@
+from topiary.commandline_args import arg_parser, write_outputs
+import tempfile
+import pandas as pd
+from nose.tools import eq_
+
+
+def test_write_outputs():
+
+    with tempfile.NamedTemporaryFile(mode="r+", delete=False) as f:
+        df = pd.DataFrame({
+            "x": [1, 2, 3],
+            "y": [10, 20, 30]
+        })
+        args = arg_parser.parse_args([
+            "--output-csv", f.name,
+            "--subset-output-columns", "x",
+            "--rename-output-column", "x", "X",
+            "--mhc-predictor", "random",
+            "--mhc-alleles", "A0201",
+        ])
+
+        write_outputs(
+            df,
+            args,
+            print_df_before_filtering=True,
+            print_df_after_filtering=True)
+        print("File: %s" % f.name)
+        df_from_file = pd.read_csv(f.name, index_col="#")
+
+        df_expected = pd.DataFrame({
+            "X": [1, 2, 3]})
+        print(df_from_file)
+        eq_(len(df_expected), len(df_from_file))
+        assert (df_expected == df_from_file).all().all()
diff --git a/topiary/__init__.py b/topiary/__init__.py
@@ -21,7 +21,7 @@
 )
 from . import commandline_args
 
-__version__ = '0.0.19'
+__version__ = '0.0.20'
 
 __all__ = [
     "LazyLigandomeDict",