pachterlab · austinv11 · Dec 11, 2024 · Dec 11, 2024 · Dec 11, 2024 · Dec 11, 2024
diff --git a/gget/gget_mutate.py b/gget/gget_mutate.py
@@ -459,7 +459,7 @@ def mutate(
                                    The identifiers (following the '>') of the mutated sequences in the output fasta will be '>[seq_ID]_[mut_ID]'.
     - verbose                      (True/False) whether to print progress information. Default: True
 
-    Saves mutated sequences in fasta format (or returns a list containing the mutated sequences if out=None).
+    Saves mutated sequences in fasta format (or, if out=None: when update_df is True, returns the mutation dataframe, otherwise returns a list containing the mutated sequences).
     """
 
     global intronic_mutations, posttranslational_region_mutations, unknown_mutations, uncertain_mutations, ambiguous_position_mutations, cosmic_incorrect_wt_base, mut_idx_outside_seq
@@ -471,6 +471,8 @@ def mutate(
         "mutation_type",
         "wt_sequence",
         "mutant_sequence",
+        "start_mutation_position",
+        "end_mutation_position"
     ]
 
     # Load input sequences and their identifiers from fasta file
@@ -672,7 +674,7 @@ def mutate(
 
     if mutations.empty:
         logger.warning("No valid mutations found in the input.")
-        return []
+        return mutations if update_df else []
 
     # Split nucleotide positions into start and end positions
     split_positions = mutations["nucleotide_positions"].str.split("_", expand=True)
@@ -713,7 +715,7 @@ def mutate(
 
     if mutations.empty:
         logger.warning("No valid mutations found in the input.")
-        return []
+        return mutations if update_df else []
 
     # Create masks for each type of mutation
     mutations["wt_nucleotides_ensembl"] = None
@@ -789,7 +791,7 @@ def mutate(
 
     if mutations.empty:
         logger.warning("No valid mutations found in the input.")
-        return []
+        return mutations if update_df else []
 
     # Adjust the start and end positions for insertions
     mutations.loc[
@@ -1310,9 +1312,9 @@ def mutate(
         )
         if not update_df_out:
             if not mutations_path:
-                logger.warning(
-                    "mutations_path must be provided if update_df is True and update_df_out is not provided."
-                )
+                # logger.warning(
+                #     "mutations_path must be provided if update_df is True and update_df_out is not provided."
+                # )
                 saved_updated_df = False
             else:
                 base_name, ext = os.path.splitext(mutations_path)
@@ -1335,13 +1337,17 @@ def mutate(
 
     # When out=None, return list of mutated seqs
     else:
-        all_mut_seqs = []
-        all_mut_seqs.extend(mutations["mutant_sequence"].values)
-
-        # Remove empty strings from final list of mutated sequences
-        # (these are introduced when unknown mutations are encountered)
-        while "" in all_mut_seqs:
-            all_mut_seqs.remove("")
-
-        if len(all_mut_seqs) > 0:
-            return all_mut_seqs
+        if update_df:
+            return mutations[columns_to_keep]
+        else:
+            all_mut_seqs = []
+            all_mut_seqs.extend(mutations["mutant_sequence"].values)
+
+            # Remove empty strings from final list of mutated sequences
+            # (these are introduced when unknown mutations are encountered)
+            while "" in all_mut_seqs:
+                all_mut_seqs.remove("")
+
+            if len(all_mut_seqs) > 0:
+                return all_mut_seqs
+            return []