refactor var names in list comprehensions

oschwengers · Oct 15, 2024 · fe9eece · fe9eece
1 parent befe6ee
commit fe9eece
Show file tree

Hide file tree

Showing 4 changed files with 45 additions and 45 deletions.
diff --git a/bakta/main.py b/bakta/main.py
@@ -516,21 +516,21 @@ def main():
     print(f"\tN ratio: {100 * data['stats']['n_ratio']:.1f} %")
     print(f"\tcoding density: {100 * data['stats']['coding_ratio']:.1f} %")
     print('\nannotation summary:')
-    print(f"\ttRNAs: {len([f for f in features if f['type'] == bc.FEATURE_T_RNA])}")
-    print(f"\ttmRNAs: {len([f for f in features if f['type'] == bc.FEATURE_TM_RNA])}")
-    print(f"\trRNAs: {len([f for f in features if f['type'] == bc.FEATURE_R_RNA])}")
-    print(f"\tncRNAs: {len([f for f in features if f['type'] == bc.FEATURE_NC_RNA])}")
-    print(f"\tncRNA regions: {len([f for f in features if f['type'] == bc.FEATURE_NC_RNA_REGION])}")
-    print(f"\tCRISPR arrays: {len([f for f in features if f['type'] == bc.FEATURE_CRISPR])}")
-    cdss = [f for f in features if f['type'] == bc.FEATURE_CDS]
+    print(f"\ttRNAs: {len([feat for feat in features if feat['type'] == bc.FEATURE_T_RNA])}")
+    print(f"\ttmRNAs: {len([feat for feat in features if feat['type'] == bc.FEATURE_TM_RNA])}")
+    print(f"\trRNAs: {len([feat for feat in features if feat['type'] == bc.FEATURE_R_RNA])}")
+    print(f"\tncRNAs: {len([feat for feat in features if feat['type'] == bc.FEATURE_NC_RNA])}")
+    print(f"\tncRNA regions: {len([feat for feat in features if feat['type'] == bc.FEATURE_NC_RNA_REGION])}")
+    print(f"\tCRISPR arrays: {len([feat for feat in features if feat['type'] == bc.FEATURE_CRISPR])}")
+    cdss = [feat for feat in features if feat['type'] == bc.FEATURE_CDS]
     print(f"\tCDSs: {len(cdss)}")
     print(f"\t\thypotheticals: {len([cds for cds in cdss if 'hypothetical' in cds])}")
     print(f"\t\tpseudogenes: {len([cds for cds in cdss if 'pseudogene' in cds])}")
     print(f"\t\tsignal peptides: {len([cds for cds in cdss if bc.FEATURE_SIGNAL_PEPTIDE in cds])}")
-    print(f"\tsORFs: {len([f for f in features if f['type'] == bc.FEATURE_SORF])}")
-    print(f"\tgaps: {len([f for f in features if f['type'] == bc.FEATURE_GAP])}")
-    print(f"\toriCs/oriVs: {len([f for f in features if (f['type'] == bc.FEATURE_ORIC or f['type'] == bc.FEATURE_ORIV)])}")
-    print(f"\toriTs: {len([f for f in features if f['type'] == bc.FEATURE_ORIT])}")
+    print(f"\tsORFs: {len([feat for feat in features if feat['type'] == bc.FEATURE_SORF])}")
+    print(f"\tgaps: {len([feat for feat in features if feat['type'] == bc.FEATURE_GAP])}")
+    print(f"\toriCs/oriVs: {len([feat for feat in features if (feat['type'] == bc.FEATURE_ORIC or feat['type'] == bc.FEATURE_ORIV)])}")
+    print(f"\toriTs: {len([feat for feat in features if feat['type'] == bc.FEATURE_ORIT])}")
 
     ############################################################################
     # Write output files
@@ -609,21 +609,21 @@ def main():
         fh_out.write(f"N ratio: {100 * data['stats']['n_ratio']:.1f}\n")
         fh_out.write(f"coding density: {100 * data['stats']['coding_ratio']:.1f}\n")
         fh_out.write('\nAnnotation:\n')
-        fh_out.write(f"tRNAs: {len([f for f in features if f['type'] == bc.FEATURE_T_RNA])}\n")
-        fh_out.write(f"tmRNAs: {len([f for f in features if f['type'] == bc.FEATURE_TM_RNA])}\n")
-        fh_out.write(f"rRNAs: {len([f for f in features if f['type'] == bc.FEATURE_R_RNA])}\n")
-        fh_out.write(f"ncRNAs: {len([f for f in features if f['type'] == bc.FEATURE_NC_RNA])}\n")
-        fh_out.write(f"ncRNA regions: {len([f for f in features if f['type'] == bc.FEATURE_NC_RNA_REGION])}\n")
-        fh_out.write(f"CRISPR arrays: {len([f for f in features if f['type'] == bc.FEATURE_CRISPR])}\n")
+        fh_out.write(f"tRNAs: {len([feat for feat in features if feat['type'] == bc.FEATURE_T_RNA])}\n")
+        fh_out.write(f"tmRNAs: {len([feat for feat in features if feat['type'] == bc.FEATURE_TM_RNA])}\n")
+        fh_out.write(f"rRNAs: {len([feat for feat in features if feat['type'] == bc.FEATURE_R_RNA])}\n")
+        fh_out.write(f"ncRNAs: {len([feat for feat in features if feat['type'] == bc.FEATURE_NC_RNA])}\n")
+        fh_out.write(f"ncRNA regions: {len([feat for feat in features if feat['type'] == bc.FEATURE_NC_RNA_REGION])}\n")
+        fh_out.write(f"CRISPR arrays: {len([feat for feat in features if feat['type'] == bc.FEATURE_CRISPR])}\n")
         fh_out.write(f"CDSs: {len(cdss)}\n")
         fh_out.write(f"pseudogenes: {len([cds for cds in cdss if 'pseudogene' in cds])}\n")
         fh_out.write(f"hypotheticals: {len([cds for cds in cdss if 'hypothetical' in cds])}\n")
         fh_out.write(f"signal peptides: {len([cds for cds in cdss if bc.FEATURE_SIGNAL_PEPTIDE in cds])}\n")
-        fh_out.write(f"sORFs: {len([f for f in features if f['type'] == bc.FEATURE_SORF])}\n")
-        fh_out.write(f"gaps: {len([f for f in features if f['type'] == bc.FEATURE_GAP])}\n")
-        fh_out.write(f"oriCs: {len([f for f in features if f['type'] == bc.FEATURE_ORIC])}\n")
-        fh_out.write(f"oriVs: {len([f for f in features if f['type'] == bc.FEATURE_ORIV])}\n")
-        fh_out.write(f"oriTs: {len([f for f in features if f['type'] == bc.FEATURE_ORIT])}\n")
+        fh_out.write(f"sORFs: {len([feat for feat in features if feat['type'] == bc.FEATURE_SORF])}\n")
+        fh_out.write(f"gaps: {len([feat for feat in features if feat['type'] == bc.FEATURE_GAP])}\n")
+        fh_out.write(f"oriCs: {len([feat for feat in features if feat['type'] == bc.FEATURE_ORIC])}\n")
+        fh_out.write(f"oriVs: {len([feat for feat in features if feat['type'] == bc.FEATURE_ORIV])}\n")
+        fh_out.write(f"oriTs: {len([feat for feat in features if feat['type'] == bc.FEATURE_ORIT])}\n")
         fh_out.write('\nBakta:\n')
         fh_out.write(f'Software: v{bakta.__version__}\n')
         fh_out.write(f"Database: v{cfg.db_info['major']}.{cfg.db_info['minor']}, {cfg.db_info['type']}\n")

diff --git a/bakta/proteins.py b/bakta/proteins.py
@@ -199,12 +199,12 @@ def map_aa_columns(feat: dict) -> Sequence[str]:
         str(feat['length']),
         gene,
         feat['product'],
-        ','.join([k.replace('EC:', '') for k in feat['db_xrefs'] if 'EC:' in k]),
-        ','.join([k for k in feat['db_xrefs'] if 'GO:' in k]),
-        ','.join([k.replace('COG:', '') for k in feat['db_xrefs'] if 'COG:' in k]),
-        ','.join([k.replace('RefSeq:', '') for k in feat['db_xrefs'] if 'RefSeq:' in k]),
-        ','.join([k.replace('UniParc:', '') for k in feat['db_xrefs'] if 'UniParc:' in k]),
-        ','.join([k.replace('UniRef:', '') for k in feat['db_xrefs'] if 'UniRef' in k])
+        ','.join([dbxref.replace('EC:', '') for dbxref in feat['db_xrefs'] if 'EC:' in dbxref]),
+        ','.join([dbxref for dbxref in feat['db_xrefs'] if 'GO:' in dbxref]),
+        ','.join([dbxref.replace('COG:', '') for dbxref in feat['db_xrefs'] if 'COG:' in dbxref]),
+        ','.join([dbxref.replace('RefSeq:', '') for dbxref in feat['db_xrefs'] if 'RefSeq:' in dbxref]),
+        ','.join([dbxref.replace('UniParc:', '') for dbxref in feat['db_xrefs'] if 'UniParc:' in dbxref]),
+        ','.join([dbxref.replace('UniRef:', '') for dbxref in feat['db_xrefs'] if 'UniRef' in dbxref])
     ]
 
 
@@ -214,7 +214,7 @@ def map_hypothetical_columns(feat: dict) -> Sequence[str]:
         str(feat['length']),
         f"{(feat['seq_stats']['molecular_weight']/1000):.1f}" if feat['seq_stats']['molecular_weight'] else 'NA'
         f"{feat['seq_stats']['isoelectric_point']:.1f}" if feat['seq_stats']['isoelectric_point'] else 'NA'
-        ','.join([k.replace('PFAM:', '') for k in feat['db_xrefs'] if 'PFAM:' in k])
+        ','.join([dbxref.replace('PFAM:', '') for dbxref in feat['db_xrefs'] if 'PFAM:' in dbxref])
     ]
 
 

diff --git a/scripts/collect-annotation-stats.py b/scripts/collect-annotation-stats.py
@@ -82,20 +82,20 @@
                 f"{100 * data['stats']['n_ratio']:.1f}",
                 f"{data['stats']['n50']}",
                 f"{100 * data['stats']['coding_ratio']:.1f}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_T_RNA])}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_TM_RNA])}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_R_RNA])}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_NC_RNA])}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_NC_RNA_REGION])}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_CRISPR])}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_CDS])}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_CDS and 'hypothetical' in f])}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_CDS and 'pseudogene' in f])}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_SORF])}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_GAP])}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_ORIC])}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_ORIV])}",
-                f"{len([f for f in data['features'] if f['type'] == bc.FEATURE_ORIT])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_T_RNA])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_TM_RNA])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_R_RNA])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_NC_RNA])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_NC_RNA_REGION])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_CRISPR])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_CDS])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_CDS and 'hypothetical' in feat])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_CDS and 'pseudogene' in feat])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_SORF])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_GAP])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_ORIC])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_ORIV])}",
+                f"{len([feat for feat in data['features'] if feat['type'] == bc.FEATURE_ORIT])}",
             ]
             output_line = '\t'.join(stats)
             print(output_line)

diff --git a/test/test_bakta.py b/test/test_bakta.py
@@ -81,7 +81,7 @@ def test_bakta_plasmid(tmpdir):
         bc.FEATURE_ORIT: 0
     }
     for type, count in feature_counts_expected.items():
-        assert len([f for f in features if f['type'] == type]) == count
+        assert len([feat for feat in features if feat['type'] == type]) == count
 
 
 @pytest.mark.parametrize(
@@ -142,5 +142,5 @@ def test_bakta_genome(db, tmpdir):
         bc.FEATURE_ORIT: 0
     }
     for type, count in feature_counts_expected.items():
-        assert len([f for f in features if f['type'] == type]) == count
+        assert len([feat for feat in features if feat['type'] == type]) == count