From befe6eec861d141b64bfd82ba9682c4666fe3f4a Mon Sep 17 00:00:00 2001
From: Oliver Schwengers <oliver.schwengers@computational.bio.uni-giessen.de>
Date: Tue, 15 Oct 2024 17:49:37 +0200
Subject: [PATCH] remove io.py

---
 bakta/io.py | 189 ----------------------------------------------------
 1 file changed, 189 deletions(-)
 delete mode 100644 bakta/io.py
diff --git a/bakta/io.py b/bakta/io.py
deleted file mode 100644
index 1002bef..0000000
--- a/bakta/io.py
+++ /dev/null
@@ -1,189 +0,0 @@
-import atexit
-import logging
-import os
-import sys
-
-from pathlib import Path
-
-import bakta
-import bakta.constants as bc
-import bakta.config as cfg
-import bakta.utils as bu
-import bakta.io.fasta as fasta
-import bakta.io.json as json
-import bakta.io.tsv as tsv
-import bakta.io.gff as gff
-import bakta.io.insdc as insdc
-import bakta.plot as plot
-
-
-log = logging.getLogger('IO')
-
-
-def main():
-    # parse options and arguments
-    parser = bu.init_parser(sub_command='_proteins')
-    parser.add_argument('input', metavar='<input>', help='Bakta annotations in JSON format')
-    
-    arg_group_io = parser.add_argument_group('Input / Output')
-    arg_group_io.add_argument('--output', '-o', action='store', default=os.getcwd(), help='Output directory (default = current working directory)')
-    arg_group_io.add_argument('--prefix', '-p', action='store', default=None, help='Prefix for output files')
-    arg_group_io.add_argument('--force', '-f', action='store_true', help='Force overwriting existing output folder')
-    
-    arg_group_general = parser.add_argument_group('General')
-    arg_group_general.add_argument('--help', '-h', action='help', help='Show this help message and exit')
-    arg_group_general.add_argument('--verbose', '-v', action='store_true', help='Print verbose information')
-    arg_group_general.add_argument('--debug', action='store_true', help='Run Bakta in debug mode. Temp data will not be removed.')
-    arg_group_general.add_argument('--version', '-V', action='version', version=f'%(prog)s {bakta.__version__}')
-    args = parser.parse_args()
-
-    ############################################################################
-    # Setup logging
-    ############################################################################
-    cfg.prefix = args.prefix if args.prefix else Path(args.input).stem
-    output_path = cfg.check_output_path(args.output, args.force)
-    cfg.force = args.force
-    log.info('force=%s', args.force)
-    
-    bu.setup_logger(output_path, cfg.prefix, args)
-    log.info('prefix=%s', cfg.prefix)
-    log.info('output=%s', output_path)
-
-    ############################################################################
-    # Checks and configurations
-    # - check parameters and setup global configuration
-    # - test database
-    # - test binary dependencies
-    ############################################################################
-    try:
-        if args.input == '':
-            raise ValueError('File path argument must be non-empty')
-        annotation_path = Path(args.input).resolve()
-        cfg.check_readability('annotation', annotation_path)
-        cfg.check_content_size('annotation', annotation_path)
-    except:
-        log.error('provided annotation file not valid! path=%s', args.input)
-        sys.exit(f'ERROR: annotation file ({args.input}) not valid!')
-    log.info('input-path=%s', annotation_path)
-    
-    cfg.check_tmp_path(args)
-    cfg.debug = args.debug
-    log.info('debug=%s', cfg.debug)
-    cfg.verbose = True if cfg.debug else args.verbose
-    log.info('verbose=%s', cfg.verbose)
-    cfg.user_proteins = cfg.check_user_proteins(args)
-    
-    if(cfg.verbose):
-        print(f'Bakta v{bakta.__version__}')
-        print('Options and arguments:')
-        print(f'\tinput: {annotation_path}')
-        print(f'\toutput: {cfg.output_path}')
-        print(f'\tprefix: {cfg.prefix}')
-        if(cfg.force): print(f'\tforce: {cfg.force}')
-    
-    if(cfg.debug):
-        print(f"\nBakta runs in DEBUG mode! Temporary data will not be destroyed at: {cfg.tmp_path}")
-    else:
-        atexit.register(bu.cleanup, log, cfg.tmp_path)  # register cleanup exit hook
-    
-    ############################################################################
-    # Import annotations from JSON
-    ############################################################################
-    print('Parse genome annotations...')
-    with annotation_path.open('r') as fh:
-        data = json.load(fh)
-    features = data['features']
-    features_by_sequence = {seq['id']: [] for seq in data['sequences']}
-    for feature in data['features']:
-        sequence_features = features_by_sequence.get(feature['sequence'])
-        sequence_features.append(feature)
-
-    ############################################################################
-    # Write output files
-    # - write optional output files in GFF3/GenBank/EMBL formats
-    # - measure runtime
-    # - write comprehensive annotation results as JSON
-    # - remove temp directory
-    ############################################################################
-    print(f'\nExport annotation results to: {cfg.output_path}')
-    print('\thuman readable TSV...')
-    tsv_path = cfg.output_path.joinpath(f'{cfg.prefix}.tsv')
-    tsv.write_features(data['sequences'], features_by_sequence, tsv_path)
-
-    print('\tGFF3...')
-    gff3_path = cfg.output_path.joinpath(f'{cfg.prefix}.gff3')
-    gff.write_features(data, features_by_sequence, gff3_path)
-
-    print('\tINSDC GenBank & EMBL...')
-    genbank_path = cfg.output_path.joinpath(f'{cfg.prefix}.gbff')
-    embl_path = cfg.output_path.joinpath(f'{cfg.prefix}.embl')
-    insdc.write_features(data, features, genbank_path, embl_path)
-
-    print('\tgenome sequences...')
-    fna_path = cfg.output_path.joinpath(f'{cfg.prefix}.fna')
-    fasta.export_sequences(data['sequences'], fna_path, description=True, wrap=True)
-
-    print('\tfeature nucleotide sequences...')
-    ffn_path = cfg.output_path.joinpath(f'{cfg.prefix}.ffn')
-    fasta.write_ffn(features, ffn_path)
-
-    print('\ttranslated CDS sequences...')
-    faa_path = cfg.output_path.joinpath(f'{cfg.prefix}.faa')
-    fasta.write_faa(features, faa_path)
-
-    print('\tfeature inferences...')
-    tsv_path = cfg.output_path.joinpath(f'{cfg.prefix}.inference.tsv')
-    tsv.write_feature_inferences(data['sequences'], features_by_sequence, tsv_path)
-
-    if(cfg.skip_plot  or  cfg.meta):
-        print('\tskip generation of circular genome plot...')
-    else:
-        print('\tcircular genome plot...')
-        plot.write(features, data['sequences'], cfg.output_path)
-
-    if(cfg.skip_cds is False):
-        hypotheticals = [feat for feat in features if feat['type'] == bc.FEATURE_CDS and 'hypothetical' in feat]
-        print('\thypothetical TSV...')
-        tsv_path = cfg.output_path.joinpath(f'{cfg.prefix}.hypotheticals.tsv')
-        tsv.write_hypotheticals(hypotheticals, tsv_path)
-
-        print('\ttranslated hypothetical CDS sequences...')
-        faa_path = cfg.output_path.joinpath(f'{cfg.prefix}.hypotheticals.faa')
-        fasta.write_faa(hypotheticals, faa_path)
-
-    print('\tGenome and annotation summary...')
-    summary_path = cfg.output_path.joinpath(f'{cfg.prefix}.txt')
-    with summary_path.open('w') as fh_out:
-        fh_out.write('Sequence(s):\n')
-        fh_out.write(f"Length: {data['stats']['size']:}\n")
-        fh_out.write(f"Count: {len(data['sequences'])}\n")
-        fh_out.write(f"GC: {100 * data['stats']['gc']:.1f}\n")
-        fh_out.write(f"N50: {data['stats']['n50']:}\n")
-        fh_out.write(f"N ratio: {100 * data['stats']['n_ratio']:.1f}\n")
-        fh_out.write(f"coding density: {100 * data['stats']['coding_ratio']:.1f}\n")
-        fh_out.write('\nAnnotation:\n')
-        fh_out.write(f"tRNAs: {len([f for f in features if f['type'] == bc.FEATURE_T_RNA])}\n")
-        fh_out.write(f"tmRNAs: {len([f for f in features if f['type'] == bc.FEATURE_TM_RNA])}\n")
-        fh_out.write(f"rRNAs: {len([f for f in features if f['type'] == bc.FEATURE_R_RNA])}\n")
-        fh_out.write(f"ncRNAs: {len([f for f in features if f['type'] == bc.FEATURE_NC_RNA])}\n")
-        fh_out.write(f"ncRNA regions: {len([f for f in features if f['type'] == bc.FEATURE_NC_RNA_REGION])}\n")
-        fh_out.write(f"CRISPR arrays: {len([f for f in features if f['type'] == bc.FEATURE_CRISPR])}\n")
-        cdss = [f for f in features if f['type'] == bc.FEATURE_CDS]
-        fh_out.write(f"CDSs: {len(cdss)}\n")
-        fh_out.write(f"pseudogenes: {len([cds for cds in cdss if 'pseudogene' in cds])}\n")
-        fh_out.write(f"hypotheticals: {len([cds for cds in cdss if 'hypothetical' in cds])}\n")
-        fh_out.write(f"signal peptides: {len([cds for cds in cdss if bc.FEATURE_SIGNAL_PEPTIDE in cds])}\n")
-        fh_out.write(f"sORFs: {len([f for f in features if f['type'] == bc.FEATURE_SORF])}\n")
-        fh_out.write(f"gaps: {len([f for f in features if f['type'] == bc.FEATURE_GAP])}\n")
-        fh_out.write(f"oriCs: {len([f for f in features if f['type'] == bc.FEATURE_ORIC])}\n")
-        fh_out.write(f"oriVs: {len([f for f in features if f['type'] == bc.FEATURE_ORIV])}\n")
-        fh_out.write(f"oriTs: {len([f for f in features if f['type'] == bc.FEATURE_ORIT])}\n")
-        fh_out.write('\nBakta:\n')
-        fh_out.write(f'Software: v{bakta.__version__}\n')
-        fh_out.write(f"Database: v{cfg.db_info['major']}.{cfg.db_info['minor']}, {cfg.db_info['type']}\n")
-        fh_out.write('DOI: 10.1099/mgen.0.000685\n')
-        fh_out.write('URL: github.com/oschwengers/bakta\n')
-
-
-if __name__ == '__main__':
-    main()