-
Notifications
You must be signed in to change notification settings - Fork 0
/
fastahandler.py
90 lines (79 loc) · 3.47 KB
/
fastahandler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# A main module for FASTAhandler created by Hyungtaek Jung
#!/usr/bin/env python3
import sys
import subprocess
usage = '''FASTAhandler: Fasta Manipulation Toolkit
version 1.0.1
Usage: python3 fastahandler.py <module> <parameters>
Modules:
Multi2Single\t| m2s\tConvert a multi-fasta (multiline) into a single-line fasta.
Gfa2Fasta\t| g2a\tConvert a gfa into a single-line fasta.
RenameId\t| rid\tRename prefix IDs and headers.
PrefixRename\t| prn\tRename prefix IDs and headers with a user’s input.
PrefixSelectRename\t| psr\tRename prefix IDs and headers with a user’s input (Only).
PrefixFindReplaceRename\t| pfr\tReplace and rename prefix IDs and headers with a user’s input (Only).
IdExtract\t| idx\tExtract matched IDs and their corresponding sequences.
IdExtractLocation\t| iel\tExtract matched IDs, locations and their corresponding sequences.
IdExtractLocationMultiple\t| iem\tExtract matched IDs, locations and their corresponding sequences (Multiple).
ReverseComplement\t| rcp\tMake a reverse complement sequence.
FindCountDuplication\t| fcd\tFind and count the duplicated IDs and sequences.
RemoveDuplication\t| rvd\tRemove the duplicated IDs and sequences.
SubsetFasta\t| ssf\tMake a subset of data with a sequence length filter.
ExtractPattern\t| xpt\tMake a subset of data with find, filter and extract.
EachFastaStats\t| efs\tGenerate each line fasta statistic for a multi-line fasta.
AllFastaStats\t| afs\tGenerate a summary of multi-line fasta statistics.
MultipleFastaStats\t| mfs\tGenerate a summary of multi-line fasta statistics (Multiple).
ConcatenateFasta\t| ccf\tMake a concatenated fasta file (Multiple).
TranslateSequence\t| tls\tFind the translated sequences as a protein and open reading frames (ORFs).
Use <module> --help for module usage.'''
module_map = {
'Multi2Single': 'multi2single.py',
'm2s': 'multi2single.py',
'Gfa2Fasta': 'gfa2fa.py',
'g2a': 'gfa2fa.py',
'RenameId': 'renameid.py',
'rid': 'renameid.py',
'PrefixRename': 'prfxrename.py',
'prn': 'prfxrename.py',
'PrefixSelectRename': 'prfxselrename.py',
'psr': 'prfxselrename.py',
'PrefixFindReplaceRename': 'prfxfindreplace.py',
'pfr': 'prfxfindreplace.py',
'IdExtract': 'idextract.py',
'idx': 'idextract.py',
'IdExtractLocation': 'idextloct.py',
'iel': 'idextloct.py',
'IdExtractLocationMultiple': 'idextloctmlt.py',
'iem': 'idextloctmlt.py',
'ReverseComplement': 'revcomplt.py',
'rcp': 'revcomplt.py',
'FindCountDuplication': 'findcntdupl.py',
'fcd': 'findcntdupl.py',
'RemoveDuplication': 'removedupl.py',
'rvd': 'removedupl.py',
'SubsetFasta': 'subsetfa.py',
'ssf': 'subsetfa.py',
'ExtractPattern': 'extractptrn.py',
'xpt': 'extractptrn.py',
'EachFastaStats': 'eachfastats.py',
'efs': 'eachfastats.py',
'AllFastaStats': 'allfastats.py',
'afs': 'allfastats.py',
'MultipleFastaStats': 'asmstatsunlm.py',
'mfs': 'asmstatsunlm.py',
'ConcatenateFasta': 'concatenate.py',
'ccf': 'concatenate.py',
'TranslateSequence': 'translatedna.py',
'tls': 'translatedna.py'
}
if __name__ == '__main__':
if len(sys.argv) < 2 or sys.argv[1] in ['--h', '--help']:
print(usage)
sys.exit(0)
module = sys.argv[1]
if module not in module_map:
print('Unexpected module. Use --h for help.')
sys.exit(0)
script_path = f'{sys.path[0]}/scripts/{module_map[module]}'
parameters = ' '.join(sys.argv[2:])
subprocess.run(f'python3 {script_path} {parameters}', shell=True)