Skip to content

Commit

Permalink
Add circlator
Browse files Browse the repository at this point in the history
  • Loading branch information
yamaton committed Dec 13, 2023
1 parent 6a74a4a commit 5d5a7e2
Show file tree
Hide file tree
Showing 7 changed files with 480 additions and 5 deletions.
4 changes: 2 additions & 2 deletions bio.json.gz
Git LFS file not shown
1 change: 1 addition & 0 deletions bio.txt
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ cd-hit-est-2d
cellranger
centrifuge
checkm
circlator
clustalo
cnvkit.py
combineMUMs
Expand Down
2 changes: 1 addition & 1 deletion bio/bash
Submodule bash updated 1 files
+146 −0 completions/circlator
2 changes: 1 addition & 1 deletion bio/fish
Submodule fish updated 1 files
+149 −0 completions/circlator.fish
1 change: 1 addition & 0 deletions bio/json/circlator.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"name":"circlator","description":"A tool to circularize genome assemblies","usage":"circlator <command> [options] <required arguments>","options":[],"subcommands":[{"name":"all","description":"Run mapreads, bam2reads, assemble, merge, clean, fixstart","usage":"circlator all [options] <assembly.fasta> <reads.fasta/q> <output directory>","options":[{"names":["-h","--help"],"argument":"","description":"show this help message and exit"},{"names":["--threads"],"argument":"INT","description":"Number of threads [1]"},{"names":["--verbose"],"argument":"","description":"Be verbose"},{"names":["--unchanged_code"],"argument":"INT","description":"Code to return when the input assembly is not changed [0]"},{"names":["--assembler"],"argument":"{canu,spades}","description":"Assembler to use for reassemblies [spades]"},{"names":["--split_all_reads"],"argument":"","description":"By default, reads mapped to shorter contigs are left unchanged. This option splits them into two, broken at the middle of the contig to try to force circularization. May help if the assembler does not detect circular contigs (eg canu)"},{"names":["--data_type"],"argument":"{pacbio-raw,pacbio-corrected,nanopore-raw,nanopore-corrected}","description":"String representing one of the 4 type of data analysed (only used for Canu) [pacbio-corrected]"},{"names":["--assemble_spades_k"],"argument":"k1,k2,k3,...","description":"Comma separated list of kmers to use when running SPAdes. Max kmer is 127 and each kmer should be an odd integer [127,117,107,97,87,77]"},{"names":["--assemble_spades_use_first"],"argument":"","description":"Use the first successful SPAdes assembly. Default is to try all kmers and use the assembly with the largest N50"},{"names":["--assemble_not_careful"],"argument":"","description":"Do not use the --careful option with SPAdes (used by default)"},{"names":["--assemble_not_only_assembler"],"argument":"","description":"Do not use the --assemble-only option with SPAdes (used by default). Important: with this option, the input reads must be in FASTQ format, otherwise SPAdes will crash because it needs quality scores to correct the reads."},{"names":["--bwa_opts"],"argument":"STRING","description":"BWA options, in quotes [-x pacbio]"},{"names":["--b2r_discard_unmapped"],"argument":"","description":"Use this to not keep unmapped reads"},{"names":["--b2r_only_contigs"],"argument":"FILENAME","description":"File of contig names (one per line). Only reads that map to these contigs are kept (and unmapped reads, unless --b2r_discard_unmapped is used). Note: the whole assembly is still used as a reference when mapping"},{"names":["--b2r_length_cutoff"],"argument":"INT","description":"All reads mapped to contigs shorter than this will be kept [100000]"},{"names":["--b2r_min_read_length"],"argument":"INT","description":"Minimum length of read to output [250]"},{"names":["--merge_diagdiff"],"argument":"INT","description":"Nucmer diagdiff option [25]"},{"names":["--merge_min_id"],"argument":"FLOAT","description":"Nucmer minimum percent identity [95]"},{"names":["--merge_min_length"],"argument":"INT","description":"Minimum length of hit for nucmer to report [500]"},{"names":["--merge_min_length_merge"],"argument":"INT","description":"Minimum length of nucmer hit to use when merging [4000]"},{"names":["--merge_min_spades_circ_pc"],"argument":"FLOAT","description":"Min percent of contigs needed to be covered by nucmer hits to spades circular contigs [95]"},{"names":["--merge_breaklen"],"argument":"INT","description":"breaklen option used by nucmer [500]"},{"names":["--merge_ref_end"],"argument":"INT","description":"max distance allowed between nucmer hit and end of input assembly contig [15000]"},{"names":["--merge_reassemble_end"],"argument":"INT","description":"max distance allowed between nucmer hit and end of reassembly contig [1000]"},{"names":["--no_pair_merge"],"argument":"","description":"Do not merge pairs of contigs when running merge task"},{"names":["--clean_min_contig_length"],"argument":"INT","description":"Contigs shorter than this are discarded (unless specified using --keep) [2000]"},{"names":["--clean_min_contig_percent"],"argument":"FLOAT","description":"If length of nucmer hit is at least this percentage of length of contig, then contig is removed. (unless specified using --keep) [95]"},{"names":["--clean_diagdiff"],"argument":"INT","description":"Nucmer diagdiff option [25]"},{"names":["--clean_min_nucmer_id"],"argument":"FLOAT","description":"Nucmer minimum percent identity [95]"},{"names":["--clean_min_nucmer_length"],"argument":"INT","description":"Minimum length of hit for nucmer to report [500]"},{"names":["--clean_breaklen"],"argument":"INT","description":"breaklen option used by nucmer [500]"},{"names":["--genes_fa"],"argument":"FILENAME","description":"FASTA file of genes to search for to use as start point. If this option is not used, a built-in set of dnaA genes is used"},{"names":["--fixstart_mincluster"],"argument":"INT","description":"The -c|mincluster option of promer. If this option is used, it overrides promer's default value"},{"names":["--fixstart_min_id"],"argument":"FLOAT","description":"Minimum percent identity of promer match between contigs and gene(s) to use as start point [70]"}]},{"name":"mapreads","description":"Map reads to assembly","usage":"circlator mapreads [options] <reference.fasta> <reads.fasta> <out.bam>","options":[{"names":["-h","--help"],"argument":"","description":"show this help message and exit"},{"names":["--bwa_opts"],"argument":"STRING","description":"BWA options, in quotes [-x pacbio]"},{"names":["--threads"],"argument":"INT","description":"Number of threads [1]"},{"names":["--verbose"],"argument":"","description":"Be verbose"}]},{"name":"bam2reads","description":"Make reads from mapping to be reassembled","usage":"circlator bam2reads [options] <in.bam> <outprefix>","options":[{"names":["-h","--help"],"argument":"","description":"show this help message and exit"},{"names":["--discard_unmapped"],"argument":"","description":"Use this to not keep unmapped reads"},{"names":["--fastq"],"argument":"","description":"Write fastq output (if quality scores are present in input BAM file)"},{"names":["--only_contigs"],"argument":"FILENAME","description":"File of contig names (one per line). Only reads that map to these contigs are kept (and unmapped reads, unless --discard_unmapped is used)."},{"names":["--length_cutoff"],"argument":"INT","description":"All reads mapped to contigs shorter than this will be kept [100000]"},{"names":["--min_read_length"],"argument":"INT","description":"Minimum length of read to output [250]"},{"names":["--split_all_reads"],"argument":"","description":"By default, reads mapped to shorter contigs are left unchanged. This option splits them into two, broken at the middle of the contig to try to force circularization. May help if the assembler does not detect circular contigs (eg canu)"},{"names":["--verbose"],"argument":"","description":"Be verbose"}]},{"name":"assemble","description":"Run assembly using reads from bam2reads","usage":"circlator assemble [options] <in.reads.fasta> <out_dir>","options":[{"names":["-h","--help"],"argument":"","description":"show this help message and exit"},{"names":["--not_careful"],"argument":"","description":"Do not use the --careful option with SPAdes (used by default)"},{"names":["--not_only_assembler"],"argument":"","description":"Do not use the --assemble-only option with SPAdes (used by default)"},{"names":["--threads"],"argument":"INT","description":"Number of threads [1]"},{"names":["--verbose"],"argument":"","description":"Be verbose"},{"names":["--spades_k"],"argument":"k1,k2,k3,...","description":"Comma separated list of kmers to use when running SPAdes. Max kmer is 127 and each kmer should be an odd integer [127,117,107,97,87,77]"},{"names":["--spades_use_first"],"argument":"","description":"Use the first successful SPAdes assembly. Default is to try all kmers and use the assembly with the largest N50"},{"names":["--assembler"],"argument":"{canu,spades}","description":"Assembler to use for reassemblies [spades]"},{"names":["--data_type"],"argument":"{pacbio-raw,pacbio-corrected,nanopore-raw,nanopore-corrected}","description":"String representing one of the 4 type of data analysed (only used for Canu) [pacbio-corrected]"}]},{"name":"merge","description":"Merge original assembly and new assembly made by assemble","usage":"circlator merge [options] <original.fasta> <new.fasta> <outprefix>","options":[{"names":["-h","--help"],"argument":"","description":"show this help message and exit"},{"names":["--diagdiff"],"argument":"INT","description":"Nucmer diagdiff option [25]"},{"names":["--min_id"],"argument":"FLOAT","description":"Nucmer minimum percent identity [95]"},{"names":["--min_length"],"argument":"INT","description":"Minimum length of hit for nucmer to report [500]"},{"names":["--min_length_merge"],"argument":"INT","description":"Minimum length of nucmer hit to use when merging [4000]"},{"names":["--breaklen"],"argument":"INT","description":"breaklen option used by nucmer [500]"},{"names":["--min_spades_circ_pc"],"argument":"FLOAT","description":"Min percent of contigs needed to be covered by nucmer hits to spades circular contigs [95]"},{"names":["--assemble_not_careful"],"argument":"","description":"Do not use the --careful option with SPAdes (used by default)"},{"names":["--assemble_not_only_assembler"],"argument":"","description":"Do not use the --assemble-only option with SPAdes (used by default)"},{"names":["--spades_k"],"argument":"k1,k2,k3,...","description":"Comma separated list of kmers to use when running SPAdes. Max kmer is 127 and each kmer should be an odd integer [127,117,107,97,87,77]"},{"names":["--spades_use_first"],"argument":"","description":"Use the first successful SPAdes assembly. Default is to try all kmers and use the assembly with the largest N50"},{"names":["--assembler"],"argument":"{canu,spades}","description":"Assembler to use for reassemblies [spades]"},{"names":["--data_type"],"argument":"{pacbio-raw,pacbio-corrected,nanopore-raw,nanopore-corrected}","description":"String representing one of the 4 type of data analysed (only used for Canu) [pacbio-corrected]"},{"names":["--b2r_length_cutoff"],"argument":"INT","description":"All reads mapped to contigs shorter than this will be kept [100000]"},{"names":["--b2r_split_all_reads"],"argument":"","description":"By default, reads mapped to shorter contigs are left unchanged. This option splits them into two, broken at the middle of the contig to try to force circularization. May help if the assembler does not detect circular contigs (eg canu)"},{"names":["--ref_end"],"argument":"INT","description":"max distance allowed between nucmer hit and end of input assembly contig [15000]"},{"names":["--reassemble_end"],"argument":"INT","description":"max distance allowed between nucmer hit and end of reassembly contig [1000]"},{"names":["--threads"],"argument":"INT","description":"Number of threads for remapping/assembly (only applies if --reads is used) [1]"},{"names":["--reads"],"argument":"FILENAME","description":"FASTA file of corrected reads that made the new assembly. Using this triggers iterative contig pair merging"},{"names":["--verbose"],"argument":"","description":"Be verbose"}]},{"name":"clean","description":"Remove small and completely contained contigs from assembly","usage":"circlator clean [options] <in.fasta> <outprefix>","options":[{"names":["-h","--help"],"argument":"","description":"show this help message and exit"},{"names":["--min_contig_length"],"argument":"INT","description":"Contigs shorter than this are discarded (unless specified using --keep) [2000]"},{"names":["--min_contig_percent"],"argument":"FLOAT","description":"If length of nucmer hit is at least this percentage of length of contig, then contig is removed. (unless specified using --keep) [95]"},{"names":["--diagdiff"],"argument":"INT","description":"Nucmer diagdiff option [25]"},{"names":["--min_nucmer_id"],"argument":"FLOAT","description":"Nucmer minimum percent identity [95]"},{"names":["--min_nucmer_length"],"argument":"INT","description":"Minimum length of hit for nucmer to report [500]"},{"names":["--breaklen"],"argument":"INT","description":"breaklen option used by nucmer [500]"},{"names":["--keep"],"argument":"FILENAME","description":"File of contig names to keep in output file"},{"names":["--verbose"],"argument":"","description":"Be verbose"}]},{"name":"fixstart","description":"Change start position of circular sequences","usage":"circlator fixstart [options] <assembly.fasta> <outprefix>","options":[{"names":["-h","--help"],"argument":"","description":"show this help message and exit"},{"names":["--genes_fa"],"argument":"FILENAME","description":"FASTA file of genes to search for to use as start point. If this option is not used, a built-in set of dnaA genes is used"},{"names":["--ignore"],"argument":"FILENAME","description":"Absolute path to file of IDs of contigs to not change"},{"names":["--mincluster"],"argument":"INT","description":"The -c|mincluster option of promer. If this option is used, it overrides promer's default value"},{"names":["--min_id"],"argument":"FLOAT","description":"Minimum percent identity of promer match between contigs and gene(s) to use as start point [70]"},{"names":["--verbose"],"argument":"","description":"Be verbose"}]},{"name":"minimus2","description":"Run the minimus2 based circularisation pipeline","usage":"circlator minimus2 [options] <assembly.fasta> <output prefix>","options":[{"names":["-h","--help"],"argument":"","description":"show this help message and exit"},{"names":["--no_pre_merge"],"argument":"","description":"Do not do initial minimus2 run before trying to circularise each contig"}]},{"name":"get_dnaa","description":"Download file of dnaA (or other of user's choice) genes","usage":"circlator get_dnaa [options] <output prefix>","options":[{"names":["-h","--help"],"argument":"","description":"show this help message and exit"},{"names":["--min_length"],"argument":"INT","description":"Minimum length in amino acids [333]"},{"names":["--max_length"],"argument":"INT","description":"Maximum length in amino acids [500]"},{"names":["--uniprot_search"],"argument":"STRING","description":"Uniprot search term [dnaa]"},{"names":["--name_re"],"argument":"STRING","description":"Each sequence name must match this regular expression [Chromosomal replication initiat(or|ion) protein.*dnaa]"},{"names":["--name_re_case_sensitive"],"argument":"","description":"Do a case-sensitive match to regular expression given by --name_re. Default is to ignore case."}]},{"name":"progcheck","description":"Checks dependencies are installed","usage":"circlator progcheck","options":[{"names":["-h","--help"],"argument":"","description":"show this help message and exit"},{"names":["--debug"],"argument":"","description":"Debug mode with very verbose output"}]},{"name":"test","description":"Run Circlator on a small test set","usage":"ariba test [options] <outdir>","options":[{"names":["-h","--help"],"argument":"","description":"show this help message and exit"},{"names":["--threads"],"argument":"INT","description":"Number of threads [1]"}]},{"name":"version","description":"Print version and exit","usage":"circlator <command> [options] <required arguments>","options":[]}],"version":"1.5.5"}
Loading

0 comments on commit 5d5a7e2

Please sign in to comment.