CLIFinder.xml

<tool name="CLIFinder" id="CLIFinder" version="0.5.2" profile="16.01">
    <description>Find chimerics transcripts containing LINEs sequences</description>
    <macros>
        <xml name="source_bwa" token_arg="Argument" token_build="Build argument" token_ref="">
            <conditional name="source">
                <param name="source" type="select" label="Will you select the reference database from your history or use a built-in index?">
                    <option value="indexed">Use a built-in index</option>
                    <option value="history">Use one from the history</option>
                </param>
                <when value="indexed">
                    <param name="indices" argument="@ARG@" type="select" label="Select @REF@">
                        <options from_data_table="bwa_mem_indexes">
                            <filter type="sort_by" column="2" />
                            <validator type="no_options" message="No indexes are available" />
                        </options>
                    </param>
                </when>
                <when value="history">
                    <param name="file" argument="@ARG@" type="data" format="fasta" label="Select @REF@ from history"  help="We will also use @BUILD@"/>
                </when>
            </conditional>
        </xml>
        <xml name="source_blast" token_arg="Argument" token_build="Build argument" token_ref="">
            <conditional name="source">
                <param name="source" type="select" label="Will you select the reference database from your history or use a built-in index?">
                    <option value="indexed">Use a built-in index</option>
                    <option value="history">Generate one from the history</option>
                    <option value="url">Download one from some URL</option>
                </param>
                <when value="indexed">
                    <param name="indices" argument="@ARG@" type="select" label="Select @REF@">
                        <options from_data_table="blastdb">
                            <filter type="sort_by" column="2" />
                            <validator type="no_options" message="No indexes are available" />
                        </options>
                    </param>
                </when>
                <when value="history">
                    <param name="file" argument="@ARG@" type="data" format="fasta" label="Select @REF@ from history"  help="We will also use @BUILD@"/>
                </when>
                <when value="url">
                    <param name="file" argument="@ARG@" type="text" label="Download @REF@ from URL"  help="We will not use @BUILD@: please provide link to tar.gz"/>
                </when>
            </conditional>
        </xml>
    </macros>
    <requirements>
        <requirement type="package" version="1.3">seqtk</requirement>
        <requirement type="package" version="1.9">samtools</requirement>
        <requirement type="package" version="2.26.0gx">bedtools</requirement>
        <requirement type="package" version="4.0.9_p2">repeatmasker</requirement>
        <requirement type="package" version="0.7.17">bwa</requirement>
        <requirement type="package" version="0.0.14">fastx_toolkit</requirement>
        <requirement type="package" version="1.20.1">wget</requirement>
        <requirement type="package" version="5.26.2">perl</requirement>
        <requirement type="package" version="2.50">perl-getopt-long</requirement>
        <requirement type="package" version="0.45">perl-file-copy-recursive</requirement>
        <requirement type="package" version="2.02">perl-parallel-forkmanager</requirement>
        <requirement type="package" version="0.34">perl-statistics-r</requirement>
        <requirement type="package" version="3.5.1">r-base</requirement>
        <requirement type="package" version="1.8.5">r-plyr</requirement>
        <requirement type="package" version="1.34.0">bioconductor-genomicranges</requirement>
    </requirements>
    <version_command>perl '$__tool_directory__/script/CLIFinder.pl' --version | head -n 1 | grep 'version' | cut -d ' ' -f 3</version_command>
    <command detect_errors="aggressive"><![CDATA[
perl '$__tool_directory__/script/CLIFinder.pl'

    #if str($inputs.type) == 'paired_collection'
        #for $x in $inputs.fastq
            --first '$x.forward'
            --name '$x.name'
            --second '$x.reverse'
        #end for
    #else
        #if str($inputs.datasets.custom_name) == 'true'
            #for $x in $inputs.datasets.fastq
                --first '$x.first'
                --name '$x.name'
                --second '$x.second'
            #end for
        #else
            #for $x in $inputs.datasets.fastq
                --first '$x.first'
                --name '$x.first.name'
                --second '$x.second'
            #end for
        #end if
    #end if

    #if $genome.source.source == "history"
    --ref '$genome.source.file'
    --build_ref
    #else
    --ref '$genome.source.indices.fields.path'
    #end if

    #if $te.source.source == "history"
    --TE '$te.source.file'
    --build_TE
    #else
    --TE '$te.source.indices.fields.path'
    #end if

    #if str($rnadb.blast.run) == 'true'
        #if $rnadb.blast.source.source == "indexed"
        --rnadb '$rnadb.blast.source.indices.fields.path'
        #else
        --rnadb '$rnadb.blast.source.file'
        #end if
        #if $rnadb.blast.source.source == "history"
        --build_rnadb
        #end if
    #end if

    #if str($estdb.blast.run) == 'true'
        #if $estdb.blast.source.source == "indexed"
        --estdb '$estdb.blast.source.indices.fields.path'
        #else
        --estdb '$estdb.blast.source.file'
        #end if
        #if $estdb.blast.source.source == "history"
        --build_estdb
        #end if
    #end if

    #if str($species) != ''
        --species '$species'
    #end if

    --rmsk '$rmsk'
    --refseq '$refseq'
    --html '$chimerae'
    --html_path '${chimerae.files_path}'
    --size_insert '$size_insert'
    --size_read '$size'
    --min_unique '$min_unique'
    --BDir '$BDir'
    --min_L1 '$min_L1'
    --mis_L1 '$mis_L1'
    --threads "\${GALAXY_SLOTS:-4}"
        ]]>
    </command>
    <inputs>
        <conditional name="inputs">
            <param name="type" type="select" label="Input Type">
                <option value="datasets" selected="true">Distinct datasets</option>
                <option value="paired_collection">Paired collection</option>
            </param>
            <when value="datasets">
                <conditional name="datasets">
                    <param name="custom_name" type="select" label="Use custom name for the input sequence files?">
                        <option value="true">Yes</option>
                        <option value="false" selected="true">No: the names will be extracted automatically</option>
                    </param>
                    <when value="true">
                        <repeat name="fastq" title="Input sequences" min="1">
                            <param argument="--first" type="data" format="fastqsanger" label="First set of paired-end reads"/>
                            <param argument="--name" type="text" value="" label="Label for the input sequences"/>
                            <param argument="--second" type="data" format="fastqsanger" label="Second set of paired-end reads"/>
                        </repeat>
                    </when>
                    <when value="false">
                        <repeat name="fastq" title="Input sequences" min="1">
                            <param argument="--first" type="data" format="fastqsanger" label="First set of paired-end reads"/>
                            <param argument="--second" type="data" format="fastqsanger" label="Second set of paired-end reads"/>
                        </repeat>
                    </when>
                </conditional>
            </when>
            <when value="paired_collection">
                <param name="fastq" format="fastqsanger" type="data_collection" collection_type="list:paired" label="Select paired collection" help="Specify paired dataset collection containing paired reads"/>
            </when>
        </conditional>
        <section name="genome" title="Reference genome" expanded="true">
            <expand macro="source_bwa" arg="--ref" build="--build_ref" ref="a reference genome"/>
        </section>
        <section name="te" title="Transposable Elements" expanded="true">
            <expand macro="source_bwa" arg="--TE" build="--build_TE" ref="reference TE sequences"/>
        </section>
        <section name="rnadb" title="RNA Blast database" expanded="true">
            <conditional name="blast">
                <param name="run" type="select" label="Should blast be ran?">
                    <option value="true">Yes</option>
                    <option value="false">No</option>
                </param>
                <when value="true">
                    <expand macro="source_blast" arg="--rnadb" build="--build_rnadb" ref="reference RNA sequences"/>
                </when>
                <when value="false" />
            </conditional>
        </section>
        <section name="estdb" title="EST Blast database" expanded="true">
            <conditional name="blast">
                <param name="run" type="select" label="Should blast be ran?">
                    <option value="true">Yes</option>
                    <option value="false">No</option>
                </param>
                <when value="true">
                    <expand macro="source_blast" arg="--estdb" build="--build_estdb" ref="reference EST sequences"/>
                </when>
                <when value="false" />
            </conditional>
        </section>
        <param argument="--rmsk" type="data" format="tabular" label="Tab-delimited text file (with headers) containing reference repeat sequences (e.g. rmsk track from UCSC)"/>
        <param argument="--refseq" type="data" format="tabular" label="Tab-delimited file (with headers) containing reference genes (e.g. RefGene.txt from UCSC)"/>
        <param name="BDir" type="select" >
            <option value="0">Undirectional libraries</option>
            <option value="1">TEs sequences in first read in pair</option>
            <option value="2">TEs sequences in second read in pair</option>
        </param>
        <param argument="--size_read" name="size" type="integer" value="100" label="Reads size"/>
        <param argument="--size_insert" type="integer" value="250" label="Maximum insert size (bp)"/>
        <param argument="--min_L1" type="integer" value="50" label="Minimun bp mapping on selected TEs database"/>
        <param argument="--mis_L1" type="integer" value="1" label="Number of mismatches tolerated in TEs mapping sequences"/>
        <param argument="--min_unique" type="integer" value="33" label="Minimum consecutive bp corresponding to a unique sequence"/>
        <param argument="--species" type="text" value="human" label="Species or clade of the input sequence (the species name must be a valid NCBI Taxonomy Database species name and be contained in the RepeatMasker repeat database)"/>
    </inputs>
    <outputs>
        <data format="html" name="chimerae" label="${tool.name}_on_${on_string}"/>
    </outputs>
    <tests>
        <test>
            <conditional name="inputs">
                <param name="type" value="datasets"/>
                <conditional name="datasets">
                    <param name="custom_name" value="true"/>
                    <repeat name="fastq">
                        <param name="first" value="one.fastq" ftype="fastqsanger" />
                        <param name="name" value="test"/>
                        <param name="second" value="two.fastq" ftype="fastqsanger" />
                    </repeat>
                </conditional>
            </conditional>
            <section name="genome">
                <conditional name="source">
                    <param name="source" value="history" />
                    <param name="file" value ="genome.fa" />
                </conditional>
            </section>
            <section name="te">
                <conditional name="source">
                    <param name="source" value="history" />
                    <param name="file" value ="TE.fa" />
                </conditional>
            </section>
            <section name="rnadb">
                <conditional name="blast">
                    <param name="run" value="true" />
                    <conditional name="source">
                        <param name="source" value="history" />
                        <param name="file" value ="rna-small.fa.gz" />
                    </conditional>
                </conditional>
            </section>
            <section name="estdb">
                <conditional name="blast">
                    <param name="run" value="true" />
                    <conditional name="source">
                        <param name="source" value="history" />
                        <param name="file" value ="est-small.fa.gz" />
                    </conditional>
                </conditional>
            </section>
            <param name="rmsk" value="rmsk-small.txt" />
            <param name="refseq" value="refseq-small.txt" />
            <param name="BDir" value="0" />
            <param name="size" value="100" />
            <param name="size_insert" value="500" />
            <param name="min_L1" value="30" />
            <param name="mis_L1" value="6" />
            <param name="min_unique" value="30" />
            <output name="chimerae" file="res.html" compare="diff" lines_diff="0">
                <extra_files type="file" name="results.txt" value="res_files/results.txt" compare="diff" />
                <extra_files type="file" name="first_results.txt" value="res_files/first_results.txt" compare="diff" />
                <extra_files type="file" name="final_result_chimerae.txt" value="res_files/final_result_chimerae.txt" compare="diff" />
            </output>
        </test>
    </tests>
    <help>
        <![CDATA[
**Usage:**

  `CLIFinder.pl --first <first fastq of paired-end set 1> --name <name 1> --second <second fastq of paired-end set 1> [--first <first fastq of paired-end set 2> --name <name 2> --second <second fastq of paired-end set 2> ...] --ref <reference genome> [--build_ref] --TE <transposable elements> [--build_TE] --html <results.html> --html-path <results directory> [options]`

**Arguments:**
  --first      First fastq file to process from paired-end set
  --name       Name of the content to process
  --second     Second fastq file to process from paired-end set
  --ref        Fasta file containing the reference genome
  --TE         Fasta file containing the transposable elements
  --rmsk       Tab-delimited text file (with headers) containing reference repeat sequences (e.g. rmsk track from UCSC)
  --refseq     Tab-delimited file (with headers) containing reference genes (e.g. RefGene.txt from UCSC)
  --html       Main HTML file where results will be displayed
  --html-path  Folder where results will be stored

For any fasta file, if a bwa index is not provided, you should build it through the corresponding *--build_[element]* argument

**Options:**
  --rnadb        Blast database with RNA sequences (optional)
  --estdb        Blast database with RNA sequences (optional)
  --size_read    Size of reads (default: 100)
  --BDir         Orientation of reads (0: undirectional libraries, 1: TEs sequences in first read in pair, 2: TEs sequences in second read in pair)
  --size_insert  Maximum size of insert tolerated between R1 and R2 for alignment on the reference genome (default: 250)
  --min_L1       Minimum number of bp matching for L1 mapping (default: 50)
  --mis_L1       Maximum number of mismatches tolerated for L1 mapping (default: 1)
  --min_unique   Number of consecutive bp not annotated by RepeatMasker (default: 33)
  --species      Species to use in RepeatMasker (default: human)
  --threads      Number of threads (default: 1)

For Blast database files, if a fasta is provided, the database can be built with '--build_[db]'. Otherwise, provide a path or URL. "tar(.gz)" files are acceptable, as well as wild card (rna*) URLs.

        ]]>
    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btx671</citation>
    </citations>
</tool>