view blat.xml @ 5:70d7377d5e24 draft

planemo upload commit 7856c637db5bd4ea0b8b4db63e242618421a9cc6-dirty
author yating-l
date Wed, 01 Feb 2017 17:16:02 -0500
parents 9e56efe1c371
children 6f06b6d68c0b
line wrap: on
line source

<?xml version="1.0"?>
<tool id="ucsc_blat" name="UCSC BLAT Alignment Tool" version="1.0">
    <description>Standalone blat sequence search command line tool</description>
    <requirements>
      <requirement type="package" version="1.0">ucsc_tools_340_for_BLAT</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
    blat 
        #if $noHead
          -noHead
        #end if
        -q=$query_type
        -t=$database_type
        -mask=$mask
        '${database}'
        '${query}'
        output
    && sort -k 10,10 -k 12,12n output > '${output_sorted}'
    && pslReps -minAli=0.25 '${output_sorted}' output.reps.psl output.reps.psr
    && faPolyASizes '${query}' query.polyA
    #if $filter_param.filter =="yes"
      && pslCDnaFilter
            #if $filter_param.assembly_type == "native"
                  -localNearBest=0.001
                  #if $filter_param.assembly_category == "finished"
                        -minId=0.95
                        -minCover=0.25
                  #else if $filter_param.assembly_category == "well-ordered"
                        -minId=0.95
                        -minCover=0.15
            #else
                  -minId=0.94
                  -minAlnSize=80
            #end if
            #else
                  -localNearBest=0.010
                  #if $filter_param.assembly_category == "finished"
                        -minId=0.35
                        -minCover=0.25
                  #else if $filter_param.assembly_category == "well-ordered"
                        -minId=0.35
                        -minCover=0.15
                  #else
                        -minId=0.33
                        -minAlnSize=80
                  #end if
            #end if
            -minQSize=20 
            -ignoreIntrons 
            -repsAsMatch 
            -ignoreNs 
            -bestOverlap 
            -polyASizes=query.polyA 
            output.reps.psl 
            '${output_filtered}'
    #end if
]]></command>
      <inputs>
            <param type="data" name="database" format="fasta" />
            <param type="data" name="query" format="fasta" />
            <param type="select" name="database_type" format="text" multiple="false" label="database type" help="Choose your database type, the default is dna">
                  <option value="dna">DNA sequence</option>
                  <option value="prot">protein sequence</option>
                  <option value="dnax">DNA sequence translated in six frames to protein</option>
            </param>
            <param type="select" name="query_type" format="text" multiple="false" label="query type" help="Choose your query type, the default is dna">
                  <option value="dna">DNA sequence</option>
                  <option value="rna">RNA sequence</option>
                  <option value="prot">protein sequence</option>
                  <option value="dnax">DNA sequence translated in six frames to protein</option>
                  <option value="rnax">DNA sequence translated in three frames to protein</option>
            </param>
            <param name="noHead" type="boolean" value="false" label="Suppresses .psl header (so it's just a tab-separated file)." />
            <param name="mask" type="select" label="Mask out repeats" help="Alignments won't be started in masked region
                  but may extend through it in nucleotide searches.  Masked areas
                  are ignored entirely in protein or translated searches.">
                  <option value="lower">lower - mask out lower-cased sequence</option>
                  <option value="upper">upper - mask out upper-cased sequence</option>
                  <option value="out">out - mask according to database.out RepeatMasker .out file</option>
                  <option value="file.out">file.out - mask database according to RepeatMasker file.out</option>
            </param>
            <conditional name="filter_param">
                  <param name="filter" type="select" label="Filter BLAT results with pslCDnaFilter">
                        <option value="no" selected="true">No</option>
                        <option value="yes">Yes</option>
                  </param>
                  <when value="yes">
                        <param name="assembly_type" type="select" label="Choose your type of cDNA sequence">
                              <option value="native">Same species</option>
                              <option value="xeno">Across species</option>
                        </param>
                        <param name="assembly_category" type="select" label="Choose your genome assembly category">
                              <option value="finished">finished assemblies (high quality)</option>
                              <option value="well-ordered">well-ordered assemblies (well ordered, whole genome shotgun)</option>
                              <option value="low-coverage">low-coverage assemblies (low coverage (&lt; 4x"), lots of contigs, N50 scaffold size &lt; 1mb) </option>
                        </param>
                  </when>
            </conditional>
      </inputs>
      <outputs>
            <data format="psl" name="output_sorted"></data>
            <data format="psl" name="output_filtered"></data>
      </outputs>
  <tests>
      <test>
            <param name="database" value="amaVit1.fa" />
            <param name="query" value="Gallus_gallus_RefSeq.fa" />
            <param name="database_type" value="dnax" />    
            <param name="query_type" value="rnax" />     
            <param name="noHead" value="true" />
            <param name="mask" value="lower" />
            <param name="filter" value="yes" />
            <param name="assembly_type" value="xeno" />
            <param name="assembly_category" value="well-ordered" />
            <output name="output_sorted" value="amaVit1_Gallus_gallus.psl" />
            <output name="output_filtered" value="amaVit1_Gallus_gallus_filtered.psl" />
      </test>
  </tests> 
  <help>
        <![CDATA[
BLAT
====
BLAT is a bioinformatics software a tool which performs rapid mRNA/DNA and cross-species protein alignments. 

blat (version: v340)- Standalone blat sequence search command line tool. 
---------------------------------------------------------
usage:
++++++
   blat database query [-ooc=11.ooc] output.psl
where:
   database and query are each either a .fa, .nib or .2bit file,
      or a list of these files with one file name per line.
   -ooc=11.ooc tells the program to load over-occurring 11-mers from
      an external file.  This will increase the speed
      by a factor of 40 in many cases, but is not required.
   output.psl is the name of the output file.   
documentation:
++++++++++++++
See Blat documentation (http://genome.ucsc.edu/goldenPath/help/blatSpec.html)  
Source code:
++++++++++++
http://hgdownload.cse.ucsc.edu/admin/exe/
pslCDnaFilter (version: v340)
---------------------------
Filter cDNA alignments in psl format. Filtering criteria are comparative, selecting near best in genome alignments for each given cDNA and non-comparative, based only on the quality of an individual alignment.
usage:
++++++
      pslCDnaFilter [options] inPsl outPsl
Source code:
++++++++++++
http://hgdownload.cse.ucsc.edu/admin/exe/

Licence
=======
Please note that commercial download and installation of the Blat and In-Silico PCR software may be licensed through Kent Informatics (http://www.kentinformatics.com).
]]>
</help>  
<citations>
      <citation type="bibtex">@article{kent2002blat,
  title={BLAT—the BLAST-like alignment tool},
  author={Kent, W James},
  journal={Genome research},
  volume={12},
  number={4},
  pages={656--664},
  year={2002},
  publisher={Cold Spring Harbor Lab}
      }</citation>
</citations> 
</tool>