diff blat.xml @ 5:70d7377d5e24 draft

planemo upload commit 7856c637db5bd4ea0b8b4db63e242618421a9cc6-dirty
author yating-l
date Wed, 01 Feb 2017 17:16:02 -0500
parents 9e56efe1c371
children 6f06b6d68c0b
line wrap: on
line diff
--- a/blat.xml	Tue Jan 31 18:31:42 2017 -0500
+++ b/blat.xml	Wed Feb 01 17:16:02 2017 -0500
@@ -1,5 +1,6 @@
+<?xml version="1.0"?>
 <tool id="ucsc_blat" name="UCSC BLAT Alignment Tool" version="1.0">
-    <description>Rapidly align sequences to the genome</description>
+    <description>Standalone blat sequence search command line tool</description>
     <requirements>
       <requirement type="package" version="1.0">ucsc_tools_340_for_BLAT</requirement>
     </requirements>
@@ -13,8 +14,46 @@
         -mask=$mask
         '${database}'
         '${query}'
-        '${output}'
-
+        output
+    && sort -k 10,10 -k 12,12n output > '${output_sorted}'
+    && pslReps -minAli=0.25 '${output_sorted}' output.reps.psl output.reps.psr
+    && faPolyASizes '${query}' query.polyA
+    #if $filter_param.filter =="yes"
+      && pslCDnaFilter
+            #if $filter_param.assembly_type == "native"
+                  -localNearBest=0.001
+                  #if $filter_param.assembly_category == "finished"
+                        -minId=0.95
+                        -minCover=0.25
+                  #else if $filter_param.assembly_category == "well-ordered"
+                        -minId=0.95
+                        -minCover=0.15
+            #else
+                  -minId=0.94
+                  -minAlnSize=80
+            #end if
+            #else
+                  -localNearBest=0.010
+                  #if $filter_param.assembly_category == "finished"
+                        -minId=0.35
+                        -minCover=0.25
+                  #else if $filter_param.assembly_category == "well-ordered"
+                        -minId=0.35
+                        -minCover=0.15
+                  #else
+                        -minId=0.33
+                        -minAlnSize=80
+                  #end if
+            #end if
+            -minQSize=20 
+            -ignoreIntrons 
+            -repsAsMatch 
+            -ignoreNs 
+            -bestOverlap 
+            -polyASizes=query.polyA 
+            output.reps.psl 
+            '${output_filtered}'
+    #end if
 ]]></command>
       <inputs>
             <param type="data" name="database" format="fasta" />
@@ -40,93 +79,27 @@
                   <option value="out">out - mask according to database.out RepeatMasker .out file</option>
                   <option value="file.out">file.out - mask database according to RepeatMasker file.out</option>
             </param>
-            <!--<conditional name="database" format="fasta">
-                  <param type="select" name="database_type" format="text" multiple="false" label="database type" help="Choose your database type, the default is dna">
-                        <option value="dna">DNA sequence</option>
-                        <option value="prot">protein sequence</option>
-                        <option value="dnax">DNA sequence translated in six frames to protein</option>
+            <conditional name="filter_param">
+                  <param name="filter" type="select" label="Filter BLAT results with pslCDnaFilter">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
                   </param>
-        <when value="dna">
-             <param type="integer" name="tileSize" value="11" min="1" max="12" label="tileSize" help="Sets the size of match that triggers an alignment. Usually between 8 and 12">tileSize</param>
-             <param name="minMatch" type="integer" value="2" label="Sets the number of tile matches.  Usually set from 2 to 4.
-                  Default is 2 for nucleotide, 1 for protein.">-minMatch</param>
-            <param name="minIdentity" type="integer" value="90" label="Sets minimum sequence identity (in percent).  Default is
-                  90 for nucleotide searches, 25 for protein or translated
-                  protein searches.">-minIdentity</param>
-            
-        </when>
-        <when value="prot">  
-            <param type="integer" name="tileSize" value="5" min="1" max="12" label="tileSize" help="Sets the size of match that triggers an alignment. Usually between 8 and 12">tileSize</param>
-            <param name="minMatch" type="integer" value="1" label="Sets the number of tile matches.  Usually set from 2 to 4.
-                  Default is 2 for nucleotide, 1 for protein.">-minMatch</param>
-            <param name="minIdentity" type="integer" value="25" label="Sets minimum sequence identity (in percent).  Default is
-                  90 for nucleotide searches, 25 for protein or translated
-                  protein searches.">-minIdentity</param>
-        </when> 
-    </conditional>
-    <param type="select" name="query_type" format="text" multiple="false" label="query type" help="Choose your query type, the default is dna">
-        <option value="dna">DNA sequence</option>
-        <option value="rna">RNA sequence</option>
-        <option value="prot">protein sequence</option>
-        <option value="dnax">DNA sequence translated in six frames to protein</option>
-        <option value="rnax">DNA sequence translated in three frames to protein</option>
-    </param>
-    <conditional name="settings">
-        <param name="advanced" type="select" multiple="false" label="Specify advanced parameters">
-            <option value="simple" selected="true">No, use program defaults. </option>
-            <option value="advanced">Yes, see full parameter list.</option>
-        </param>
-        <when value="advanced">
-            <param name="mask" type="select" label="Mask out repeats" help="Alignments won't be started in masked region
-                  but may extend through it in nucleotide searches.  Masked areas
-                  are ignored entirely in protein or translated searches.">
-                  <option value="lower">lower - mask out lower-cased sequence</option>
-                  <option value="upper">upper - mask out upper-cased sequence</option>
-                  <option value="out">out - mask according to database.out RepeatMasker .out file</option>
-                  <option value="file.out">file.out - mask database according to RepeatMasker file.out</option>
-            </param>
-            <param name="qmask" type="select" label="Mask out repeats in query sequence" help="Similar to -mask above, but
-                  for query rather than target sequence.">
-                  <option value="lower">lower - mask out lower-cased sequence</option>
-                  <option value="upper">upper - mask out upper-cased sequence</option>
-                  <option value="out">out - mask according to database.out RepeatMasker .out file</option>
-                  <option value="file.out">file.out - mask database according to RepeatMasker file.out</option>
-            </param>
-            <param name="oneOff" type="integer" value="0" label="If set to 1, this allows one mismatch in tile and still triggers an alignment. Default is 0.">-oneOff</param>
-            <param name="minScore" type="integer" value="30" label="Sets minimum score.  This is the matches minus the
-                  mismatches minus some sort of gap penalty.  Default is 30.">-minScore</param>
-            <param name="maxGap" type="integer" value="2" label="Sets the size of maximum gap between tiles in a clump.  Usually
-                  set from 0 to 3.  Default is 2. Only relevent for minMatch > 1.">-maxGap</param>
-            <param name="minRepDivergence" type="integer" value="15" min="0" max="100" label="Minimum percent divergence of repeats to allow
-                  them to be unmasked.  Default is 15.  Only relevant for
-                  masking using RepeatMasker .out files.">-minRepDivergence</param>
-            <param name="noHead" type="boolean" value="false" label="Suppresses .psl header (so it's just a tab-separated file)." />
-            <param name="dots" type="integer" value="0" label="Output dot every N sequences to show program's progress." />
-            <param name="trimT" type="boolean" value="false" label="Trim leading poly-T." />
-            <param name="trimHardA" type="boolean" value="false" label="Remove poly-A tail from qSize as well as alignments in
-                  psl output." />
-            <param name="fastMap" type="boolean" value="false" label="Run for fast DNA/DNA remapping - not allowing introns,
-                  requiring high %ID. Query sizes must not exceed 5000." />
-            <param name="fine" type="boolean" value="false" label="For high-quality mRNAs, look harder for small initial and
-                  terminal exons.  Not recommended for ESTs." />
-            <param name="out" type="select" label="Output file format">
-                <option value="psl" selected="true">psl - Default.  Tab-separated format, no sequence</option>
-                <option value="pslx">pslx - Tab-separated format with sequence</option>
-                <option value="axt">axt - blastz-associated axt format</option>
-                <option value="maf">maf - multiz-associated maf format</option>
-                <option value="sim4">sim4 - similar to sim4 format</option>
-                <option value="wublast">wublast - similar to wublast format</option>
-                <option value="blast">blast - similar to NCBI blast format</option>
-                <option value="blast8">blast8- NCBI blast tabular format</option>
-                <option value="blast9">blast9 - NCBI blast tabular format with comments</option>
-           </param>
-           <param name="maxIntro" type="integer" value="750000" label="Sets maximum intron size. Default is 750000." />
-           <param name="extendThroughN" type="boolean" value="false" label="Allows extension of alignment through large blocks of Ns." />
-      </when>
-    </conditional>-->
+                  <when value="yes">
+                        <param name="assembly_type" type="select" label="Choose your type of cDNA sequence">
+                              <option value="native">Same species</option>
+                              <option value="xeno">Across species</option>
+                        </param>
+                        <param name="assembly_category" type="select" label="Choose your genome assembly category">
+                              <option value="finished">finished assemblies (high quality)</option>
+                              <option value="well-ordered">well-ordered assemblies (well ordered, whole genome shotgun)</option>
+                              <option value="low-coverage">low-coverage assemblies (low coverage (&lt; 4x"), lots of contigs, N50 scaffold size &lt; 1mb) </option>
+                        </param>
+                  </when>
+            </conditional>
       </inputs>
       <outputs>
-            <data format="psl" name="output"></data>
+            <data format="psl" name="output_sorted"></data>
+            <data format="psl" name="output_filtered"></data>
       </outputs>
   <tests>
       <test>
@@ -136,9 +109,64 @@
             <param name="query_type" value="rnax" />     
             <param name="noHead" value="true" />
             <param name="mask" value="lower" />
-            <output name="output" value="amaVit1_Gallus_gallus.psl" />
+            <param name="filter" value="yes" />
+            <param name="assembly_type" value="xeno" />
+            <param name="assembly_category" value="well-ordered" />
+            <output name="output_sorted" value="amaVit1_Gallus_gallus.psl" />
+            <output name="output_filtered" value="amaVit1_Gallus_gallus_filtered.psl" />
       </test>
-  </tests>           
+  </tests> 
+  <help>
+        <![CDATA[
+BLAT
+====
+BLAT is a bioinformatics software a tool which performs rapid mRNA/DNA and cross-species protein alignments. 
+
+blat (version: v340)- Standalone blat sequence search command line tool. 
+---------------------------------------------------------
+usage:
+++++++
+   blat database query [-ooc=11.ooc] output.psl
+where:
+   database and query are each either a .fa, .nib or .2bit file,
+      or a list of these files with one file name per line.
+   -ooc=11.ooc tells the program to load over-occurring 11-mers from
+      an external file.  This will increase the speed
+      by a factor of 40 in many cases, but is not required.
+   output.psl is the name of the output file.   
+documentation:
+++++++++++++++
+See Blat documentation (http://genome.ucsc.edu/goldenPath/help/blatSpec.html)  
+Source code:
+++++++++++++
+http://hgdownload.cse.ucsc.edu/admin/exe/
+pslCDnaFilter (version: v340)
+---------------------------
+Filter cDNA alignments in psl format. Filtering criteria are comparative, selecting near best in genome alignments for each given cDNA and non-comparative, based only on the quality of an individual alignment.
+usage:
+++++++
+      pslCDnaFilter [options] inPsl outPsl
+Source code:
+++++++++++++
+http://hgdownload.cse.ucsc.edu/admin/exe/
+
+Licence
+=======
+Please note that commercial download and installation of the Blat and In-Silico PCR software may be licensed through Kent Informatics (http://www.kentinformatics.com).
+]]>
+</help>  
+<citations>
+      <citation type="bibtex">@article{kent2002blat,
+  title={BLAT—the BLAST-like alignment tool},
+  author={Kent, W James},
+  journal={Genome research},
+  volume={12},
+  number={4},
+  pages={656--664},
+  year={2002},
+  publisher={Cold Spring Harbor Lab}
+      }</citation>
+</citations> 
 </tool>