Mercurial > repos > rnateam > segemehl

diff segemehl.xml @ 3:e1d38fef6dd5 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/segemehl commit 21aaee40723b5341b4236edeb0e72995c2054053
author: bgruening
date: Fri, 16 Dec 2016 06:25:17 -0500
parents: dc63d59e7bf8
children: 9ffdddb42700
--- a/segemehl.xml	Sat Jan 18 05:43:29 2014 -0500
+++ b/segemehl.xml	Fri Dec 16 06:25:17 2016 -0500
@@ -1,26 +1,37 @@
-<tool id="segemehl" name="segemehl" version="0.1.6.0">
+<tool id="segemehl" name="segemehl" version="0.2.0">
     <description>based short read aligner</description>
     <requirements>
-        <requirement type="package" version="0.1.6">segemehl</requirement>
+        <requirement type="package" version="0.2.0">segemehl</requirement>
     </requirements>
+    <stdio>
+        <regex match="Exit forced"
+           source="both"
+           level="fatal"
+           description="Execution halted." />
+    </stdio>
     <command>
+<![CDATA[
         ## prepare segemehl index if no reference genome is supplied
-        temp_index = `mktemp`;
         #if $refGenomeSource.genomeSource == "history":
-            segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome;
+            mkdir ./temp_index/ &&
+	    #set $temp_index = './temp_index/temp.idx'
+	    segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome &&
         #else:
-            $temp_index = ${refGenomeSource.index.fields.index_path}
+            #set $temp_index = $refGenomeSource.index.fields.index_path
         #end if
 
 
         ## execute segemehl
         segemehl.x
-        
+
         ## number of threads
         -t "\${GALAXY_SLOTS:-12}"
 
-        ## db file path
-        -d ${refGenomeSource.index.fields.db_path}
+        #if $refGenomeSource.genomeSource == "history":
+	    -d $refGenomeSource.own_reference_genome
+        #else:
+            -d ${refGenomeSource.index.fields.db_path}
+        #end if
 
         -i $temp_index
 
@@ -28,13 +39,13 @@
         #if str( $library.type ) == "single":
             #set $query_list = list()
             ## prepare inputs
-            #for $fastq in $library.reads:
-                $query_list.append('%s' %($fastq.input_query))
+            #for $fastq in $library.input_query:
+                $query_list.append('%s' % $fastq )
             #end for
             -q "#echo ' '.join( $query_list )#"
         #else
             ## prepare inputs
-            
+
             #set $mate1 = list()
             #set $mate2 = list()
             #for $mate_pair in $library.mate_list:
@@ -51,26 +62,30 @@
         -A $accuracy
         -H $hitstrategy
         #if str( $prime5 ).strip():
-            -P $prime5
+            -P "$prime5"
         #end if
         #if str( $prime3 ).strip():
-            -Q $prime3
+            -Q "$prime3"
         #end if
         $polyA
         $autoclip
         $hardclip
         $order
+	$splits
+        #if $maxout:
+            --maxout $maxout
+        #end if
         -s
-        -o $segemehl_out
+
+        --minsplicecover $minsplicecover
+        --minfragscore $minfragscore
+        --minfraglen $minfraglen
+        --splicescorescale $splicescorescale
+
+        -o '$segemehl_out'
+]]>
     </command>
-    <stdio>
-        <regex match="Exit forced" 
-           source="both" 
-           level="fatal" 
-           description="Execution halted." />
-    </stdio>
     <inputs>
-
         <conditional name="refGenomeSource">
           <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
             <option value="indexed">Use a built-in index</option>
@@ -90,22 +105,20 @@
             </param>
           </when>  <!-- build-in -->
           <when value="history">
-            <param name="own_reference_genome" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
+              <param name="own_reference_genome" type="data" format="fasta" label="Select the reference genome" />
           </when>  <!-- history -->
         </conditional>  <!-- refGenomeSource -->
 
-
         <conditional name="library">
             <param name="type" type="select" label="Is this library paired-end?">
                 <option value="single">Single-end</option>
                 <option value="paired">Paired-end</option>
             </param>
             <when value="single">
-                <repeat name="reads" title="FASTQ/FASTA files">
-                    <param name="input_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads fasta/fastq file" />
-                </repeat>
+                <param name="input_query" type="data" multiple="True" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads in FASTQ/FASTA files" />
             </when>
             <when value="paired">
+                <!-- ToDo paired coolections -->
                 <repeat name="mate_list" title="Paired End Pairs" min="1">
                     <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" />
                     <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" />
@@ -114,47 +127,63 @@
             </when>
         </conditional>
 
+        <param name="minsplicecover" type="integer" value="80" label="Min coverage for spliced transcripts" help="(--minsplicecover)" />
+        <param name="minfragscore" type="integer" value="18" label="Min coverage for spliced transcripts" help="(--minfragscore)" />
+        <param name="minfraglen" type="integer" value="20" label="Min length of a spliced fragment" help="(--minfraglen)" />
+        <param name="splicescorescale" type="float" value="1.0" label="Report spliced alignment with score greater than this scale times the score"
+            help="Report only if this value x score is larger than next best spliced alignment (--splicescorescale)" />
 
-        <param name="minsize" type="integer" value="12" size="5" label="Minimum size of queries" help="default: 12 (-m)">
-            <validator type="in_range" min="1"/>
-        </param>
-        <param name="accuracy" type="integer" value="85" size="5" label="Min percentage of matches per read in semi-global alignment" help="default: 85 (-A)" >
-            <validator type="in_range" min="1" max="100"/>
-        </param>
+        <param name="minsize" type="integer" value="12" min="1" label="Minimum size of queries" help="(-m)" />
+
+        <param name="maxout" type="integer" min="0" value="0" optional="True" 
+            label="Maximum number of alignments that will be reported" help="(--maxout)" />
+        <param name="accuracy" type="integer" value="85" min="1" max="100" label="Min percentage of matches per read in semi-global alignment" help="(-A)" />
+
         <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)">
             <option value="1">report only best scoring hits</option>
             <option value="0">report all scoring hits</option>
         </param>
-        <param name="prime5" type="text" size="80" label="add 5' adapter" help="default: none (-Q)" />
-        <param name="prime3" type="text" size="80" label="add 3' adapter" help="default: none (-P)"/>
+        <param name="prime5" type="text" label="add 5' adapter" help="default: none (-Q)" />
+        <param name="prime3" type="text" label="add 3' adapter" help="default: none (-P)"/>
         <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/>
         <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/>
-        <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="-C"/>
+        <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="(-C)"/>
         <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/>
+        <param name="splits" type="boolean" truevalue="--splits" falsevalue="" checked="false" label="Detect split/spliced reads" help="(--splits)"/>
     </inputs>
-
     <outputs>
         <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/>
     </outputs>
+    <tests>
+      <test>
+	<param name="genomeSource" value="history" />
+        <param name="own_reference_genome" value="chr1.fa" />
+	<param name="library" value="single" />
+	<param name="input_query" value="test.fastq" />	
+	<param name="splits" value="true" />
+        <output name="segemehl_out" file="testmap.sam" lines_diff="2" />
+      </test>
+    </tests>
     <help>
+<![CDATA[
 
 .. class:: infomark
 
-**What it does** 
+**What it does**
 
 Segemehl_ is a short read mapper with gaps.
 
-Segemehl_ is a software to map short sequencer reads to reference genomes. 
-Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions. 
-Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly. 
+Segemehl_ is a software to map short sequencer reads to reference genomes.
+Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions.
+Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly.
 segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl_ allows bisulfite sequencing mapping and split read mapping.
 
 .. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/
 
-**References**
 
-Hoffmann S, Otto C, Kurtz S, Sharma CM, Khaitovich P, Vogel J, Stadler PF, Hackermueller J: "Fast mapping of short sequences with mismatches, insertions and deletions using index structures", PLoS Comput Biol (2009) vol. 5 (9) pp. e1000502
-download latest version: 0.1.6 manual: download here new stuff: faster multiple split read mapping bug fixes: bugfixes: increased sensitivity for strand switches changes: - default accuracy now 90% older segemehl indices are still usable. issues: untraceable errors with gcc compiler gcc-4.5. zlib linker problems with some ubuntu versions complaint department: steve bioinf uni leipzig deshapeimage_1_link_0shapeimage_1_link_1
-
+]]>
     </help>
+    <citations>
+        <citation type="doi">10.1371/journal.pcbi.1000502</citation>
+    </citations>
 </tool>
author	bgruening
date	Fri, 16 Dec 2016 06:25:17 -0500
parents	dc63d59e7bf8
children	9ffdddb42700