Mercurial > repos > rnateam > segemehl
diff segemehl.xml @ 3:e1d38fef6dd5 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/segemehl commit 21aaee40723b5341b4236edeb0e72995c2054053
author | bgruening |
---|---|
date | Fri, 16 Dec 2016 06:25:17 -0500 |
parents | dc63d59e7bf8 |
children | 9ffdddb42700 |
line wrap: on
line diff
--- a/segemehl.xml Sat Jan 18 05:43:29 2014 -0500 +++ b/segemehl.xml Fri Dec 16 06:25:17 2016 -0500 @@ -1,26 +1,37 @@ -<tool id="segemehl" name="segemehl" version="0.1.6.0"> +<tool id="segemehl" name="segemehl" version="0.2.0"> <description>based short read aligner</description> <requirements> - <requirement type="package" version="0.1.6">segemehl</requirement> + <requirement type="package" version="0.2.0">segemehl</requirement> </requirements> + <stdio> + <regex match="Exit forced" + source="both" + level="fatal" + description="Execution halted." /> + </stdio> <command> +<![CDATA[ ## prepare segemehl index if no reference genome is supplied - temp_index = `mktemp`; #if $refGenomeSource.genomeSource == "history": - segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome; + mkdir ./temp_index/ && + #set $temp_index = './temp_index/temp.idx' + segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome && #else: - $temp_index = ${refGenomeSource.index.fields.index_path} + #set $temp_index = $refGenomeSource.index.fields.index_path #end if ## execute segemehl segemehl.x - + ## number of threads -t "\${GALAXY_SLOTS:-12}" - ## db file path - -d ${refGenomeSource.index.fields.db_path} + #if $refGenomeSource.genomeSource == "history": + -d $refGenomeSource.own_reference_genome + #else: + -d ${refGenomeSource.index.fields.db_path} + #end if -i $temp_index @@ -28,13 +39,13 @@ #if str( $library.type ) == "single": #set $query_list = list() ## prepare inputs - #for $fastq in $library.reads: - $query_list.append('%s' %($fastq.input_query)) + #for $fastq in $library.input_query: + $query_list.append('%s' % $fastq ) #end for -q "#echo ' '.join( $query_list )#" #else ## prepare inputs - + #set $mate1 = list() #set $mate2 = list() #for $mate_pair in $library.mate_list: @@ -51,26 +62,30 @@ -A $accuracy -H $hitstrategy #if str( $prime5 ).strip(): - -P $prime5 + -P "$prime5" #end if #if str( $prime3 ).strip(): - -Q $prime3 + -Q "$prime3" #end if $polyA $autoclip $hardclip $order + $splits + #if $maxout: + --maxout $maxout + #end if -s - -o $segemehl_out + + --minsplicecover $minsplicecover + --minfragscore $minfragscore + --minfraglen $minfraglen + --splicescorescale $splicescorescale + + -o '$segemehl_out' +]]> </command> - <stdio> - <regex match="Exit forced" - source="both" - level="fatal" - description="Execution halted." /> - </stdio> <inputs> - <conditional name="refGenomeSource"> <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> <option value="indexed">Use a built-in index</option> @@ -90,22 +105,20 @@ </param> </when> <!-- build-in --> <when value="history"> - <param name="own_reference_genome" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> + <param name="own_reference_genome" type="data" format="fasta" label="Select the reference genome" /> </when> <!-- history --> </conditional> <!-- refGenomeSource --> - <conditional name="library"> <param name="type" type="select" label="Is this library paired-end?"> <option value="single">Single-end</option> <option value="paired">Paired-end</option> </param> <when value="single"> - <repeat name="reads" title="FASTQ/FASTA files"> - <param name="input_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads fasta/fastq file" /> - </repeat> + <param name="input_query" type="data" multiple="True" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads in FASTQ/FASTA files" /> </when> <when value="paired"> + <!-- ToDo paired coolections --> <repeat name="mate_list" title="Paired End Pairs" min="1"> <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" /> <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" /> @@ -114,47 +127,63 @@ </when> </conditional> + <param name="minsplicecover" type="integer" value="80" label="Min coverage for spliced transcripts" help="(--minsplicecover)" /> + <param name="minfragscore" type="integer" value="18" label="Min coverage for spliced transcripts" help="(--minfragscore)" /> + <param name="minfraglen" type="integer" value="20" label="Min length of a spliced fragment" help="(--minfraglen)" /> + <param name="splicescorescale" type="float" value="1.0" label="Report spliced alignment with score greater than this scale times the score" + help="Report only if this value x score is larger than next best spliced alignment (--splicescorescale)" /> - <param name="minsize" type="integer" value="12" size="5" label="Minimum size of queries" help="default: 12 (-m)"> - <validator type="in_range" min="1"/> - </param> - <param name="accuracy" type="integer" value="85" size="5" label="Min percentage of matches per read in semi-global alignment" help="default: 85 (-A)" > - <validator type="in_range" min="1" max="100"/> - </param> + <param name="minsize" type="integer" value="12" min="1" label="Minimum size of queries" help="(-m)" /> + + <param name="maxout" type="integer" min="0" value="0" optional="True" + label="Maximum number of alignments that will be reported" help="(--maxout)" /> + <param name="accuracy" type="integer" value="85" min="1" max="100" label="Min percentage of matches per read in semi-global alignment" help="(-A)" /> + <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)"> <option value="1">report only best scoring hits</option> <option value="0">report all scoring hits</option> </param> - <param name="prime5" type="text" size="80" label="add 5' adapter" help="default: none (-Q)" /> - <param name="prime3" type="text" size="80" label="add 3' adapter" help="default: none (-P)"/> + <param name="prime5" type="text" label="add 5' adapter" help="default: none (-Q)" /> + <param name="prime3" type="text" label="add 3' adapter" help="default: none (-P)"/> <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/> <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/> - <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="-C"/> + <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="(-C)"/> <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/> + <param name="splits" type="boolean" truevalue="--splits" falsevalue="" checked="false" label="Detect split/spliced reads" help="(--splits)"/> </inputs> - <outputs> <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/> </outputs> + <tests> + <test> + <param name="genomeSource" value="history" /> + <param name="own_reference_genome" value="chr1.fa" /> + <param name="library" value="single" /> + <param name="input_query" value="test.fastq" /> + <param name="splits" value="true" /> + <output name="segemehl_out" file="testmap.sam" lines_diff="2" /> + </test> + </tests> <help> +<![CDATA[ .. class:: infomark -**What it does** +**What it does** Segemehl_ is a short read mapper with gaps. -Segemehl_ is a software to map short sequencer reads to reference genomes. -Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions. -Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly. +Segemehl_ is a software to map short sequencer reads to reference genomes. +Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions. +Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly. segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl_ allows bisulfite sequencing mapping and split read mapping. .. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/ -**References** -Hoffmann S, Otto C, Kurtz S, Sharma CM, Khaitovich P, Vogel J, Stadler PF, Hackermueller J: "Fast mapping of short sequences with mismatches, insertions and deletions using index structures", PLoS Comput Biol (2009) vol. 5 (9) pp. e1000502 -download latest version: 0.1.6 manual: download here new stuff: faster multiple split read mapping bug fixes: bugfixes: increased sensitivity for strand switches changes: - default accuracy now 90% older segemehl indices are still usable. issues: untraceable errors with gcc compiler gcc-4.5. zlib linker problems with some ubuntu versions complaint department: steve bioinf uni leipzig deshapeimage_1_link_0shapeimage_1_link_1 - +]]> </help> + <citations> + <citation type="doi">10.1371/journal.pcbi.1000502</citation> + </citations> </tool>