Mercurial > repos > iuc > samblaster
comparison samblaster.xml @ 0:cd001ec3c0c6 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/samblaster commit 82097013a9eb5a6161d400e5b6c493113c440687
| author | iuc |
|---|---|
| date | Mon, 19 Dec 2016 15:18:26 -0500 |
| parents | |
| children | de6d84a9bc8f |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:cd001ec3c0c6 |
|---|---|
| 1 <tool id="samblaster" name="samblaster" version="0.1.24"> | |
| 2 <description>marks duplicates, outputs split reads, discordant read pairs and unmapped reads</description> | |
| 3 <requirements> | |
| 4 <requirement type="package" version="0.1.24">samblaster</requirement> | |
| 5 <requirement type="package" version="0.6.5">sambamba</requirement> | |
| 6 </requirements> | |
| 7 <version_command>samblaster --version</version_command> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 #if $input.is_of_type('sam'): | |
| 10 #set stream="<(sambamba view -S -f bam -t ${GALAXY_SLOTS:-4} -h '%s')" % $input | |
| 11 #else: | |
| 12 #set stream="'%s'" % $input | |
| 13 #end if | |
| 14 sambamba view -t \${GALAXY_SLOTS:-4} -h <(sambamba sort -t \${GALAXY_SLOTS:-4} -n $stream -o /dev/stdout) | | |
| 15 samblaster | |
| 16 $output | |
| 17 $discordantFile | |
| 18 $splitterFile | |
| 19 $unmappedFile | |
| 20 $acceptDupMarks | |
| 21 $excludeDups | |
| 22 $removeDups | |
| 23 $addMateTags | |
| 24 $compatibility_mode | |
| 25 --maxSplitCount '$maxSplitCount' | |
| 26 --maxUnmappedBases '$maxUnmappedBases' | |
| 27 --minIndelSize '$minIndelSize' | |
| 28 --minNonOverlap '$minNonOverlap' | |
| 29 --minClipSize '$minClipSize' | |
| 30 #if $output != "-o /dev/null": | |
| 31 && sambamba sort -o output.bam -l 6 -t \${GALAXY_SLOTS:-4} <(sambamba view -S -f bam output.sam) | |
| 32 #end if | |
| 33 #if $discordantFile: | |
| 34 && sambamba sort -o discordant.bam -l 6 -t \${GALAXY_SLOTS:-4} <(sambamba view -S -f bam discordant.sam) | |
| 35 #end if | |
| 36 #if $splitterFile: | |
| 37 && sambamba sort -o splitter.bam -l 6 -t \${GALAXY_SLOTS:-4} <(sambamba view -S -f bam splitter.sam) | |
| 38 #end if | |
| 39 ]]></command> | |
| 40 <inputs> | |
| 41 <param argument="--input" type="data" format="bam,sam"/> | |
| 42 <param argument="--output" label="Output bam file for all input alignments" type="boolean" checked="true" truevalue="-o output.sam" falsevalue="-o /dev/null"/> | |
| 43 <param argument="--discordantFile" label="Output discordant read pairs?" type="boolean" truevalue="-d discordant.sam" falsevalue=""/> | |
| 44 <param argument="--splitterFile" label="Output split reads?" type="boolean" truevalue="-s splitter.sam" falsevalue=""/> | |
| 45 <param argument="--unmappedFile" label="Output unmapped/clipped reads as FASTQ?" type="boolean" truevalue="-u unmapped.fastq" falsevalue=""/> | |
| 46 <param argument="--acceptDupMarks" label="Accept duplicate marks already in input file instead of looking for duplicates in the input?" type="boolean" truevalue="-a" falsevalue=""/> | |
| 47 <param argument="--excludeDups" label="Exclude reads marked as duplicates from discordant, splitter, and/or unmapped file?" type="boolean" truevalue="-a" falsevalue=""/> | |
| 48 <param argument="--removeDups" label="Remove duplicates reads from all output files?" help="(Implies --excludeDups)" type="boolean" truevalue="-e" falsevalue=""/> | |
| 49 <param argument="--addMateTags" label="Add MC and MQ tags?" type="boolean" truevalue="--addMateTags" falsevalue=""/> | |
| 50 <param name="compatibility_mode" argument="-M" label="Run in compatibility mode?" help="Both 0x100 and 0x800 are considered chimeric. Similar to BWA MEM -M option." type="boolean" truevalue="-M" falsevalue="" /> | |
| 51 <param argument="--maxSplitCount" label="Maximum number of split alignments for a read to be included in splitter file." type="integer" value="2"/> | |
| 52 <param argument="--maxUnmappedBases" label="Maximum number of un-aligned bases between two alignments to be included in splitter file." type="integer" value="50" min="1"/> | |
| 53 <param argument="--minIndelSize" label="Minimum structural variant feature size for split alignments to be included in splitter file." type="integer" value="50" min="1"/> | |
| 54 <param argument="--minNonOverlap" label="Minimum non-overlaping base pairs between two alignments for a read to be included in splitter file." type="integer" value="20" min="1"/> | |
| 55 <param argument="--minClipSize" label="Minumum number of bases a mapped read must be clipped to be included in unmapped file." type="integer" value="20" min="1"/> | |
| 56 </inputs> | |
| 57 <outputs> | |
| 58 <data name="output_bam" format="bam" label="samblaster alignments on ${on_string}" from_work_dir="output.bam"> | |
| 59 <filter>output</filter> | |
| 60 </data> | |
| 61 <data name="discordant_bam" format="bam" label="samblaster discordant alignments on ${on_string}" from_work_dir="discordant.bam"> | |
| 62 <filter>discordantFile</filter> | |
| 63 </data> | |
| 64 <data name="splitter_bam" format="bam" label="samblaster split alignments on ${on_string}" from_work_dir="splitter.bam"> | |
| 65 <filter>splitterFile</filter> | |
| 66 </data> | |
| 67 <data name="unmapped_fastq" format="fastqsanger" label="samblaster unmapped fastq on ${on_string}" from_work_dir="unmapped.fastq"> | |
| 68 <filter>unmappedFile</filter> | |
| 69 </data> | |
| 70 </outputs> | |
| 71 <tests> | |
| 72 <test> | |
| 73 <param name="input" value="sr.input.bam"/> | |
| 74 <param name="output" value="true"/> | |
| 75 <param name="discordandFile" value="false"/> | |
| 76 <param name="splitterFile" value="true"/> | |
| 77 <param name="unmappedFile" value="true"/> | |
| 78 <output name="output_bam" file="output.bam" compare="sim_size"/> | |
| 79 <output name="splitter_bam" file="splitters.bam" compare="sim_size"/> | |
| 80 <output name="unmapped_fastq"> | |
| 81 <assert_contents> | |
| 82 <has_line line="@M00860:26:000000000-A6UGV:1:1101:10000:6072" /> | |
| 83 </assert_contents> | |
| 84 </output> | |
| 85 </test> | |
| 86 <test> | |
| 87 <param name="input" value="sr.input.sam.gz" ftype="sam"/> | |
| 88 <param name="output" value="true"/> | |
| 89 <param name="discordandFile" value="false"/> | |
| 90 <param name="splitterFile" value="true"/> | |
| 91 <param name="unmappedFile" value="true"/> | |
| 92 <output name="output_bam" file="output.bam" compare="sim_size"/> | |
| 93 <output name="splitter_bam" file="splitters.bam" compare="sim_size"/> | |
| 94 <output name="unmapped_fastq"> | |
| 95 <assert_contents> | |
| 96 <has_line line="@M00860:26:000000000-A6UGV:1:1101:10000:6072" /> | |
| 97 </assert_contents> | |
| 98 </output> | |
| 99 </test> | |
| 100 </tests> | |
| 101 <help><![CDATA[ | |
| 102 | |
| 103 *samblaster* | |
| 104 ============ | |
| 105 | |
| 106 Summary | |
| 107 ------- | |
| 108 | |
| 109 *samblaster* is a fast and flexible program for marking duplicates in | |
| 110 **read-id grouped** paired-end SAM files. It can also optionally output | |
| 111 discordant read pairs and/or split read mappings to separate SAM files, | |
| 112 and/or unmapped/clipped reads to a separate FASTQ file. When marking | |
| 113 duplicates, *samblaster* will require approximately 20MB of memory per | |
| 114 1M read pairs. | |
| 115 | |
| 116 Usage | |
| 117 ----- | |
| 118 | |
| 119 See the `SAM File Format | |
| 120 Specification <http://samtools.sourceforge.net/SAMv1.pdf>`__ for details | |
| 121 about the SAM alignment format. | |
| 122 | |
| 123 By default, samblaster marks duplicates with SAM FLAG 0x400. The | |
| 124 **--removeDups** option will instead remove duplicate alignments from the | |
| 125 output file. | |
| 126 | |
| 127 **ALIGNMENT TYPE DEFINITIONS:** Below, we will use the following | |
| 128 definitions for alignment types. Starting with *samblaster* release | |
| 129 0.1.22, these definitions are affected by the use of the **-M** option. | |
| 130 By default, *samblaster* will use the current definitions of alignment | |
| 131 types as specified in the `SAM | |
| 132 Specification <http://samtools.sourceforge.net/SAMv1.pdf>`__. Namely, | |
| 133 alignments marked with FLAG 0x100 are considered *secondary*, while | |
| 134 those marked with FLAG 0x800 are considered *supplemental*. If the | |
| 135 **-M** option is specified, alignments marked with either FLAG 0x100 or | |
| 136 0x800 are considered *supplemental*, and no alignments are considered | |
| 137 *secondary*. A *primary* alignment is always one that is neither | |
| 138 *secondary* nor *supplemental*. Only *primary* and *supplemental* | |
| 139 alignments are used to find chimeric (split-read) mappings. The **-M** | |
| 140 flag is used for backward compatibility with older SAM/BAM files in | |
| 141 which "chimeric" alignments were marked with FLAG 0x100, and should also | |
| 142 be used with output from more recent runs of *bwa mem* using its **-M** | |
| 143 option. | |
| 144 | |
| 145 **DISCORDANT READ PAIR IDENTIFICATION:** A **discordant** read pair is | |
| 146 one which meets all of the following criteria: | |
| 147 | |
| 148 1. Both side of the read pair are mapped (neither FLAG 0x4 or 0x8 is | |
| 149 set). | |
| 150 2. The *properly paired* FLAG (0x2) is not set. | |
| 151 3. *Secondary* or *supplemental* alignments are never output as | |
| 152 discordant, although a discordant read pair can have such alignments | |
| 153 associated with them. | |
| 154 4. Duplicate read pairs that meet the above criteria will be output as | |
| 155 discordant unless the **-e** option is used. | |
| 156 | |
| 157 **UNMAPPED/CLIPPED READ IDENTIFICATION:** An **unmapped** or **clipped** | |
| 158 read is a *primary* alignment that is unaligned over all or part of its | |
| 159 length respectively. The lack of a full alignment may be caused by a SV | |
| 160 breakpoint that falls within the read. Therefore, *samblaster* will | |
| 161 optionally output such reads to a FASTQ file for re-alignment by a tool, | |
| 162 such as `YAHA <https://github.com/GregoryFaust/yaha/>`__, geared toward | |
| 163 finding split-read mappings. *samblaster* applies the following strategy | |
| 164 to identify and output unmapped/clipped reads: | |
| 165 | |
| 166 1. An **unmapped** read has the *unmapped read* FLAG set (0x4). | |
| 167 2. A **clipped** read is a mapped read with a CIGAR string that begins | |
| 168 or ends with at least **--minClipSize** unaligned bases (CIGAR code S | |
| 169 and/or H), and is not from a read that has one or more *supplemental* | |
| 170 alignments. | |
| 171 3. In order for *samblaster* to output the entire sequence for clipped | |
| 172 reads, the input SAM file must have soft clipped primary alignments. | |
| 173 4. *samblaster* will output unmapped/clipped reads into a FASTQ file if | |
| 174 QUAL information is available in the input file, and a FASTA file if | |
| 175 not. | |
| 176 5. Unmapped/clipped reads that are part of a duplicate read pair will be | |
| 177 output unless the **-e** option is used. | |
| 178 | |
| 179 | |
| 180 **Written by:** Greg Faust (gf4ea@virginia.edu) `Ira Hall Lab, | |
| 181 University of Virginia <http://faculty.virginia.edu/irahall/>`__ | |
| 182 | |
| 183 **Please cite:** `Faust, G.G. and Hall, I.M., “\ *SAMBLASTER*: fast | |
| 184 duplicate marking and structural variant read extraction,” | |
| 185 *Bioinformatics* Sept. 2014; **30**\ (17): | |
| 186 2503-2505. <http://bioinformatics.oxfordjournals.org/content/30/17/2503>`__ | |
| 187 | |
| 188 **Also see:** `SAMBLASTER\_Supplemental.pdf | |
| 189 <https://github.com/GregoryFaust/samblaster/raw/master/SAMBLASTER_Supplemental.pdf>`__ | |
| 190 for additonal discussion and statistics about the duplicates marked by | |
| 191 *samblaster* vs. *Picard* using the NA12878 sample dataset. Click the | |
| 192 preceeding link or download the file from this repository. | |
| 193 **Written by:** Greg Faust (gf4ea@virginia.edu) `Ira Hall Lab, | |
| 194 University of Virginia <http://faculty.virginia.edu/irahall/>`__ | |
| 195 | |
| 196 **Please cite:** `Faust, G.G. and Hall, I.M., “\ *SAMBLASTER*: fast | |
| 197 duplicate marking and structural variant read extraction,” | |
| 198 *Bioinformatics* Sept. 2014; **30**\ (17): | |
| 199 2503-2505. <http://bioinformatics.oxfordjournals.org/content/30/17/2503>`__ | |
| 200 | |
| 201 **Also see:** `SAMBLASTER\_Supplemental.pdf | |
| 202 <https://github.com/GregoryFaust/samblaster/raw/master/SAMBLASTER_Supplemental.pdf>`__ | |
| 203 for additonal discussion and statistics about the duplicates marked by | |
| 204 *samblaster* vs. *Picard* using the NA12878 sample dataset. Click the | |
| 205 preceeding link or download the file from this repository. | |
| 206 | |
| 207 ]]></help> | |
| 208 <citations> | |
| 209 <citation type="doi">10.1093/bioinformatics/btu314</citation> | |
| 210 </citations> | |
| 211 </tool> |
