Mercurial > repos > devteam > bamtools_filter
annotate bamtools-filter.xml @ 1:5e2fe70292a7 draft
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
| author | devteam |
|---|---|
| date | Wed, 26 Aug 2015 14:58:44 -0400 |
| parents | db10554eaad9 |
| children | 39e21f756379 |
| rev | line source |
|---|---|
| 0 | 1 <tool id="bamFilter" name="Filter" version="0.0.1"> |
| 2 <description>BAM datasets on a variety of attributes</description> | |
| 3 <requirements> | |
| 4 <requirement type="package" version="2.3.0_2d7685d2ae">bamtools</requirement> | |
| 5 </requirements> | |
| 6 <command> | |
|
1
5e2fe70292a7
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
0
diff
changeset
|
7 cat $script_file > $out_file2; |
| 0 | 8 |
| 9 #for $bam_count, $input_bam in enumerate( $input_bams ): | |
| 10 ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" && | |
| 11 ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" && | |
| 12 #end for | |
| 13 | |
| 14 bamtools | |
| 15 filter | |
| 16 -script $script_file | |
| 17 | |
| 18 #for $bam_count, $input_bam in enumerate( $input_bams ): | |
| 19 -in "localbam_${bam_count}.bam" | |
| 20 #end for | |
| 21 -out $out_file1 | |
| 22 </command> | |
| 23 <inputs> | |
| 24 <repeat name="input_bams" title="BAM dataset(s) to filter" min="1"> | |
| 25 <param name="input_bam" type="data" format="bam" label="BAM dataset" /> | |
| 26 </repeat> | |
| 27 <repeat name="conditions" title="Condition" min="1"> | |
| 28 <repeat name="filters" title="Filter" min="1"> | |
| 29 <conditional name="bam_property"> | |
| 30 <param name="bam_property_selector" type="select" label="Select BAM property to filter on"> | |
| 31 <option value="alignmentFlag"/> | |
| 32 <option value="cigar"/> | |
| 33 <option value="insertSize"/> | |
| 34 <option value="isDuplicate"/> | |
| 35 <option value="isFailedQC"/> | |
| 36 <option value="isFirstMate"/> | |
| 37 <option value="isMapped"/> | |
| 38 <option value="isMateMapped"/> | |
| 39 <option value="isMateReverseStrand"/> | |
| 40 <option value="isPaired"/> | |
| 41 <option value="isPrimaryAlignment"/> | |
| 42 <option value="isProperPair"/> | |
| 43 <option value="isReverseStrand"/> | |
| 44 <option value="isSecondMate"/> | |
| 45 <option selected="True" value="mapQuality"/> | |
| 46 <option value="matePosition"/> | |
| 47 <option value="mateReference"/> | |
| 48 <option value="name"/> | |
| 49 <option value="position"/> | |
| 50 <option value="queryBases"/> | |
| 51 <option value="reference"/> | |
| 52 <option value="tag"/> | |
| 53 </param> | |
| 54 <!-- would be fanstastic to have AND and OR constructs in when statements --> | |
| 55 <when value="alignmentFlag"> | |
| 56 <param name="bam_property_value" type="integer" value="3" label="Filter on this alignment flag" help="Default (3) is for a paired read mapped in a proper pair"/> | |
| 57 </when> | |
| 58 <when value="cigar"> | |
| 59 <param name="bam_property_value" type="text" size="10" value="101M" label="Filter on this CIGAR string" help="Default (101M) is for 101 continuously matched bases"/> | |
| 60 </when> | |
| 61 <when value="insertSize"> | |
| 62 <param name="bam_property_value" type="text" size="10" value=">=250" label="Filter on inster size" help="You can use >, <, =, and ! (not) in your expression. E.g., to select pairs with inster size above 250 nt use ">=250""> | |
| 63 <sanitizer invalid_char=""> | |
| 64 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
| 65 </sanitizer> | |
| 66 </param> | |
| 67 </when> | |
| 68 <when value="isDuplicate"> | |
| 69 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads makwed as duplicates" help="Checked = Read IS Duplicate, Empty = Read is NOT Duplicate" /> | |
| 70 </when> | |
| 71 <when value="isFailedQC"> | |
| 72 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads failing QC" help="Checked = Failed QC, Empty = Passed QC"/> | |
| 73 </when> | |
| 74 <when value="isFirstMate"> | |
| 75 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select first mate in a read pair" help="Checked = is first mate, Empty = is NOT first mate"/> | |
| 76 </when> | |
| 77 <when value="isMapped"> | |
| 78 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Selected mapped reads" help="Checked = Mapped, Empty = NOT mapped"/> | |
| 79 </when> | |
| 80 <when value="isMateMapped"> | |
| 81 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mapped mate" help="Checked = Mate IS mapped Empty = Mate is NOT mapped"/> | |
| 82 </when> | |
| 83 <when value="isMateReverseStrand"> | |
| 84 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mate on the reverse strand" help="Checked = Mate IS on reverse strand, Empty = Mate is NOT on the reverse strand"/> | |
| 85 </when> | |
| 86 <when value="isPaired"> | |
| 87 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select paired reads" help="Checked = Read IS paired, Empty = Read is NOT paired"/> | |
| 88 </when> | |
| 89 <when value="isPrimaryAlignment"> | |
| 90 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select BAM records for primary alignments" help="Checked = Alignment IS primary, Empty = Alignment is NOT primary"/> | |
| 91 </when> | |
| 92 <when value="isProperPair"> | |
| 93 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select properly paired reads" help="Checked = Read IS in proper pair, Empty = Read is NOT in the proper pair"/> | |
| 94 </when> | |
| 95 <when value="isReverseStrand"> | |
| 96 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads in the reverse strand only" help="Checked = Read IS on the reverse strand, Empty = Read is NOT on the reverse strand"/> | |
| 97 </when> | |
| 98 <when value="isSecondMate"> | |
| 99 <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select second mate in a read pair" help="Checked = Read IS second mate, Empty = Read is NOT second mate"/> | |
| 100 </when> | |
| 101 <when value="mapQuality"> | |
| 102 <param name="bam_property_value" type="text" value="20" label="Filter on read mapping quality (phred scale)" help="You can use >, <, =, and ! (not) in your expression. E.g., to select reads with mapping quality of at least 30 use ">=30""> | |
| 103 <sanitizer invalid_char=""> | |
| 104 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
| 105 </sanitizer> | |
| 106 </param> | |
| 107 </when> | |
| 108 <when value="matePosition"> | |
| 109 <param name="bam_property_value" type="text" value="1000000" label="Filter on the position of the mate" help="You can use >, <, =, and ! (not) in your expression. E.g., to select reads with mate (second end) mapping after position 1,000,000 use ">1000000""> | |
| 110 <sanitizer invalid_char=""> | |
| 111 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
| 112 </sanitizer> | |
| 113 </param> | |
| 114 </when> | |
| 115 <when value="mateReference"> | |
| 116 <param name="bam_property_value" type="text" value="chr22" label="Filter on reference name for the mate" help="You can use = and ! (not) in your expression. E.g., to select reads with mates mapping to chrM use "chr22""> | |
| 117 <sanitizer invalid_char=""> | |
| 118 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
| 119 </sanitizer> | |
| 120 </param> | |
| 121 </when> | |
| 122 <when value="name"> | |
| 123 <param name="bam_property_value" type="text" label="Filter on read name" help="You can use = and ! (not) in your expression."> | |
| 124 <sanitizer invalid_char=""> | |
| 125 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
| 126 </sanitizer> | |
| 127 </param> | |
| 128 </when> | |
| 129 <when value="position"> | |
| 130 <param name="bam_property_value" type="text" value="500000" label="Filter on the position of the read" help="You can use >, <, =, and ! (not) in your expression. E.g., to select reads mapping after position 5,000 use ">5000""> | |
| 131 <sanitizer invalid_char=""> | |
| 132 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
| 133 </sanitizer> | |
| 134 </param> | |
| 135 </when> | |
| 136 <when value="queryBases"> | |
| 137 <param name="bam_property_value" type="text" value="ttagggttagg" label="Filter on a sequence motif" help="You can use ! (not) in your expression"> | |
| 138 <sanitizer invalid_char=""> | |
| 139 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
| 140 </sanitizer> | |
| 141 </param> | |
| 142 </when> | |
| 143 <when value="reference"> | |
| 144 <param name="bam_property_value" type="text" value="chr22" label="Filter on the reference name for the read" help="You can use ! (not) in your expression"> | |
| 145 <sanitizer invalid_char=""> | |
| 146 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value="!="/></valid> | |
| 147 </sanitizer> | |
| 148 </param> | |
| 149 </when> | |
| 150 <when value="tag"> | |
| 151 <param name="bam_property_value" type="text" value="NM:>1" label="Filter on a particular tag" help="You can use >, <, =, and ! (not). | |
| 152 Tag name and its value must be separated by ":". E.g., to obtain reads with at least one mismatch use "NM:>1""> | |
| 153 <sanitizer invalid_char=""> | |
| 154 <valid initial="string.letters,string.digits"><add value=">"/><add value="<"/><add value=":!="/></valid> | |
| 155 </sanitizer> | |
| 156 </param> | |
| 157 </when> | |
| 158 </conditional> | |
| 159 </repeat> | |
| 160 </repeat> | |
| 161 <conditional name="rule_configuration"> | |
| 162 <param name="rules_selector" type="boolean" truevalue="true" falsevalue="false" label="Would you like to set rules?" help="Allows complex logical constructs. See Example 4 below." /> | |
| 163 <when value="true"> | |
| 164 <param name="rules" type="text" size="20" label="Enter rules here" help="This option can only be used with at least two conditions. Read help below (Example 4) to understand how it works." > | |
| 165 <sanitizer invalid_char=""> | |
| 166 <valid initial="string.printable"/> | |
| 167 </sanitizer> | |
| 168 </param> | |
| 169 </when> | |
| 170 </conditional> | |
| 171 </inputs> | |
| 172 | |
| 173 <configfiles> | |
| 174 <configfile name="script_file"> | |
| 175 ##Sets up a json configfile for bamtools filter | |
| 176 ##If there is more than one condition prints brackets and "filters:" | |
| 177 #if len( $conditions ) > 1 | |
| 178 { | |
| 179 "filters": | |
| 180 [ | |
| 181 #end if | |
| 182 #for $i, $c in enumerate( $conditions, start=1 ) | |
| 183 { "id": "$i", | |
| 184 #for $j, $s in enumerate( $c.filters, start=1 ) | |
| 185 ##The if below takes care of the comma at the end of last condition within group | |
| 186 #if $j != len( $c.filters) | |
| 187 "${s.bam_property.bam_property_selector}":"${s.bam_property.bam_property_value}", | |
| 188 #else | |
| 189 "${s.bam_property.bam_property_selector}":"${s.bam_property.bam_property_value}" | |
| 190 #end if | |
| 191 #end for | |
| 192 ##The if below takes care of the comma at the end of last condition within group | |
| 193 #if $i != len( $conditions ) | |
| 194 }, | |
| 195 #else | |
| 196 } | |
|
1
5e2fe70292a7
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
0
diff
changeset
|
197 #end if |
|
5e2fe70292a7
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
0
diff
changeset
|
198 #end for |
| 0 | 199 #if len( $conditions ) > 1 |
| 200 #if str( $rule_configuration.rules_selector ) == "True": | |
| 201 ], | |
| 202 "rule" : "${rule_configuration.rules}" | |
| 203 #else | |
| 204 ] | |
| 205 #end if | |
| 206 } | |
| 207 #end if | |
| 208 </configfile> | |
| 209 </configfiles> | |
| 210 | |
| 211 <outputs> | |
| 212 <data format="txt" name="out_file2" /> | |
| 213 <data format="bam" name="out_file1" /> | |
| 214 </outputs> | |
| 215 <tests> | |
| 216 <test> | |
| 217 <param name="input_bam" ftype="bam" value="bamtools-input1.bam"/> | |
| 218 <param name="bam_property_selector" value="mapQuality"/> | |
| 219 <param name="bam_property_value" value=">20"/> | |
| 220 <output name="out_file1" file="bamtools-test1.bam" ftype="bam"/> | |
| 221 </test> | |
| 222 </tests> | |
| 223 <help> | |
| 224 **What is does** | |
| 225 | |
| 226 BAMTools filter is a very powerful utility to perform complex filtering of BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). | |
| 227 | |
| 228 ----- | |
| 229 | |
| 230 **How it works** | |
| 231 | |
| 232 The tool use logic relies on the three concepts: (1) input BAM, (2) groups, and (3) filters. | |
| 233 | |
| 234 *Input BAM(s)* | |
| 235 | |
| 236 The input BAM is self-explanatory. This is the dataset you will be filtering. The tool can accept just one or multiple BAM files. To filter on multiple BAMs just add them by clicking **Add new BAM dataset(s) to filter** | |
| 237 | |
| 238 *Conditions and Filters* | |
| 239 | |
| 240 Conditions for filtering BAM files can be arranged in **Groups and Filters**. While it can be confusing at first this is what gives ultimate power to this tools. So try to look at the examples we are supplying below. | |
| 241 | |
| 242 ----- | |
| 243 | |
| 244 **Example 1. Using a single filter** | |
| 245 | |
| 246 When filtering on a single condition there is no need to worry about filters and conditions. Just choose a filter from the **Select BAM property to filter on:** dropdown and enter a value (or click a checkbox for binary filters). | |
| 247 For example, for retaining reads with mapping quality of at least 20 one would set the tool interface as shown below: | |
| 248 | |
|
1
5e2fe70292a7
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
0
diff
changeset
|
249 .. image:: images/single-filter.png |
| 0 | 250 |
| 251 ----- | |
| 252 | |
| 253 **Example 2. Using multiple filters** | |
| 254 | |
| 255 Now suppose one needs to extract reads that (1) have mapping quality of at least 20, (2) contain at least 1 mismatch, and (3) are mapping onto forward strand only. | |
| 256 To do so we will use three filters as shown below (multiple filters are added to the interface by clicking on the **Add new Filter** button): | |
| 257 | |
|
1
5e2fe70292a7
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
0
diff
changeset
|
258 .. image:: images/multiple-filters.png |
| 0 | 259 |
| 260 In this case (you can see that the three filters are grouped within a single Condition - **Condition 1**) the filter too use logical **AND** to perform filtering. | |
| 261 In other words only reads that (1) have mapping quality of at least 20 **AND** (2) contain at least 1 mismatch **AND** are mapping onto forward strand will be returned in this example. | |
| 262 | |
| 263 ----- | |
| 264 | |
| 265 **Example 3. Complex filtering with multiple conditions** | |
| 266 | |
| 267 Suppose now you would like to select **either** reads that (**1**) have (*1.1*) no mismatches and (*1.2*) are on the forward strand **OR** (**2**) reads that have (*2.1*) | |
| 268 at least one mismatch and (*2.2*) are on the reverse strand. In this scenario we have to set up two conditions: (**1**) and (**2**) each with two filters: *1.1* and *1.2* as well as *2.1* and *2.2*. | |
| 269 The following screenshot expalins how this can be done: | |
| 270 | |
|
1
5e2fe70292a7
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
0
diff
changeset
|
271 .. image:: images/complex-filters.png |
| 0 | 272 |
| 273 ----- | |
| 274 | |
| 275 **Example 4. Even more complex filtering with Rules** | |
| 276 | |
| 277 In the above example we have used two conditions (Condition 1 and Condition 2). Using multiple conditions allows to combine them and a variety of ways to enable even more powerful filtering. | |
| 278 For example, suppose get all reads that (**1**) do NOT map to mitochondria and either (**2**) have mapping quality over 20, or (**3**) are in properly mapped pairs. The logical rule to enable such | |
| 279 filtering will look like this:: | |
| 280 | |
| 281 !(1) & (2 | 3) | |
| 282 | |
| 283 Here, numbers 1, 2, and 3 represent conditions. The following screenshot illustrates how to do this in Galaxy: | |
| 284 | |
|
1
5e2fe70292a7
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
0
diff
changeset
|
285 .. image:: images/rule.png |
| 0 | 286 |
| 287 There are three conditions here, each with a single filter. A text entry area that can be opened by clicking on the **Would you like to set rules?** checkbox enables you to enter a rule. | |
| 288 Here numbers correspond to numbers of conditions as they are shown in the interface. E.g., 1 corresponds to condition 1, 2 to condition 2 and so on... In human language this means:: | |
| 289 | |
| 290 NOT condition 1 AND (condition 2 OR condition 3) | |
| 291 | |
| 292 ----- | |
| 293 | |
| 294 **JSON script file** | |
| 295 | |
| 296 This tool produces two outputs. One of the them is a BAM file containing filtered reads. The other is a JSONified script. It can help you to see how your instructions are sent to BAMTools. | |
| 297 For instance, the example 4 looks like this in the JSON form:: | |
| 298 | |
| 299 { | |
| 300 "filters": | |
| 301 [ | |
| 302 { "id": "1", | |
| 303 "tag":"NM:=0", | |
| 304 "isReverseStrand":"false" | |
| 305 }, | |
| 306 { "id": "2", | |
| 307 "tag":"NM:>0", | |
| 308 "isReverseStrand":"true" | |
| 309 } | |
| 310 ] | |
| 311 } | |
|
1
5e2fe70292a7
planemo upload commit 5ad726dc73203a704666033cd3bf70b82575978f-dirty
devteam
parents:
0
diff
changeset
|
312 |
| 0 | 313 |
| 314 ----- | |
| 315 | |
| 316 **More information** | |
| 317 | |
| 318 .. class:: infomark | |
| 319 | |
| 320 Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki | |
| 321 | |
| 322 | |
| 323 </help> | |
| 324 <citations> | |
| 325 <citation type="doi">10.1093/bioinformatics/btr174</citation> | |
| 326 </citations> | |
| 327 </tool> |
