comparison bamtools-split.xml.save @ 0:8c17ddca0eee draft

Uploaded
author jjohnson
date Mon, 15 May 2017 16:27:18 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:8c17ddca0eee
1 <tool id="bamSplit" name="Split" version="2.4.1">
2 <description>BAM datasets on variety of attributes</description>
3 <macros>
4 <xml name="macro_output_type">
5 <param name="output_type" type="select" label="output as">
6 <option value="history_items">history items</option>
7 <option value="dataset_collection">dataset_collection</option>
8 </param>
9 </xml>
10 </macros>
11 <requirements>
12 <requirement type="package" version="2.4.0">bamtools</requirement>
13 </requirements>
14 <command>
15 <![CDATA[
16 echo "BAM" > $report &&
17 #for $bam_count, $input_bam in enumerate( $input_bams ):
18 ln -s "${input_bam}" "localbam_${bam_count}.bam" &&
19 ln -s "${input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &&
20 #end for
21 bamtools
22 split
23 #if str ( $analysis_type.analysis_type_selector ) == "-tag" :
24 ${analysis_type.analysis_type_selector} "${analysis_type.tag_name}"
25 #else
26 ${analysis_type.analysis_type_selector}
27 #end if
28 -stub split_bam
29 #for $bam_count, $input_bam in enumerate( $input_bams ):
30 -in "localbam_${bam_count}.bam"
31 #end for
32 #if str ( $analysis_type.analysis_type_selector ) == "-reference" and $analysis_type.output_type == "dataset_collection":
33 #import re
34 #set $name = $re.sub('\W','_',$re.sub('\.bam$','',$input_bams[0].name))
35 #set $ref_list = ' '.join([$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')])
36 && (export I=0;
37 for i in $ref_list;
38 do I=\$((++I)); SN=`printf "split_bam.REF_%s.bam" "\$i"`;
39 if [ -e \$SN ];
40 then FN=`printf "split_bam%05d%s.%s.bam" \$((I)) "$name" "\$i"`;
41 mv \$SN \$FN;
42 fi;
43 done)
44 #end if
45 ]]>
46 </command>
47 <inputs>
48 <param name="input_bams" type="data" format="bam" label="BAM dataset(s) to filter" min="1" multiple="True"/>
49 <conditional name="analysis_type">
50 <param name="analysis_type_selector" type="select" label="Split BAM dataset(s) by" help="See help below for explanation of each option">
51 <option value="-mapped">Mapping status (-mapped)</option>
52 <option value="-paired">Pairing status (-paired)</option>
53 <option value="-reference">Reference name (-reference)</option>
54 <option value="-tag">Specific tag (-tag)</option>
55 </param>
56 <when value="-mapped" />
57 <when value="-paired" />
58 <when value="-reference" >
59 <expand macro="macro_output_type" />
60 </when>
61 <when value="-tag">
62 <param name="tag_name" type="text" value="NM" label="Enter tag name here" help="For example, to split on NM tag enter &quot;NM&quot;"/>
63 <expand macro="macro_output_type" />
64 </when>
65 </conditional>
66 </inputs>
67 <outputs>
68 <data format="txt" name="report" label="BAMSplitter Run" hidden="true">
69 <discover_datasets pattern="split_bam\.(?P&lt;designation&gt;.+)\.bam" ext="bam" visible="true"/>
70 <filter>analysis_type['analysis_type_selector'] in ('-mapped','-paired') or analysis_type['output_type'] != 'dataset_collection'</filter>
71 </data>
72 <collection name="output_bams" type="list" label="${input_bams[0].name} by ${analysis_type.analysis_type_selector.replace('-','')}">
73 <discover_datasets pattern="split_bam\d*(?P&lt;designation&gt;.+)\.bam" ext="bam" visible="false"/>
74 <filter>analysis_type['analysis_type_selector'] in ('-reference','-tag') and analysis_type['output_type'] == 'dataset_collection'</filter>
75 </collection>
76 </outputs>
77 <tests>
78 <test>
79 <param name="input_bams" ftype="bam" value="bamtools-input1.bam"/>
80 <param name="analysis_type_selector" value="-mapped"/>
81 <output name="report">
82 <assert_contents>
83 <has_line line="BAM" />
84 </assert_contents>
85 <discovered_dataset designation="MAPPED" file="bamtools-split-test1.bam" ftype="bam"/>
86 </output>
87 </test>
88 <test>
89 <param name="input_bams" ftype="bam" value="bamtools-input2.bam"/>
90 <param name="analysis_type_selector" value="-reference"/>
91 <param name="output_type" value="dataset_collection"/>
92 <output_collection name="output_bams" type="list">
93 <element name="bamtools_input2.chr1" file="bamtools_input2.chr1" compare="sim_size" delta="500" />
94 </output_collection>
95 </test>
96
97 </tests>
98 <help>
99 **What is does**
100
101 BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
102
103 -----
104
105 .. class:: warningmark
106
107 **DANGER: Multiple Outputs**
108
109 As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing.
110
111 -----
112
113 **How it works**
114
115 The following options can be specified via "**Split BAM dataset(s) by**" dropdown::
116
117 Mapping status (-mapped) split mapped/unmapped and generate two output files
118 named (MAPPED) and (UNMAPPED) containing mapped and unmapped
119 reads, respectively.
120
121 Pairing status (-paired) split single-end/paired-end alignments and generate two output files
122 named (SINGLE_END) and (PAIRED_END) containing paired and unpaired
123 reads, respectively.
124
125 Reference name (-reference) split alignments by reference name. In cases of unfinished genomes with
126 very large number of reference sequences (scaffolds) it can generate
127 thousands (if not millions) of output datasets.
128
129 Specific tag (-tag) split alignments based on all values of TAG encountered. Choosing this
130 option from the menu will allow you to enter the tag name. As was the
131 case with the reference splitting above, this option can produce very
132 large number of outputs if a tag has a large number of unique values.
133
134 -----
135
136 .. class:: infomark
137
138 **More information**
139
140 Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
141
142 </help>
143 <citations>
144 <citation type="doi">10.1093/bioinformatics/btr174</citation>
145 </citations>
146 </tool>