Mercurial > repos > jjohnson > bamtools_split
comparison bamtools-split.xml.save @ 0:8c17ddca0eee draft
Uploaded
author | jjohnson |
---|---|
date | Mon, 15 May 2017 16:27:18 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8c17ddca0eee |
---|---|
1 <tool id="bamSplit" name="Split" version="2.4.1"> | |
2 <description>BAM datasets on variety of attributes</description> | |
3 <macros> | |
4 <xml name="macro_output_type"> | |
5 <param name="output_type" type="select" label="output as"> | |
6 <option value="history_items">history items</option> | |
7 <option value="dataset_collection">dataset_collection</option> | |
8 </param> | |
9 </xml> | |
10 </macros> | |
11 <requirements> | |
12 <requirement type="package" version="2.4.0">bamtools</requirement> | |
13 </requirements> | |
14 <command> | |
15 <![CDATA[ | |
16 echo "BAM" > $report && | |
17 #for $bam_count, $input_bam in enumerate( $input_bams ): | |
18 ln -s "${input_bam}" "localbam_${bam_count}.bam" && | |
19 ln -s "${input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" && | |
20 #end for | |
21 bamtools | |
22 split | |
23 #if str ( $analysis_type.analysis_type_selector ) == "-tag" : | |
24 ${analysis_type.analysis_type_selector} "${analysis_type.tag_name}" | |
25 #else | |
26 ${analysis_type.analysis_type_selector} | |
27 #end if | |
28 -stub split_bam | |
29 #for $bam_count, $input_bam in enumerate( $input_bams ): | |
30 -in "localbam_${bam_count}.bam" | |
31 #end for | |
32 #if str ( $analysis_type.analysis_type_selector ) == "-reference" and $analysis_type.output_type == "dataset_collection": | |
33 #import re | |
34 #set $name = $re.sub('\W','_',$re.sub('\.bam$','',$input_bams[0].name)) | |
35 #set $ref_list = ' '.join([$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')]) | |
36 && (export I=0; | |
37 for i in $ref_list; | |
38 do I=\$((++I)); SN=`printf "split_bam.REF_%s.bam" "\$i"`; | |
39 if [ -e \$SN ]; | |
40 then FN=`printf "split_bam%05d%s.%s.bam" \$((I)) "$name" "\$i"`; | |
41 mv \$SN \$FN; | |
42 fi; | |
43 done) | |
44 #end if | |
45 ]]> | |
46 </command> | |
47 <inputs> | |
48 <param name="input_bams" type="data" format="bam" label="BAM dataset(s) to filter" min="1" multiple="True"/> | |
49 <conditional name="analysis_type"> | |
50 <param name="analysis_type_selector" type="select" label="Split BAM dataset(s) by" help="See help below for explanation of each option"> | |
51 <option value="-mapped">Mapping status (-mapped)</option> | |
52 <option value="-paired">Pairing status (-paired)</option> | |
53 <option value="-reference">Reference name (-reference)</option> | |
54 <option value="-tag">Specific tag (-tag)</option> | |
55 </param> | |
56 <when value="-mapped" /> | |
57 <when value="-paired" /> | |
58 <when value="-reference" > | |
59 <expand macro="macro_output_type" /> | |
60 </when> | |
61 <when value="-tag"> | |
62 <param name="tag_name" type="text" value="NM" label="Enter tag name here" help="For example, to split on NM tag enter "NM""/> | |
63 <expand macro="macro_output_type" /> | |
64 </when> | |
65 </conditional> | |
66 </inputs> | |
67 <outputs> | |
68 <data format="txt" name="report" label="BAMSplitter Run" hidden="true"> | |
69 <discover_datasets pattern="split_bam\.(?P<designation>.+)\.bam" ext="bam" visible="true"/> | |
70 <filter>analysis_type['analysis_type_selector'] in ('-mapped','-paired') or analysis_type['output_type'] != 'dataset_collection'</filter> | |
71 </data> | |
72 <collection name="output_bams" type="list" label="${input_bams[0].name} by ${analysis_type.analysis_type_selector.replace('-','')}"> | |
73 <discover_datasets pattern="split_bam\d*(?P<designation>.+)\.bam" ext="bam" visible="false"/> | |
74 <filter>analysis_type['analysis_type_selector'] in ('-reference','-tag') and analysis_type['output_type'] == 'dataset_collection'</filter> | |
75 </collection> | |
76 </outputs> | |
77 <tests> | |
78 <test> | |
79 <param name="input_bams" ftype="bam" value="bamtools-input1.bam"/> | |
80 <param name="analysis_type_selector" value="-mapped"/> | |
81 <output name="report"> | |
82 <assert_contents> | |
83 <has_line line="BAM" /> | |
84 </assert_contents> | |
85 <discovered_dataset designation="MAPPED" file="bamtools-split-test1.bam" ftype="bam"/> | |
86 </output> | |
87 </test> | |
88 <test> | |
89 <param name="input_bams" ftype="bam" value="bamtools-input2.bam"/> | |
90 <param name="analysis_type_selector" value="-reference"/> | |
91 <param name="output_type" value="dataset_collection"/> | |
92 <output_collection name="output_bams" type="list"> | |
93 <element name="bamtools_input2.chr1" file="bamtools_input2.chr1" compare="sim_size" delta="500" /> | |
94 </output_collection> | |
95 </test> | |
96 | |
97 </tests> | |
98 <help> | |
99 **What is does** | |
100 | |
101 BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). | |
102 | |
103 ----- | |
104 | |
105 .. class:: warningmark | |
106 | |
107 **DANGER: Multiple Outputs** | |
108 | |
109 As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing. | |
110 | |
111 ----- | |
112 | |
113 **How it works** | |
114 | |
115 The following options can be specified via "**Split BAM dataset(s) by**" dropdown:: | |
116 | |
117 Mapping status (-mapped) split mapped/unmapped and generate two output files | |
118 named (MAPPED) and (UNMAPPED) containing mapped and unmapped | |
119 reads, respectively. | |
120 | |
121 Pairing status (-paired) split single-end/paired-end alignments and generate two output files | |
122 named (SINGLE_END) and (PAIRED_END) containing paired and unpaired | |
123 reads, respectively. | |
124 | |
125 Reference name (-reference) split alignments by reference name. In cases of unfinished genomes with | |
126 very large number of reference sequences (scaffolds) it can generate | |
127 thousands (if not millions) of output datasets. | |
128 | |
129 Specific tag (-tag) split alignments based on all values of TAG encountered. Choosing this | |
130 option from the menu will allow you to enter the tag name. As was the | |
131 case with the reference splitting above, this option can produce very | |
132 large number of outputs if a tag has a large number of unique values. | |
133 | |
134 ----- | |
135 | |
136 .. class:: infomark | |
137 | |
138 **More information** | |
139 | |
140 Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki | |
141 | |
142 </help> | |
143 <citations> | |
144 <citation type="doi">10.1093/bioinformatics/btr174</citation> | |
145 </citations> | |
146 </tool> |