Mercurial > repos > iuc > maxbin2
comparison maxbin2.xml @ 0:fa268dda3c1e draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/maxbin2/ commit e62df5b9d7cdbfd5fc02f5e52bbc2f7284eee555"
| author | iuc |
|---|---|
| date | Mon, 28 Oct 2019 19:36:33 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:fa268dda3c1e |
|---|---|
| 1 <tool id="maxbin2" name="MaxBin2" version="@MAXBIN_VERSION@+galaxy1"> | |
| 2 <description>clusters metagenomic contigs into bins</description> | |
| 3 <macros> | |
| 4 <token name="@MAXBIN_VERSION@">2.2.7</token> | |
| 5 </macros> | |
| 6 <requirements> | |
| 7 <requirement type="package" version="@MAXBIN_VERSION@">maxbin2</requirement> | |
| 8 </requirements> | |
| 9 <version_command><![CDATA[run_MaxBin.pl -version | head -n 1]]></version_command> | |
| 10 <command detect_errors="exit_code"><![CDATA[ | |
| 11 ## generate read or abundance files | |
| 12 #if $intype_cond.intype_select == 'rds': | |
| 13 #for $r in $intype_cond.reads | |
| 14 #if $r | |
| 15 echo '$r' >> reads_list && | |
| 16 #end if | |
| 17 #end for | |
| 18 #else if $intype_cond.intype_select == 'abdc': | |
| 19 #for $a in $intype_cond.abund | |
| 20 #if $a | |
| 21 echo '$a' >> abund_list && | |
| 22 #end if | |
| 23 #end for | |
| 24 #end if | |
| 25 | |
| 26 ## in case of reassembly the IBDA out and err is appended | |
| 27 ## to differentiate this a header is added also befor the | |
| 28 ## MaxBin2 outputs | |
| 29 #if $intype_cond.intype_select == 'rds' and $intype_cond.reassembly != "" | |
| 30 echo "==== MaxBin2 stdout ====" && | |
| 31 echo "==== MaxBin2 stderr ====" 1>&2 && | |
| 32 #end if | |
| 33 | |
| 34 run_MaxBin.pl | |
| 35 -contig '$contig' | |
| 36 -out out | |
| 37 #if $intype_cond.intype_select == 'rds': | |
| 38 -reads_list reads_list | |
| 39 $intype_cond.reassembly | |
| 40 #else if $intype_cond.intype_select == 'abdc': | |
| 41 -abund_list abund_list | |
| 42 #end if | |
| 43 #if $adv_cond.adv_select == 'yes': | |
| 44 -min_contig_length $adv_cond.min_contig_length | |
| 45 -max_iteration $adv_cond.max_iteration | |
| 46 -prob_threshold $adv_cond.prob_threshold | |
| 47 $adv_cond.plotmarker | |
| 48 -markerset $adv_cond.markerset | |
| 49 #end if | |
| 50 -thread \${GALAXY_SLOTS:-1} | |
| 51 | |
| 52 && tar -xf out.marker_of_each_bin.tar.gz | |
| 53 | |
| 54 ## redirect the idba out and err file content to stdout and err | |
| 55 ## since this is also wanted in case the error case ';' is used here to | |
| 56 ## separate commands | |
| 57 #if $intype_cond.intype_select == 'rds' and $intype_cond.reassembly != "" | |
| 58 ; echo "==== IDBA stdout ====" | |
| 59 && cat out.idba.out | |
| 60 && echo "==== IDBA stderr ====" 1>&2 | |
| 61 && cat out.idba.err 1>&2 | |
| 62 #end if | |
| 63 ]]></command> | |
| 64 <inputs> | |
| 65 <param argument="-contig" type="data" format="fasta,fasta.gz" label="Contig file"/> | |
| 66 <conditional name="intype_cond"> | |
| 67 <param name="intype_select" type="select" label="Input type"> | |
| 68 <option value="rds" selected="true">Sequencing Reads</option> | |
| 69 <option value="abdc">Abundances</option> | |
| 70 </param> | |
| 71 <when value="rds"> | |
| 72 <param name="reads" type="data" format="fasta,fastq" multiple="true" label="Reads file(s)" help="(-read/-read2/...)"/> | |
| 73 <param name="output_abundances" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output abundances" help="" /> | |
| 74 <param argument="--reassembly" type="boolean" truevalue="-reassembly" falsevalue="" checked="false" label="Reassembly" help="Reassembly option is still highly experimental. To use this function, you need to feed MaxBin interleaved paired-end fastq or fasta file." /> | |
| 75 </when> | |
| 76 <when value="abdc"> | |
| 77 <param name="abund" type="data" format="tabular" multiple="true" label="Abundance file(s)" help="(-abund/-abund2/...)" /> | |
| 78 </when> | |
| 79 </conditional> | |
| 80 <conditional name="adv_cond"> | |
| 81 <param name="adv_select" type="select" label="Advanced options"> | |
| 82 <option value="yes">Yes</option> | |
| 83 <option value="no" selected="true">No</option> | |
| 84 </param> | |
| 85 <when value="no"/> | |
| 86 <when value="yes"> | |
| 87 <param argument="-min_contig_length" type="integer" min="0" value="1000" label="minimum contig length" /> | |
| 88 <param argument="-max_iteration" type="integer" min="0" value="50" label="Maximum Expectation-Maximization algorithm iteration number" /> | |
| 89 <param argument="-prob_threshold" type="float" min="0" max="1.0" value="0.5" label="Probability threshold for EM final classification" /> | |
| 90 <param argument="-plotmarker" type="boolean" truevalue="-plotmarker" falsevalue="" checked="false" label="Generate visualization of the marker gene presence numbers" /> | |
| 91 <param name="output_marker" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output marker gene presence for bins table" /> | |
| 92 <param name="output_markers" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output marker genes for each bin as fasta" /> | |
| 93 <param name="output_log" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output log" /> | |
| 94 <param argument="-markerset" type="select" label="Marker gene set"> | |
| 95 <option value="107" selected="true">107 marker genes present in >95% of bacteria</option> | |
| 96 <option value="40">40 marker gene sets that are universal among bacteria and archaea</option> | |
| 97 </param> | |
| 98 </when> | |
| 99 </conditional> | |
| 100 </inputs> | |
| 101 <outputs> | |
| 102 <!-- default outputs --> | |
| 103 <collection name="bins" type="list" label="${tool.name} on ${on_string} (bins)"> | |
| 104 <discover_datasets pattern="out.(?P<designation>[0-9]+).fasta" format="fasta" visible="false" /> | |
| 105 </collection> | |
| 106 <data name="noclass" format="fasta" label="${tool.name} on ${on_string} (unclassified sequences)" from_work_dir="out.noclass"/> | |
| 107 <data name="toshort" format="fasta" label="${tool.name} on ${on_string} (to short sequences)" from_work_dir="out.tooshort"/> | |
| 108 <data name="summary" format="tabular" label="${tool.name} on ${on_string} (summary)" from_work_dir="out.summary"/> | |
| 109 | |
| 110 <!-- optional outputs --> | |
| 111 <data name="log" format="txt" label="${tool.name} on ${on_string} (log)" from_work_dir="out.log"> | |
| 112 <filter>adv_cond['adv_select']=='yes' and adv_cond['output_log']</filter> | |
| 113 </data> | |
| 114 <data name="marker" format="tabular" label="${tool.name} on ${on_string} (marker gene presence)" from_work_dir="out.marker"> | |
| 115 <filter>adv_cond['adv_select']=='yes' and adv_cond['output_marker']</filter> | |
| 116 </data> | |
| 117 <data name="abundout" format="tabular" label="${tool.name} on ${on_string} (abundances)" from_work_dir="out.abund1"> | |
| 118 <filter>intype_cond['intype_select']=='rds' and intype_cond['output_abundances']</filter> | |
| 119 </data> | |
| 120 <data name="plot" format="pdf" label="${tool.name} on ${on_string} (marker gene presence plot)" from_work_dir="out.marker.pdf"> | |
| 121 <filter>adv_cond['adv_select']=='yes' and adv_cond['plotmarker']</filter> | |
| 122 </data> | |
| 123 <collection name="markers" type="list" label="${tool.name} on ${on_string} (markers prediced for bins)"> | |
| 124 <discover_datasets pattern="out.(?P<designation>[0-9]+).marker.fasta" format="fasta" visible="false" /> | |
| 125 <filter>adv_cond['adv_select']=='yes' and adv_cond['output_markers']</filter> | |
| 126 </collection> | |
| 127 | |
| 128 <!-- additional output in case of reassembly --> | |
| 129 <collection name="reassembly_bins" type="list" label="${tool.name} on ${on_string} (reassembly bins)"> | |
| 130 <discover_datasets directory="out.reassem" pattern="out.(?P<designation>[0-9]+).fasta" format="fasta" visible="false" /> | |
| 131 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter> | |
| 132 </collection> | |
| 133 <collection name="reassembly_reads" type="list" label="${tool.name} on ${on_string} (reassembly reads)"> | |
| 134 <discover_datasets directory="out.reassem" pattern="out.reads.(?P<designation>[0-9]+)" format="fasta" visible="false" /> | |
| 135 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter> | |
| 136 </collection> | |
| 137 <data name="reassembly_noclass" format="fasta" label="${tool.name} on ${on_string} (reassembly unclassified sequences)" from_work_dir="out.reassem/out.reads.noclass"> | |
| 138 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter> | |
| 139 </data> | |
| 140 <data name="reassembly_n50" format="txt" label="${tool.name} on ${on_string} (reassembly N50)" from_work_dir="out.reassem/N50.txt"> | |
| 141 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']</filter> | |
| 142 </data> | |
| 143 </outputs> | |
| 144 <tests> | |
| 145 <test expect_num_outputs="4"><!-- test w contigs and reads as input --> | |
| 146 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> | |
| 147 <conditional name="intype_cond"> | |
| 148 <param name="intype_select" value="rds"/> | |
| 149 <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/> | |
| 150 </conditional> | |
| 151 <conditional name="adv_cond"> | |
| 152 <param name="adv_select" value="no"/> | |
| 153 </conditional> | |
| 154 <output_collection name="bins" type="list" count="2"> | |
| 155 <element name="001" file="1/out.001.fasta" ftype="fasta"/> | |
| 156 <element name="002" file="1/out.002.fasta" ftype="fasta"/> | |
| 157 </output_collection> | |
| 158 <output name="summary" file="1/out.summary" ftype="tabular" /> | |
| 159 <output name="noclass" file="1/out.noclass" ftype="fasta" /> | |
| 160 <output name="toshort" file="1/out.tooshort" ftype="fasta" /> | |
| 161 </test> | |
| 162 <!-- test w contigs and reads as input test for optional outputs --> | |
| 163 <test expect_num_outputs="9"> | |
| 164 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> | |
| 165 <conditional name="intype_cond"> | |
| 166 <param name="intype_select" value="rds"/> | |
| 167 <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/> | |
| 168 <param name="output_abundances" value="true" /> | |
| 169 </conditional> | |
| 170 <conditional name="adv_cond"> | |
| 171 <param name="adv_select" value="yes"/> | |
| 172 <param name="plotmarker" value="true" /> | |
| 173 <param name="output_marker" value="true" /> | |
| 174 <param name="output_markers" value="true" /> | |
| 175 <param name="output_log" value="true" /> | |
| 176 </conditional> | |
| 177 <output_collection name="bins" type="list" count="2"> | |
| 178 <element name="001" file="1/out.001.fasta" ftype="fasta"/> | |
| 179 <element name="002" file="1/out.002.fasta" ftype="fasta"/> | |
| 180 </output_collection> | |
| 181 <output name="summary" file="1/out.summary" ftype="tabular" /> | |
| 182 <output name="noclass" file="1/out.noclass" ftype="fasta" /> | |
| 183 <output name="toshort" file="1/out.tooshort" ftype="fasta" /> | |
| 184 <output name="log" file="1/out.log" ftype="txt" compare="diff" lines_diff="21" /> | |
| 185 <output name="abundout" file="1/out.abund1" ftype="tabular" /> | |
| 186 <output name="marker" file="1/out.marker" ftype="tabular" /> | |
| 187 <output name="plot" file="1/out.marker.pdf" ftype="pdf" compare="sim_size" /> | |
| 188 <output_collection name="markers" type="list" count="1"> | |
| 189 <element name="001" file="1/out.001.marker.fasta" ftype="fasta"/> | |
| 190 </output_collection> | |
| 191 </test> | |
| 192 <!--test w contigs and abundances as input + advanced options --> | |
| 193 <test expect_num_outputs="5"> | |
| 194 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> | |
| 195 <conditional name="intype_cond"> | |
| 196 <param name="intype_select" value="abdc"/> | |
| 197 <param name="abund" value="abundances.tsv" ftype="tabular"/> | |
| 198 </conditional> | |
| 199 <conditional name="adv_cond"> | |
| 200 <param name="adv_select" value="yes"/> | |
| 201 <param name="min_contig_length" value="500"/> | |
| 202 <param name="max_iteration" value="10"/> | |
| 203 <param name="prob_threshold" value="0.95"/> | |
| 204 <param name="plotmarker" value="-plotmarker"/> | |
| 205 <param name="markerset" value="107"/> | |
| 206 </conditional> | |
| 207 <output_collection name="bins" type="list" count="2"> | |
| 208 <element name="001" file="2/out.001.fasta" ftype="fasta"/> | |
| 209 <element name="002" file="2/out.002.fasta" ftype="fasta"/> | |
| 210 </output_collection> | |
| 211 <output name="summary" file="2/out.summary" ftype="tabular" /> | |
| 212 <output name="noclass" file="2/out.noclass" ftype="fasta" /> | |
| 213 <output name="toshort" file="2/out.tooshort" ftype="fasta" /> | |
| 214 <output name="plot" file="2/out.marker.pdf" ftype="pdf" compare="sim_size" /> | |
| 215 </test> | |
| 216 <!-- test w contigs and reads as input + reassembly--> | |
| 217 <test expect_num_outputs="8"> | |
| 218 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" /> | |
| 219 <conditional name="intype_cond"> | |
| 220 <param name="intype_select" value="rds"/> | |
| 221 <param name="reads" value="interleavedPE_unmapped_Sample3_small.fasta" ftype="fasta"/> | |
| 222 <param name="reassembly" value="-reassembly"/> | |
| 223 </conditional> | |
| 224 <conditional name="adv_cond"> | |
| 225 <param name="adv_select" value="no"/> | |
| 226 </conditional> | |
| 227 <output_collection name="bins" type="list" count="2"> | |
| 228 <element name="001" file="3/out.001.fasta" ftype="fasta"/> | |
| 229 <element name="002" file="3/out.002.fasta" ftype="fasta"/> | |
| 230 </output_collection> | |
| 231 <output name="summary" file="3/out.summary" ftype="tabular" /> | |
| 232 <output name="noclass" file="3/out.noclass" ftype="fasta" /> | |
| 233 <output name="toshort" file="3/out.tooshort" ftype="fasta" /> | |
| 234 <output_collection name="reassembly_bins" type="list" count="2"> | |
| 235 <element name="001" file="3/out.reassem/out.001.fasta" ftype="fasta"/> | |
| 236 <element name="002" file="3/out.reassem/out.002.fasta" ftype="fasta"/> | |
| 237 </output_collection> | |
| 238 <output_collection name="reassembly_reads" type="list" count="2"> | |
| 239 <element name="001" file="3/out.reassem/out.reads.001" ftype="fasta"/> | |
| 240 <element name="002" file="3/out.reassem/out.reads.002" ftype="fasta"/> | |
| 241 </output_collection> | |
| 242 <output name="reassembly_noclass" file="3/out.reassem/out.reads.noclass" ftype="fasta" /> | |
| 243 <output name="reassembly_n50" file="3/out.reassem/N50.txt" ftype="txt" /> | |
| 244 </test> | |
| 245 </tests> | |
| 246 <help><![CDATA[ | |
| 247 MaxBin is a software that clusters metagenomic contigs into different bins, | |
| 248 each consists (hopefully) of contigs from one species. MaxBin uses the | |
| 249 nucleotide composition information and contig abundance information to do | |
| 250 achieve binning through an Expectation-Maximization algorithm. | |
| 251 | |
| 252 **Input**: | |
| 253 | |
| 254 MaxBin need the contigs and contig abundance information. The contig abundance | |
| 255 information can be provided in two ways: the user can choose to provide | |
| 256 | |
| 257 - the abundance file or | |
| 258 - the sequencing reads in fasta format (and MaxBin will use Bowtie2 to map the | |
| 259 sequencing reads against the contigs and generate the abundance information) | |
| 260 | |
| 261 The abundance information can be provided as tabular file: | |
| 262 | |
| 263 For example, assume I have three contigs named A0001, A0002, and A0003, then my abundance file will look like | |
| 264 | |
| 265 A0001 30.89 | |
| 266 A0002 20.02 | |
| 267 A0003 78.93 | |
| 268 | |
| 269 Reads/Abundundance files can be given in multiple files. | |
| 270 | |
| 271 By default MaxBin will look for 107 marker genes present in >95% of bacteria. | |
| 272 Alternatively you can also choose 40 marker gene sets that are universal among | |
| 273 bacteria and archaea (Wu et al., PLoS ONE 2013). This option may be better | |
| 274 suited for environment dominated by archaea; however it tend to split genomes | |
| 275 into more bins. You can choose between different marker gene sets and see which | |
| 276 one works better. | |
| 277 | |
| 278 **Outputs** | |
| 279 | |
| 280 - bins: binned sequences | |
| 281 - summary: a summary file describing which contigs are being classified into which bin. | |
| 282 - log: a log file recording the core steps of MaxBin algorithm | |
| 283 - abundances (only if reads are used as input): a summary file describing which contigs are being classified into which bin | |
| 284 - marker: marker gene presence numbers for each bin. This table is ready to be plotted by R or other 3rd-party software. | |
| 285 - marker plot (anly present if selected in the advanced options): visualization of the marker gene presence numbers using R. Will only appear if -plotmarker is specified. | |
| 286 - unclassified sequences: this file stores all sequences that pass the minimum length threshold but are not classified successfully. | |
| 287 - to short sequences: this file stores all sequences that do not meet the minimum length threshold. | |
| 288 - markers prediced for bins: these data sets store all markers predicted from the individual bins. | |
| 289 | |
| 290 **Reassembly** | |
| 291 | |
| 292 This is an experimental feature of MaxBin. It calls for each read bin IDBA_UD with the pre_correction parameter. Of course this IDBA_UD call can be done also with the corresponding Galaxy tool | |
| 293 | |
| 294 | |
| 295 ** More information ** | |
| 296 | |
| 297 https://downloads.jbei.org/data/microbial_communities/MaxBin/MaxBin.html | |
| 298 | |
| 299 ]]></help> | |
| 300 <citations> | |
| 301 <citation type="doi">10.1093/bioinformatics/btv638</citation> | |
| 302 </citations> | |
| 303 </tool> |
