Mercurial > repos > iuc > bmtagger
comparison bmtagger.xml @ 0:55b963dc5f76 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/bmtagger commit e3d492d96b0ffe79370ca090b3f749b0869e8b60
| author | iuc |
|---|---|
| date | Wed, 12 Nov 2025 12:03:11 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:55b963dc5f76 |
|---|---|
| 1 <tool id="bmtagger" name="bmtagger" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> | |
| 2 <description>remove contaminant reads</description> | |
| 3 <macros> | |
| 4 <token name="@TOOL_VERSION@">3.101</token> | |
| 5 <token name="@VERSION_SUFFIX@">0</token> | |
| 6 <token name="@PROFILE@">25.0</token> | |
| 7 | |
| 8 <xml name="assert"> | |
| 9 <assert_contents> | |
| 10 <has_n_lines n="2668"/> | |
| 11 </assert_contents> | |
| 12 </xml> | |
| 13 <xml name="element_assert" tokens="name,ftype" token_decompress="false"> | |
| 14 <element name="@NAME@" ftype="@FTYPE@" decompress="@DECOMPRESS@"> | |
| 15 <expand macro="assert"/> | |
| 16 </element> | |
| 17 </xml> | |
| 18 </macros> | |
| 19 <xrefs> | |
| 20 <xref type="bio.tools">bmtagger</xref> | |
| 21 </xrefs> | |
| 22 <requirements> | |
| 23 <requirement type="package" version="@TOOL_VERSION@">bmtagger</requirement> | |
| 24 </requirements> | |
| 25 <version_command><![CDATA[bmtagger.sh -V 2> /dev/null | grep version | cut -d" " -f2]]></version_command> | |
| 26 <command detect_errors="exit_code"><![CDATA[ | |
| 27 set -eo pipefail; | |
| 28 #set gz = False | |
| 29 #set fasta = False | |
| 30 #if $sequences.type == "single" | |
| 31 #if $sequences.reads.ext.startswith("fasta") | |
| 32 #set fasta = True | |
| 33 #end if | |
| 34 #if $sequences.reads.ext.endswith(".gz") | |
| 35 gunzip -c '$sequences.reads' > forward && | |
| 36 #set gz = True | |
| 37 #else | |
| 38 ln -s '$sequences.reads' forward && | |
| 39 #end if | |
| 40 | |
| 41 #else | |
| 42 #if $sequences.reads.forward.ext.startswith("fasta") | |
| 43 #set fasta = True | |
| 44 #end if | |
| 45 #if $sequences.reads.forward.ext.endswith(".gz") | |
| 46 gunzip -c '$sequences.reads.forward' > forward && | |
| 47 gunzip -c '$sequences.reads.reverse' > reverse && | |
| 48 #set gz = True | |
| 49 #else | |
| 50 ln -s '$sequences.reads.forward' forward && | |
| 51 ln -s '$sequences.reads.reverse' reverse && | |
| 52 #end if | |
| 53 #end if | |
| 54 | |
| 55 #if $host.source == "cached" | |
| 56 #set reference = $host.reference.fields.path | |
| 57 ## srprism test data is to large (>100MB) to store ar IUC | |
| 58 ## hence we generate it on the fly for tool tests using the | |
| 59 ## fasta file which we keep in the path referred by the | |
| 60 ## data table (not needed otherwise) | |
| 61 #if $test == "true" | |
| 62 srprism mkindex -i '${host.reference.fields.path}.fa' -o reference.srprism && | |
| 63 #end if | |
| 64 #else | |
| 65 #if $host.sequence.ext == "fasta.gz" | |
| 66 gunzip -c '$host.sequence' > reference.fa && | |
| 67 #else | |
| 68 ln -s '$host.sequence' reference.fa && | |
| 69 #end if | |
| 70 ## bmtool creates multi GB file if used with default parameters | |
| 71 ## -> use much smaller word size for testing | |
| 72 bmtool -d reference.fa -o reference.bitmask -w #if $test != "" then 10 else 18 # && | |
| 73 srprism mkindex -i reference.fa -o reference.srprism && | |
| 74 makeblastdb -in reference.fa -dbtype nucl && | |
| 75 #set reference = "reference" | |
| 76 #end if | |
| 77 | |
| 78 bmtagger.sh | |
| 79 -q #if $fasta then 0 else 1# | |
| 80 -1 forward | |
| 81 #if $sequences.type == "paired" | |
| 82 -2 reverse | |
| 83 #end if | |
| 84 -b '${reference}.bitmask' | |
| 85 #if $test == "" or $host.source != "cached" | |
| 86 -x '${reference}.srprism' | |
| 87 #else | |
| 88 -x reference.srprism | |
| 89 #end if | |
| 90 -d '${reference}' | |
| 91 -o host_ids | |
| 92 && | |
| 93 | |
| 94 extract_fullseq host_ids -keep -fastq | |
| 95 #if $sequences.type == "single" | |
| 96 -single | |
| 97 #else | |
| 98 -mate1 | |
| 99 #end if | |
| 100 'forward' | |
| 101 #if $gz | |
| 102 | gzip -c | |
| 103 #end if | |
| 104 #if $sequences.type == "single" | |
| 105 > '$out_single' | |
| 106 #else | |
| 107 > '$out_pair.forward' | |
| 108 && | |
| 109 extract_fullseq host_ids -keep -fastq -mate2 'reverse' | |
| 110 #if $gz | |
| 111 | gzip -c | |
| 112 #end if | |
| 113 > '$out_pair.reverse' | |
| 114 #end if | |
| 115 ]]></command> | |
| 116 <inputs> | |
| 117 <conditional name="sequences"> | |
| 118 <param name="type" type="select" label="Sequence type"> | |
| 119 <option value="single">Single end data</option> | |
| 120 <option value="paired">Paired end data</option> | |
| 121 </param> | |
| 122 <when value="single"> | |
| 123 <param name="reads" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz" label="Single end reads"/> | |
| 124 </when> | |
| 125 <when value="paired"> | |
| 126 <param name="reads" type="data_collection" collection_type="paired" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz" label="Paired end reads" /> | |
| 127 </when> | |
| 128 </conditional> | |
| 129 <conditional name="host"> | |
| 130 <param name="source" type="select" label="Host data source"> | |
| 131 <option value="cached">Precomputed indices</option> | |
| 132 <option value="history">Sequence from History</option> | |
| 133 </param> | |
| 134 <when value="cached"> | |
| 135 <param name="reference" type="select" label="Reference"> | |
| 136 <options from_data_table="bmtagger"> | |
| 137 <filter type="sort_by" column="2"/> | |
| 138 <validator type="no_options" message="No indexes are available for the selected input dataset"/> | |
| 139 </options> | |
| 140 </param> | |
| 141 </when> | |
| 142 <when value="history"> | |
| 143 <param name="sequence" type="data" format="fasta,fasta.gz" label="Host sequence" help="nucleotide sequence" /> | |
| 144 </when> | |
| 145 </conditional> | |
| 146 <param name="test" type="hidden"/> | |
| 147 </inputs> | |
| 148 <outputs> | |
| 149 <data name="out_single" format_source="reads" label="${tool.name} on ${on_string}"> | |
| 150 <filter>sequences["type"] == "single"</filter> | |
| 151 </data> | |
| 152 <collection name="out_pair" type="paired" label="${tool.name} on ${on_string}: pairs"> | |
| 153 <data name="forward" format_source="reads" /> | |
| 154 <data name="reverse" format_source="reads" /> | |
| 155 <filter>sequences["type"] == "paired"</filter> | |
| 156 </collection> | |
| 157 </outputs> | |
| 158 <tests> | |
| 159 <!-- single input, cached reference --> | |
| 160 <test expect_num_outputs="1"> | |
| 161 <conditional name="sequences"> | |
| 162 <param name="type" value="single"/> | |
| 163 <param name="reads" value="host_and_contaminant.fq1.fq" ftype="fastqsanger"/> | |
| 164 </conditional> | |
| 165 <param name="test" value="true"/> | |
| 166 <output name="out_single" ftype="fastqsanger"> | |
| 167 <expand macro="assert"/> | |
| 168 </output> | |
| 169 </test> | |
| 170 <!-- paired input, cached reference --> | |
| 171 <test expect_num_outputs="3"> | |
| 172 <conditional name="sequences"> | |
| 173 <param name="type" value="paired"/> | |
| 174 <param name="reads"> | |
| 175 <collection type="paired_or_unpaired" name="reads"> | |
| 176 <element name="forward" value="host_and_contaminant.fq1.fq" ftype="fastqsanger"/> | |
| 177 <element name="reverse" value="host_and_contaminant.fq2.fq" ftype="fastqsanger"/> | |
| 178 </collection> | |
| 179 </param> | |
| 180 </conditional> | |
| 181 <param name="test" value="true"/> | |
| 182 <output_collection name="out_pair" count="2"> | |
| 183 <expand macro="element_assert" name="forward" ftype="fastqsanger"/> | |
| 184 <expand macro="element_assert" name="reverse" ftype="fastqsanger"/> | |
| 185 </output_collection> | |
| 186 </test> | |
| 187 <!-- gz input, cached reference --> | |
| 188 <test expect_num_outputs="3"> | |
| 189 <conditional name="sequences"> | |
| 190 <param name="type" value="paired"/> | |
| 191 <param name="reads"> | |
| 192 <collection type="paired_or_unpaired" name="reads"> | |
| 193 <element name="forward" value="host_and_contaminant.fq1.fq.gz" ftype="fastqsanger.gz"/> | |
| 194 <element name="reverse" value="host_and_contaminant.fq2.fq.gz" ftype="fastqsanger.gz"/> | |
| 195 </collection> | |
| 196 </param> | |
| 197 </conditional> | |
| 198 <param name="test" value="true"/> | |
| 199 <output_collection name="out_pair" count="2"> | |
| 200 <expand macro="element_assert" name="forward" ftype="fastqsanger.gz" decompress="true"/> | |
| 201 <expand macro="element_assert" name="reverse" ftype="fastqsanger.gz" decompress="true"/> | |
| 202 </output_collection> | |
| 203 </test> | |
| 204 | |
| 205 <!-- single gz input, fasta reference --> | |
| 206 <test expect_num_outputs="1"> | |
| 207 <conditional name="sequences"> | |
| 208 <param name="type" value="single"/> | |
| 209 <param name="reads" value="host_and_contaminant.fq1.fq.gz" ftype="fastqsanger.gz"/> | |
| 210 </conditional> | |
| 211 <conditional name="host"> | |
| 212 <param name="source" value="history"/> | |
| 213 <param name="sequence" value="host.fa" ftype="fasta"/> | |
| 214 </conditional> | |
| 215 <param name="test" value="true"/> | |
| 216 <output name="out_single" ftype="fastqsanger.gz" decompress="true"> | |
| 217 <expand macro="assert"/> | |
| 218 </output> | |
| 219 </test> | |
| 220 | |
| 221 <!-- gz input, gzipped fasta reference --> | |
| 222 <test expect_num_outputs="3"> | |
| 223 <conditional name="sequences"> | |
| 224 <param name="type" value="paired"/> | |
| 225 <param name="reads"> | |
| 226 <collection type="paired_or_unpaired" name="reads"> | |
| 227 <element name="forward" value="host_and_contaminant.fq1.fq.gz" ftype="fastqsanger.gz"/> | |
| 228 <element name="reverse" value="host_and_contaminant.fq2.fq.gz" ftype="fastqsanger.gz"/> | |
| 229 </collection> | |
| 230 </param> | |
| 231 </conditional> | |
| 232 <conditional name="host"> | |
| 233 <param name="source" value="history"/> | |
| 234 <param name="sequence" value="host.fa.gz" ftype="fasta.gz"/> | |
| 235 </conditional> | |
| 236 <param name="test" value="true"/> | |
| 237 <output_collection name="out_pair" count="2"> | |
| 238 <expand macro="element_assert" name="forward" ftype="fastqsanger.gz" decompress="true"/> | |
| 239 <expand macro="element_assert" name="reverse" ftype="fastqsanger.gz" decompress="true"/> | |
| 240 </output_collection> | |
| 241 </test> | |
| 242 </tests> | |
| 243 <help><![CDATA[ | |
| 244 | |
| 245 .. class:: infomark | |
| 246 | |
| 247 **What it does** | |
| 248 | |
| 249 Filter contaminant sequences from input FASTA or FASTQ sequences. | |
| 250 | |
| 251 This is done by iteratively applying | |
| 252 | |
| 253 - bmfilter | |
| 254 - srprism | |
| 255 - blastn (megablast) | |
| 256 | |
| 257 Usage | |
| 258 ..... | |
| 259 | |
| 260 **Input** | |
| 261 | |
| 262 FASTA/FASTQ sequences and a reference database. | |
| 263 | |
| 264 **Output** | |
| 265 | |
| 266 FASTA/FASTQ sequences | |
| 267 | |
| 268 ]]></help> | |
| 269 <citations> | |
| 270 <citation type="bibtex">@article{rotmistrovsky2011bmtagger, | |
| 271 title={BMTagger: Best Match Tagger for removing human reads from metagenomics datasets}, | |
| 272 author={Rotmistrovsky, Kirill and Agarwala, Richa}, | |
| 273 journal={NCBI/NLM, National Institutes of Health}, | |
| 274 year={2011} | |
| 275 }</citation> | |
| 276 </citations> | |
| 277 </tool> |
