Mercurial > repos > iuc > magicblast
comparison magicblast.xml @ 0:0aa444798f96 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blast commit 15fc6c06f743bae276ff02dc405e7da61a07bd08"
| author | iuc |
|---|---|
| date | Tue, 05 Apr 2022 12:10:32 +0000 |
| parents | |
| children | 3db0c572fb04 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0aa444798f96 |
|---|---|
| 1 <tool id="magicblast" name="Magic-BLAST: map large RNA or DNA sequences" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>against a whole genome or transcriptome</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"/> | |
| 7 <command detect_errors="exit_code"><![CDATA[ | |
| 8 #import os | |
| 9 | |
| 10 magicblast | |
| 11 -num_threads \${GALAXY_SLOTS:-8} | |
| 12 #if $query.is_of_type('fasta.gz', 'fastqsanger.gz'): | |
| 13 -query <(gunzip -c '${query}') | |
| 14 #else: | |
| 15 -query '${query}' | |
| 16 #end if | |
| 17 #if $query_mate: | |
| 18 -paired | |
| 19 #if $query.is_of_type('fasta.gz', 'fastqsanger.gz'): | |
| 20 -query_mate <(gunzip -c '${query}') | |
| 21 #else: | |
| 22 -query_mate '${query}' | |
| 23 #end if | |
| 24 #end if | |
| 25 | |
| 26 #if $query.is_of_type('fastqsanger', 'fastqsanger.gz'): | |
| 27 -infmt fastq | |
| 28 #end if | |
| 29 | |
| 30 #if $db_opts.db_opts_selector == "histdb": | |
| 31 -db '${os.path.join($db_opts.histdb.extra_files_path, "blastdb")}' | |
| 32 #elif $db_opts.db_opts_selector == "db": | |
| 33 -db '${os.path.join($db_opts.database.fields.path, "blastdb")}' | |
| 34 #else: | |
| 35 #if $db_opts.subject.is_of_type('fasta.gz'): | |
| 36 -subject <(gunzip -c '${$db_opts.subject}') | |
| 37 #else: | |
| 38 -subject '${db_opts.subject}' | |
| 39 #end if | |
| 40 #end if | |
| 41 | |
| 42 ## General search options | |
| 43 -word_size $general_search.word_size | |
| 44 -gapopen $general_search.gapopen | |
| 45 -gapextend $general_search.gapextend | |
| 46 -penalty $general_search.penalty | |
| 47 -max_intron_length $general_search.max_intron_length | |
| 48 | |
| 49 ## Query filtering options | |
| 50 $query_filtering.lcase_masking | |
| 51 -validate_seqs $query_filtering.validate_seqs | |
| 52 -limit_lookup $query_filtering.limit_lookup | |
| 53 -max_db_word_count $query_filtering.max_db_word_count | |
| 54 -lookup_stride $query_filtering.lookup_stride | |
| 55 | |
| 56 ## Restrict database search | |
| 57 #if $restrict_search.gilist: | |
| 58 -gilist '$restrict_search.gilist' | |
| 59 #end if | |
| 60 #if $restrict_search.negative_gilist: | |
| 61 -negative_gilist '$restrict_search.negative_gilist' | |
| 62 #end if | |
| 63 #if $restrict_search.seqidlist: | |
| 64 -seqidlist '$restrict_search.seqidlist' | |
| 65 #end if | |
| 66 #if $restrict_search.negative_seqidlist: | |
| 67 -negative_seqidlist '$restrict_search.negative_seqidlist' | |
| 68 #end if | |
| 69 #if str($restrict_search.taxids) != '': | |
| 70 --taxids '$restrict_search.taxids' | |
| 71 #end if | |
| 72 #if $restrict_search.taxidlist: | |
| 73 -taxidlist '$restrict_search.taxidlist' | |
| 74 #end if | |
| 75 #if str($restrict_search.negative_taxids) != '': | |
| 76 --negative_taxids '$restrict_search.negative_taxids' | |
| 77 #end if | |
| 78 #if $restrict_search.negative_taxidlist: | |
| 79 -negative_taxidlist '$restrict_search.negative_taxidlist' | |
| 80 #end if | |
| 81 | |
| 82 ## Mapping options | |
| 83 -score $mapping.score | |
| 84 #if $mapping.max_edit_dist > 0: | |
| 85 -max_edit_dist $mapping.max_edit_dist | |
| 86 #end if | |
| 87 -splice '$mapping.splice' | |
| 88 -reftype '$mapping.reftype' | |
| 89 | |
| 90 ## Output unaligned options | |
| 91 #if str($output_options.report_unaligned_cond.report_unaligned) == 'yes': | |
| 92 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.report_unaligned_separately) == 'yes': | |
| 93 -out_unaligned 'out_unaligned' | |
| 94 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt) == 'bam': | |
| 95 -unaligned_fmt 'sam' | |
| 96 #else: | |
| 97 -unaligned_fmt '$output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt' | |
| 98 #end if | |
| 99 #end if | |
| 100 #else: | |
| 101 -no_unaligned | |
| 102 #end if | |
| 103 | |
| 104 ## Additional output options | |
| 105 $output_options.no_discordant | |
| 106 ## Switch default SAM output to be BAM. | |
| 107 #if str($output_options.outfmt_cond.outfmt) == 'bam': | |
| 108 $output_options.outfmt_cond.md_tag | |
| 109 #if $query_mate: | |
| 110 $output_options.outfmt_cond.no_query_id_trim | |
| 111 #end if | |
| 112 -out 'output.sam' | |
| 113 #if str($output_options.outfmt_cond.output_sort) == 'coordinate': | |
| 114 && samtools sort -@\${GALAXY_SLOTS:-4} -O bam 'output.sam' > '$output' | |
| 115 #elif str($output_options.outfmt_cond.output_sort) == 'name': | |
| 116 && samtools sort -n -@\${GALAXY_SLOTS:-4} -O bam -o 'output.sam' > '$output' | |
| 117 #else: | |
| 118 && samtools view -@\${GALAXY_SLOTS:-4} -bS 'output.sam' > '$output' | |
| 119 #end if | |
| 120 #else: | |
| 121 -out '$output' | |
| 122 -outfmt '$output_options.outfmt_cond.outfmt' | |
| 123 #end if | |
| 124 | |
| 125 ## Convert out_unaligned from SAM to BAM if necessary | |
| 126 | |
| 127 #if str($output_options.report_unaligned_cond.report_unaligned) == 'yes': | |
| 128 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.report_unaligned_separately) == 'yes': | |
| 129 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt) == 'bam': | |
| 130 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.output_sort) == 'coordinate': | |
| 131 && samtools sort -@\${GALAXY_SLOTS:-4} -O bam -o 'out_unaligned' > '$output_unaligned' | |
| 132 #elif str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.output_sort) == 'name': | |
| 133 && samtools sort -n -@\${GALAXY_SLOTS:-4} -O bam -o 'out_unaligned' > '$output_unaligned' | |
| 134 #else: | |
| 135 && samtools view -@\${GALAXY_SLOTS:-4} -bS 'out_unaligned' > '$output_unaligned' | |
| 136 #end if | |
| 137 #else: | |
| 138 && mv 'out_unaligned' '$output_unaligned' | |
| 139 #end if | |
| 140 #end if | |
| 141 #end if | |
| 142 ]]></command> | |
| 143 <inputs> | |
| 144 <param argument="-query" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" label="Query file" help="Fasta or fastqsanger, optionally gzipped"/> | |
| 145 <param argument="-query_mate" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" optional="true" label="Query mate file (optional)" help="Fasta or fastqsanger, optionally gzipped"/> | |
| 146 <conditional name="db_opts"> | |
| 147 <param name="db_opts_selector" type="select" label="Subject database/sequences"> | |
| 148 <option value="histdb" selected="true">blast database from your history</option> | |
| 149 <option value="db">Locally installed blast database</option> | |
| 150 <option value="file">fasta file from your history (see warning in the tool help section below)</option> | |
| 151 </param> | |
| 152 <when value="histdb"> | |
| 153 <param name="histdb" type="data" format="blastdbn" label="Nucleotide blast database"/> | |
| 154 </when> | |
| 155 <when value="db"> | |
| 156 <param name="database" type="select" multiple="true" optional="false" label="Nucleotide blast database"> | |
| 157 <options from_data_table="blastdb"/> | |
| 158 </param> | |
| 159 </when> | |
| 160 <when value="file"> | |
| 161 <param argument="-subject" type="data" format="fasta,fasta.gz" label="Nucleotide fasta subject file to use instead of a database"/> | |
| 162 </when> | |
| 163 </conditional> | |
| 164 <section name="general_search" title="General search"> | |
| 165 <param argument="-word_size" type="integer" value="18" min="12" label="Minimum number of consecutive bases matching exactly"/> | |
| 166 <param argument="-gapopen" type="integer" value="0" min="0" label="Cost to open a gap"/> | |
| 167 <param argument="-gapextend" type="integer" value="0" min="0" label="Cost to extend a gap"/> | |
| 168 <param argument="-penalty" type="integer" value="-4" max="0" label="Penalty for a nucleotide mismatch"/> | |
| 169 <param argument="-max_intron_length" type="integer" value="500000" min="0" label="Maximum allowed intron length"/> | |
| 170 </section> | |
| 171 <section name="query_filtering" title="Query filtering"> | |
| 172 <param argument="-lcase_masking" type="boolean" truevalue="-lcase_masking" falsevalue="" checked="false" label="Use lower case filtering in subject sequences?"/> | |
| 173 <param argument="-validate_seqs" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Reject low quality sequences?"/> | |
| 174 <param argument="-limit_lookup" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Remove word seeds with high frequency in the searched database?"/> | |
| 175 <param argument="-max_db_word_count" type="integer" value="30" min="0" label="Words that appear more than this number of times in the database will be masked in the lookup table"/> | |
| 176 <param argument="-lookup_stride" type="integer" value="0" min="0" label="Number of words to skip after collecting one while creating a lookup table"/> | |
| 177 </section> | |
| 178 <section name="restrict_search" title="Restrict database search"> | |
| 179 <param argument="-gilist" type="data" format="tabular" optional="true" label="Tabular file containing list of GIs to which to restrict database search" help="Available only for database searches"/> | |
| 180 <param argument="-negative_gilist" type="data" format="tabular" optional="true" label="Tabular file containing list of GIs to restrict database search to everything except the specified GIs" help="Available only for database searches"/> | |
| 181 <param argument="-seqidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of SeqIDs to which to restrict database search" help="Available only for database searches"/> | |
| 182 <param argument="-negative_seqidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of SeqIDs to restrict database search to everything except the specified SeqIDs" help="Available only for database searches"/> | |
| 183 <param argument="-taxids" type="text" optional="true" label="Comma-separated list of taxonomy IDs to which to restrict database search" help="Available only for database searches"> | |
| 184 <expand macro="sanitize_query" validinitial="string.ascii_letters,string.digits,string.whitespace,string.punctuation"/> | |
| 185 </param> | |
| 186 <param argument="-taxidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of taxonomy IDs to which to restrict database search" help="Available only for database searches"/> | |
| 187 <param argument="-negative_taxids" type="text" optional="true" label="Comma-separated list of taxonomy IDs to restrict database search to everything except the specified taxonomy IDs" help="Available only for database searches"> | |
| 188 <expand macro="sanitize_query" validinitial="string.ascii_letters,string.digits,string.whitespace,string.punctuation"/> | |
| 189 </param> | |
| 190 <param argument="-negative_taxidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of taxonomy IDs to restrict database search to everythin except the specified taxonomy IDs" help="Available only for database searches"/> | |
| 191 </section> | |
| 192 <section name="mapping" title="Mapping"> | |
| 193 <param argument="-score" type="integer" value="0" min="0" label="Cutoff score for accepting alignments" help="Zero value ignores"/> | |
| 194 <param argument="-max_edit_dist" type="integer" value="0" min="0" label="Cutoff edit distance for accepting an alignment" help="Zero value is unlimited"/> | |
| 195 <param argument="-splice" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Search for spliced alignments?"/> | |
| 196 <param argument="-reftype" type="select" label="Type of the reference"> | |
| 197 <option value="genome" selected="true">genome</option> | |
| 198 <option value="transcriptome">transcriptome</option> | |
| 199 </param> | |
| 200 </section> | |
| 201 <section name="output_options" title="Output options"> | |
| 202 <conditional name="report_unaligned_cond"> | |
| 203 <param name="report_unaligned" type="select" label="Report unaligned reads?"> | |
| 204 <option value="yes" selected="true">Yes</option> | |
| 205 <option value="no">No</option> | |
| 206 </param> | |
| 207 <when value="yes"> | |
| 208 <conditional name="report_unaligned_separately_cond"> | |
| 209 <param name="report_unaligned_separately" type="select" label="Output unaligned reads to a separate file?" help="Select No to output all reads to the same file"> | |
| 210 <option value="no" selected="true">No</option> | |
| 211 <option value="yes">Yes</option> | |
| 212 </param> | |
| 213 <when value="no"/> | |
| 214 <when value="yes"> | |
| 215 <conditional name="unaligned_fmt_cond"> | |
| 216 <param argument="-unaligned_fmt" type="select" label="Output format for unaligned reads"> | |
| 217 <option value="bam" selected="true">bam</option> | |
| 218 <option value="tabular">tabular</option> | |
| 219 <option value="fasta">fasta</option> | |
| 220 </param> | |
| 221 <when value="bam"> | |
| 222 <expand macro="output_sort_param"/> | |
| 223 </when> | |
| 224 <when value="tabular"/> | |
| 225 <when value="fasta"/> | |
| 226 </conditional> | |
| 227 </when> | |
| 228 </conditional> | |
| 229 </when> | |
| 230 <when value="no"/> | |
| 231 </conditional> | |
| 232 <conditional name="outfmt_cond"> | |
| 233 <param argument="-outfmt" type="select" label="Output format"> | |
| 234 <option value="bam" selected="true">bam</option> | |
| 235 <option value="tabular">tabular</option> | |
| 236 </param> | |
| 237 <when value="bam"> | |
| 238 <expand macro="output_sort_param"/> | |
| 239 <param argument="-md_tag" type="boolean" truevalue="-md_tag" falsevalue="" checked="false" label="Include MD tag in BAM output?"/> | |
| 240 <param argument="-no_query_id_trim" type="boolean" truevalue="-no_query_id_trim" falsevalue="" checked="false" label="Do not trim '.1', '/1', '.2', or '/2' at the end of read ids in BAM output for paired reads?" help="Ignored if no query mate"/> | |
| 241 </when> | |
| 242 <when value="tabular"/> | |
| 243 </conditional> | |
| 244 <param argument="-no_discordant" type="boolean" truevalue="-no_discordant" falsevalue="" checked="false" label="Suppress discordant alignments for paired reads?" help="Ignored if no query mate"/> | |
| 245 </section> | |
| 246 </inputs> | |
| 247 <outputs> | |
| 248 <data name="output" format="bam" label="${tool.name} on ${on_string}"> | |
| 249 <change_format> | |
| 250 <when input="output.outfmt_cond.outfmt" value="tabular" format="tabular"/> | |
| 251 </change_format> | |
| 252 </data> | |
| 253 <data name="output_unaligned" format="bam" label="${tool.name} on ${on_string}: unaligned reads"> | |
| 254 <filter>output_options['report_unaligned_cond']['report_unaligned'] == 'yes' and output_options['report_unaligned_cond']['report_unaligned_separately_cond']['report_unaligned_separately'] == 'yes'</filter> | |
| 255 <change_format> | |
| 256 <when input="output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt" value="tabular" format="tabular"/> | |
| 257 <when input="output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt" value="fasta" format="fasta"/> | |
| 258 </change_format> | |
| 259 </data> | |
| 260 </outputs> | |
| 261 <tests> | |
| 262 <!-- Single fasta.gz input, subject file --> | |
| 263 <test expect_num_outputs="1"> | |
| 264 <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/> | |
| 265 <param name="db_opts_selector" value="file"/> | |
| 266 <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/> | |
| 267 <output name="output" ftype="bam"> | |
| 268 <assert_contents> | |
| 269 <has_size value="1247" delta="50"/> | |
| 270 </assert_contents> | |
| 271 </output> | |
| 272 </test> | |
| 273 <!-- Single fasta.gz input, subject file, output unaligned reads separately--> | |
| 274 <test expect_num_outputs="2"> | |
| 275 <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/> | |
| 276 <param name="db_opts_selector" value="file"/> | |
| 277 <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/> | |
| 278 <param name="report_unaligned_separately" value="yes"/> | |
| 279 <param name="unaligned_fmt" value="tabular"/> | |
| 280 <output name="output" ftype="bam"> | |
| 281 <assert_contents> | |
| 282 <has_size value="492" delta="50"/> | |
| 283 </assert_contents> | |
| 284 </output> | |
| 285 <output name="output_unaligned" ftype="tabular"> | |
| 286 <assert_contents> | |
| 287 <has_size value="959"/> | |
| 288 </assert_contents> | |
| 289 </output> | |
| 290 </test> | |
| 291 <!-- Single fasta.gz input, subject file, gilist file, results in error --> | |
| 292 <test expect_failure="true"> | |
| 293 <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/> | |
| 294 <param name="db_opts_selector" value="file"/> | |
| 295 <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/> | |
| 296 <param name="report_unaligned_separately" value="yes"/> | |
| 297 <param name="gilist" value="gilist1.tabular" ftype="tabular"/> | |
| 298 <assert_stderr> | |
| 299 <has_text text="Incompatible with argument:"/> | |
| 300 </assert_stderr> | |
| 301 </test> | |
| 302 <!-- Single fasta.gz input, cached db, taxidlist, results in error --> | |
| 303 <test expect_failure="true"> | |
| 304 <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/> | |
| 305 <param name="db_opts_selector" value="db"/> | |
| 306 <param name="database" value="phiX174"/> | |
| 307 <param name="taxidlist" value="taxids.tabular" ftype="tabular"/> | |
| 308 <assert_stderr> | |
| 309 <has_text text="Taxonomy filtering is not supported in v4 BLAST dbs"/> | |
| 310 </assert_stderr> | |
| 311 </test> | |
| 312 <!-- Paired fastqsanger.gz input, subject file --> | |
| 313 <test expect_num_outputs="1"> | |
| 314 <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/> | |
| 315 <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/> | |
| 316 <param name="db_opts_selector" value="file"/> | |
| 317 <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/> | |
| 318 <output name="output" ftype="bam"> | |
| 319 <assert_contents> | |
| 320 <has_size value="62080" delta="50"/> | |
| 321 </assert_contents> | |
| 322 </output> | |
| 323 </test> | |
| 324 <!-- Paired fastqsanger.gz input, cached blast db --> | |
| 325 <test expect_num_outputs="1"> | |
| 326 <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/> | |
| 327 <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/> | |
| 328 <param name="db_opts_selector" value="db"/> | |
| 329 <param name="database" value="phiX174"/> | |
| 330 <output name="output" ftype="bam"> | |
| 331 <assert_contents> | |
| 332 <has_size value="62079" delta="50"/> | |
| 333 </assert_contents> | |
| 334 </output> | |
| 335 </test> | |
| 336 </tests> | |
| 337 <help><![CDATA[ | |
| 338 **What it does** | |
| 339 | |
| 340 .. class:: warningmark | |
| 341 | |
| 342 In addition to a BLAST database, you can also search against a fasta file of subject (target) sequences. However, this is not | |
| 343 advised because it is slower (only one CPU is used), but more importantly gives e-values for pairwise searches (very small | |
| 344 e-values which will look overly signficiant). In most cases you should convert the fasta file into a blast database using | |
| 345 *makeblastdb* and search against that. | |
| 346 | |
| 347 Magic-BLAST is a tool for mapping large next-generation RNA or DNA sequencing runs against a whole genome or transcriptome. | |
| 348 Each alignment optimizes a composite score, taking into account simultaneously the two reads of a pair, and in case of RNA-seq, | |
| 349 locating the candidate introns and adding up the score of all exons. This is very different from other versions of BLAST, where | |
| 350 each exon is scored as a separate hit and read-pairing is ignored. | |
| 351 | |
| 352 Magic-BLAST incorporates within the NCBI BLAST code framework ideas developed in the NCBI Magic pipeline, in particular hit | |
| 353 extensions by local walk and jump, and recursive clipping of mismatches near the edges of the reads, which avoids accumulating | |
| 354 artefactual mismatches near splice sites and is needed to distinguish short indels from substitutions near the edges. | |
| 355 | |
| 356 The tool accepts a single or paired set of reads in fasta or fastqsanger format and produces bam or tabular output. | |
| 357 | |
| 358 More information about Magic-BLAST is available in the | |
| 359 `online documentation <https://ncbi.github.io/magicblast/>`_. | |
| 360 ]]></help> | |
| 361 <expand macro="citations"/> | |
| 362 </tool> |
