Mercurial > repos > iuc > bbtools_bbmap
comparison bbmap.xml @ 0:2b64db8ec69e draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bbtools commit 3682ff4e2e47438e975fc04f92469eca7814fcfa"
| author | iuc |
|---|---|
| date | Mon, 04 Oct 2021 12:13:52 +0000 |
| parents | |
| children | 7bc91654244d |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:2b64db8ec69e |
|---|---|
| 1 <tool id="bbtools_bbmap" name="BBTools: BBMap" version="@WRAPPER_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>short-read aligner</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"/> | |
| 7 <command detect_errors="exit_code"><![CDATA[ | |
| 8 #import os | |
| 9 #import re | |
| 10 | |
| 11 #if str($ref_source_cond.ref_source) == 'cached' | |
| 12 #set ref = str($ref_source_cond.reference.fields.path) | |
| 13 #else: | |
| 14 #set ref = $ref_source_cond.reference | |
| 15 #end if | |
| 16 | |
| 17 #if str($input_type_cond.input_type) in ['single', 'pair']: | |
| 18 #set read1 = $input_type_cond.read1 | |
| 19 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) | |
| 20 ## bbmap uses the file extension to determine the input format. | |
| 21 #set ext = $read1_identifier + '.fastq' | |
| 22 #if $read1.ext.endswith('.gz'): | |
| 23 #set ext = $ext + '.gz' | |
| 24 #end if | |
| 25 #set read1_file = $read1_identifier + $ext | |
| 26 ln -s '${read1}' '${read1_file}' && | |
| 27 #if str($input_type_cond.input_type) == 'pair': | |
| 28 #set read2 = $input_type_cond.read2 | |
| 29 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) | |
| 30 #set read2_file = $read2_identifier + $ext | |
| 31 ln -s '${read2}' '${read2_file}' && | |
| 32 #end if | |
| 33 #else: | |
| 34 #set read1 = $input_type_cond.reads_collection['forward'] | |
| 35 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.name)) | |
| 36 ## bbmap uses the file extension to determine the input format. | |
| 37 #set ext = $read1_identifier + '.fastq' | |
| 38 #if $read1.ext.endswith('.gz'): | |
| 39 #set ext = $ext + '.gz' | |
| 40 #end if | |
| 41 #set read1_file = $read1_identifier + $ext | |
| 42 ln -s '${read1}' '${read1_file}' && | |
| 43 #set read2 = $input_type_cond.reads_collection['reverse'] | |
| 44 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.name)) | |
| 45 #set read2_file = $read2_identifier + $ext | |
| 46 ln -s '${read2}' '${read2_file}' && | |
| 47 #end if | |
| 48 | |
| 49 bbmap.sh t=\${GALAXY_SLOTS:-4} ref='${ref}' | |
| 50 #if str($input_type_cond.input_type) == 'single': | |
| 51 in='${read1_file}' | |
| 52 #else: | |
| 53 in1='${read1_file}' in2='${read2_file}' | |
| 54 #end if | |
| 55 #if str($output_sort) == 'coordinate': | |
| 56 out='mapped.bam'; samtools sort -@\${GALAXY_SLOTS:-4} -T "\${TMPDIR:-.}" -O bam -o '$output' 'mapped.bam' | |
| 57 #elif str($output_sort) == 'name': | |
| 58 out='mapped.bam'; samtools sort -n -@\${GALAXY_SLOTS:-4} -T '\${TMPDIR:-.}' -O bam -o '$output' 'mapped.bam' | |
| 59 #else: | |
| 60 out='mapped.bam' && mv 'mapped.bam' '$output' | |
| 61 #end if | |
| 62 ]]></command> | |
| 63 <inputs> | |
| 64 <conditional name="input_type_cond"> | |
| 65 <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> | |
| 66 <option value="single" selected="true">Single dataset</option> | |
| 67 <option value="pair">Dataset pair</option> | |
| 68 <option value="paired">List of dataset pairs</option> | |
| 69 </param> | |
| 70 <when value="single"> | |
| 71 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> | |
| 72 </when> | |
| 73 <when value="pair"> | |
| 74 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> | |
| 75 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> | |
| 76 </when> | |
| 77 <when value="paired"> | |
| 78 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> | |
| 79 </when> | |
| 80 </conditional> | |
| 81 <expand macro="reference_source_cond"/> | |
| 82 <param name="output_sort" type="select" label="BAM sorting mode" help="The 'Not sorted' option can significantly extend the run time of the tool (it runs using a single thread)."> | |
| 83 <option value="coordinate" selected="True">Sort by chromosomal coordinates</option> | |
| 84 <option value="name">Sort by read names</option> | |
| 85 <option value="unsorted">Not sorted (sorted as input)</option> | |
| 86 </param> | |
| 87 </inputs> | |
| 88 <outputs> | |
| 89 <data format="bam" name="output" label="${tool.name} on ${on_string} (mapped reads in BAM format)"> | |
| 90 <expand macro="dbKeyActionsBBMap"/> | |
| 91 <change_format> | |
| 92 <when input="output_sort" value="name" format="qname_sorted.bam" /> | |
| 93 <when input="output_sort" value="unsorted" format="qname_input_sorted.bam" /> | |
| 94 </change_format> | |
| 95 </data> | |
| 96 </outputs> | |
| 97 <tests> | |
| 98 <!-- Single file, cached reference, output coordinate sorted --> | |
| 99 <test expect_num_outputs="1"> | |
| 100 <param name="input_type" value="single"/> | |
| 101 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz"/> | |
| 102 <output name="output" file="output1.bam" ftype="bam" lines_diff="4"> | |
| 103 <metadata name="dbkey" value="89" /> | |
| 104 </output> | |
| 105 </test> | |
| 106 <!-- Paired reads in separate datasets, cached reference, output name sorted --> | |
| 107 <test expect_num_outputs="1"> | |
| 108 <param name="input_type" value="pair"/> | |
| 109 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz"/> | |
| 110 <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz"/> | |
| 111 <param name="output_sort" value="name"/> | |
| 112 <output name="output" file="output2.bam" ftype="qname_sorted.bam" lines_diff="4"> | |
| 113 <metadata name="dbkey" value="89" /> | |
| 114 </output> | |
| 115 </test> | |
| 116 <!-- Collection of Paired reads, history reference, output unsorted --> | |
| 117 <test expect_num_outputs="1"> | |
| 118 <param name="input_type" value="paired"/> | |
| 119 <param name="reads_collection"> | |
| 120 <collection type="paired"> | |
| 121 <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz"/> | |
| 122 <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz"/> | |
| 123 </collection> | |
| 124 </param> | |
| 125 <param name="ref_source" value="history"/> | |
| 126 <param name="reference" value="NC_002945v4.fasta" dbkey="89" ftype="fasta"/> | |
| 127 <param name="output_sort" value="unsorted"/> | |
| 128 <output name="output" file="output3.bam" ftype="qname_input_sorted.bam" lines_diff="4"> | |
| 129 <metadata name="dbkey" value="89" /> | |
| 130 </output> | |
| 131 </test> | |
| 132 </tests> | |
| 133 <help> | |
| 134 **What it does** | |
| 135 | |
| 136 BBMap is a splice-aware global aligner for DNA and RNA sequencing reads. It is fast and extremely accurate, particularly | |
| 137 with highly mutated genomes or reads with long indels, even whole-gene deletions over 100kbp long. It has no upper limit | |
| 138 to genome size or number of contigs and has been successfully used for mapping to an 85 gigabase soil metagenome with over | |
| 139 200 million contigs. the indexing phase is very fast compared to other aligners. | |
| 140 | |
| 141 BBMap can output many different statistics files; an empirical read quality histogram, insert-size distribution, and genome | |
| 142 coverage with or without generating a sam file. It is useful in quality control of libraries and sequencing runs or | |
| 143 evaluating new sequencing platforms. | |
| 144 | |
| 145 **Options** | |
| 146 | |
| 147 *Bam sorting mode* - the generated bam files can be sorted according to three criteria: coordinates, names and input order. | |
| 148 | |
| 149 * Sort by chromosomal coordinates - the file is sorted by coordinates (i.e., the reads from the beginning of the first | |
| 150 chromosome are first in the file. | |
| 151 * Sort by read names - the file is sorted by the reference ID (i.e., the QNAME field). | |
| 152 * Not sorted (sorted as input) - the file is sorted in the order of the reads in the input file. | |
| 153 | |
| 154 </help> | |
| 155 <expand macro="citations"/> | |
| 156 </tool> | |
| 157 |
