Mercurial > repos > devteam > samtools_mpileup
annotate samtools_mpileup.xml @ 1:b47a418ccfdc draft
Uploaded tarball for 0.0.2 version on main tool shed.
| author | devteam |
|---|---|
| date | Wed, 12 Mar 2014 12:52:52 -0400 |
| parents | f8ea7725e333 |
| children | 3aa48bcbc599 |
| rev | line source |
|---|---|
|
1
b47a418ccfdc
Uploaded tarball for 0.0.2 version on main tool shed.
devteam
parents:
0
diff
changeset
|
1 <tool id="samtools_mpileup" name="MPileup" version="0.0.2"> |
| 0 | 2 <description>SNP and indel caller</description> |
| 3 <requirements> | |
| 4 <requirement type="package" version="0.1.18">samtools</requirement> | |
| 5 </requirements> | |
| 6 <command interpreter="python">samtools_wrapper.py | |
| 7 -p 'samtools mpileup' | |
| 8 --stdout "${output_log}" | |
| 9 #if $reference_source.reference_source_selector != "history": | |
| 10 -p '-f "${reference_source.ref_file.fields.path}"' | |
| 11 #else: | |
| 12 -d "-f" "${reference_source.ref_file}" "fa" "reference_input" | |
| 13 #end if | |
| 14 #for $i, $input_bam in enumerate( $reference_source.input_bams ): | |
| 15 -d " " "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "bam_input_${i}" | |
| 16 -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "bam_input_${i}" ##hardcode galaxy ext type as bam_index | |
| 17 #end for | |
| 18 -p ' | |
| 19 #if str( $advanced_options.advanced_options_selector ) == "advanced": | |
| 20 ${advanced_options.skip_anomalous_read_pairs} | |
| 21 ${advanced_options.disable_probabilistic_realignment} | |
| 22 -C "${advanced_options.coefficient_for_downgrading}" | |
| 23 -d "${advanced_options.max_reads_per_bam}" | |
| 24 ${advanced_options.extended_BAQ_computation} | |
| 25 #if str( $advanced_options.position_list ) != 'None': | |
| 26 -l "${advanced_options.position_list}" | |
| 27 #end if | |
| 28 -q "${advanced_options.minimum_mapping_quality}" | |
| 29 -Q "${advanced_options.minimum_base_quality}" | |
| 30 #if str( $advanced_options.region_string ): | |
| 31 -r "${advanced_options.region_string}" | |
| 32 #end if | |
| 33 ${advanced_options.output_per_sample_read_depth} | |
| 34 ${advanced_options.output_per_sample_strand_bias_p_value} | |
| 35 #end if | |
| 36 #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation': | |
| 37 ##-g or -u | |
| 38 -g | |
| 39 -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}" | |
| 40 -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}" | |
| 41 #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling': | |
| 42 -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}" | |
| 43 #else: | |
| 44 -I | |
| 45 #end if | |
| 46 -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}" | |
| 47 #if len( $genotype_likelihood_computation_type.platform_list_repeat ): | |
| 48 -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }" | |
| 49 #end if | |
| 50 #end if | |
| 51 > "${output_mpileup}" | |
| 52 ' | |
| 53 </command> | |
| 54 <inputs> | |
| 55 <conditional name="reference_source"> | |
| 56 <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> | |
| 57 <option value="cached">Locally cached</option> | |
| 58 <option value="history">History</option> | |
| 59 </param> | |
| 60 <when value="cached"> | |
| 61 <repeat name="input_bams" title="BAM file" min="1"> | |
|
1
b47a418ccfdc
Uploaded tarball for 0.0.2 version on main tool shed.
devteam
parents:
0
diff
changeset
|
62 <param name="input_bam" type="data" format="bam" label="BAM file"> |
|
b47a418ccfdc
Uploaded tarball for 0.0.2 version on main tool shed.
devteam
parents:
0
diff
changeset
|
63 <validator type="unspecified_build" /> |
|
b47a418ccfdc
Uploaded tarball for 0.0.2 version on main tool shed.
devteam
parents:
0
diff
changeset
|
64 <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> |
|
b47a418ccfdc
Uploaded tarball for 0.0.2 version on main tool shed.
devteam
parents:
0
diff
changeset
|
65 </param> |
| 0 | 66 </repeat> |
| 67 <param name="ref_file" type="select" label="Using reference genome"> | |
|
1
b47a418ccfdc
Uploaded tarball for 0.0.2 version on main tool shed.
devteam
parents:
0
diff
changeset
|
68 <options from_data_table="fasta_indexes"> |
|
b47a418ccfdc
Uploaded tarball for 0.0.2 version on main tool shed.
devteam
parents:
0
diff
changeset
|
69 <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...--> |
| 0 | 70 </options> |
| 71 </param> | |
| 72 </when> | |
| 73 <when value="history"> <!-- FIX ME!!!! --> | |
| 74 <repeat name="input_bams" title="BAM file" min="1"> | |
|
1
b47a418ccfdc
Uploaded tarball for 0.0.2 version on main tool shed.
devteam
parents:
0
diff
changeset
|
75 <param name="input_bam" type="data" format="bam" label="BAM file"> |
|
b47a418ccfdc
Uploaded tarball for 0.0.2 version on main tool shed.
devteam
parents:
0
diff
changeset
|
76 <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." /> |
|
b47a418ccfdc
Uploaded tarball for 0.0.2 version on main tool shed.
devteam
parents:
0
diff
changeset
|
77 </param> |
| 0 | 78 </repeat> |
| 79 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> | |
| 80 </when> | |
| 81 </conditional> | |
| 82 | |
| 83 | |
| 84 <conditional name="genotype_likelihood_computation_type"> | |
| 85 <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation"> | |
| 86 <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option> | |
| 87 <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option> | |
| 88 </param> | |
| 89 <when value="perform_genotype_likelihood_computation"> | |
| 90 <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" /> | |
| 91 <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." /> | |
| 92 <conditional name="perform_indel_calling"> | |
| 93 <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling"> | |
| 94 <option value="perform_indel_calling" selected="True">Perform INDEL calling</option> | |
| 95 <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option> | |
| 96 </param> | |
| 97 <when value="perform_indel_calling"> | |
| 98 <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" /> | |
| 99 </when> | |
| 100 <when value="do_not_perform_indel_calling" /> | |
| 101 </conditional> | |
| 102 <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" /> | |
| 103 <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> | |
| 104 <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" /> | |
| 105 </repeat> | |
| 106 </when> | |
| 107 <when value="do_not_perform_genotype_likelihood_computation"> | |
| 108 <!-- Do nothing here --> | |
| 109 </when> | |
| 110 </conditional> | |
| 111 <conditional name="advanced_options"> | |
| 112 <param name="advanced_options_selector" type="select" label="Set advanced options"> | |
| 113 <option value="basic" selected="True">Basic</option> | |
| 114 <option value="advanced">Advanced</option> | |
| 115 </param> | |
| 116 <when value="advanced"> | |
| 117 <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" /> | |
| 118 <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label=" Disable probabilistic realignment for the computation of base alignment quality (BAQ)" /> | |
| 119 <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" /> | |
| 120 <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" /> | |
| 121 <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" /> | |
| 122 <param name="position_list" type="data" format="bed" label="List of regions or sites on which to operate" optional="True" /> | |
| 123 <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" /> | |
| 124 <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" /> | |
| 125 <param name="region_string" type="text" value="" label="Only generate pileup in region" /> | |
| 126 <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" /> | |
| 127 <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" /> | |
| 128 </when> | |
| 129 <when value="basic" /> | |
| 130 </conditional> | |
| 131 </inputs> | |
| 132 <outputs> | |
| 133 <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}"> | |
| 134 <change_format> | |
| 135 <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" /> | |
| 136 </change_format> | |
| 137 </data> | |
| 138 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> | |
| 139 </outputs> | |
| 140 <tests> | |
| 141 <test> | |
| 142 <param name="reference_source_selector" value="history" /> | |
| 143 <param name="ref_file" value="phiX.fasta" ftype="fasta" /> | |
| 144 <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> | |
| 145 <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> | |
| 146 <param name="advanced_options_selector" value="basic" /> | |
| 147 <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_1.pileup" /> | |
| 148 <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> | |
| 149 </test> | |
| 150 <test> | |
| 151 <param name="reference_source_selector" value="history" /> | |
| 152 <param name="ref_file" value="phiX.fasta" ftype="fasta" /> | |
| 153 <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> | |
| 154 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> | |
| 155 <param name="gap_extension_sequencing_error_probability" value="20" /> | |
| 156 <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> | |
| 157 <param name="perform_indel_calling_selector" value="perform_indel_calling" /> | |
| 158 <param name="skip_indel_calling_above_sample_depth" value="250" /> | |
| 159 <param name="gap_open_sequencing_error_probability" value="40" /> | |
| 160 <param name="platform_list_repeat" value="0" /> | |
| 161 <param name="advanced_options_selector" value="basic" /> | |
| 162 <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_2.bcf" /> | |
| 163 <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> | |
| 164 </test> | |
| 165 </tests> | |
| 166 <help> | |
| 167 **What it does** | |
| 168 | |
| 169 Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. | |
| 170 | |
| 171 ------ | |
| 172 | |
| 173 **Settings**:: | |
| 174 | |
| 175 Input Options: | |
| 176 -6 Assume the quality is in the Illumina 1.3+ encoding. | |
| 177 -A Do not skip anomalous read pairs in variant calling. | |
| 178 -B Disable probabilistic realignment for the computation of base alignment quality (BAQ). BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments. | |
| 179 -b FILE List of input BAM files, one file per line [null] | |
| 180 -C INT Coefficient for downgrading mapping quality for reads containing excessive mismatches. Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. [0] | |
| 181 -d INT At a position, read maximally INT reads per input BAM. [250] | |
| 182 -E Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt specificity a little bit. | |
| 183 -f FILE The faidx-indexed reference file in the FASTA format. The file can be optionally compressed by razip. [null] | |
| 184 -l FILE BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null] | |
| 185 -q INT Minimum mapping quality for an alignment to be used [0] | |
| 186 -Q INT Minimum base quality for a base to be considered [13] | |
| 187 -r STR Only generate pileup in region STR [all sites] | |
| 188 Output Options: | |
| 189 | |
| 190 -D Output per-sample read depth | |
| 191 -g Compute genotype likelihoods and output them in the binary call format (BCF). | |
| 192 -S Output per-sample Phred-scaled strand bias P-value | |
| 193 -u Similar to -g except that the output is uncompressed BCF, which is preferred for piping. | |
| 194 | |
| 195 Options for Genotype Likelihood Computation (for -g or -u): | |
| 196 | |
| 197 -e INT Phred-scaled gap extension sequencing error probability. Reducing INT leads to longer indels. [20] | |
| 198 -h INT Coefficient for modeling homopolymer errors. Given an l-long homopolymer run, the sequencing error of an indel of size s is modeled as INT*s/l. [100] | |
| 199 -I Do not perform INDEL calling | |
| 200 -L INT Skip INDEL calling if the average per-sample depth is above INT. [250] | |
| 201 -o INT Phred-scaled gap open sequencing error probability. Reducing INT leads to more indel calls. [40] | |
| 202 -P STR Comma dilimited list of platforms (determined by @RG-PL) from which indel candidates are obtained. It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA. [all] | |
| 203 | |
| 204 ------ | |
| 205 | |
| 206 **Citation** | |
| 207 | |
| 208 For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_ | |
| 209 | |
| 210 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* | |
| 211 | |
| 212 </help> | |
| 213 </tool> |
