Mercurial > repos > devteam > samtools_mpileup
diff samtools_mpileup.xml @ 3:da0203c3461a draft
planemo upload commit 9c291d7ad7c82e6051fe3e5ddffbe7a5edf6f006
author | devteam |
---|---|
date | Fri, 11 Sep 2015 14:16:52 -0400 |
parents | 3aa48bcbc599 |
children | 5872c9894a37 |
line wrap: on
line diff
--- a/samtools_mpileup.xml Wed Mar 12 12:53:30 2014 -0400 +++ b/samtools_mpileup.xml Fri Sep 11 14:16:52 2015 -0400 @@ -1,9 +1,13 @@ -<tool id="samtools_mpileup" name="MPileup" version="0.0.3"> - <description>SNP and indel caller</description> - <requirements> - <requirement type="package" version="0.1.19">samtools</requirement> - </requirements> - <command><![CDATA[ +<tool id="samtools_mpileup" name="MPileup" version="2.1"> + <description>call variants</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> + <command> + <![CDATA[ #if $reference_source.reference_source_selector == "history": ln -s "${reference_source.ref_file}" && samtools faidx `basename "${reference_source.ref_file}"` && samtools mpileup #else: @@ -15,7 +19,7 @@ -f "${reference_source.ref_file}" #end if #for $i, $input_bam in enumerate( $reference_source.input_bams ): - $input_bam.input_bam + "${input_bam.input_bam}" #end for #if str( $advanced_options.advanced_options_selector ) == "advanced": #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter": @@ -29,14 +33,14 @@ #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": -l "$pasted_regions" #elif str( $advanced_options.limit_by_region.limit_by_regions ) == "history" - -l "$bed_regions" + -l "$advanced_options.limit_by_region.bed_regions" #end if #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": -G "$excluded_read_groups" #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history" - -G "$read_groups" + -G "$advanced_options.exclude_read_group.read_groups" #end if - ${advanced_options.skip_anomalous_read_pairs} + ${advanced_options.skip_anomalous_read_pairs} ${advanced_options.disable_probabilistic_realignment} -C "${advanced_options.coefficient_for_downgrading}" -d "${advanced_options.max_reads_per_bam}" @@ -46,106 +50,129 @@ #if str( $advanced_options.region_string ): -r "${advanced_options.region_string}" #end if - ${advanced_options.output_per_sample_read_depth} - ${advanced_options.output_per_sample_strand_bias_p_value} + #end if #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation': - ##-g or -u - -g - -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}" - -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}" + ## + + ${genotype_likelihood_computation_type.output_format} + ${genotype_likelihood_computation_type.compressed} + + #if str( $genotype_likelihood_computation_type.output_tags ) != "None": + --output-tags "${genotype_likelihood_computation_type.output_tags}" + #end if + #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling': + -o "${genotype_likelihood_computation_type.perform_indel_calling.gap_open_sequencing_error_probability}" + -e "${genotype_likelihood_computation_type.perform_indel_calling.gap_extension_sequencing_error_probability}" + -h "${genotype_likelihood_computation_type.perform_indel_calling.coefficient_for_modeling_homopolymer_errors}" -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}" -m "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_reads_for_indel_candidates}" + --open-prob "${genotype_likelihood_computation_type.perform_indel_calling.open_seq_error_probability}" -F "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_read_fraction}" ${genotype_likelihood_computation_type.perform_indel_calling.gapped_read_per_sample} - #else: + #if len( $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ): + -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ] ) }" + #end if + #elif str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'do_not_perform_indel_calling': -I #end if - -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}" - #if len( $genotype_likelihood_computation_type.platform_list_repeat ): - -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }" - #end if + + #else: ${genotype_likelihood_computation_type.base_position_on_reads} ${genotype_likelihood_computation_type.output_mapping_quality} #end if - > "$output_mpileup" 2> "$output_log" - ]]></command> - <stdio> - <exit_code range="1:" level="fatal" description="Error" /> - </stdio> + --output "$output_mpileup" 2> "$output_log" + ]]> + </command> <inputs> <conditional name="reference_source"> - <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> - <option value="cached">Locally cached</option> - <option value="history">History</option> + <param label="Choose the source for the reference genome" name="reference_source_selector" type="select"> + <option value="cached">Use a built-in genome</option> + <option value="history">Use a genome from the history</option> </param> <when value="cached"> - <repeat name="input_bams" title="BAM file" min="1"> - <param name="input_bam" type="data" format="bam" label="BAM file"> + <repeat min="1" name="input_bams" title="BAM file"> + <param format="bam" label="BAM file" name="input_bam" type="data"> <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> + <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="fasta_indexes" type="dataset_metadata_in_data_table" /> </param> </repeat> - <param name="ref_file" type="select" label="Using reference genome"> + <param label="Using reference genome" name="ref_file" type="select"> <options from_data_table="fasta_indexes" /> - <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...--> </param> </when> <when value="history"> - <repeat name="input_bams" title="BAM file" min="1"> - <param name="input_bam" type="data" format="bam" label="BAM file"> - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." /> + <repeat min="1" name="input_bams" title="BAM file"> + <param format="bam" label="BAM file" name="input_bam" type="data"> + <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" /> </param> </repeat> - <param name="ref_file" type="data" format="fasta" label="Using reference file" /> + <param format="fasta" label="Using reference genome" name="ref_file" type="data" /> </when> </conditional> <conditional name="genotype_likelihood_computation_type"> - <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation"> - <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option> - <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option> + <param label="Genotype Likelihood Computation" name="genotype_likelihood_computation_type_selector" type="select"> + <option selected="True" value="perform_genotype_likelihood_computation">Perform genotype likelihood computation (--VCF, --BCF options)</option> + <option value="do_not_perform_genotype_likelihood_computation">Do not perform genotype likelihood computation (output pileup)</option> </param> <when value="perform_genotype_likelihood_computation"> - <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" /> - <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." /> + <param label="Choose the output format" name="output_format" type="select"> + <option value="--VCF">VCF</option> + <option value="--BCF">BCF</option> + </param> + <param checked="False" falsevalue="--uncompressed" label="Compress output" name="compressed" truevalue="" type="boolean" help="--incompressed; default=False"/> + <param name="output_tags" optional="True" type="select" multiple="True" display="checkboxes" label="Optional tags to output" help="--output-tags"> + <option value="DP">DP (Number of high-quality bases)</option> + <option value="DPR">DRP (Number of high-quality bases for each observed allele)</option> + <option value="DV">DV (Number of high-quality non-reference bases)</option> + <option value="DP4">DP4 (Number of high-quality ref-forward, ref-reverse, alt-forward and alt-reverse bases)</option> + <option value="INFO/DPR">INFO/DPR (Number of high-quality bases for each observed allele)</option> + <option value="SP">SP (Phred-scaled strand bias P-value)</option> + </param> <conditional name="perform_indel_calling"> - <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling"> - <option value="perform_indel_calling" selected="True">Perform INDEL calling</option> + <param label="Perform INDEL calling" name="perform_indel_calling_selector" type="select"> + <option selected="True" value="perform_indel_calling_def">Perform INDEL calling using default options</option> + <option value="perform_indel_calling">Perform INDEL calling and set advanced options</option> <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option> </param> + <when value="perform_indel_calling_def" /> <when value="perform_indel_calling"> - <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" /> - <param name="minimum_gapped_reads_for_indel_candidates" type="integer" value="1" label="Minimum gapped reads for indel candidates" /> - <param name="minimum_gapped_read_fraction" type="float" value="0.002" label="Minimum fraction of gapped reads for candidates" /> - <param name="gapped_read_per_sample" type="boolean" truevalue="-p" falsevalue="" checked="False" label="Apply minimum values on a per-sample basis" /> + <param label="Phred-scaled gap open sequencing error probability" name="gap_open_sequencing_error_probability" type="integer" value="40" help="--open-prob; Reducing this value leads to more indel calls; default=40"/> + <param label="Phred-scaled gap extension sequencing error probability" name="gap_extension_sequencing_error_probability" type="integer" value="20" help="--ext-prob; Reducing this value leads to longer indels. default=20"/> + <param label="Coefficient for modeling homopolymer errors." name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" help="--tandem-qual; default=100"/> + <param label="Skip INDEL calling if the average per-sample depth is above" name="skip_indel_calling_above_sample_depth" type="integer" value="250" help="--max-idepth; default=250"/> + <param label="Minimum gapped reads for indel candidates" name="minimum_gapped_reads_for_indel_candidates" type="integer" value="1" help="--min-ireads; default=1"/> + <param label="Phred-scaled gap open sequencing error probability" name="open_seq_error_probability" type="integer" value="40" help="--open-prob; Reducing this value leads to more indel calls; default=40"/> + <param label="Minimum fraction of gapped reads" name="minimum_gapped_read_fraction" type="float" value="0.002" help="--gap-frac; default=0.002"/> + <param checked="False" falsevalue="" label="Apply --min-ireads and --gap-frac values on a per-sample basis" name="gapped_read_per_sample" truevalue="-p" type="boolean" help="--per-sample-mF; by default both options are applied to reads pooled from all samples"/> + <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> + <param label="Platform to use for INDEL candidates" name="platform_entry" type="text" value="" help="It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA"/> + </repeat> </when> <when value="do_not_perform_indel_calling" /> </conditional> - <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" /> - <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> - <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" /> - </repeat> + </when> <when value="do_not_perform_genotype_likelihood_computation"> - <param name="base_position_on_reads" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Output base positions on reads" /> - <param name="output_mapping_quality" type="boolean" truevalue="-s" falsevalue="" checked="False" label="Output mapping quality" /> + <param checked="False" falsevalue="" label="Output base positions on reads" name="base_position_on_reads" truevalue="-O" type="boolean" help="--output-BP"/> + <param checked="False" falsevalue="" label="Output mapping quality" name="output_mapping_quality" truevalue="-s" type="boolean" help="--output-MQ"/> </when> </conditional> <conditional name="advanced_options"> - <param name="advanced_options_selector" type="select" label="Set advanced options"> - <option value="basic" selected="True">Basic</option> + <param label="Set advanced options" name="advanced_options_selector" type="select"> + <option selected="True" value="basic">Basic</option> <option value="advanced">Advanced</option> </param> <when value="advanced"> <conditional name="filter_by_flags"> - <param name="filter_flags" type="select" label="Set filter by flags"> - <option value="nofilter" selected="True">Do not filter</option> + <param label="Set filter by flags" name="filter_flags" type="select"> + <option selected="True" value="nofilter">Do not filter</option> <option value="filter">Filter by flags to exclude or require</option> </param> <when value="filter"> - <param name="require_flags" type="select" display="checkboxes" multiple="True" label="Require"> + <param display="checkboxes" label="Require" multiple="True" name="require_flags" type="select" help="--incl-flags"> <option value="1">Read is paired</option> <option value="2">Read is mapped in a proper pair</option> <option value="4">The read is unmapped</option> @@ -158,7 +185,7 @@ <option value="512">The read fails platform/vendor quality checks</option> <option value="1024">The read is a PCR or optical duplicate</option> </param> - <param name="exclude_flags" type="select" display="checkboxes" multiple="True" label="Exclude"> + <param display="checkboxes" label="Exclude" multiple="True" name="exclude_flags" type="select" help="--excl-flags"> <option value="1">Read is paired</option> <option value="2">Read is mapped in a proper pair</option> <option value="4">The read is unmapped</option> @@ -175,121 +202,75 @@ <when value="nofilter" /> </conditional> <conditional name="limit_by_region"> - <param name="limit_by_regions" type="select" label="Select regions to call"> - <option value="no_limit" selected="True">Do not limit</option> - <option value="history">From an uploaded BED file</option> - <option value="paste">Paste a list of regions or BED</option> + <param label="Select regions to call" name="limit_by_regions" type="select"> + <option selected="True" value="no_limit">Do not limit</option> + <option value="history">From an uploaded BED file (--positions)</option> + <option value="paste">Paste a list of regions or BED (--region)</option> </param> <when value="history"> - <param name="bed_regions" type="data" format="bed" label="BED file"> + <param format="bed" label="BED file" name="bed_regions" type="data" help="--positions"> <validator type="dataset_ok_validator" /> </param> </when> <when value="paste"> - <param name="region_paste" type="text" area="true" size="10x35" label="Regions" help="Paste a list of regions in BED format or as a list of chromosomes and positions"/> + <param area="true" help="Paste a list of regions in BED format or as a list of chromosomes and positions" label="Regions" name="region_paste" size="10x35" type="text"/> </when> <when value="no_limit" /> </conditional> <conditional name="exclude_read_group"> - <param name="exclude_read_groups" type="select" label="Select read groups to exclude"> - <option value="no_limit" selected="True">Do not exclude</option> + <param label="Select read groups to exclude" name="exclude_read_groups" type="select" help="--exclude-RG"> + <option selected="True" value="no_limit">Do not exclude</option> <option value="history">From an uploaded text file</option> <option value="paste">Paste a list of read groups</option> </param> <when value="history"> - <param name="read_groups" type="data" format="txt" label="Text file"> + <param format="txt" label="Text file" name="read_groups" type="data"> <validator type="dataset_ok_validator" /> </param> </when> <when value="paste"> - <param name="group_paste" type="text" area="true" size="10x35" label="Read groups" help="Paste a list of read groups"/> + <param area="true" help="Paste a list of read groups" label="Read groups" name="group_paste" size="10x35" type="text" /> </when> <when value="no_limit" /> </conditional> - <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" /> - <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label=" Disable probabilistic realignment for the computation of base alignment quality (BAQ)" /> - <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" /> - <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" /> - <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" /> - <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" /> - <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" /> - <param name="region_string" type="text" value="" label="Only generate pileup in region" /> - <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" /> - <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" /> + <param checked="False" falsevalue="" label="Disable read-pair overlap detection" name="ignore_overlaps" truevalue="-x" type="boolean" help="--ignore-overlaps"/> + <param checked="False" falsevalue="" label="Do not skip anomalous read pairs in variant calling" name="skip_anomalous_read_pairs" truevalue="-A" type="boolean" help="--count-orphans"/> + <param checked="False" falsevalue="" label="Disable probabilistic realignment for the computation of base alignment quality (BAQ)" name="disable_probabilistic_realignment" truevalue="-B" type="boolean" help="--no-BAQ; BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments"/> + <param label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" name="coefficient_for_downgrading" type="integer" value="0" help="--adjust-MQ; Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. default=0"/> + <param label="Max reads per BAM" max="1024" min="1" name="max_reads_per_bam" type="integer" value="250" help="--max-depth; default=250"/> + <param checked="False" falsevalue="" label="Redo BAQ computation" name="extended_BAQ_computation" truevalue="-E" type="boolean" help="--redo-BAQ; ignore existing BQ tags"/> + <param label="Minimum mapping quality for an alignment to be used" name="minimum_mapping_quality" type="integer" value="0" help="-min-MQ; default=0"/> + <param label="Minimum base quality for a base to be considered" name="minimum_base_quality" type="integer" value="13" help="--min-BQ; default=13"/> + <param label="Only generate pileup in region" name="region_string" type="text" value="" help="--region; If used in conjunction with --positions, then considers the intersection of the two requests. Defaults to all sites" /> </when> <when value="basic" /> </conditional> </inputs> - <configfiles> - <configfile name="excluded_read_groups"> -<![CDATA[ -<% -import re -%> -#set pasted_data = '' -#if str( $advanced_options.advanced_options_selector ) == "advanced": - #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": - #set regex=re.compile("\\s+") - #set pasted_data = '\t'.join( regex.split( str( $advanced_options.exclude_read_group['read_groups'] ) ) ) - #end if -#end if -${pasted_data} -]]> - </configfile> - <configfile name="pasted_regions"> -<![CDATA[ -<% -import re -%> -#set pasted_data = '' -#if str( $advanced_options.advanced_options_selector ) == "advanced": - #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": - #set regex=re.compile("\\s+") - #set pasted_data = '\t'.join( regex.split( str( $advanced_options.limit_by_region['region_paste'] ) ) ) - #end if -#end if -${pasted_data} -]]> - </configfile> - </configfiles> <outputs> - <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}"> + <data format="pileup" label="${tool.name} on ${on_string}" name="output_mpileup"> <change_format> - <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" /> + <when format="bcf" input="genotype_likelihood_computation_type.output_format" value="--BCF" /> + <when format="vcf" input="genotype_likelihood_computation_type.output_format" value="--VCF" /> </change_format> </data> - <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> + <data format="txt" label="${tool.name} on ${on_string} (log)" name="output_log" /> </outputs> <tests> <test> <param name="reference_source_selector" value="history" /> - <param name="ref_file" value="phiX.fasta" ftype="fasta" /> - <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" /> + <param ftype="fasta" name="ref_file" value="phiX.fasta" /> + <param ftype="bam" name="input_bam" value="samtools_mpileup_in_1.bam" /> <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> <param name="advanced_options_selector" value="basic" /> <param name="base_position_on_reads" value="true" /> <param name="output_mapping_quality" value="true" /> - <output name="output_mpileup" file="samtools_mpileup_out_1.pileup" /> - <output name="output_log" file="samtools_mpileup_out_1.log" /> + <output file="samtools_mpileup_out_1.pileup" name="output_mpileup" /> + <output file="samtools_mpileup_out_1.log" name="output_log" /> </test> <test> <param name="reference_source_selector" value="history" /> - <param name="ref_file" value="phiX.fasta" ftype="fasta" /> - <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" /> - <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> - <param name="gap_extension_sequencing_error_probability" value="20" /> - <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> - <param name="perform_indel_calling_selector" value="perform_indel_calling" /> - <param name="skip_indel_calling_above_sample_depth" value="250" /> - <param name="gap_open_sequencing_error_probability" value="40" /> - <param name="platform_list_repeat" value="0" /> - <param name="advanced_options_selector" value="basic" /> - <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_2.bcf" lines_diff="1" /> - <output name="output_log" file="samtools_mpileup_out_2.log" /> - </test> - <test> - <param name="reference_source_selector" value="cached" /> - <param name="input_bam" value="samtools_mpileup_in_3.bam" ftype="bam" dbkey="phiX" /> + <param ftype="fasta" name="ref_file" value="phiX.fasta" /> + <param ftype="bam" name="input_bam" value="phiX.bam" /> <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> <param name="gap_extension_sequencing_error_probability" value="20" /> <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> @@ -298,204 +279,94 @@ <param name="gap_open_sequencing_error_probability" value="40" /> <param name="platform_list_repeat" value="0" /> <param name="advanced_options_selector" value="basic" /> - <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_2.bcf" lines_diff="1" /> - <output name="output_log" file="samtools_mpileup_out_3.log" /> - </test> - <test> - <param name="reference_source_selector" value="cached" /> - <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" dbkey="phiX" /> - <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> - <param name="gap_extension_sequencing_error_probability" value="20" /> - <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> - <param name="perform_indel_calling_selector" value="perform_indel_calling" /> - <param name="skip_indel_calling_above_sample_depth" value="250" /> - <param name="gap_open_sequencing_error_probability" value="40" /> - <param name="platform_list_repeat" value="0" /> - <param name="advanced_options_selector" value="advanced" /> - <param name="advanced_options|filter_by_flags|filter_flags" value="nofilter" /> - <param name="advanced_options|limit_by_region|limit_by_regions" value="no_limit" /> - <param name="advanced_options|coefficient_for_downgrading" value="true" /> - <param name="advanced_options|max_reads_per_bam" value="200" /> - <param name="advanced_options|extended_BAQ_computation" value="true" /> - <param name="advanced_options|minimum_mapping_quality" value="0" /> - <param name="advanced_options|minimum_base_quality" value="43" /> - <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_4.bcf" lines_diff="1" /> - <output name="output_log" file="samtools_mpileup_out_4.log" /> + <param name="genotype_likelihood_computation_type|output_format" value="VCF" /> + <output file="samtools_mpileup_out_2.vcf" ftype="vcf" lines_diff="8" name="output_mpileup" /> + <output file="samtools_mpileup_out_2.log" name="output_log" /> </test> </tests> <help> +<![CDATA[ **What it does** - Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. +Report variants for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. ------ -.. list-table:: **Input options** - :widths: 5 5 40 10 - :header-rows: 1 +**Input options**:: - * - Flag - - Type - - Description - - Default - * - -6 - - *BOOLEAN* - - assume the quality is in the Illumina-1.3+ encoding - - off - * - -A - - *BOOLEAN* - - count anomalous read pairs - - off - * - -B - - *BOOLEAN* - - disable BAQ computation - - off - * - -b - - *FILE* - - list of input BAM filenames, one per line - - *null* - * - -C - - *INT* - - parameter for adjusting mapQ; 0 to disable - - 0 - * - -d - - *INT* - - max per-BAM depth to avoid excessive memory usage - - 250 - * - -E - - *BOOLEAN* - - recalculate extended BAQ on the fly thus ignoring existing BQs - - off - * - -f - - *FILE* - - faidx indexed reference sequence file - - *null* - * - -G - - *FILE* - - exclude read groups listed in FILE - - *null* - * - -l - - *FILE* - - list of positions (chr pos) or regions (BED) - - *null* - * - -M - - *INT* - - cap mapping quality at INT - - 60 - * - -r - - *STR* - - region in which pileup is generated - - *null* - * - -R - - *BOOLEAN* - - ignore RG tags - - off - * - -q - - *INT* - - skip alignments with mapQ smaller than INT - - 0 - * - -Q - - *INT* - - skip bases with baseQ/BAQ smaller than INT - - 13 - * - --rf - - *INT* - - required flags: skip reads with mask bits unset - - 0 - * - --ff - - *INT* - - filter flags: skip reads with mask bits set - - 0 + -6, --illumina1.3+ quality is in the Illumina-1.3+ encoding + -A, --count-orphans do not discard anomalous read pairs + -b, --bam-list FILE list of input BAM filenames, one per line + -B, --no-BAQ disable BAQ (per-Base Alignment Quality) + -C, --adjust-MQ INT adjust mapping quality; recommended:50, disable:0 [0] + -d, --max-depth INT max per-BAM depth; avoids excessive memory usage [250] + -E, --redo-BAQ recalculate BAQ on the fly, ignore existing BQs + -f, --fasta-ref FILE faidx indexed reference sequence file + -G, --exclude-RG FILE exclude read groups listed in FILE + -l, --positions FILE skip unlisted positions (chr pos) or regions (BED) + -q, --min-MQ INT skip alignments with mapQ smaller than INT [0] + -Q, --min-BQ INT skip bases with baseQ/BAQ smaller than INT [13] + -r, --region REG region in which pileup is generated + -R, --ignore-RG ignore RG tags (one BAM = one sample) + --rf, --incl-flags STR|INT required flags: skip reads with mask bits unset [] + --ff, --excl-flags STR|INT filter flags: skip reads with mask bits set + [UNMAP,SECONDARY,QCFAIL,DUP] + -x, --ignore-overlaps disable read-pair overlap detection ------- +**Output options**:: + + -o, --output FILE write output to FILE [standard output] + -g, --BCF generate genotype likelihoods in BCF format + -v, --VCF generate genotype likelihoods in VCF format + +**Output options for mpileup format** (without -g/-v):: -.. list-table:: **Output options** - :widths: 5 5 40 10 - :header-rows: 1 + -O, --output-BP output base positions on reads + -s, --output-MQ output mapping quality + +**Output options for genotype likelihoods** (when -g/-v is used):: + + -t, --output-tags LIST optional tags to output: DP,DPR,DV,DP4,INFO/DPR,SP [] + -u, --uncompressed generate uncompressed VCF/BCF output + +**SNP/INDEL genotype likelihoods options** (effective with -g/-v):: - * - Flag - - Type - - Description - - Default - * - -D - - *BOOLEAN* - - output per-sample DP in BCF (require -g/-u) - - off - * - -g - - *BOOLEAN* - - generate BCF output (genotype likelihoods) - - off - * - -O - - *BOOLEAN* - - output base positions on reads (disabled by -g/-u) - - off - * - -s - - *BOOLEAN* - - output mapping quality (disabled by -g/-u) - - off - * - -S - - *BOOLEAN* - - output per-sample strand bias P-value in BCF (require -g/-u) - - off - * - -u - - *BOOLEAN* - - generate uncompressed BCF output - - off - ------- - -.. list-table:: **SNP/INDEL genotype likelihoods options (effective with '-g' or '-u')** - :widths: 5 5 40 10 - :header-rows: 1 + -e, --ext-prob INT Phred-scaled gap extension seq error probability [20] + -F, --gap-frac FLOAT minimum fraction of gapped reads [0.002] + -h, --tandem-qual INT coefficient for homopolymer errors [100] + -I, --skip-indels do not perform indel calling + -L, --max-idepth INT maximum per-sample depth for INDEL calling [250] + -m, --min-ireads INT minimum number gapped reads for indel candidates [1] + -o, --open-prob INT Phred-scaled gap open seq error probability [40] + -p, --per-sample-mF apply -m and -F per-sample for increased sensitivity + -P, --platforms STR comma separated list of platforms for indels [all] - * - Flag - - Type - - Description - - Default - * - -e - - *INT* - - Phred-scaled gap extension seq error probability - - 20 - * - -F - - *FLOAT* - - minimum fraction of gapped reads for candidates - - 0.002 - * - -h - - *INT* - - coefficient for homopolymer errors - - 100 - * - -I - - *BOOLEAN* - - do not perform indel calling - - off - * - -L - - *INT* - - max per-sample depth for INDEL calling - - 250 - * - -m - - *INT* - - minimum gapped reads for indel candidates - - 1 - * - -o - - *INT* - - Phred-scaled gap open sequencing error probability - - 40 - * - -p - - *BOOLEAN* - - apply -m and -F per-sample to increase sensitivity - - off - * - -P - - *STR* - - comma separated list of platforms for indels - - all - ------- - -**Citation** - -For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_ - - -If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* +**Notes**: Assuming diploid individuals. +]]> </help> -</tool> \ No newline at end of file + <configfiles> + <configfile name="excluded_read_groups"> +<![CDATA[ +#set pasted_data = '' +#if str( $advanced_options.advanced_options_selector ) == "advanced": + #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": + #set pasted_data = '\t'.join( str( $advanced_options.exclude_read_group['read_groups'] ).split() ) + #end if +#end if +${pasted_data} +]]> + </configfile> + <configfile name="pasted_regions"> +<![CDATA[ +#set pasted_data = '' +#if str( $advanced_options.advanced_options_selector ) == "advanced": + #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": + #set pasted_data = '\t'.join( str( $advanced_options.limit_by_region['region_paste'] ).split() ) + #end if +#end if +${pasted_data} +]]> + </configfile> + </configfiles> + <expand macro="citations" /> +</tool>