# HG changeset patch # User devteam # Date 1444752807 14400 # Node ID 5cfa4b6db588d401f47e547b5f79905752d626e9 # Parent c5a6f28a3e85ce347af8926e8b33cdb0d105feca planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 diff -r c5a6f28a3e85 -r 5cfa4b6db588 bowtie2_wrapper.xml --- a/bowtie2_wrapper.xml Thu Dec 04 13:05:09 2014 -0500 +++ b/bowtie2_wrapper.xml Tue Oct 13 12:13:27 2015 -0400 @@ -1,13 +1,16 @@ - + - map reads against reference genome + + read_group_macros.xml + bowtie2 --version bowtie2 samtools - + ## prepare bowtie2 index #set index_path = '' #if str($reference_genome.source) == "history": @@ -17,27 +20,26 @@ #else: #set index_path = $reference_genome.index.fields.path #end if - + ## execute bowtie2 - + bowtie2 - + ## number of threads -p \${GALAXY_SLOTS:-4} ## index file path -x $index_path - - + ## Fastq inputs #if str( $library.type ) == "single": - -U "${input_1}" + -U "${library.input_1}" #if str( $library.unaligned_file ) == "true": --un $output_unaligned_reads_l #end if #elif str( $library.type ) == "paired": - -1 "${input_1}" - -2 "${input_2}" + -1 "${library.input_1}" + -2 "${library.input_2}" #if str( $library.paired_options.paired_options_selector ) == "yes": -I "${library.paired_options.I}" -X "${library.paired_options.X}" @@ -69,15 +71,33 @@ --un-conc $output_unaligned_reads_l #end if #end if - - ## Readgroups - #if str( $read_group.read_group_selector ) == "yes": - --rg-id "${read_group.rgid}" - --rg "SM:${read_group.rgsm}" - --rg "LB:${read_group.rglb}" - --rg "PL:${read_group.rgpl}" + + ## Read group information. + @define_read_group_helpers@ + #if str( $library.type ) == "single": + #set $rg_auto_name = $read_group_name_default($library.input_1) + #elif str( $library.type ) == "paired": + #set $rg_auto_name = $read_group_name_default($library.input_1, $library.input_2) + #else + #set $rg_auto_name = $read_group_name_default($library.input_1) #end if - + @set_use_rg_var@ + @set_read_group_vars@ + #if $use_rg + $format_read_group("", $rg_id, '"', arg='--rg-id ') + $format_read_group("SM:", $rg_sm, '"', arg='--rg ') + $format_read_group("PL:", $rg_pl, '"', arg='--rg ') + $format_read_group("LB:", $rg_lb, '"', arg='--rg ') + $format_read_group("CN:", $rg_cn, '"', arg='--rg ') + $format_read_group("DS:", $rg_ds, '"', arg='--rg ') + $format_read_group("DT:", $rg_dt, '"', arg='--rg ') + $format_read_group("FO:", $rg_fo, '"', arg='--rg ') + $format_read_group("KS:", $rg_ks, '"', arg='--rg ') + $format_read_group("PG:", $rg_pg, '"', arg='--rg ') + $format_read_group("PI:", $rg_pi, '"', arg='--rg ') + $format_read_group("PU:", $rg_pu, '"', arg='--rg ') + #end if + ## Analysis type #if ( str( $analysis_type.analysis_type_selector ) == "simple" and str( $analysis_type.presets ) != "no_presets" ): $analysis_type.presets @@ -88,83 +108,87 @@ --trim5 "${analysis_type.input_options.trim5}" --trim3 "${analysis_type.input_options.trim3}" ${analysis_type.input_options.qv_encoding} - ${analysis_type.input_options.solexa-quals} - ${analysis_type.input_options.int-quals} + ${analysis_type.input_options.solexa_quals} + ${analysis_type.input_options.int_quals} #end if - + #if str( $analysis_type.alignment_options.alignment_options_selector ) == "yes": - -N "${$analysis_type.alignment_options.N}" - -L "${$analysis_type.alignment_options.L}" - -i "${$analysis_type.alignment_options.i}" - --n_ceil "${$analysis_type.alignment_options.n_ceil}" - --dpad "${$analysis_type.alignment_options.dpad}" - --gbar "${$analysis_type.alignment_options.gbar}" - ${analysis_type.alignment_options.ignore-quals} + -N "${analysis_type.alignment_options.N}" + -L "${analysis_type.alignment_options.L}" + -i "${analysis_type.alignment_options.i}" + --n-ceil "${analysis_type.alignment_options.n_ceil}" + --dpad "${analysis_type.alignment_options.dpad}" + --gbar "${analysis_type.alignment_options.gbar}" + ${analysis_type.alignment_options.ignore_quals} ${analysis_type.alignment_options.nofw} ${analysis_type.alignment_options.norc} ${analysis_type.alignment_options.no_1mm_upfront} #if str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "end-to-end": --end-to-end - --score-min "${$analysis_type.alignment_options.align_mode.core-min}" + --score-min "${analysis_type.alignment_options.align_mode.score_min_ete}" #elif str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "local": --local - --score-min "${$analysis_type.alignment_options.align_mode.core-min}" + --score-min "${analysis_type.alignment_options.align_mode.score_min_loc}" #end if #end if - + #if str( $analysis_type.scoring_options.scoring_options_selector ) == "yes": - --ma "${analysis_type.scoring_options.ma}" + #if ( str( $analysis_type.alignment_options.alignment_options_selector ) == "yes" and str( $analysis_type.alignment_options.align_mode.align_mode_selector ) == "local" ): + --ma "${analysis_type.scoring_options.ma}" + #end if --mp "${analysis_type.scoring_options.mp}" --np "${analysis_type.scoring_options.np}" --rdg "${analysis_type.scoring_options.rdg_read_open},${analysis_type.scoring_options.rdg_read_extend}" --rfg "${analysis_type.scoring_options.rfg_ref_open},${analysis_type.scoring_options.rfg_ref_extend}" #end if - + #if str( $analysis_type.reporting_options.reporting_options_selector ) == "k": -k "${analysis_type.reporting_options.k}" #elif str( $analysis_type.reporting_options.reporting_options_selector ) == "a": -a #end if - + #if str( $analysis_type.effort_options.effort_options_selector ) == "yes": -D "${analysis_type.effort_options.D}" -R "${analysis_type.effort_options.R}" #end if - + #if str( $analysis_type.sam_options.sam_options_selector ) == "yes": - ${analysis_type.sam_options.no-unal} - ${analysis_type.sam_options.omit-sec-seq} + ${analysis_type.sam_options.no_unal} + ${analysis_type.sam_options.omit_sec_seq} #end if - + #if str( $analysis_type.other_options.other_options_selector ) == "yes": ${analysis_type.other_options.reorder} - ${analysis_type.other_options.non-deterministic} + ${analysis_type.other_options.non_deterministic} --seed "${analysis_type.other_options.seed}" #end if - + #elif str( $analysis_type.analysis_type_selector ) == "cline": ${analysis_type.cline} - #end if - - ## view/sort and output BAM file - | samtools view -Su - | samtools sort -o - - > $output - + #end if + + ## output file + #if ( str( $analysis_type.analysis_type_selector ) != "full" or str( $analysis_type.sam_opt ) != "true" ): + | samtools view -Su - | samtools sort -o - - > $output + #else + > $output_sam + #end if + ## rename unaligned sequence files #if $library.type == "paired" and $output_unaligned_reads_l and $output_unaligned_reads_r: - #set left = str($output_unaligned_reads_l).replace( '.dat', '.1.dat' ) - #set right = str($output_unaligned_reads_l).replace( '.dat', '.2.dat' ) - - ; mv $left $output_unaligned_reads_l; - mv $right $output_unaligned_reads_r + #from os.path import splitext + #set _unaligned_root, _unaligned_ext = splitext( str( $output_unaligned_reads_l ) ) + && mv "${ _unaligned_root }.1${_unaligned_ext}" "${ output_unaligned_reads_l }" + && mv "${ _unaligned_root }.2${_unaligned_ext}" "${ output_unaligned_reads_r }" #end if - + - - + @@ -175,12 +199,12 @@ - + - - + + @@ -189,17 +213,17 @@ - - + + - + - - - + + + @@ -207,7 +231,7 @@ - + @@ -216,17 +240,17 @@ - - + + - + - - - + + + @@ -255,20 +279,7 @@ - - - - - - - - - - - - - - + @@ -294,16 +305,16 @@ - - + + - - + + - - + + @@ -316,25 +327,25 @@ - - - - + + + + - + - + - + - + @@ -349,7 +360,7 @@ - + @@ -369,10 +380,10 @@ - - + + - + @@ -382,7 +393,7 @@ - + @@ -396,8 +407,8 @@ - - + + @@ -411,12 +422,13 @@ - + + @@ -428,21 +440,45 @@ library['unaligned_file'] is True - - + + + + + + + + + + + + + + ( library['type'] == "paired" or library['type'] == "paired_collection" ) and library['unaligned_file'] is True - - + + + + + + + + + + - + + analysis_type['analysis_type_selector'] == "simple" or analysis_type['sam_opt'] is False @@ -461,6 +497,28 @@ + + + analysis_type['analysis_type_selector'] == "full" and analysis_type['sam_opt'] is True + + + + + + + + + + + + + + + @@ -477,6 +535,22 @@ + + + + + + + + + + + + + + + + @@ -541,7 +615,7 @@ pipelines. --phred64 - Input qualities are ASCII chars equal to the [Phred quality] plus 64. This is + Input qualities are ASCII chars equal to the Phred quality plus 64. This is also called the "Phred+64" encoding. --solexa-quals @@ -551,7 +625,7 @@ --int-quals Quality values are represented in the read input file as space-separated ASCII integers, e.g., `40 40 30 40`..., rather than ASCII characters, e.g., `II?I`.... - Integers are treated as being on the [Phred quality] scale unless + Integers are treated as being on the Phred quality scale unless `--solexa-quals` is also specified. Default: off. ------ @@ -591,19 +665,19 @@ **Alignment options**:: -N <int> - Sets the number of mismatches to allowed in a seed alignment during [multiseed - alignment]. Can be set to 0 or 1. Setting this higher makes alignment slower + Sets the number of mismatches to allowed in a seed alignment during multiseed + alignment. Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower) but increases sensitivity. Default: 0. -L <int> - Sets the length of the seed substrings to align during [multiseed alignment]. - Smaller values make alignment slower but more senstive. Default: the - `--sensitive` preset is used by default, which sets `-L` to 20 both in - `--end-to-end` mode and in `--local` mode. + Sets the length of the seed substrings to align during multiseed alignment. + Smaller values make alignment slower but more sensitive. Default: the + `--sensitive` preset is used by default, which sets `-L` to 22 in + `--end-to-end` mode and to 20 in `--local` mode. -i <func> Sets a function governing the interval between seed substrings to use during - [multiseed alignment]. For instance, if the read has 30 characers, and seed + multiseed alignment. For instance, if the read has 30 characers, and seed length is 10, and the seed interval is 6, the seeds extracted will be: Read: TAGCTACGCTCTACGCTATCATGCATAAAC @@ -620,7 +694,7 @@ the interval as a function of the read length, rather than a single one-size-fits-all number. For instance, specifying `-i S,1,2.5` sets the interval function `f` to `f(x) = 1 + 2.5 * sqrt(x)`, where x is the read length. - See also: [setting function options]. If the function returns a result less than + If the function returns a result less than 1, it is rounded up to 1. Default: the `--sensitive` preset is used by default, which sets `-i` to `S,1,1.15` in `--end-to-end` mode to `-i S,1,0.75` in `--local` mode. @@ -629,8 +703,8 @@ Sets a function governing the maximum number of ambiguous characters (usually `N`s and/or `.`s) allowed in a read as a function of read length. For instance, specifying `-L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`, - where x is the read length. See also: [setting function options]. Reads - exceeding this ceiling are [filtered out]. Default: `L,0,0.15`. + where x is the read length. Reads exceeding this ceiling are filtered out. + Default: `L,0,0.15`. --dpad <int> "Pads" dynamic programming problems by `<int>` columns on either side to allow @@ -658,14 +732,14 @@ --no-1mm-upfront By default, Bowtie 2 will attempt to find either an exact or a 1-mismatch - end-to-end alignment for the read *before* trying the [multiseed heuristic]. Such + end-to-end alignment for the read *before* trying the multiseed heuristic. Such alignments can be found very quickly, and many short read alignments have exact or near-exact end-to-end alignments. However, this can lead to unexpected - alignments when the user also sets options governing the [multiseed heuristic], + alignments when the user also sets options governing the multiseed heuristic, like `-L` and `-N`. For instance, if the user specifies `-N 0` and `-L` equal to the length of the read, the user will be surprised to find 1-mismatch alignments reported. This option prevents Bowtie 2 from searching for 1-mismatch end-to-end - alignments before using the [multiseed heuristic], which leads to the expected + alignments before using the multiseed heuristic, which leads to the expected behavior when combined with options such as `-L` and `-N`. This comes at the expense of speed. @@ -721,8 +795,7 @@ Sets a function governing the minimum alignment score needed for an alignment to be considered "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f` - to `f(x) = 0 + -0.6 * x`, where `x` is the read length. See also: [setting - function options]. The default in `--end-to-end` mode is `L,-0.6,-0.6` and + to `f(x) = 0 + -0.6 * x`, where `x` is the read length. The default in `--end-to-end` mode is `L,-0.6,-0.6` and the default in `--local` mode is `G,20,8`. ----- @@ -840,18 +913,15 @@ --dovetail If the mates "dovetail", that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that - to be concordant. See also: [Mates can overlap, contain or dovetail each - other]. Default: mates cannot dovetail in a concordant alignment. + to be concordant. Default: mates cannot dovetail in a concordant alignment. --no-contain If one mate alignment contains the other, consider that to be non-concordant. - See also: [Mates can overlap, contain or dovetail each other]. Default: a mate - can contain the other in a concordant alignment. + Default: a mate can contain the other in a concordant alignment. --no-overlap If one mate alignment overlaps the other at all, consider that to be - non-concordant. See also: [Mates can overlap, contain or dovetail each other]. - Default: mates can overlap in a concordant alignment. + non-concordant. Default: mates can overlap in a concordant alignment. ------ @@ -866,9 +936,9 @@ --rg <text> Add `<text>` (usually of the form `TAG:VAL`, e.g. `SM:Pool1`) as a field on the `@RG` header line. Note: in order for the `@RG` line to appear, `--rg-id` - must also be specified. This is because the `ID` tag is required by the [SAM - Spec][SAM]. Specify `--rg` multiple times to set multiple fields. See the - [SAM Spec][SAM] for details about what fields are legal. + must also be specified. This is because the `ID` tag is required by the SAM + Specification. Specify `--rg` multiple times to set multiple fields. See the + SAM Specification for details about what fields are legal. --omit-sec-seq When printing secondary alignments, Bowtie 2 by default will write out the `SEQ` diff -r c5a6f28a3e85 -r 5cfa4b6db588 read_group_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_group_macros.xml Tue Oct 13 12:13:27 2015 -0400 @@ -0,0 +1,294 @@ + + + +#def identifier_or_name($input1) + #if hasattr($input1, 'element_identifier') + #return $input1.element_identifier + #else + #return $input1.name.rstrip('.gz').rstrip('.fastq').rstrip('.fq') + #end if +#end def + +#def clean(name) + #import re + #set $name_clean = re.sub('[^\w\-_\.]', '_', $name) + #return $name_clean +#end def + +#def read_group_name_default($input1, $input2=None) + #if $input2 is None + #return $clean($identifier_or_name($input1)) + #else + #import itertools + #set $input_name1 = $clean($identifier_or_name($input1)) + #set $input_name2 = $clean($identifier_or_name($input2)) + #set $common_prefix = ''.join([c[0] for c in itertools.takewhile(lambda x: all(x[0] == y for y in x), itertools.izip(*[$input_name1, $input_name2]))]) + #if len($common_prefix) > 3 + #return $common_prefix + #else + #return $input_name1 + #end if + #end if +#end def + +#def format_read_group(prefix, value, quote='', arg='') + #if $value + #return $arg + $quote + $prefix + $value + $quote + #else + #return '' + #end if +#end def + +#def rg_param(name) + #if $varExists("rg") + #return $rg.get($name, None) + #else + #return $getVar($name, None) + #end if +#end def + +#set $use_rg = True + + + +#if $use_rg + #if $rg_param('read_group_id_conditional') is None + #set $rg_id = $rg_auto_name + #elif $rg_param('read_group_id_conditional').do_auto_name + #set $rg_id = $rg_auto_name + #else + #set $rg_id = str($rg_param('read_group_id_conditional').ID) + #end if + + #if $rg_param('read_group_sm_conditional') is None + #set $rg_sm = '' + #elif $rg_param('read_group_sm_conditional').do_auto_name + #set $rg_sm = $rg_auto_name + #else + #set $rg_sm = str($rg_param('read_group_sm_conditional').SM) + #end if + + #if $rg_param('PL') + #set $rg_pl = str($rg_param('PL')) + #else + #set $rg_pl = '' + #end if + + #if $rg_param('read_group_lb_conditional') is None + #set $rg_lb = '' + #elif $rg_param('read_group_lb_conditional').do_auto_name + #set $rg_lb = $rg_auto_name + #else + #set $rg_lb = str($rg_param('read_group_lb_conditional').LB) + #end if + + #if $rg_param('CN') + #set $rg_cn = str($rg_param('CN')) + #else + #set $rg_cn = '' + #end if + + #if $rg_param("DS") + #set $rg_ds = str($rg_param("DS")) + #else + #set $rg_ds = '' + #end if + + #if $rg_param("DT") + #set $rg_dt = str($rg_param("DT")) + #else + #set $rg_dt = '' + #end if + + #if $rg_param("FO") + #set $rg_fo = str($rg_param("FO")) + #else + #set $rg_fo = '' + #end if + + #if $rg_param("KS") + #set $rg_ks = str($rg_param("KS")) + #else + #set $rg_ks = '' + #end if + + #if $rg_param("PG") + #set $rg_pg = str($rg_param("PG")) + #else + #set $rg_pg = '' + #end if + + #if str($rg_param("PI")) + #set $rg_pi = str($rg_param("PI")) + #else + #set $rg_pi = '' + #end if + + #if $rg_param("PU") + #set $rg_pu = str($rg_param("PU")) + #else + #set $rg_pu = '' + #end if +#end if + + +#set $use_rg = str($rg.rg_selector) != "do_not_set" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \*|[ACMGRSVTWYHKDBN]+$ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r c5a6f28a3e85 -r 5cfa4b6db588 test-data/bowtie2-test2.bam Binary file test-data/bowtie2-test2.bam has changed