Mercurial > repos > matthias > dada2_plotqualityprofile
diff dada2_plotQualityProfile.xml @ 9:d908015e5889 draft
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit a54770771e567c7ad8a9dd75cc4689c3935ef11c
author | matthias |
---|---|
date | Tue, 28 May 2019 12:15:38 -0400 |
parents | 7970dfbedde3 |
children | 36224cf72a7b |
line wrap: on
line diff
--- a/dada2_plotQualityProfile.xml Mon May 27 13:23:01 2019 -0400 +++ b/dada2_plotQualityProfile.xml Tue May 28 12:15:38 2019 -0400 @@ -8,121 +8,98 @@ <command detect_errors="exit_code"><![CDATA[ ##name files by linking #import re -#if "batch" in str($paired_cond.paired_select) - #set elid = re.sub('[^\w\-\.]', '_', str($paired_cond.fl.element_identifier)) - #if "single" in str($paired_cond.paired_select) - ln -s '$paired_cond.fl' '$elid' && - #else - ln -s '$paired_cond.fl.forward' '$elid'_forward && - ln -s '$paired_cond.fl.reverse' '$elid'_reverse && - #end if +mkdir forward && +#if $batch_cond.paired_cond.paired_select != "single" + mkdir reverse && +#end if + +#if $batch_cond.batch_select == "batch": + #set elid = re.sub('[^\w\-\.]', '_', str($batch_cond.paired_cond.reads.element_identifier)) + #if $batch_cond.paired_cond.paired_select != "paired" + ln -s '$batch_cond.paired_cond.reads' forward/'$elid' && + #else + ln -s '$batch_cond.paired_cond.reads.forward' forward/'$elid' && + ln -s '$batch_cond.paired_cond.reads.reverse' reverse/'$elid' && + #end if + #if $batch_cond.paired_cond.paired_select == "separate" + ln -s '$batch_cond.paired_cond.sdaer' reverse/'$elid' && + #end if #else - #for $read in $paired_cond.fl: - #set elid = re.sub('[^\w\-\.]', '_', str($read.element_identifier)) - #if "single" in str($paired_cond.paired_select) - ln -s '$read' '$elid' && - #else - ln -s '$read.forward' '$elid'_forward && - ln -s '$read.reverse' '$elid'_reverse && + #for $read in $batch_cond.paired_cond.reads: + #set elid = re.sub('[^\w\-\.]', '_', str($read.element_identifier)) + #if $batch_cond.paired_cond.paired_select != "paired" + ln -s '$read' forward/'$elid' && + #else + ln -s '$read.forward' forward/'$elid' && + ln -s '$read.reverse' reverse/'$elid' && + #end if + #end for + #if $batch_cond.paired_cond.paired_select == "separate" + #for $read in $batch_cond.paired_cond.sdaer: + #set elid = re.sub('[^\w\-\.]', '_', str($read.element_identifier)) + ln -s '$read' reverse/'$elid' && + #end for #end if - #end for #end if - - Rscript --slave '$dada2_script' + + Rscript --slave '$dada2_script' ]]></command> <configfiles> <configfile name="dada2_script"><![CDATA[ #import re -fwd_files = c() -rev_files = c() -#if "batch" in str($paired_cond.paired_select) - #set elid = re.sub('[^\w\-\.]', '_', str($paired_cond.fl.element_identifier)) - #if "single" in str($paired_cond.paired_select) - fwd_files = c(fwd_files, '$elid') - #else - fwd_files = c(fwd_files, paste('$elid', 'forward', sep = "_")) - rev_files = c(rev_files, paste('$elid', 'reverse', sep = "_")) - #end if -#else - #for $read in $paired_cond.fl: - #set elid = re.sub('[^\w\-\.]', '_', str($read.element_identifier)) - #if "single" in str($paired_cond.paired_select) - fwd_files = c(fwd_files, '$elid') - #else - fwd_files = c(fwd_files, paste('$elid', 'forward', sep = "_")) - rev_files = c(rev_files, paste('$elid', 'reverse', sep = "_")) - #end if - #end for -#end if +library(ggplot2, quietly=T) +library(dada2, quietly=T) -#if not "batch" in str($paired_cond.paired_select) -agg = $paired_cond.aggregate +#if $batch_cond.batch_select != "batch" +agg = $batch_cond.aggregate #else agg = FALSE #end if -library(ggplot2, quietly=T) -library(dada2, quietly=T) - -qp <- plotQualityProfile(fwd_files, -#if str($n) != "" - n=$n, -#end if - aggregate = agg) +fwd_files = list.files("forward", full.names=T) +qp <- plotQualityProfile(fwd_files, n=$n, aggregate = agg) ggsave('output.pdf', qp, width = 20,height = 15,units = c("cm")) -#if "paired" in str($paired_cond.paired_select) -qp <- plotQualityProfile(rev_files, -#if str($n) != "" - n=$n, -#end if - aggregate = agg) +#if $batch_cond.paired_cond.paired_select != "single" +rev_files = list.files("reverse", full.names=T) +qp <- plotQualityProfile(rev_files, n=$n, aggregate = agg) ggsave('output_rev.pdf', qp, width = 20,height = 15,units = c("cm")) #end if ]]></configfile> </configfiles> <inputs> - <conditional name="paired_cond"> - <param name="paired_select" type="select" label="Input data organisation and processing mode" help="Select if data is organized in a paired collection or not (note that the pairing of the data sets is not used by the tool); batch will create a separate pdf for each input data set or data set pair; non-batch will create one pdf containing a plot for each data set"> - <option value="paired">paired - non batch</option> - <option value="single">single - non batch</option> - <option value="paired_batch">paired - batch</option> - <option value="single_batch">single - batch</option> + <conditional name="batch_cond"> + <param name="batch_select" type="select" label="Processing mode" help="Joint processing processes all reads at once in a single job creating a single output (two in the case of paired data). Batch processes the samples in separate jobs and creates separate output for each"> + <option value="joint">Joint</option> + <option value="batch">Batch</option> </param> - <when value="paired"> - <param argument="fl" type="data_collection" collection_type="list:paired" format="fastq,fastq.gz" label="Short read data"/> + <when value="joint"> + <expand macro="fastq_input" multiple="True" collection_type="list:paired" argument_fwd="fl" argument_rev="fl"/> <param argument="aggregate" type="boolean" label="Aggregate data" checked="True" truevalue="TRUE" falsevalue="FALSE" help="Create a single plot for all data sets (default) or a separate plot for each data set"/> </when> - <when value="single"> - <param argument="fl" type="data" multiple="true" format="fastq,fastq.gz" label="Short read data"/> - <param argument="aggregate" type="boolean" label="Aggregate data" checked="True" truevalue="TRUE" falsevalue="FALSE" help="Create a single plot for all data sets (default) or a separate plot for each data set"/> - </when> - <when value="paired_batch"> - <param argument="fl" type="data_collection" collection_type="paired" format="fastq,fastq.gz" label="Short read data"/> - </when> - <when value="single_batch"> - <param argument="fl" type="data" format="fastq,fastq.gz" label="Short read data"/> + <when value="batch"> + <expand macro="fastq_input" multiple="False" collection_type="paired" argument_fwd="fl" argument_rev="fl"/> </when> </conditional> <param argument="n" type="integer" value="500000" label="sample number" help="number of records to sample from the fastq file"/> </inputs> <outputs> <data name="output" format="pdf" from_work_dir="output.pdf"> - <filter>"single" in paired_cond['paired_select']</filter> - </data> - <data name="output_fwd" format="pdf" from_work_dir="output.pdf" label="${tool.name} on ${on_string}: forward reads"> - <filter>"paired" in paired_cond['paired_select']</filter> + <filter>batch_cond['paired_cond']['paired_select'] == "single"</filter> </data> - <data name="output_rev" format="pdf" from_work_dir="output_rev.pdf" label="${tool.name} on ${on_string}: reverse reads"> - <filter>"paired" in paired_cond['paired_select']</filter> + <data name="output_fwd" format="pdf" from_work_dir="output.pdf" label="${tool.name} on ${on_string}: forward reads"> + <filter>batch_cond['paired_cond']['paired_select'] != "single"</filter> + </data> + <data name="output_rev" format="pdf" from_work_dir="output_rev.pdf" label="${tool.name} on ${on_string}: reverse reads"> + <filter>batch_cond['paired_cond']['paired_select'] != "single"</filter> </data> </outputs> <tests> - <!-- paired non-batch, aggregate --> - <test> - <param name="paired_cond|paired_select" value="paired"/> - <param name="paired_cond|aggregate" value="TRUE"/> - <param name="paired_cond|fl"> + <!-- paired joint, no-aggregate --> + <test expect_num_outputs="2"> + <param name="batch_cond|batch_select" value="joint"/> + <param name="batch_cond|paired_cond|paired_select" value="paired"/> + <param name="batch_cond|paired_cond|reads"> <collection type="list:paired"> <element name="F3D0_S188_L001"> <collection type="paired"> @@ -132,13 +109,35 @@ </element> </collection> </param> + <param name="batch_cond|aggregate" value="FALSE"/> <output name="output_fwd" value="qualityProfileMultiple.pdf" ftype="pdf"/> <output name="output_rev" value="qualityProfileMultiple_rev.pdf" ftype="pdf"/> </test> - <!-- paired, batch, no aggregate--> - <test> - <param name="paired_cond|paired_select" value="paired_batch"/> - <param name="paired_cond|fl"> + <!-- paired-separate joint, no-aggregate (sim_size because element ids differ) --> + <test expect_num_outputs="2"> + <param name="batch_cond|batch_select" value="joint"/> + <param name="batch_cond|paired_cond|paired_select" value="separate"/> + <param name="batch_cond|paired_cond|reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> + <param name="batch_cond|paired_cond|sdaer" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/> + <param name="batch_cond|aggregate" value="FALSE"/> + <output name="output_fwd" value="qualityProfileMultiple.pdf" ftype="pdf" compare="sim_size"/> + <output name="output_rev" value="qualityProfileMultiple_rev.pdf" ftype="pdf" compare="sim_size"/> + </test> + <!-- single, non-batch, aggregate, small sample --> + <test expect_num_outputs="1"> + <param name="batch_cond|batch_select" value="joint"/> + <param name="batch_cond|paired_cond|paired_select" value="single"/> + <param name="batch_cond|paired_cond|reads" value="F3D0_S188_L001_R1_001.fastq.gz,F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/> + <param name="n" value="10000"/> + <param name="batch_cond|aggregate" value="TRUE"/> + <output name="output" value="qualityProfileSmallSample.pdf" ftype="pdf"/> + </test> + + <!-- paired, batch --> + <test expect_num_outputs="2"> + <param name="batch_cond|batch_select" value="batch"/> + <param name="batch_cond|paired_cond|paired_select" value="paired"/> + <param name="batch_cond|paired_cond|reads"> <collection type="paired"> <element name="forward" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> <element name="reverse" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/> @@ -147,22 +146,24 @@ <output name="output_fwd" value="qualityProfile.pdf" ftype="pdf"/> <output name="output_rev" value="qualityProfile_rev.pdf" ftype="pdf"/> </test> - <!-- single, non-batch, aggregate --> - <test> - <param name="paired_cond|paired_select" value="single"/> - <param name="paired_cond|aggregate" value="TRUE"/> - <param name="paired_cond|fl" value="F3D0_S188_L001_R1_001.fastq.gz,F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/> - <param name="n" value="10000"/> - <output name="output" value="qualityProfileSmallSample.pdf" ftype="pdf"/> + <!-- paired-separate batch (sim_size because element ids differ)--> + <test expect_num_outputs="2"> + <param name="batch_cond|batch_select" value="batch"/> + <param name="batch_cond|paired_cond|paired_select" value="separate"/> + <param name="batch_cond|paired_cond|reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> + <param name="batch_cond|paired_cond|sdaer" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/> + <output name="output_fwd" value="qualityProfile.pdf" ftype="pdf" compare="sim_size"/> + <output name="output_rev" value="qualityProfile_rev.pdf" ftype="pdf" compare="sim_size"/> </test> - <!-- single, batch, no aggregate --> - <test> - <param name="aggregate" value="FALSE"/> - <param name="paired_cond|paired_select" value="single_batch"/> - <param name="paired_cond|fl" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> + <!-- single, batch --> + <test expect_num_outputs="1"> + <param name="batch_cond|batch_select" value="batch"/> + <param name="batch_cond|paired_cond|paired_select" value="single"/> + <param name="batch_cond|paired_cond|reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/> <param name="n" value="10000"/> <output name="output" value="qualityProfileSmallSample.pdf" ftype="pdf" compare="sim_size"/> - </test> </tests> + </test> + </tests> <help><![CDATA[ Summary ....... @@ -174,8 +175,6 @@ The distribution of quality scores at each position is shown as a grey-scale heat map, with dark colors corresponding to higher frequency. The plotted lines show positional summary statistics: green is the mean, orange is the median, and the dashed orange lines are the 25th and 75th quantiles. If the sequences vary in length, a red line will be plotted showing the percentage of reads that extend to at least that position. - -Note this tool ignores the pairing of the reads, but the data is just processed as list. ]]></help> <expand macro="citations"/> </tool>