Mercurial > repos > dfornika > snippy
changeset 29:62329bafeaef draft
"planemo upload commit f1b3e36f79747fca391321389276ac196d3f7cd0-dirty"
| author | dfornika | 
|---|---|
| date | Sat, 25 Jan 2020 00:00:54 +0000 | 
| parents | 04f229b754cd | 
| children | 20b52007c4dc | 
| files | macros.xml snippy-core.xml snippy.xml test-data/a_fna_ref_mincov_2_minqual_60.snps.txt test-data/all_fasta.loc test-data/b_2_fna_ref_mincov_2_minqual_60.snps.gff test-data/b_fna_ref_mincov_2_minqual_60.snps.gff test-data/b_fna_ref_mincov_2_minqual_60.snps.txt tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test | 
| diffstat | 11 files changed, 185 insertions(+), 59 deletions(-) [+] | 
line wrap: on
 line diff
--- a/macros.xml Fri Jun 21 14:38:05 2019 -0400 +++ b/macros.xml Sat Jan 25 00:00:54 2020 +0000 @@ -2,6 +2,7 @@ <xml name="requirements"> <requirements> <requirement type="package" version="@VERSION@">snippy</requirement> + <requirement type="package" version="1.32">tar</requirement> <yield /> </requirements> </xml> @@ -10,7 +11,53 @@ <version_command><![CDATA[snippy --version]]></version_command> </xml> - <token name="@VERSION@">4.3.6</token> + <token name="@REFERENCE_SOURCE_FILE@"> + <![CDATA[ + #if $reference_source.reference_source_selector == 'history' + #if $reference_source.ref_file.is_of_type("fasta") + ln -sf '$reference_source.ref_file' 'ref.fna' && + #elif $reference_source.ref_file.is_of_type("genbank") + ln -sf '$reference_source.ref_file' 'ref.gbk' && + #end if + #elif $reference_source.reference_source_selector == 'cached' + ln -sf '$reference_source.ref_file.fields.path' 'ref.fna' && + #end if]]> + </token> + + <token name="@REFERENCE_COMMAND@"> + <![CDATA[ + #if $reference_source.reference_source_selector == 'history' + #if $reference_source.ref_file.is_of_type("fasta") + --ref 'ref.fna' + #elif $reference_source.ref_file.is_of_type("genbank") + --ref 'ref.gbk' + #end if + #elif $reference_source.reference_source_selector == 'cached' + --ref 'ref.fna' + #end if + ]]> + </token> + + <xml name="reference_selector"> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below. If you would like to perform self-mapping select `history` here, then choose your input file as reference."> + <option value="cached">Use a built-in genome index</option> + <option value="history">Use a genome from history and build index</option> + </param> + <when value="cached"> + <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> + <options from_data_table="all_fasta"> + <validator type="no_options" message="No reference genomes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ref_file" type="data" format="fasta,genbank" label="Use the following dataset as the reference sequence" help="You can upload a FASTA or FASTQ sequence to the history and use it as reference" /> + </when> + </conditional> + </xml> + + <token name="@VERSION@">4.4.5</token> <xml name="citations"> <citations>
--- a/snippy-core.xml Fri Jun 21 14:38:05 2019 -0400 +++ b/snippy-core.xml Sat Jan 25 00:00:54 2020 +0000 @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="utf-8"?> -<tool id="snippy_core" name="snippy-core" version="@VERSION@+galaxy3"> +<tool id="snippy_core" name="snippy-core" version="@VERSION@+galaxy1"> <description> Combine multiple Snippy outputs into a core SNP alignment </description> @@ -8,17 +8,20 @@ </macros> <expand macro="requirements" /> <command detect_errors="exit_code"><![CDATA[ + @REFERENCE_SOURCE_FILE@ #for $indir in $indirs - tar -xf '$indir' && + #set $sample_name = os.path.splitext(os.path.basename(str($indir.name)))[0] + mkdir '$sample_name' && tar -xf '$indir' -C '$sample_name' --strip-components=1 && #end for + #set snippy_dirs = " ".join(["'{0}'".format(os.path.splitext(os.path.basename(str($indir.name)))[0]) for $indir in $indirs]) snippy-core - --ref '$ref' - `ls -1 -I "*.dat" -I "*.log" | tr '\n' ' '` + @REFERENCE_COMMAND@ + ${snippy_dirs} ]]></command> <inputs> - <param name="indirs" type="data" multiple="true" format="tar" label="Snippy input zipped dirs" help="Select all the snippy inputs for alignment" /> - <param name="ref" type="data" format="fasta,genbank" label="Reference File (either in fasta or genbank format)" help="Fasta or Genbank file to use as the reference" /> + <param name="indirs" type="data" multiple="true" format="zip" label="Snippy input zipped dirs" help="Select all the snippy inputs for alignment" /> + <expand macro="reference_selector" /> <param name="outputs" type="select" multiple="true" display="checkboxes" label="Output selection"> <option value="outaln" selected="True">A core SNP alignment in the fasta format</option> <option value="outfull" selected="False">A whole genome SNP alignment (includes invariant sites)</option> @@ -38,7 +41,7 @@ <data format="tabular" name="alignment_table" label="${tool.name} on ${on_string} core alignment table" from_work_dir="core.tab"> <filter>outputs and 'outtab' in outputs</filter> </data> - <data format="tabular" name="alignment_summary" label="${tool.name} on ${on_string} core alignment summary" from_work_dir="core.txt"> + <data format="txt" name="alignment_summary" label="${tool.name} on ${on_string} core alignment summary" from_work_dir="core.txt"> <filter>outputs and 'outtxt' in outputs</filter> </data> </outputs> @@ -46,9 +49,21 @@ <tests> <test><!-- Test #1 - test with 3 zipped directories --> <param name="indirs" value="a.tgz,b.tgz,c.tgz" /> - <param name="ref" value="reference.fasta" /> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="reference.fasta" ftype="fasta"/> + </conditional> <param name="outputs" value="outtxt" /> - <output name="alignment_summary" ftype="tabular" file="a_b_c.core.txt" /> + <output name="alignment_summary" ftype="txt" file="a_b_c.core.txt" /> + </test> + <test><!-- Test #2 - test with 3 zipped directories --> + <param name="indirs" value="a.tgz,b.tgz,c.tgz" /> + <conditional name="reference_source"> + <param name="reference_source_selector" value="cached"/> + <param name="ref_file" value="test_id"/> + </conditional> + <param name="outputs" value="outtxt" /> + <output name="alignment_summary" ftype="txt" file="a_b_c.core.txt" /> </test> </tests>
--- a/snippy.xml Fri Jun 21 14:38:05 2019 -0400 +++ b/snippy.xml Sat Jan 25 00:00:54 2020 +0000 @@ -1,15 +1,17 @@ -<tool id="snippy" name="snippy" version="@VERSION@+galaxy3"> +<tool id="snippy" name="snippy" version="@VERSION@+galaxy1"> <description> Snippy finds SNPs between a haploid reference genome and your NGS sequence reads. - </description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements" /> - <expand macro="version_command" /> + </description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_command" /> <command detect_errors="exit_code"><![CDATA[ + @REFERENCE_SOURCE_FILE@ + #import re #if str( $fastq_input.fastq_input_selector ) == "paired" #set $dir_name = re.sub('[^\w_]', '_', $fastq_input.fastq_input1.element_identifier) @@ -21,22 +23,11 @@ #set $dir_name = re.sub('[^\w_]', '_', $fastq_input.fastq_input_interleaved.element_identifier) #end if - #if $ref.is_of_type("fasta") - cp '$ref' 'ref.fna' && - #end if - #if $ref.is_of_type("genbank") - cp '$ref' 'ref.gbk' && - #end if snippy - --outdir '$dir_name' + --outdir '${dir_name}' --cpus \${GALAXY_SLOTS:-1} --ram \$((\${GALAXY_MEMORY_MB:-4096}/1024)) - #if $ref.is_of_type("fasta") - --ref 'ref.fna' - #end if - #if $ref.is_of_type("genbank") - --ref 'ref.gbk' - #end if + @REFERENCE_COMMAND@ --mapqual $adv.mapqual --mincov $adv.mincov --minfrac $adv.minfrac @@ -62,20 +53,16 @@ && - cp -r '$dir_name' 'out' && - - tar -czf 'out.tgz' '${dir_name}' #if "outcon" in str($outputs) and $adv.rename_cons - && sed -i 's/>.*/>${dir_name}/' out/snps.consensus.fa + && sed -i 's/>.*/>${dir_name}/' ${dir_name}/snps.consensus.fa #end if - - - ]]></command> + + && mv ${dir_name} out + && tar -czf out.tgz out + ]]> </command> <inputs> - - <param name="ref" type="data" format="fasta,genbank" label="Reference File (either in fasta or genbank format)" help="Fasta or Genbank file to use as the reference" /> - + <expand macro="reference_selector" /> <conditional name="fastq_input"> <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> <option value="paired">Paired</option> @@ -116,7 +103,6 @@ <option value="outlog" selected="False">A log file with the commands run and their outputs</option> <option value="outaln" selected="False">A version of the reference but with - at position with depth=0 and N for 0 to depth to --mincov (does not have variants)</option> <option value="outcon" selected="False">A version of the reference genome with all variants instantiated</option> - <option value="outdep" selected="False">Output of samtools depth for the .bam file</option> <option value="outbam" selected="False">The alignments in BAM format. Note that multi-mapping and unmapped reads are not present.</option> <option value="outzip" selected="True">Zipped files needed for input into snippy-core</option> </param> @@ -146,13 +132,10 @@ <data format="fasta" name="snpconsensus" label="${tool.name} on ${on_string} consensus fasta" from_work_dir="out/snps.consensus.fa"> <filter>outputs and 'outcon' in outputs</filter> </data> - <data format="tabular" name="snpsdepth" label="${tool.name} on ${on_string} mapping depth" from_work_dir="out/snps.depth"> - <filter>outputs and 'outdep' in outputs</filter> - </data> <data format="bam" name="snpsbam" label="${tool.name} on ${on_string} mapped reads (bam)" from_work_dir="out/snps.bam"> <filter>outputs and 'outbam' in outputs</filter> </data> - <data format="tar" name="outdir" label="${tool.name} on ${on_string} dir for snippy core" from_work_dir="out.tgz"> + <data format="zip" name="outdir" label="${tool.name} on ${on_string} dir for snippy core" from_work_dir="out.tgz"> <filter>outputs and 'outzip' in outputs</filter> </data> @@ -160,8 +143,12 @@ <tests> - <test> <!-- test 0 - fasta ref no snps --> - <param name="ref" value="reference.fasta" ftype="fasta" /> + <test> <!-- test 0 - fasta ref no snps --> + <!-- <param name="ref" value="reference.fasta" ftype="fasta" /> --> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="reference.fasta" ftype="fasta"/> + </conditional> <param name="fastq_input_selector" value="paired" /> <param name="fastq_input1" ftype="fastqsanger" value="a_1.fastq" /> <param name="fastq_input2" ftype="fastqsanger" value="a_2.fastq" /> @@ -172,8 +159,11 @@ <output name="snpgff" ftype="gff3" file="a_fna_ref_mincov_2_minqual_60.snps.gff" /> </test> - <test> <!-- test 1 - fasta ref one snp --> - <param name="ref" value="reference.fasta" ftype="fasta" /> + <test> <!-- test 1 - fasta ref one snp --> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="reference.fasta" ftype="fasta"/> + </conditional> <param name="fastq_input_selector" value="paired" /> <param name="fastq_input1" ftype="fastqsanger" value="b_1.fastq" /> <param name="fastq_input2" ftype="fastqsanger" value="b_2.fastq" /> @@ -184,8 +174,11 @@ <output name="snpgff" ftype="gff3" file="b_fna_ref_mincov_2_minqual_60.snps.gff" /> </test> - <test> <!-- test 2 - fasta ref one snp paired_collection --> - <param name="ref" value="reference.fasta" ftype="fasta" /> + <test> <!-- test 2 - fasta ref one snp paired_collection --> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="reference.fasta" ftype="fasta"/> + </conditional> <param name="fastq_input_selector" value="paired_collection" /> <param name="fastq_input"> <collection type="paired"> @@ -200,8 +193,25 @@ <output name="snpgff" ftype="gff3" file="b_fna_ref_mincov_2_minqual_60.snps.gff" /> </test> - <test> <!-- test 3 - fasta ref one snp single --> - <param name="ref" value="reference.fasta" ftype="fasta" /> + <test> <!-- test 3 - fasta ref one snp single --> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="reference.fasta" ftype="fasta"/> + </conditional> + <param name="fastq_input_selector" value="single" /> + <param name="fastq_input_single" value="b_2.fastq" ftype="fastqsanger" /> + <param name="mincov" value="2" /> + <param name="minqual" value="60" /> + <param name="outputs" value="outgff,outsum" /> + <output name="snpsum" ftype="tabular" file="b_fna_ref_mincov_2_minqual_60.snps.txt" lines_diff="6" /> + <output name="snpgff" ftype="gff3" file="b_2_fna_ref_mincov_2_minqual_60.snps.gff" /> + </test> + + <test> <!-- test 4 - reference source as cached --> + <conditional name="reference_source"> + <param name="reference_source_selector" value="cached"/> + <param name="ref_file" value="test_id"/> + </conditional> <param name="fastq_input_selector" value="single" /> <param name="fastq_input_single" value="b_2.fastq" ftype="fastqsanger" /> <param name="mincov" value="2" /> @@ -248,7 +258,7 @@ For a much more in depth description of snippy and how it works, see https://github.com/tseemann/snippy - ]]></help> - <expand macro="citations"/> + ]]> </help> + <expand macro="citations"/> </tool>
--- a/test-data/a_fna_ref_mincov_2_minqual_60.snps.txt Fri Jun 21 14:38:05 2019 -0400 +++ b/test-data/a_fna_ref_mincov_2_minqual_60.snps.txt Sat Jan 25 00:00:54 2020 +0000 @@ -2,5 +2,5 @@ ReadFiles a_1.fastq a_2.fastq Reference reference.fasta ReferenceSize 700 -Software snippy 4.3.6 +Software snippy 4.4.5 VariantTotal 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_fasta.loc Sat Jan 25 00:00:54 2020 +0000 @@ -0,0 +1,20 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# +test_id test_dbkey test display name ${__HERE__}/ref.fna +
--- a/test-data/b_2_fna_ref_mincov_2_minqual_60.snps.gff Fri Jun 21 14:38:05 2019 -0400 +++ b/test-data/b_2_fna_ref_mincov_2_minqual_60.snps.gff Sat Jan 25 00:00:54 2020 +0000 @@ -1,2 +1,2 @@ ##gff-version 3 -reference snippy:4.3.6 variation 4 4 . . 0 note=snp A=>T T:5 A:0 +reference snippy:4.4.5 variation 4 4 . . 0 note=snp A=>T T:5 A:0
--- a/test-data/b_fna_ref_mincov_2_minqual_60.snps.gff Fri Jun 21 14:38:05 2019 -0400 +++ b/test-data/b_fna_ref_mincov_2_minqual_60.snps.gff Sat Jan 25 00:00:54 2020 +0000 @@ -1,2 +1,2 @@ ##gff-version 3 -reference snippy:4.3.6 variation 4 4 . . 0 note=snp A=>T T:10 A:0 +reference snippy:4.4.5 variation 4 4 . . 0 note=snp A=>T T:10 A:0
--- a/test-data/b_fna_ref_mincov_2_minqual_60.snps.txt Fri Jun 21 14:38:05 2019 -0400 +++ b/test-data/b_fna_ref_mincov_2_minqual_60.snps.txt Sat Jan 25 00:00:54 2020 +0000 @@ -2,6 +2,6 @@ ReadFiles b_1.fastq b_2.fastq Reference reference.fasta ReferenceSize 700 -Software snippy 4.3.6 +Software snippy 4.4.5 Variant-SNP 1 VariantTotal 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Sat Jan 25 00:00:54 2020 +0000 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Sat Jan 25 00:00:54 2020 +0000 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Sat Jan 25 00:00:54 2020 +0000 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/all_fasta.loc" /> + </table> +</tables>
