Mercurial > repos > greg > extract_genomic_dna
view extract_genomic_dna.xml @ 6:3d40351fc9ac draft
Uploaded
author | greg |
---|---|
date | Thu, 14 Jan 2016 10:51:45 -0500 |
parents | cc1879e0b0ae |
children | 338e991cdd1f |
line wrap: on
line source
<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.0"> <description>using coordinates from assembled/unassembled genomes</description> <requirements> <requirement type="package" version="35x1">faToTwoBit</requirement> </requirements> <command> <![CDATA[ #set genome = $input.metadata.dbkey #set datatype = $input.datatype mkdir -p output_dir && python $__tool_directory__/extract_genomic_dna.py --input "$input" --genome "$genome" #if isinstance($datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__): --input_format "gff" --columns "1,4,5,7" --interpret_features $interpret_features #else: --input_format "interval" --columns "${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol},${input.metadata.nameCol}" #end if --reference_genome_source $reference_genome_cond.reference_genome_source #if str($reference_genome_cond.reference_genome_source) == "cached" --reference_genome $reference_genome_cond.reference_genome.fields.path #else: --reference_genome $reference_genome_cond.reference_genome #end if --output_format $output_format --output $output ]]> </command> <inputs> <param name="input" type="data" format="gff,interval" label="Fetch sequences for intervals in" help="Supported formats are gff, interval"> <validator type="unspecified_build" /> </param> <param name="interpret_features" type="select" label="Interpret features when possible" help="Applicable only when input dataset format is gff"> <option value="yes">Yes</option> <option value="no">No</option> </param> <conditional name="reference_genome_cond"> <param name="reference_genome_source" type="select" label="Choose the source for the reference genome"> <option value="cached">locally cached</option> <option value="history">from history</option> </param> <when value="cached"> <param name="reference_genome" type="select" label="Using reference genome"> <options from_data_table="twobit"> <filter type="data_meta" key="dbkey" ref="input" column="0"/> </options> <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> </param> </when> <when value="history"> <param name="reference_genome" type="data" format="fasta" label="Using reference genome"> <options> <filter type="data_meta" key="dbkey" ref="input"/> </options> <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/> </param> </when> </conditional> <param name="output_format" type="select" label="Select output format"> <option value="fasta" selected="True">fasta</option> <option value="interval">interval</option> </param> </inputs> <outputs> <data name="output" format="gff"> <change_format> <when output_format="interval" format="interval" /> </change_format> </data> </outputs> <tests> <test> <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> <param name="interpret_features" value="yes"/> <param name="index_source" value="cached"/> <param name="out_format" value="fasta"/> <output name="out_file1" file="extract_genomic_dna_out1.fasta"> <assert_contents> <!-- First few lines... --> <has_text text=">hg17_chr1_147962192_147962580_- CCDS989.1_cds_0_0_chr1_147962193_r" /> <has_text text="ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTG" /> <has_text text="GACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGT" /> <has_text text="GATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGT" /> <!-- Last few lines... --> <has_text text="GCTGTGGCACAGAACATGGACTCTGTGTTTAAGGAGCTCTTGGGAAAGAC" /> <has_text text="CTCTGTCCGCCAGGGCCTTGGGCCAGCATCTACCACCTCTCCCAGTCCTG" /> <has_text text="GGCCCCGAAGCCCAAAGGCCCCGCCCAGCAGCCGCCTGGGCAGGAACAAA" /> <has_text text="GGCTTCTCCCGGGGCCCTGGGGCCCCAGCCTCACCCTCAGCTTCCCACCC" /> <has_text text="CCAGGGCCTAGACACGACCCCCAAGCCACACTGA" /> </assert_contents> </output> </test> <test> <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" /> <param name="interpret_features" value="yes"/> <param name="index_source" value="cached"/> <param name="out_format" value="fasta"/> <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains"/> </test> <test> <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> <param name="interpret_features" value="yes"/> <param name="index_source" value="cached"/> <param name="out_format" value="interval"/> <output name="out_file1" file="extract_genomic_dna_out3.interval" /> </test> <!-- Test GFF file support. --> <test> <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="no"/> <param name="index_source" value="cached"/> <param name="out_format" value="interval"/> <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains"/> </test> <test> <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="no"/> <param name="out_format" value="fasta"/> <param name="index_source" value="cached"/> <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains"/> </test> <!-- Test custom sequences support and GFF feature interpretation. --> <test> <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="no"/> <param name="index_source" value="history"/> <param name="ref_file" value="tophat_in1.fasta"/> <param name="out_format" value="fasta"/> <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains"/> </test> <test> <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="yes"/> <param name="index_source" value="history"/> <param name="ref_file" value="tophat_in1.fasta"/> <param name="out_format" value="fasta"/> <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains"/> </test> </tests> <help> .. class:: warningmark The following will cause a line from the input dataset to be skipped and a warning generated. - Sequences that fall outside of the range of a line's start and end coordinates. - Chromosome start or end coordinates that are invalid for the specified build. ----- **What it does** This tool uses coordinate, strand, and build information to fetch genomic DNA from gff data, producing fasta data. ----- **Example** If the input dataset is:: chr7 127475281 127475310 NM_000230 0 + chr7 127485994 127486166 NM_000230 0 + chr7 127486011 127486166 D49487 0 + Extracting sequences returns:: >hg17_chr7_127475281_127475310_+ NM_000230 GTAGGAATCGCAGCGCCAGCGGTTGCAAG >hg17_chr7_127485994_127486166_+ NM_000230 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG GATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATC CAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAG GATCAATGACATTTCACACACG >hg17_chr7_127486011_127486166_+ D49487 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC ACACG </help> <citations> <citation type="bibtex"> @unpublished{None, author = {}, title = {None}, year = {None}, eprint = {None}, url = {http://www.bx.psu.edu/~anton/labSite/} }</citation> </citations> </tool>