view MirParser.xml @ 3:bf31563d22a1 draft

planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author drosofff
date Mon, 17 Aug 2015 14:00:06 -0400
parents c68bfbff72d5
children 933475df0fd2
line wrap: on
line source

<tool id="MirParser" name="Parse miRNAs" version="0.9.3">
    <description>from aligment files</description>
    <requirements>
        <requirement type="package" version="0.12.7">bowtie</requirement>
        <requirement type="package" version="0.7.7">pysam</requirement>
        <requirement type="package" version="1.9">numpy</requirement>
        <requirement type="package" version="2.14">biocbasics</requirement>
        <requirement type="package" version="1.2">samtools</requirement>
    </requirements>
    <command interpreter="python">
        MirParser.py 
	          #if $refGenomeSource.genomeSource == "history":
                    $refGenomeSource.ownFile ## index source sys.arg[1]
         	    --do_not_extract_index  ## sys.argv[2]
          	  #else:
		    #silent reference= filter( lambda x: str( x[0] ) == str( $input_list.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
		    $reference   ## sys.argv[1]
		    --extract_index  ## sys.argv[2]
          	  #end if
		  $output1 ## for pre-mirs  ## sys.argv[3]
		  $output2 ## for mature mirs  ## sys.argv[4]
		  $GFF3    ## sys.argv[5]
		  #if $plotting.plottingOption == "yes":
		    $lattice_dataframe   ## sys.argv[6]
		    $plotCode   ## sys.argv[7]
		    $latticePDF ## sys.argv[8]
		  #else:
		    "dummy_dataframe_path"   ## sys.argv[6]
                    "dummy_plotCode"   ## sys.argv[7]
		    "dummy_latticePDF" ## sys.argv[8]
		  #end if
		  #for $i in $refGenomeSource.input_list
    		    $i $i.ext "$i.name" ## sys.argv[9,10,11] modulo 3
		  #end for
                  #silent plottingoption = $plotting.plottingOption
</command>
    <inputs>
        <conditional name="refGenomeSource">
            <param help="Built-ins were indexed using default options" label="Will you select a reference genome from your history or use a built-in index?" name="genomeSource" type="select">
                <option value="indexed">Use a built-in index</option>
                <option value="history">Use one from the history</option>
            </param>
            <when value="indexed">
                <param label="Select multiple alignments to parse" multiple="true" name="input_list" type="data" format="tabular,sam,bam">
                    <validator message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history." metadata_column="0" metadata_name="dbkey" table_name="bowtie_indexes" type="dataset_metadata_in_data_table" />
                </param>
            </when>
            <when value="history">
                <param label="Select multiple alignments to parse" multiple="true" name="input_list" type="data" format="tabular,sam,bam"/>
                <param format="fasta" label="Select the fasta reference" name="ownFile" type="data" />
            </when>
        </conditional>
        <param label="miRbase GFF3 guide" name="GFF3" type="data" format="gff,gff3"/>
        <conditional name="plotting">
            <param label="Additional miRNA coverage graphs" name="plottingOption" type="select">
                <option selected="True" value="no">no</option>
                <option value="yes">yes</option>
            </param>
            <when value="yes">
                <param label="Display Coverage with absolute number of reads or relatively to the total number of read matching the gene or mir" name="display" type="select">
                    <option selected="True" value="relative">Relative Coverage</option>
                    <option value="absolute">Absolute Coverage</option>
                </param>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="tabular" label="Premirs Count  Lists" name="output1" />
        <data format="tabular" label="Mature Mirs Count  Lists" name="output2" />
        <data format="tabular" label="Lattice Dataframe" name="lattice_dataframe">
            <filter>plotting['plottingOption'] == "yes"</filter>
        </data>
        <data format="pdf" label="Mir coverage" name="latticePDF">
            <filter>plotting['plottingOption'] == "yes"</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="genomeSource" value="history"/>
            <param name="ownFile" value="Dmel_r5.49_short.fa" ftype="fasta"/>
            <param name="input_list" value="input.bam" ftype="bam"/>
            <param name="GFF3" value="dme.gff3" ftype="gff3"/>
            <param name="plottingOption" value="yes"/>
            <output name="output1" file="output1.tab" ftype="tabular"/>
            <output name="output2" file="output2.tab" ftype="tabular"/>
            <output name="lattice_dataframe" file="lattice_dataframe.tab" ftype="tabular"/>
            <output name="latticePDF" file="latticePDF.pdf" ftype="pdf"/>
        </test>
    </tests>
    <configfiles>
        <configfile name="plotCode">
	#if  $plotting.plottingOption == "yes":
          graph_type = "${plotting.display}" ## "relative" or "absolute"
          ## Setup R error handling to go to stderr
          options( show.error.messages=F,
                 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
          library(lattice)
          coverage = read.delim("${lattice_dataframe}", header=T)
          Numb_of_biosamples = length(levels(coverage\$sample))
          if (graph_type=="relative") {
          graph = xyplot(countsNorm~offsetNorm | mir, data=coverage, groups=polarity, col=c("red", "blue"), type="l", lwd=1,
                        scales=list(x=list(cex=.5), y=list(cex=.5)), par.strip.text=list(cex=.5), strip=strip.custom(which.given=1, bg="lightblue"), layout=c(Numb_of_biosamples,15), as.table=TRUE, main="miRNA coverage maps")
          } else {
          graph = xyplot(counts~offset | mir, data=coverage, groups=polarity, col=c("red", "blue"), type="l", lwd=1,
                        scales=list(x=list(cex=.5), y=list(cex=.5)), par.strip.text=list(cex=.5), strip=strip.custom(which.given=1, bg="lightblue"), layout=c(Numb_of_biosamples,15), as.table=TRUE, main="miRNA coverage maps")
          }
          ## pdf output
          pdf(file="${latticePDF}", paper="special", height=11.69, width=8.2677)
          plot(graph, newpage = T)
          dev.off()
        #end if
     </configfile>
    </configfiles>
    <help>

**What it does**

This tool uses a species-specific GFF3 file from mirBase_ to guide the parsing of an alignment file produced with the sRbowtie tool.

.. _mirBase: ftp://mirbase.org/pub/mirbase/CURRENT/genomes/

------

.. class:: warningmark

the Guide GFF3 file must be in the following format:

2L	.	miRNA_primary_transcript	243035	243141	.	-	.	ID=MI0005821;Alias=MI0005821;Name=dme-mir-965

2L	.	miRNA	243055	243076	.	-	.	ID=MIMAT0005480;Alias=MIMAT0005480;Name=dme-miR-965-3p;Derives_from=MI0005821

2L	.	miRNA	243096	243118	.	-	.	ID=MIMAT0020861;Alias=MIMAT0020861;Name=dme-miR-965-5p;Derives_from=MI0005821

2L	.	miRNA_primary_transcript	857542	857632	.	+	.	ID=MI0005813;Alias=MI0005813;Name=dme-mir-375

2L	.	miRNA	857596	857617	.	+	.	ID=MIMAT0005472;Alias=MIMAT0005472;Name=dme-miR-375-3p;Derives_from=MI0005813

2L	.	miRNA	857556	857579	.	+	.	ID=MIMAT0020853;Alias=MIMAT0020853;Name=dme-miR-375-5p;Derives_from=MI0005813

2L	.	miRNA_primary_transcript	1831685	1831799	.	-	.	ID=MI0011290;Alias=MI0011290;Name=dme-mir-2280

With name for mature miRNA (3rd column = miRNA) containing either the -3p or -5p string in the attribute Name (Name=dme-miR-965-3p, for instance)

------

**Input formats**

1. One or sereral alignment files generated with sRbowtie tool and **renamed** according to the name of the biosample (avoid spaces in biosample labels)

.. class:: warningmark

Alignment datasets generated with sRbowtie must be renamed according to a biosample name

2. A GFF3 file retrieved from mirBase_

------

**Outputs**

Two count list files for counts of reads aligned to pre-mir or mature miRNA

A pdf of pre-mir coverages. Red coverages indicate that the mir gene is in the genomic up strand, blue coverages indicate that the mir gene is in the genomic down strand.

  </help>
</tool>