comparison RPKM_saturation.xml @ 1:8dab2cfd456f default tip

first commit
author nilesh
date Thu, 18 Jul 2013 11:24:42 -0500
parents
children
comparison
equal deleted inserted replaced
0:0e4ef5fef2c5 1:8dab2cfd456f
1 <tool id="RPKM_saturation" name="RPKM Saturation">
2 <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description>
3 <requirements>
4 <requirement type="package" version="2.15.1">R</requirement>
5 <requirement type="package" version="2.3.7">rseqc</requirement>
6 </requirements>
7 <command interpreter="python"> RPKM_saturation.py -i $input -o output -r $refgene
8
9 #if str($strand_type.strand_specific) == "pair"
10 -d
11 #if str($strand_type.pair_type) == "sd"
12 '1++,1--,2+-,2-+'
13 #else
14 '1+-,1-+,2++,2--'
15 #end if
16 #end if
17
18 #if str($strand_type.strand_specific) == "single"
19 -d
20 #if str($strand_type.single_type) == "s"
21 '++,--'
22 #else
23 '+-,-+'
24 #end if
25 #end if
26
27 -l $percentileFloor -u $percentileCeiling -s $percentileStep -c $rpkmCutoff
28
29 </command>
30 <inputs>
31 <param name="input" type="data" format="bam" label="input bam/sam file" />
32 <param name="refgene" type="data" format="bed" label="Reference gene model" />
33 <conditional name="strand_type">
34 <param name="strand_specific" type="select" label="Strand-specific?" value="None">
35 <option value="none">None</option>
36 <option value="pair">Pair-End RNA-seq</option>
37 <option value="single">Single-End RNA-seq</option>
38 </param>
39 <when value="pair">
40 <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd">
41 <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option>
42 <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option>
43 </param>
44 </when>
45 <when value="single">
46 <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s">
47 <option value="s">positive --> positive; negative --> negative</option>
48 <option value="d">positive --> negative; negative --> positive</option>
49 </param>
50 </when>
51 <when value="none"></when>
52 </conditional>
53 <param name="percentileFloor" type="integer" value="5" label="Begin sampling from this percentile (default=5)" />
54 <param name="percentileCeiling" type="integer" value="100" label="End sampling at this percentile (default=100)" />
55 <param name="percentileStep" type="integer" value="5" label="Sampling step size (default=5)" />
56 <param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" />
57 </inputs>
58 <outputs>
59 <data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls"/>
60 <data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls"/>
61 <data format="r" name="outputr" from_work_dir="output.saturation.r"/>
62 <data format="pdf" name="outputpdf" from_work_dir="output.saturation.pdf"/>
63 </outputs>
64 <tests>
65 <test>
66 <param name="input" value="Pairend_nonStrandSpecific_36mer_Human_hg19.bam" />
67 <param name="refgene" value="hg19_RefSeq.bed" />
68 <output name="outputxls" file="rpkmsatout.eRPKM.xls" />
69 <output name="outputrawxls" file="rpkmsatout.rawCount.xls" />
70 <output name="outputr" file="rpkmsatout.saturation.r" />
71 <output name="outputpdf" file="rpkmsatout.saturation.pdf" />
72 </test>
73 </tests>
74 <help>
75 .. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
76
77 -----
78
79 About RSeQC
80 +++++++++++
81
82 The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
83
84 The RSeQC package is licensed under the GNU GPL v3 license.
85
86 Inputs
87 ++++++++++++++
88
89 Input BAM/SAM file
90 Alignment file in BAM/SAM format.
91
92 Reference gene model
93 Gene model in BED format.
94
95 Strand sequencing type (default=none)
96 See Infer Experiment tool if uncertain.
97
98 Options
99 ++++++++++++++
100
101 Skip Multiple Hit Reads
102 Use Multiple hit reads or use only uniquely mapped reads.
103
104 Only use exonic reads
105 Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads.
106
107 Output
108 ++++++++++++++
109
110 1. output..eRPKM.xls: RPKM values for each transcript
111 2. output.rawCount.xls: Raw count for each transcript
112 3. output.saturation.r: R script to generate plot
113 4. output.saturation.pdf:
114
115 .. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/saturation.png
116
117 - All transcripts were sorted in ascending order according to expression level (RPKM). Then they are divided into 4 groups:
118 1. Q1 (0-25%): Transcripts with expression level ranked below 25 percentile.
119 2. Q2 (25-50%): Transcripts with expression level ranked between 25 percentile and 50 percentile.
120 3. Q3 (50-75%): Transcripts with expression level ranked between 50 percentile and 75 percentile.
121 4. Q4 (75-100%): Transcripts with expression level ranked above 75 percentile.
122 - BAM/SAM file containing more than 100 million alignments will make module very slow.
123 - Follow example below to visualize a particular transcript (using R console)::
124 - output example
125 .. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/saturation_eg.png
126
127 </help>
128 </tool>