annotate cufflinks_wrapper.xml @ 1:b9d29fdd1190 draft

planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author devteam
date Tue, 13 Oct 2015 12:37:52 -0400
parents 1fffcfe2fb35
children a6f581469476
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
1 <tool id="cufflinks" name="Cufflinks" version="@VERSION@.0">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
2 <description>transcript assembly and FPKM (RPKM) estimates for RNA-Seq data</description>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
3 <expand macro="requirements" />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
4 <expand macro="stdio" />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
5 <macros>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
6 <import>cuff_macros.xml</import>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
7 </macros>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
8 <version_command>cufflinks 2>&amp;1 | head -n 1</version_command>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
9 <command interpreter="python">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
10 cufflinks_wrapper.py
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
11 --input=$input
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
12 --assembled-isoforms-output=$assembled_isoforms
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
13 --num-threads="\${GALAXY_SLOTS:-4}"
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
14 -I $max_intron_len
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
15 -F $min_isoform_fraction
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
16 -j $pre_mrna_fraction
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
17 $length_correction
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
18
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
19 ## Include reference annotation?
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
20 #if $reference_annotation.use_ref == "Use reference annotation":
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
21 -G $reference_annotation.reference_annotation_file
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
22 $reference_annotation.compatible_hits_norm
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
23 #end if
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
24 #if $reference_annotation.use_ref == "Use reference annotation guide":
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
25 -g $reference_annotation.reference_annotation_guide_file
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
26 --3-overhang-tolerance=$reference_annotation.three_overhang_tolerance
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
27 --intron-overhang-tolerance=$reference_annotation.intron_overhang_tolerance
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
28 $reference_annotation.no_faux_reads
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
29 #end if
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
30
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
31 ## Bias correction?
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
32 #if $bias_correction.do_bias_correction == "Yes":
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
33 -b
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
34 #if $bias_correction.seq_source.index_source == "history":
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
35 --ref_file=$bias_correction.seq_source.ref_file
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
36 #else:
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
37 --index=${bias_correction.seq_source.index.fields.path}
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
38 #end if
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
39 #end if
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
40
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
41 ## Multi-read correct?
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
42 #if str($multiread_correct) == "Yes":
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
43 -u
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
44 #end if
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
45
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
46 ## Include global model if available.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
47 #if $global_model:
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
48 --global_model=$global_model
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
49 #end if
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
50
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
51 ## advanced settings
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
52 #if $advanced_settings.use_advanced_settings == "Yes":
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
53 --library-type=$advanced_settings.library_type
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
54 #if $advanced_settings.mask_file:
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
55 --mask-file=$advanced_settings.mask_file
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
56 #end if
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
57 --inner-mean-dist=$advanced_settings.inner_mean_dist
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
58 --inner-dist-std-dev=$advanced_settings.inner_dist_std_dev
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
59 --max-mle-iterations=$advanced_settings.max_mle_iterations
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
60 --junc-alpha=$advanced_settings.junc_alpha
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
61 --small-anchor-fraction=$advanced_settings.small_anchor_fraction
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
62 --overhang-tolerance=$advanced_settings.overhang_tolerance
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
63 --max-bundle-length=$advanced_settings.max_bundle_length
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
64 --max-bundle-frags=$advanced_settings.max_bundle_frags
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
65 --min-intron-length=$advanced_settings.min_intron_length
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
66 --trim-3-avgcov-thresh=$advanced_settings.trim_three_avgcov_thresh
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
67 --trim-3-dropoff-frac=$advanced_settings.trim_three_dropoff_frac
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
68 #end if
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
69
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
70 </command>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
71 <inputs>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
72 <param format="sam,bam" name="input" type="data" label="SAM or BAM file of aligned RNA-Seq reads" help=""/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
73 <param name="max_intron_len" type="integer" value="300000" min="1" max="600000" label="Max Intron Length" help="ignore alignments with gaps longer than this"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
74 <param name="min_isoform_fraction" type="float" value="0.10" min="0" max="1" label="Min Isoform Fraction" help="suppress transcripts below this abundance level"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
75 <param name="pre_mrna_fraction" type="float" value="0.15" min="0" max="1" label="Pre MRNA Fraction" help="suppress intra-intronic transcripts below this level"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
76 <conditional name="reference_annotation">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
77 <param name="use_ref" type="select" label="Use Reference Annotation">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
78 <option value="No" selected="true">No</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
79 <option value="Use reference annotation">Use reference annotation</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
80 <option value="Use reference annotation guide">Use reference annotation as guide</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
81 </param>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
82 <when value="No"></when>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
83 <when value="Use reference annotation">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
84 <param format="gff3,gtf" name="reference_annotation_file" type="data" label="Reference Annotation" help="Gene annotation dataset in GTF or GFF3 format."/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
85 <param name="compatible_hits_norm" type="select" label="Count hits compatible with reference RNAs only"
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
86 help="With this option, Cufflinks counts only those fragments compatible with some reference transcript towards the number of mapped hits used in the FPKM denominator. This option can only be used in combination with --GTF.">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
87 <option value="" selected="True">No</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
88 <option value="--compatible-hits-norm">Yes</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
89 </param>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
90 </when>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
91 <when value="Use reference annotation guide">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
92 <param format="gff3,gtf" name="reference_annotation_guide_file" type="data" label="Reference Annotation" help="Gene annotation dataset in GTF or GFF3 format."/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
93 <param name="three_overhang_tolerance" type="integer" value="600" label="3prime overhang tolerance"
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
94 help="The number of bp allowed to overhang the 3prime end of a reference transcript when determining if an assembled transcript should be merged with it (ie, the assembled transcript is not novel). The default is 600 bp." />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
95 <param name="intron_overhang_tolerance" type="integer" value="50" label="Intronic overhang tolerance" help="The number of bp allowed to enter the intron of a reference transcript when determining if an assembled transcript should be merged with it (ie, the assembled transcript is not novel). The default is 50 bp." />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
96 <param name="no_faux_reads" type="select" label="Disable tiling of reference transcripts" help="This option disables tiling of the reference transcripts with faux reads. Use this if you only want to use sequencing reads in assembly but do not want to output assembled transcripts that lay within reference transcripts. All reference transcripts in the input annotation will also be included in the output.">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
97 <option value="" selected="True">No</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
98 <option value="--no-faux-reads">Yes</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
99 </param>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
100 </when>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
101 </conditional>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
102 <conditional name="bias_correction">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
103 <param name="do_bias_correction" type="select" label="Perform Bias Correction"
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
104 help="Bias detection and correction can significantly improve accuracy of transcript abundance estimates.">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
105 <option value="No" selected="true">No</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
106 <option value="Yes">Yes</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
107 </param>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
108 <when value="Yes">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
109 <conditional name="seq_source">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
110 <param name="index_source" type="select" label="Reference sequence data">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
111 <option value="cached" selected="true">Locally cached</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
112 <option value="history">History</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
113 </param>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
114 <when value="cached">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
115 <param name="index" type="select" label="Using reference genome">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
116 <options from_data_table="fasta_indexes">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
117 <filter type="data_meta" ref="input" key="dbkey" column="1" />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
118 <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
119 </options>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
120 </param>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
121 </when>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
122 <when value="history">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
123 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
124 </when>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
125 </conditional>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
126 </when>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
127 <when value="No"></when>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
128 </conditional>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
129
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
130 <param name="multiread_correct" type="select" label="Use multi-read correct"
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
131 help="Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome.">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
132 <option value="No" selected="true">No</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
133 <option value="Yes">Yes</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
134 </param>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
135
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
136 <param name="length_correction" type="select" label="Apply length correction" help="Mode of length normalization to transcript FPKM.">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
137 <option value="" selected="true">Cufflinks Effective Length Correction</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
138 <option value="--no-effective-length-correction">Standard Length Correction</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
139 <option value="--no-length-correction">No Length Correction at all (use raw counts)</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
140 </param>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
141
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
142 <param name="global_model" type="hidden_data" label="Global model (for use in Trackster)" optional="True"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
143
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
144 <!-- advanced settings -->
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
145 <conditional name="advanced_settings">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
146 <param name="use_advanced_settings" type="select" label="Set advanced Cufflinks options" help="">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
147 <option value="No" selected="true">No</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
148 <option value="Yes" >Yes</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
149 </param>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
150 <when value="No"></when>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
151 <when value="Yes">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
152
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
153 <param type="select" name="library_type" label="Library prep used for input reads" help="">
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
154 <option value="auto" selected="True">Auto Detect</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
155 <option value="ff-firststrand">ff-firststrand</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
156 <option value="ff-secondstrand">ff-secondstrand</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
157 <option value="ff-unstranded">ff-unstranded</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
158 <option value="fr-firststrand">fr-firststrand</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
159 <option value="fr-secondstrand">fr-secondstrand</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
160 <option value="fr-unstranded" >fr-unstranded</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
161 <option value="transfrags">transfrags</option>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
162 </param>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
163
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
164 <param name="mask_file" type="data" format="gff3,gtf" label="Mask File" help="Ignore all alignment within transcripts in this file " optional="True" />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
165 <param name="inner_mean_dist" type="integer" value="45" label="Inner mean distance" help="This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments selected at 300bp,where each end is 50bp, you should set it as 200. The default is 45bp." />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
166 <param name="inner_dist_std_dev" type="integer" value="20" label="Inner distance standard deviation" help="The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp." />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
167 <param name="max_mle_iterations" type="integer" value="5000" label="Max MLE iterations" help="Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000" />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
168 <param name="junc_alpha" type="float" value="0.001" min="0" max="1" label="Alpha value for the binomial test used during false positive spliced alignment filtration" help="Default: 0.001" />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
169 <param name="small_anchor_fraction" type="float" value="0.09" min="0" max="1" label="percent read overhang taken as suspiciously small" help="Spliced reads with less than this percent of their length on each side of the junction are considered suspicious and are candidates for filtering prior to assembly. Default: 0.09." />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
170 <param name="overhang_tolerance" type="integer" value="8" label="Intronic overhang tolerance" help="The number of bp allowed to enter the intron of a transcript when determining if a read or another transcript is mappable to/compatible with it. The default is 8 bp based on the default bowtie/TopHat parameters." />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
171 <param name="max_bundle_length" type="integer" value="3500000" label="Maximum genomic length of a given bundle" help="Default: 3,500,000bp" />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
172 <param name="max_bundle_frags" type="integer" value="1000000" label="Maximum number of fragments per locus" help="Sets the maximum number of fragments a locus may have before being skipped. Skipped loci are listed in skipped.gtf. Default: 1,000,000" />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
173 <param name="min_intron_length" type="integer" value="50" label="Minimal allowed intron size" help="Default: 50bp" />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
174 <param name="trim_three_avgcov_thresh" type="integer" value="10" label="Minimum average coverage required to attempt 3prime trimming." help="Default: 10" />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
175 <param name="trim_three_dropoff_frac" type="float" value="0.1" min="0" max="1" label="The fraction of average coverage below which to trim the 3prime end of an assembled transcript." help="Default: 0.1"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
176 </when>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
177 </conditional>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
178 </inputs>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
179 <outputs>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
180 <data format="tabular" name="genes_expression" label="${tool.name} on ${on_string}: gene expression" from_work_dir="genes.fpkm_tracking"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
181 <data format="tabular" name="transcripts_expression" label="${tool.name} on ${on_string}: transcript expression" from_work_dir="isoforms.fpkm_tracking"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
182 <data format="gtf" name="assembled_isoforms" label="${tool.name} on ${on_string}: assembled transcripts"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
183 <data format="txt" name="total_map_mass" label="${tool.name} on ${on_string}: total map mass" hidden="true" from_work_dir="global_model.txt"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
184 <data format="gtf" name="skipped" label="${tool.name} on ${on_string}: Skipped Transcripts" from_working_dir="skipped.gtf"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
185 </outputs>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
186
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
187 <trackster_conf>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
188 <action type="set_param" name="global_model" output_name="total_map_mass"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
189 </trackster_conf>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
190 <tests>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
191 <!--
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
192 Simple test that uses test data included with cufflinks.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
193 -->
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
194 <test>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
195 <param name="input" value="cufflinks_in.bam"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
196 <param name="max_intron_len" value="300000"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
197 <param name="min_isoform_fraction" value="0.05"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
198 <param name="pre_mrna_fraction" value="0.05"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
199 <param name="use_ref" value="No"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
200 <param name="do_bias_correction" value="No"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
201 <param name="multiread_correct" value="No"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
202 <param name="length_correction" value=""/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
203 <param name="use_advanced_settings" value="No" />
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
204 <output name="genes_expression" format="tabular" lines_diff="2" file="cufflinks_out3.fpkm_tracking"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
205 <output name="transcripts_expression" format="tabular" lines_diff="2" file="cufflinks_out2.fpkm_tracking"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
206 <output name="assembled_isoforms" file="cufflinks_out1.gtf"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
207 <output name="global_model" file="cufflinks_out4.txt"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
208 <output name="skipped" file="cufflinks_out4.gtf"/>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
209 </test>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
210 </tests>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
211
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
212 <help>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
213 **Cufflinks Overview**
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
214
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
215 Cufflinks_ assembles transcripts, estimates their abundances, and tests for differential expression and regulation in RNA-Seq samples. It accepts aligned RNA-Seq reads and assembles the alignments into a parsimonious set of transcripts. Cufflinks then estimates the relative abundances of these transcripts based on how many reads support each one. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
216
1
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
217 .. _Cufflinks: http://cole-trapnell-lab.github.io/cufflinks/
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
218
0
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
219 ------
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
220
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
221 **Know what you are doing**
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
222
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
223 .. class:: warningmark
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
224
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
225 There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
226
1
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
227 .. __: http://cole-trapnell-lab.github.io/cufflinks/cufflinks/
0
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
228
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
229 ------
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
230
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
231 **Input formats**
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
232
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
233 Cufflinks takes a text file of SAM alignments as input. The RNA-Seq read mapper TopHat produces output in this format, and is recommended for use with Cufflinks. However Cufflinks will accept SAM alignments generated by any read mapper. Here's an example of an alignment Cufflinks will accept::
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
234
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
235 s6.25mer.txt-913508 16 chr1 4482736 255 14M431N11M * 0 0 \
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
236 CAAGATGCTAGGCAAGTCTTGGAAG IIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 XS:A:-
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
237
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
238 Note the use of the custom tag XS. This attribute, which must have a value of "+" or "-", indicates which strand the RNA that produced this read came from. While this tag can be applied to any alignment, including unspliced ones, it must be present for all spliced alignment records (those with a 'N' operation in the CIGAR string).
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
239 The SAM file supplied to Cufflinks must be sorted by reference position. If you aligned your reads with TopHat, your alignments will be properly sorted already. If you used another tool, you may want to make sure they are properly sorted as follows::
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
240
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
241 sort -k 3,3 -k 4,4n hits.sam > hits.sam.sorted
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
242
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
243 NOTE: Cufflinks currently only supports SAM alignments with the CIGAR match ('M') and reference skip ('N') operations. Support for the other operations, such as insertions, deletions, and clipping, will be added in the future.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
244
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
245 ------
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
246
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
247 **Outputs**
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
248
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
249 Cufflinks produces three output files:
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
250
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
251 Transcripts and Genes:
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
252
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
253 This GTF file contains Cufflinks' assembled isoforms. The first 7 columns are standard GTF, and the last column contains attributes, some of which are also standardized (e.g. gene_id, transcript_id). There one GTF record per row, and each record represents either a transcript or an exon within a transcript. The columns are defined as follows::
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
254
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
255 Column number Column name Example Description
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
256 -----------------------------------------------------
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
257 1 seqname chrX Chromosome or contig name
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
258 2 source Cufflinks The name of the program that generated this file (always 'Cufflinks')
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
259 3 feature exon The type of record (always either "transcript" or "exon").
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
260 4 start 77696957 The leftmost coordinate of this record (where 0 is the leftmost possible coordinate)
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
261 5 end 77712009 The rightmost coordinate of this record, inclusive.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
262 6 score 77712009 The most abundant isoform for each gene is assigned a score of 1000. Minor isoforms are scored by the ratio (minor FPKM/major FPKM)
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
263 7 strand + Cufflinks' guess for which strand the isoform came from. Always one of '+', '-' '.'
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
264 7 frame . Cufflinks does not predict where the start and stop codons (if any) are located within each transcript, so this field is not used.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
265 8 attributes See below
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
266
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
267 Each GTF record is decorated with the following attributes::
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
268
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
269 Attribute Example Description
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
270 -----------------------------------------
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
271 gene_id CUFF.1 Cufflinks gene id
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
272 transcript_id CUFF.1.1 Cufflinks transcript id
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
273 FPKM 101.267 Isoform-level relative abundance in Reads Per Kilobase of exon model per Million mapped reads
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
274 frac 0.7647 Reserved. Please ignore, as this attribute may be deprecated in the future
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
275 conf_lo 0.07 Lower bound of the 95% confidence interval of the abundance of this isoform, as a fraction of the isoform abundance. That is, lower bound = FPKM * (1.0 - conf_lo)
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
276 conf_hi 0.1102 Upper bound of the 95% confidence interval of the abundance of this isoform, as a fraction of the isoform abundance. That is, upper bound = FPKM * (1.0 + conf_lo)
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
277 cov 100.765 Estimate for the absolute depth of read coverage across the whole transcript
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
278
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
279
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
280 Transcripts only:
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
281 This file is simply a tab delimited file containing one row per transcript and with columns containing the attributes above. There are a few additional attributes not in the table above, but these are reserved for debugging, and may change or disappear in the future.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
282
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
283 Genes only:
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
284 This file contains gene-level coordinates and expression values.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
285
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
286 -------
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
287
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
288 **Cufflinks settings**
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
289
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
290 All of the options have a default value. You can change any of them. Most of the options in Cufflinks have been implemented here.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
291
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
292 ------
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
293
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
294 **Cufflinks parameter list**
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
295
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
296 This is a list of implemented Cufflinks options::
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
297
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
298 -m INT This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments selected at 300bp, where each end is 50bp, you should set -r to be 200. The default is 45bp.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
299 -s INT The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
300 -I INT The minimum intron length. Cufflinks will not report transcripts with introns longer than this, and will ignore SAM alignments with REF_SKIP CIGAR operations longer than this. The default is 300,000.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
301 -F After calculating isoform abundance for a gene, Cufflinks filters out transcripts that it believes are very low abundance, because isoforms expressed at extremely low levels often cannot reliably be assembled, and may even be artifacts of incompletely spliced precursors of processed transcripts. This parameter is also used to filter out introns that have far fewer spliced alignments supporting them. The default is 0.05, or 5% of the most abundant isoform (the major isoform) of the gene.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
302 -j Some RNA-Seq protocols produce a significant amount of reads that originate from incompletely spliced transcripts, and these reads can confound the assembly of fully spliced mRNAs. Cufflinks uses this parameter to filter out alignments that lie within the intronic intervals implied by the spliced alignments. The minimum depth of coverage in the intronic region covered by the alignment is divided by the number of spliced reads, and if the result is lower than this parameter value, the intronic alignments are ignored. The default is 5%.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
303 -G Tells Cufflinks to use the supplied reference annotation to estimate isoform expression. It will not assemble novel transcripts, and the program will ignore alignments not structurally compatible with any reference transcript.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
304 -N With this option, Cufflinks excludes the contribution of the top 25 percent most highly expressed genes from the number of mapped fragments used in the FPKM denominator. This can improve robustness of differential expression calls for less abundant genes and transcripts.
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
305 </help>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
306 <citations>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
307 <citation type="doi">10.1038/nbt.1621</citation>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
308 </citations>
1fffcfe2fb35 Uploaded
devteam
parents:
diff changeset
309 </tool>